Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include py.typed file & fix and/or add missing/wrong type hints in stub files #50

Merged
merged 8 commits into from
Aug 13, 2024
13 changes: 13 additions & 0 deletions fastwarc/fastwarc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,16 @@
from .stream_io import FileStream, GZipStream, LZ4Stream
from .stream_io import FastWARCError, StreamError
from .warc import ArchiveIterator, WarcRecord, WarcRecordType

# Exposing symbols for legacy compatibility, please prefer explicit imports from submodules

__all__ = [
"FileStream",
"GZipStream",
"LZ4Stream",
"FastWARCError",
"StreamError",
"ArchiveIterator",
"WarcRecord",
"WarcRecordType"
]
phoerious marked this conversation as resolved.
Show resolved Hide resolved
Empty file added fastwarc/fastwarc/py.typed
Empty file.
31 changes: 24 additions & 7 deletions fastwarc/fastwarc/stream_io.pyi
Original file line number Diff line number Diff line change
@@ -1,18 +1,34 @@
from typing import ContextManager, IO
from types import TracebackType
from typing import ContextManager, Optional, Type, Union, BinaryIO, Protocol

class _ReadableStream(Protocol):
def read(self, size: int) -> bytes: ...
def seek(self, offset: int) -> int: ...

class _WritableStream(Protocol):
def write(self, data: bytes) -> int: ...
def flush(self) -> None: ...

class IOStream(ContextManager):

class IOStream(ContextManager[IOStream]):
def read(self, size: int) -> bytes: ...
def write(self, data: bytes) -> int: ...
def close(self) -> None: ...
def flush(self) -> None: ...
def seek(self, offset: int) -> None: ...
def tell(self) -> int: ...
def __enter__(self) -> IOStream: ...
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc: Optional[BaseException],
traceback: Optional[TracebackType]
) -> None: ...


class BufferedReader:
def __init__(
self, stream: IOStream, buf_size: int = 8192, negotiate_stream: bool = True
self, stream: Union[IOStream, BinaryIO, _ReadableStream], buf_size: int = 65536, negotiate_stream: bool = True
) -> None: ...
def close(self) -> None: ...
def consume(self, size: int = -1) -> int: ...
Expand All @@ -22,6 +38,7 @@ class BufferedReader:


class BytesIOStream(IOStream):
def __init__(self, initial_data: Union[bytes, None] = None) -> None: ...
def getvalue(self) -> bytes: ...


Expand All @@ -36,28 +53,28 @@ class CompressingStream(IOStream):

class BrotliStream(CompressingStream):
def __init__(
self, raw_stream: IOStream, quality: int = 11, lgwin: int = 22, lgblock: int = 0
self, raw_stream: Union[IOStream, BinaryIO, _ReadableStream, _WritableStream], quality: int = 11, lgwin: int = 22, lgblock: int = 0
) -> None: ...


class GZipStream(CompressingStream):
def __init__(
self, raw_stream: IOStream, compression_level: int = 9, zlib: bool = False
self, raw_stream: Union[IOStream, BinaryIO, _ReadableStream, _WritableStream], compression_level: int = 9, zlib: bool = False
) -> None: ...
phoerious marked this conversation as resolved.
Show resolved Hide resolved


class LZ4Stream(CompressingStream):
def __init__(
self,
raw_stream: IOStream,
raw_stream: Union[IOStream, BinaryIO, _ReadableStream, _WritableStream],
compression_level: int = 12,
favor_dec_speed: bool = True,
) -> None: ...
def prepopulate(self, initial_data: bytes) -> None: ...


class PythonIOStreamAdapter(IOStream):
def __init__(self, py_stream: IO) -> None: ...
def __init__(self, py_stream: Union[_ReadableStream, _WritableStream]) -> None: ...


class FastWARCError(Exception):
Expand Down
46 changes: 36 additions & 10 deletions fastwarc/fastwarc/warc.pyi
Original file line number Diff line number Diff line change
@@ -1,19 +1,30 @@
from datetime import datetime
from typing import (
Union,
Optional,
Iterator,
Dict,
Tuple,
MutableMapping,
Literal,
Callable,
Iterable,
ValuesView,
KeysView,
Type,
BinaryIO,
Protocol,
)
from enum import IntFlag

from .stream_io import BufferedReader, IOStream

class _ReadableStream(Protocol):
def read(self, size: int) -> bytes: ...
def seek(self, offset: int) -> int: ...

class _WritableStream(Protocol):
def write(self, data: bytes) -> int: ...
def flush(self) -> None: ...


class WarcRecordType(IntFlag):
warcinfo = 2
Expand All @@ -33,20 +44,25 @@ no_type = WarcRecordType.no_type
any_type = WarcRecordType.any_type


class WarcHeaderMap(MutableMapping[str, str]):
class WarcHeaderMap:
reason_phrase: Optional[str]
status_code: Optional[str]
status_line: str

def append(self, key: str, value: str) -> None: ...
def asdict(self) -> Dict[str, str]: ...
def astuples(self) -> Tuple[str, str]: ...
def astuples(self) -> Tuple[Tuple[str, str], ...]: ...
def clear(self) -> None: ...
def get(self, key: str, default: Optional[str] = None) -> Optional[str]: ...
def items(self) -> Iterator[Tuple[str, str]]: ...
def keys(self) -> KeysView[str]: ...
def values(self) -> ValuesView[str]: ...
def write(self, stream: IOStream) -> None: ...
def __getitem__(self, item: str) -> str: ...
def __iter__(self) -> Iterator[Tuple[str, str]]: ...
def __len__(self) -> int: ...
def __setitem__(self, key: str, value: str) -> None: ...
def __contains__(self, item: str) -> bool: ...


class WarcRecord:
Expand All @@ -59,32 +75,42 @@ class WarcRecord:
is_http_parsed: bool
http_headers: Optional[WarcHeaderMap]
http_content_type: Optional[str]
http_content_type: Optional[str]
http_charset: Optional[str]
http_date: Optional[datetime]
http_last_modified: Optional[datetime]
content_length: int
reader: BufferedReader
stream_pos: int

def init_headers(
self, content_length: int = 0, record_type=no_type, record_urn=None
): ...
self, content_length: int = 0, record_type: WarcRecordType = no_type, record_urn: Optional[bytes] = None
) -> None: ...
def freeze(self) -> bool: ...
def set_bytes_content(self, content: bytes) -> None: ...
def parse_http(self, strict_mode=True, auto_decode: str = "none") -> None: ...
def parse_http(self, strict_mode: bool = True, auto_decode: str = "none") -> None: ...
def verify_block_digest(self, consume: bool = False) -> bool: ...
def verify_payload_digest(self, consume: bool = False) -> bool: ...
def write(
self,
stream: Union[IOStream, BinaryIO, _WritableStream],
checksum_data: bool = False,
payload_digest: Optional[bytes] = None,
chunk_size: int = 16384
) -> int: ...



class ArchiveIterator(Iterable[WarcRecord]):
def __init__(
self,
stream: Type[IOStream],
stream: Union[IOStream, BinaryIO, _ReadableStream],
record_types: WarcRecordType = any_type,
parse_http: bool = True,
min_content_length: int = -1,
max_content_length: int = -1,
func_filter: Optional[Callable[[WarcRecord], bool]] = None,
verify_digests: bool = False,
strict_mode: bool = True,
auto_decode: Literal["none", "content", "transfer", "all"] = "none",
) -> None: ...
def __iter__(self) -> Iterator[WarcRecord]: ...
def __next__(self) -> WarcRecord: ...
Loading