diff --git a/fastwarc/fastwarc/stream_io.pyx b/fastwarc/fastwarc/stream_io.pyx index 91d090b0..da2e4e1e 100644 --- a/fastwarc/fastwarc/stream_io.pyx +++ b/fastwarc/fastwarc/stream_io.pyx @@ -53,8 +53,6 @@ cdef class IOStream: def read(self, size_t size): """ - read(self, size) - Read ``size`` bytes from stream. :param size: bytes to read @@ -76,8 +74,6 @@ cdef class IOStream: def write(self, bytes data): """ - write(self, data) - Write bytes to stream. :param data: data to write @@ -90,8 +86,6 @@ cdef class IOStream: cpdef void seek(self, size_t offset) except *: """ - seek(self, offset) - Seek to specified offset. :param offset: seek offset @@ -101,8 +95,6 @@ cdef class IOStream: cpdef size_t tell(self) except -1: """ - tell(self) - Return current stream offset. :return: stream offset @@ -112,16 +104,12 @@ cdef class IOStream: cpdef void flush(self) except *: """ - flush(self) - Flush stream buffer. """ pass cpdef void close(self) except *: """ - close(self) - Close the stream. """ pass @@ -131,8 +119,6 @@ cdef class IOStream: @cython.auto_pickle(False) cdef class BytesIOStream(IOStream): """ - __init__(self, initial_data=None) - IOStream that uses an in-memory buffer. :param initial_data: fill internal buffer with this initial data @@ -182,8 +168,6 @@ cdef class BytesIOStream(IOStream): cpdef string getvalue(self): """ - getvalue(self) - Get buffer value. :return: buffer value @@ -196,8 +180,6 @@ cdef class BytesIOStream(IOStream): @cython.auto_pickle(False) cdef class FileStream(IOStream): """ - __init__(self, filename=None, mode='rb') - Fast alternative to Python file objects for local files. :param filename: input filename @@ -268,8 +250,6 @@ cdef class FileStream(IOStream): @cython.auto_pickle(False) cdef class PythonIOStreamAdapter(IOStream): """ - __init__(self, py_stream) - IOStream adapter for file-like Python objects. :param py_stream: input Python stream object @@ -311,8 +291,6 @@ cdef class PythonIOStreamAdapter(IOStream): cpdef IOStream wrap_stream(raw_stream): """ - wrap_stream(raw_stream) - Wrap ``raw_stream`` into a :class:`PythonIOStreamAdapter` if it is a Python object or return ``raw_stream`` unmodified if it is a :class:`IOStream` already. @@ -334,8 +312,6 @@ cdef class CompressingStream(IOStream): cpdef size_t begin_member(self): """ - begin_member(self) - Begin compression member / frame (if not already started). :return: bytes written @@ -345,8 +321,6 @@ cdef class CompressingStream(IOStream): cpdef size_t end_member(self): """ - end_member(self) - End compression member / frame (if one has been started). :return: bytes written @@ -359,8 +333,6 @@ cdef class CompressingStream(IOStream): @cython.auto_pickle(False) cdef class GZipStream(CompressingStream): """ - __init__(self, raw_stream, compression_level=9, zlib=False) - GZip :class:`IOStream` implementation. :param raw_stream: raw data stream @@ -418,8 +390,6 @@ cdef class GZipStream(CompressingStream): cpdef void prepopulate(self, bint deflate, const string& initial_data): """ - prepopulate(self, initial_data) - Fill internal working buffer with initial data. Use if some initial data of the stream have already been consumed (e.g., for stream content negotiation). Has to be called before the first :meth:`read()`. @@ -602,8 +572,6 @@ cdef class GZipStream(CompressingStream): @cython.auto_pickle(False) cdef class LZ4Stream(CompressingStream): """ - __init__(self, raw_stream, compression_level=12, favor_dec_speed=True) - LZ4 :class:`IOStream` implementation. :param raw_stream: raw data stream @@ -636,8 +604,6 @@ cdef class LZ4Stream(CompressingStream): cpdef void prepopulate(self, const string& initial_data): """ - prepopulate(self, initial_data) - Fill internal working buffer with initial data. Use if some initial data of the stream have already been consumed (e.g., for stream content negotiation). Has to be called before the first :meth:`read()`. @@ -771,8 +737,6 @@ cdef class LZ4Stream(CompressingStream): @cython.auto_pickle(False) cdef class BrotliStream(CompressingStream): """ - __init__(self, raw_stream, quality=11, lgwin=22, lgblock=0) - Brotli :class:`IOStream` implementation. Implementation relies on Google's ``brotli`` Python package, will be ported to native @@ -861,8 +825,6 @@ cdef class BrotliStream(CompressingStream): @cython.auto_pickle(False) cdef class BufferedReader: """ - __init__(self, stream, buf_size=65536, negotiate_stream=True) - Buffered reader operating on an :class:`IOStream` instance. :param stream: stream to operate on @@ -995,8 +957,6 @@ cdef class BufferedReader: cpdef bytes read(self, size_t size=strnpos): """ - read(self, size=-1) - Read up to ``size`` bytes from the input stream. :param size: number of bytes to read (default means read remaining stream) @@ -1021,8 +981,6 @@ cdef class BufferedReader: cpdef string readline(self, bint crlf=True, size_t max_line_len=8192) except *: """ - readline(self, crlf=True, max_line_len=8192) - Read a single line from the input stream. :param crlf: whether lines are separated by CRLF or LF @@ -1082,8 +1040,6 @@ cdef class BufferedReader: cpdef size_t tell(self) except -1: """ - tell(self) - Offset on the input stream. :return: offset @@ -1104,8 +1060,6 @@ cdef class BufferedReader: cpdef size_t consume(self, size_t size=strnpos) except -1: """ - consume(self, size=-1) - Consume up to ``size`` bytes from the input stream without allocating a buffer for it. :param size: number of bytes to read (default means read remaining stream) @@ -1130,8 +1084,6 @@ cdef class BufferedReader: cpdef void close(self) except *: """ - close(self) - Close stream. """ if self.stream is not None: @@ -1140,8 +1092,6 @@ cdef class BufferedReader: def _buf_reader_py_test_detect_stream_type(BufferedReader buf): """ - _buf_reader_py_test_detect_stream_type(buf): - Test interface for :meth:`BufferedReader.detect_stream_type` """ buf.detect_stream_type() @@ -1149,8 +1099,6 @@ def _buf_reader_py_test_detect_stream_type(BufferedReader buf): def _buf_reader_py_test_set_limit(BufferedReader buf, size_t limit): """ - _buf_reader_py_test_detect_set_limit(buf, limit): - Test interface for :meth:`BufferedReader.set_limit` """ buf.set_limit(limit) @@ -1158,8 +1106,6 @@ def _buf_reader_py_test_set_limit(BufferedReader buf, size_t limit): def _buf_reader_py_test_reset_limit(BufferedReader buf): """ - _buf_reader_py_test_reset_limit(buf, limit): - Test interface for :meth:`BufferedReader.reset_limit` """ buf.reset_limit() diff --git a/fastwarc/fastwarc/warc.pyx b/fastwarc/fastwarc/warc.pyx index a4bfeace..a52eeafa 100644 --- a/fastwarc/fastwarc/warc.pyx +++ b/fastwarc/fastwarc/warc.pyx @@ -149,8 +149,6 @@ def _rebuild_warc_header_map(encoding, status_line, headers): # noinspection PyAttributeOutsideInit cdef class WarcHeaderMap: """ - __init__(self, encoding='utf-8') - Dict-like type representing a WARC or HTTP header block. :param encoding: header source encoding @@ -179,8 +177,6 @@ cdef class WarcHeaderMap: def __iter__(self): """ - __iter__(self) - Iterate all header map items. :rtype: t.Iterable[(str, str)] @@ -261,8 +257,6 @@ cdef class WarcHeaderMap: def append(self, key not None, value not None): """ - append(self, key, value) - Append header (use if header name is not unique). :param key: header key @@ -276,8 +270,6 @@ cdef class WarcHeaderMap: def get(self, key not None, default=None) -> str: """ - get(self, key, default=None) - Get header value or ``default``. If multiple headers have the same key, only the last occurrence will be returned. @@ -292,8 +284,6 @@ cdef class WarcHeaderMap: def items(self): """ - items(self) - Item view of keys and values. If multiple headers have the same key, only the last occurrence will be returned. @@ -304,8 +294,6 @@ cdef class WarcHeaderMap: def keys(self): """ - keys(self) - Iterable of header keys. If multiple headers have the same key, only the last occurrence will be returned. @@ -316,8 +304,6 @@ cdef class WarcHeaderMap: def values(self): """ - values(self) - Iterable of header values. If multiple headers have the same key, only the last occurrence will be returned. @@ -328,8 +314,6 @@ cdef class WarcHeaderMap: def asdict(self) -> t.Dict[str, str]: """ - asdict(self) - Headers as Python dict. If multiple headers have the same key, only the last occurrence will be returned. @@ -351,8 +335,6 @@ cdef class WarcHeaderMap: def astuples(self) -> t.Tuple[t.Tuple[str, str]]: """ - astuples(self) - Headers as a series of tuples, including multiple headers with the same key. Use this over :meth:`asdict` if header keys are not necessarily unique. @@ -362,8 +344,6 @@ cdef class WarcHeaderMap: cpdef void clear(self): """ - clear(self) - Clear all headers. """ if self._dict_cache is not None: @@ -454,8 +434,6 @@ def _rebuild_warc_record(record_type, headers, is_http, http_parsed, http_charse # noinspection PyProtectedMember, PyAttributeOutsideInit cdef class WarcRecord: """ - __init__(self) - A WARC record. WARC records are picklable, but pickling will :meth:`freeze()` the WARC record. @@ -512,8 +490,6 @@ cdef class WarcRecord: @record_type.setter def record_type(self, WarcRecordType record_type): """ - record_type(self, record_type) - Set record type. :param record_type: record type @@ -538,8 +514,6 @@ cdef class WarcRecord: @record_date.setter def record_date(self, date): """ - record_date(self, date) - Set WARC Date. :param date: datetime object @@ -715,8 +689,6 @@ cdef class WarcRecord: cpdef void init_headers(self, size_t content_length=0, WarcRecordType record_type=no_type, bytes record_urn=None): """ - init_headers(self, content_length=0, record_type=no_type, record_urn=None) - Initialize mandatory headers in a fresh :class:`WarcRecord` instance. :param content_length: WARC record body length in bytes @@ -744,8 +716,6 @@ cdef class WarcRecord: cpdef void set_bytes_content(self, bytes b): """ - set_bytes_content(self, b) - Set WARC body. :param b: body as bytes @@ -758,8 +728,6 @@ cdef class WarcRecord: cpdef bint parse_http(self, bint strict_mode=True, str auto_decode='none') except 0: """ - parse_http(self, strict_mode=True, auto_decode='none') - Parse HTTP headers and advance content reader. It is safe to call this method multiple times, even if the record is not an HTTP record. @@ -841,8 +809,6 @@ cdef class WarcRecord: cpdef size_t write(self, stream, bint checksum_data=False, bytes payload_digest=None, size_t chunk_size=16384) except -1: """ - write(self, stream, checksum_data=False, chunk_size=16384) - Write WARC record onto a stream. :param stream: output stream @@ -995,8 +961,6 @@ cdef class WarcRecord: cpdef bint freeze(self) except 0: """ - freeze(self) - "Freeze" a record by baking in the remaining payload stream contents. Freezing a record makes the :class:`WarcRecord` instance copyable and reusable by decoupling @@ -1017,8 +981,6 @@ cdef class WarcRecord: cpdef bint verify_block_digest(self, bint consume=False) except -1: """ - verify_block_digest(self, consume=False) - Verify whether record block digest is valid. :param consume: do not create an in-memory copy of the record stream @@ -1031,8 +993,6 @@ cdef class WarcRecord: cpdef bint verify_payload_digest(self, bint consume=False) except -1: """ - verify_payload_digest(self, consume=False) - Verify whether record payload digest is valid. :param consume: do not create an in-memory copy of the record stream @@ -1050,8 +1010,6 @@ cdef class WarcRecord: cdef size_t parse_header_block(BufferedReader reader, WarcHeaderMap target, bint has_status_line, bint strict_mode=True) except -1: """ - parse_header_block(reader, target, has_status_line=False, strict_mode=True) - Helper function for parsing WARC or HTTP header blocks. :param reader: input reader @@ -1115,9 +1073,6 @@ cdef size_t parse_header_block(BufferedReader reader, WarcHeaderMap target, bint @cython.auto_pickle(False) cdef class ArchiveIterator: """ - __init__(self, stream, record_types=any_type, parse_http=True, min_content_length=-1, max_content_length=-1, \ - func_filter=None, verify_digests=False, strict_mode=True, auto_decode='none') - WARC record stream iterator. :param stream: input stream (preferably an :class:`~fastwarc.stream_io.IOStream`, @@ -1165,8 +1120,6 @@ cdef class ArchiveIterator: def __iter__(self) -> t.Iterable[WarcRecord]: """ - __iter__(self) - Iterate all :class:`WarcRecord` items in the current WARC stream. :rtype: t.Iterable[WarcRecord] @@ -1182,8 +1135,6 @@ cdef class ArchiveIterator: def __next__(self) -> WarcRecord: """ - __next__(self) - Implements an iterator that can be used with ``next()``. :rtype: WarcRecord @@ -1286,8 +1237,6 @@ cdef class ArchiveIterator: cdef bint _set_stream(self, stream) except 0: """ - _set_stream(self, stream) - Replace underlying input stream. This method is for internal use and should not be called by external users. @@ -1307,8 +1256,6 @@ cdef class ArchiveIterator: # noinspection PyProtectedMember cpdef bint is_warc_10(WarcRecord record): """ - is_warc_10(record) - Filter predicate for checking if record is a WARC/1.0 record. :param record: WARC record @@ -1321,8 +1268,6 @@ cpdef bint is_warc_10(WarcRecord record): # noinspection PyProtectedMember cpdef bint is_warc_11(WarcRecord record): """ - is_warc_11(record) - Filter predicate for checking if record is a WARC/1.1 record. :param record: WARC record @@ -1335,8 +1280,6 @@ cpdef bint is_warc_11(WarcRecord record): # noinspection PyProtectedMember cpdef bint has_block_digest(WarcRecord record): """ - has_block_digest(record) - Filter predicate for checking if record has a block digest. :param record: WARC record @@ -1349,8 +1292,6 @@ cpdef bint has_block_digest(WarcRecord record): # noinspection PyProtectedMember cpdef bint has_payload_digest(WarcRecord record): """ - has_payload_digest(record) - Filter predicate for checking if record has a payload digest. :param record: WARC record @@ -1363,8 +1304,6 @@ cpdef bint has_payload_digest(WarcRecord record): # noinspection PyProtectedMember cpdef bint is_http(WarcRecord record): """ - is_http(record) - Filter predicate for checking if record is an HTTP record. :param record: WARC record @@ -1377,8 +1316,6 @@ cpdef bint is_http(WarcRecord record): # noinspection PyProtectedMember cpdef bint is_concurrent(WarcRecord record): """ - is_concurrent(record) - Filter predicate for checking if record is concurrent to another record. :param record: WARC record