diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e55f2b..9d9e296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [0.3.3] - 2018-09-21 + +### Fixed + +- Issue with packets >= 126 bytes on Python 2.7.3 + ## [0.3.2] - 2018-07-04 ### Changed diff --git a/compliance/runserver.sh b/compliance/runserver.sh new file mode 100755 index 0000000..640e77c --- /dev/null +++ b/compliance/runserver.sh @@ -0,0 +1 @@ +wstest -m fuzzingserver diff --git a/lomond/_version.py b/lomond/_version.py index 2f5d180..aef93d7 100644 --- a/lomond/_version.py +++ b/lomond/_version.py @@ -1,3 +1,3 @@ from __future__ import unicode_literals -__version__ = "0.3.2" +__version__ = "0.3.3" diff --git a/lomond/frame_parser.py b/lomond/frame_parser.py index 196fc0c..448ee0d 100644 --- a/lomond/frame_parser.py +++ b/lomond/frame_parser.py @@ -20,9 +20,6 @@ class FrameParser(Parser): """Parses a stream of data in to HTTP headers + WS frames.""" - unpack16 = struct.Struct(b"!H").unpack - unpack64 = struct.Struct(b"!Q").unpack - def __init__(self, parse_headers=True, validate=True): self.parse_headers = parse_headers self.validate = validate @@ -39,6 +36,17 @@ def __repr__(self): self.validate ) + # A bug in Python2.7.3 requires casting data to bytes + @classmethod + def unpack16(cls, data, _unpack16 = struct.Struct(b"!H").unpack): + """Unpack 16 bits in to an integer.""" + return _unpack16(bytes(data))[0] + + @classmethod + def unpack64(cls, data, _unpack64 = struct.Struct(b"!Q").unpack): + """Unpack 64 bits in to an integer.""" + return _unpack64(bytes(data))[0] + def enable_compression(self): """Enable compressed packets.""" self._compression = True @@ -71,9 +79,9 @@ def parse(self): payload_length = byte2 & 0b01111111 if payload_length == 126: - (payload_length,) = self.unpack16((yield self.read(2))) + payload_length = self.unpack16((yield self.read(2))) elif payload_length == 127: - (payload_length,) = self.unpack64((yield self.read(8))) + payload_length = self.unpack64((yield self.read(8))) if payload_length > 0x7fffffffffffffff: raise errors.PayloadTooLarge("payload is too large") diff --git a/lomond/parser.py b/lomond/parser.py index 14ddbf6..63f50af 100644 --- a/lomond/parser.py +++ b/lomond/parser.py @@ -18,8 +18,13 @@ class ParseEOF(ParseError): """End of Stream.""" +class ParseOverflow(Exception): + """Extra bytes in feed after parser completed.""" + + class _Awaitable(object): """An operation that effectively suspends the coroutine.""" + # Analogous to Python3 asyncio concept __slots__ = [] @@ -29,7 +34,8 @@ def validate(self, chunk): class _ReadBytes(_Awaitable): """Reads a fixed number of bytes.""" - __slots__ = ['remaining'] + + __slots__ = ["remaining"] def __init__(self, count): self.remaining = count @@ -37,7 +43,8 @@ def __init__(self, count): class _ReadUtf8(_ReadBytes): """Reads a fixed number of bytes, validates utf-8.""" - __slots__ = ['utf8_validator'] + + __slots__ = ["utf8_validator"] def __init__(self, count, utf8_validator): self.remaining = count @@ -46,12 +53,13 @@ def __init__(self, count, utf8_validator): def validate(self, data): valid, _, _, _ = self.utf8_validator.validate(bytes(data)) if not valid: - raise ParseError('invalid utf8') + raise ParseError("invalid utf8") class _ReadUntil(_Awaitable): """Read until a separator.""" - __slots__ = ['sep', 'max_bytes'] + + __slots__ = ["sep", "max_bytes"] def __init__(self, sep, max_bytes=None): self.sep = sep @@ -60,9 +68,7 @@ def __init__(self, sep, max_bytes=None): def check_length(self, pos): """Check the length is within max bytes.""" if self.max_bytes is not None and pos > self.max_bytes: - raise ParseError( - 'expected {!r}'.format(self.sep) - ) + raise ParseError("expected {!r}".format(self.sep)) class Parser(object): @@ -89,6 +95,7 @@ def __init__(self): self._awaiting = None self._buffer = bytearray() # Buffer for reads self._eof = False + self._exhausted = False self.reset() read = _ReadBytes @@ -121,76 +128,86 @@ def feed(self, data): :param bytes data: Data to parse. """ + def _check_length(pos): try: self._awaiting.check_length(pos) except ParseError as error: self._awaiting = self._gen.throw(error) + if self._exhausted: + raise ParseOverflow("extra bytes in feed(); {!r}".format(data[:100])) if self._eof: - raise ParseError('end of file reached') + raise ParseEOF( + "end of file reached; feed() has previously been called with empty bytes" + ) if not data: self._eof = True - self._gen.throw( - ParseError('unexpected eof of file') - ) - - _buffer = self._buffer - pos = 0 - while pos < len(data): - # Awaiting a read of a fixed number of bytes - if isinstance(self._awaiting, _ReadBytes): - # This many bytes left to read - remaining = self._awaiting.remaining - # Bite off remaining bytes - chunk = data[pos:pos + remaining] - chunk_size = len(chunk) - pos += chunk_size - try: - # Validate new data - self._awaiting.validate(chunk) - except ParseError as error: - # Raises an exception in parse() - self._awaiting = self._gen.throw(error) - # Add to buffer - _buffer.extend(chunk) - remaining -= chunk_size - if remaining: - # Await more bytes - self._awaiting.remaining = remaining - else: - # Send to coroutine, get new 'awaitable' - self._awaiting = self._gen.send(_buffer[:]) - del _buffer[:] - - # Awaiting a read until a terminator - elif isinstance(self._awaiting, _ReadUntil): - # Reading to separator - chunk = data[pos:] - _buffer.extend(chunk) - sep = self._awaiting.sep - sep_index = _buffer.find(sep) - - if sep_index == -1: - # Separator not found, advance position - pos += len(chunk) - _check_length(len(_buffer)) - else: - # Found separator - # Get data prior to and including separator - sep_index += len(sep) - _check_length(sep_index) - # Reset data, to continue parsing - data = _buffer[sep_index:] - pos = 0 - # Send bytes to coroutine, get new 'awaitable' - self._awaiting = self._gen.send(_buffer[:sep_index]) - del _buffer[:] - - # Yield any non-awaitables... - while not isinstance(self._awaiting, _Awaitable): - yield self._awaiting - self._awaiting = next(self._gen) + self._gen.throw(ParseEOF("unexpected eof of file")) + + try: + _buffer = self._buffer + pos = 0 + while pos < len(data): + # Awaiting a read of a fixed number of bytes + if isinstance(self._awaiting, _ReadBytes): + # This many bytes left to read + remaining = self._awaiting.remaining + # Bite off remaining bytes + chunk = data[pos : pos + remaining] + chunk_size = len(chunk) + pos += chunk_size + try: + # Validate new data + self._awaiting.validate(chunk) + except ParseError as error: + # Raises an exception in parse() + self._awaiting = self._gen.throw(error) + # Add to buffer + _buffer.extend(chunk) + remaining -= chunk_size + if remaining: + # Await more bytes + self._awaiting.remaining = remaining + else: + # Send to coroutine, get new 'awaitable' + self._awaiting = self._gen.send(_buffer[:]) + del _buffer[:] + + # Awaiting a read until a terminator + elif isinstance(self._awaiting, _ReadUntil): + # Reading to separator + chunk = data[pos:] + _buffer.extend(chunk) + sep = self._awaiting.sep + sep_index = _buffer.find(sep) + + if sep_index == -1: + # Separator not found, advance position + pos += len(chunk) + _check_length(len(_buffer)) + else: + # Found separator + # Get data prior to and including separator + sep_index += len(sep) + _check_length(sep_index) + # Reset data, to continue parsing + data = _buffer[sep_index:] + pos = 0 + # Send bytes to coroutine, get new 'awaitable' + self._awaiting = self._gen.send(_buffer[:sep_index]) + del _buffer[:] + + # Yield any non-awaitables... + while not isinstance(self._awaiting, _Awaitable): + yield self._awaiting + self._awaiting = next(self._gen) + except StopIteration: + self._exhausted = True + if pos < len(data): + raise ParseOverflow( + "extra bytes in feed(); {!r}".format(data[pos:][:100]) + ) def parse(self): """ @@ -210,9 +227,10 @@ def parse(self): if __name__ == "__main__": # pragma: no cover + class TestParser(Parser): def parse(self): - data = yield self.read_until(b'\r\n\r\n') + data = yield self.read_until(b"\r\n\r\n") yield data data = yield self.read(1) yield data @@ -222,7 +240,20 @@ def parse(self): yield data data = yield self.read(2) yield data + parser = TestParser() - for b in (b'head', b'ers: example', b'\r\n', b'\r\n', b'12', b'34', b'5', b'678', b'90'): + for b in ( + b"head", + b"ers: example", + b"\r\n", + b"\r\n", + b"12", + b"34", + b"5", + b"678", + b"9", + b"9", + b"9", + ): for frame in parser.feed(b): print(repr(frame)) diff --git a/lomond/stream.py b/lomond/stream.py index 2a33a51..bba7928 100644 --- a/lomond/stream.py +++ b/lomond/stream.py @@ -71,6 +71,13 @@ def feed(self, data): raise errors.CriticalProtocolError( text_type(error) ) + except errors.WebSocketError: + raise + except Exception as error: + log.exception('unknown error in websocket stream') + raise errors.CriticalProtocolError( + "unknown error; {}".format(error) + ) log.debug(" SRV -> CLI : %r", frame) if frame.is_control: # Control messages are never fragmented diff --git a/tests/test_parser.py b/tests/test_parser.py index 64c6a7c..faf0f9f 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2,7 +2,7 @@ import pytest -from lomond.parser import ParseError, Parser +from lomond.parser import ParseEOF, ParseError, ParseOverflow, Parser def test_parser_reset_is_a_generator(): @@ -32,12 +32,29 @@ def parse(self): test_parser = TestParser() test_data = [b'foo', b''] assert not test_parser.is_eof - with pytest.raises(ParseError): + with pytest.raises(ParseEOF): for data in test_data: for _token in test_parser.feed(data): print(_token) assert test_parser.is_eof - test_parser.feed(b'more') - with pytest.raises(ParseError): + with pytest.raises(ParseEOF): for data in test_parser.feed('foo'): print(data) + + +def test_overflow(): + class TestParser(Parser): + def parse(self): + data = yield self.read(3) + yield data + test_parser = TestParser() + output = [] + with pytest.raises(ParseOverflow): + for data in test_parser.feed(b'foobar'): + output.append(data) + assert output == [b'foo'] + output = [] + with pytest.raises(ParseOverflow): + for data in test_parser.feed(b'foobar'): + output.append(data) + assert not output