Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle edge case were too many bytes are supplied #80

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).


## [0.3.3] - 2018-09-21

### Fixed

- Issue with packets >= 126 bytes on Python 2.7.3

## [0.3.2] - 2018-07-04

### Changed
Expand Down
1 change: 1 addition & 0 deletions compliance/runserver.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wstest -m fuzzingserver
2 changes: 1 addition & 1 deletion lomond/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from __future__ import unicode_literals

__version__ = "0.3.2"
__version__ = "0.3.3"
18 changes: 13 additions & 5 deletions lomond/frame_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
class FrameParser(Parser):
"""Parses a stream of data in to HTTP headers + WS frames."""

unpack16 = struct.Struct(b"!H").unpack
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the source of the strange traceback. On Python 2.7.3 struct.unpack failed with anything that wasn't a bytes object. On later versions, it would accept any object with the buffer protocol, i.e. bytearray.

unpack64 = struct.Struct(b"!Q").unpack

def __init__(self, parse_headers=True, validate=True):
self.parse_headers = parse_headers
self.validate = validate
Expand All @@ -39,6 +36,17 @@ def __repr__(self):
self.validate
)

# A bug in Python2.7.3 requires casting data to bytes
@classmethod
def unpack16(cls, data, _unpack16 = struct.Struct(b"!H").unpack):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _unpack16 is in the params so that it is a local, which is faster.

"""Unpack 16 bits in to an integer."""
return _unpack16(bytes(data))[0]

@classmethod
def unpack64(cls, data, _unpack64 = struct.Struct(b"!Q").unpack):
"""Unpack 64 bits in to an integer."""
return _unpack64(bytes(data))[0]

def enable_compression(self):
"""Enable compressed packets."""
self._compression = True
Expand Down Expand Up @@ -71,9 +79,9 @@ def parse(self):
payload_length = byte2 & 0b01111111

if payload_length == 126:
(payload_length,) = self.unpack16((yield self.read(2)))
payload_length = self.unpack16((yield self.read(2)))
elif payload_length == 127:
(payload_length,) = self.unpack64((yield self.read(8)))
payload_length = self.unpack64((yield self.read(8)))
if payload_length > 0x7fffffffffffffff:
raise errors.PayloadTooLarge("payload is too large")

Expand Down
171 changes: 101 additions & 70 deletions lomond/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@ class ParseEOF(ParseError):
"""End of Stream."""


class ParseOverflow(Exception):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This new exception is thrown if data is supplied in feed when the parser has finished. This wasn't ever occurring, but at least its implemented now. It could catch issues in the future.

"""Extra bytes in feed after parser completed."""


class _Awaitable(object):
"""An operation that effectively suspends the coroutine."""

# Analogous to Python3 asyncio concept
__slots__ = []

Expand All @@ -29,15 +34,17 @@ def validate(self, chunk):

class _ReadBytes(_Awaitable):
"""Reads a fixed number of bytes."""
__slots__ = ['remaining']

__slots__ = ["remaining"]

def __init__(self, count):
self.remaining = count


class _ReadUtf8(_ReadBytes):
"""Reads a fixed number of bytes, validates utf-8."""
__slots__ = ['utf8_validator']

__slots__ = ["utf8_validator"]

def __init__(self, count, utf8_validator):
self.remaining = count
Expand All @@ -46,12 +53,13 @@ def __init__(self, count, utf8_validator):
def validate(self, data):
valid, _, _, _ = self.utf8_validator.validate(bytes(data))
if not valid:
raise ParseError('invalid utf8')
raise ParseError("invalid utf8")


class _ReadUntil(_Awaitable):
"""Read until a separator."""
__slots__ = ['sep', 'max_bytes']

__slots__ = ["sep", "max_bytes"]

def __init__(self, sep, max_bytes=None):
self.sep = sep
Expand All @@ -60,9 +68,7 @@ def __init__(self, sep, max_bytes=None):
def check_length(self, pos):
"""Check the length is within max bytes."""
if self.max_bytes is not None and pos > self.max_bytes:
raise ParseError(
'expected {!r}'.format(self.sep)
)
raise ParseError("expected {!r}".format(self.sep))


class Parser(object):
Expand All @@ -89,6 +95,7 @@ def __init__(self):
self._awaiting = None
self._buffer = bytearray() # Buffer for reads
self._eof = False
self._exhausted = False
self.reset()

read = _ReadBytes
Expand Down Expand Up @@ -121,76 +128,86 @@ def feed(self, data):
:param bytes data: Data to parse.

"""

def _check_length(pos):
try:
self._awaiting.check_length(pos)
except ParseError as error:
self._awaiting = self._gen.throw(error)

if self._exhausted:
raise ParseOverflow("extra bytes in feed(); {!r}".format(data[:100]))
if self._eof:
raise ParseError('end of file reached')
raise ParseEOF(
"end of file reached; feed() has previously been called with empty bytes"
)
if not data:
self._eof = True
self._gen.throw(
ParseError('unexpected eof of file')
)

_buffer = self._buffer
pos = 0
while pos < len(data):
# Awaiting a read of a fixed number of bytes
if isinstance(self._awaiting, _ReadBytes):
# This many bytes left to read
remaining = self._awaiting.remaining
# Bite off remaining bytes
chunk = data[pos:pos + remaining]
chunk_size = len(chunk)
pos += chunk_size
try:
# Validate new data
self._awaiting.validate(chunk)
except ParseError as error:
# Raises an exception in parse()
self._awaiting = self._gen.throw(error)
# Add to buffer
_buffer.extend(chunk)
remaining -= chunk_size
if remaining:
# Await more bytes
self._awaiting.remaining = remaining
else:
# Send to coroutine, get new 'awaitable'
self._awaiting = self._gen.send(_buffer[:])
del _buffer[:]

# Awaiting a read until a terminator
elif isinstance(self._awaiting, _ReadUntil):
# Reading to separator
chunk = data[pos:]
_buffer.extend(chunk)
sep = self._awaiting.sep
sep_index = _buffer.find(sep)

if sep_index == -1:
# Separator not found, advance position
pos += len(chunk)
_check_length(len(_buffer))
else:
# Found separator
# Get data prior to and including separator
sep_index += len(sep)
_check_length(sep_index)
# Reset data, to continue parsing
data = _buffer[sep_index:]
pos = 0
# Send bytes to coroutine, get new 'awaitable'
self._awaiting = self._gen.send(_buffer[:sep_index])
del _buffer[:]

# Yield any non-awaitables...
while not isinstance(self._awaiting, _Awaitable):
yield self._awaiting
self._awaiting = next(self._gen)
self._gen.throw(ParseEOF("unexpected eof of file"))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is too weighty. As some point I will break it up a little. It's on my list.


try:
_buffer = self._buffer
pos = 0
while pos < len(data):
# Awaiting a read of a fixed number of bytes
if isinstance(self._awaiting, _ReadBytes):
# This many bytes left to read
remaining = self._awaiting.remaining
# Bite off remaining bytes
chunk = data[pos : pos + remaining]
chunk_size = len(chunk)
pos += chunk_size
try:
# Validate new data
self._awaiting.validate(chunk)
except ParseError as error:
# Raises an exception in parse()
self._awaiting = self._gen.throw(error)
# Add to buffer
_buffer.extend(chunk)
remaining -= chunk_size
if remaining:
# Await more bytes
self._awaiting.remaining = remaining
else:
# Send to coroutine, get new 'awaitable'
self._awaiting = self._gen.send(_buffer[:])
del _buffer[:]

# Awaiting a read until a terminator
elif isinstance(self._awaiting, _ReadUntil):
# Reading to separator
chunk = data[pos:]
_buffer.extend(chunk)
sep = self._awaiting.sep
sep_index = _buffer.find(sep)

if sep_index == -1:
# Separator not found, advance position
pos += len(chunk)
_check_length(len(_buffer))
else:
# Found separator
# Get data prior to and including separator
sep_index += len(sep)
_check_length(sep_index)
# Reset data, to continue parsing
data = _buffer[sep_index:]
pos = 0
# Send bytes to coroutine, get new 'awaitable'
self._awaiting = self._gen.send(_buffer[:sep_index])
del _buffer[:]

# Yield any non-awaitables...
while not isinstance(self._awaiting, _Awaitable):
yield self._awaiting
self._awaiting = next(self._gen)
except StopIteration:
self._exhausted = True
if pos < len(data):
raise ParseOverflow(
"extra bytes in feed(); {!r}".format(data[pos:][:100])
)

def parse(self):
"""
Expand All @@ -210,9 +227,10 @@ def parse(self):


if __name__ == "__main__": # pragma: no cover

class TestParser(Parser):
def parse(self):
data = yield self.read_until(b'\r\n\r\n')
data = yield self.read_until(b"\r\n\r\n")
yield data
data = yield self.read(1)
yield data
Expand All @@ -222,7 +240,20 @@ def parse(self):
yield data
data = yield self.read(2)
yield data

parser = TestParser()
for b in (b'head', b'ers: example', b'\r\n', b'\r\n', b'12', b'34', b'5', b'678', b'90'):
for b in (
b"head",
b"ers: example",
b"\r\n",
b"\r\n",
b"12",
b"34",
b"5",
b"678",
b"9",
b"9",
b"9",
):
for frame in parser.feed(b):
print(repr(frame))
7 changes: 7 additions & 0 deletions lomond/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ def feed(self, data):
raise errors.CriticalProtocolError(
text_type(error)
)
except errors.WebSocketError:
raise
except Exception as error:
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is mere paranoia.

log.exception('unknown error in websocket stream')
raise errors.CriticalProtocolError(
"unknown error; {}".format(error)
)
log.debug(" SRV -> CLI : %r", frame)
if frame.is_control:
# Control messages are never fragmented
Expand Down
25 changes: 21 additions & 4 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from lomond.parser import ParseError, Parser
from lomond.parser import ParseEOF, ParseError, ParseOverflow, Parser


def test_parser_reset_is_a_generator():
Expand Down Expand Up @@ -32,12 +32,29 @@ def parse(self):
test_parser = TestParser()
test_data = [b'foo', b'']
assert not test_parser.is_eof
with pytest.raises(ParseError):
with pytest.raises(ParseEOF):
for data in test_data:
for _token in test_parser.feed(data):
print(_token)
assert test_parser.is_eof
test_parser.feed(b'more')
with pytest.raises(ParseError):
with pytest.raises(ParseEOF):
for data in test_parser.feed('foo'):
print(data)


def test_overflow():
class TestParser(Parser):
def parse(self):
data = yield self.read(3)
yield data
test_parser = TestParser()
output = []
with pytest.raises(ParseOverflow):
for data in test_parser.feed(b'foobar'):
output.append(data)
assert output == [b'foo']
output = []
with pytest.raises(ParseOverflow):
for data in test_parser.feed(b'foobar'):
output.append(data)
assert not output