Skip to content

Commit

Permalink
[Fix] parser: sometimes initial body payload (for streams) was missing
Browse files Browse the repository at this point in the history
  • Loading branch information
sebastien committed Oct 2, 2024
1 parent 95a919a commit 6f2f512
Showing 1 changed file with 46 additions and 9 deletions.
55 changes: 46 additions & 9 deletions src/py/extra/http/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def __str__(self) -> str:


class BodyEOSParser:
"""Looks for an End-Of-Strem (EOS) delimiter in the body."""

__slots__ = ["line", "data"]

Expand Down Expand Up @@ -161,6 +162,33 @@ def feed(self, chunk: bytes, start: int = 0) -> tuple[bytes | None, int]:
return data, read


class BodyRestParser:
"""Consumes everything that is given to it."""

__slots__ = ["buffer"]

def __init__(self) -> None:
self.buffer: bytearray = bytearray()

def flush(self) -> HTTPBodyBlob:
# TODO: We should check it's expected
res: HTTPBodyBlob = HTTPBodyBlob(
self.buffer[:],
len(self.buffer),
)
self.reset()
return res

def reset(self) -> "BodyRestParser":
self.buffer.clear()
return self

def feed(self, chunk: bytes, start: int = 0) -> tuple[bool | None, int]:
self.buffer += chunk[start:]
# We read everything and put it in the bugger
return True, len(chunk) - start


class BodyLengthParser:
"""Parses the body of a request with ContentLength set"""

Expand Down Expand Up @@ -212,8 +240,10 @@ class HTTPParser:
def __init__(self) -> None:
self.message: MessageParser = MessageParser()
self.headers: HeadersParser = HeadersParser()
# FIXME: Not sure we need body EOS parser anymore.
self.bodyEOS: BodyEOSParser = BodyEOSParser()
self.bodyLength: BodyLengthParser = BodyLengthParser()
self.bodyRest: BodyRestParser = BodyRestParser()
self.parser: (
MessageParser | HeadersParser | BodyEOSParser | BodyLengthParser
) = self.message
Expand Down Expand Up @@ -267,30 +297,36 @@ def feed(self, chunk: bytes) -> Iterator[HTTPAtom]:
query=parseQuery(line.query),
headers=headers or HTTPHeaders({}),
protocol=line.protocol,
# FIXME: Is there remaining content?
body=HTTPBodyBlob(b"", 0),
)
self.parser = self.message.reset()
elif headers is not None:
if headers.contentLength is None:
self.parser = self.bodyEOS.reset(b"\n")
# FIXME: Not sure what the EOS parser was for
# self.parser = self.bodyEOS.reset(b"\n")
self.parser = self.bodyRest.reset()
yield HTTPProcessingStatus.Body
else:
self.parser = self.bodyLength.reset(
headers.contentLength
)
yield HTTPProcessingStatus.Body
elif self.parser is self.bodyEOS or self.parser is self.bodyLength:
elif (
self.parser is self.bodyEOS
or self.parser is self.bodyLength
or self.parser is self.bodyRest
):
if self.requestLine is None or self.requestHeaders is None:
yield HTTPProcessingStatus.BadFormat
else:
headers = self.requestHeaders
line = self.requestLine
# NOTE: This is an awkward dance around the type checker
body = (
self.bodyEOS.flush()
if self.parser is self.bodyEOS
else self.bodyLength.flush()
)
if self.parser is self.bodyRest:
self.parser.feed(chunk[offset:])
offset = len(chunk)
body = self.parser.flush()
yield (
HTTPRequest(
method=line.method,
Expand All @@ -313,8 +349,9 @@ def feed(self, chunk: bytes) -> Iterator[HTTPAtom]:
self.parser = self.message.reset()
else:
raise RuntimeError(f"Unsupported parser: {self.parser}")
# We increase the offset with the read bytes
offset += read
# NOTE: Need to make sure the indentation is correct here
# We increase the offset with the read bytes
offset += read


def parseQuery(text: str) -> dict[str, str]:
Expand Down

0 comments on commit 6f2f512

Please sign in to comment.