From cc585ff8d7d5133dea08c60ee2bab49aadebe5e5 Mon Sep 17 00:00:00 2001 From: roxblnfk Date: Sun, 23 Jun 2024 17:54:52 +0400 Subject: [PATCH] fix(smtp): better HTTP Multipart parsing; fix SMTP body ending detection --- src/Traffic/Message/Multipart/Part.php | 33 ++++++------ src/Traffic/Parser/Http.php | 2 +- src/Traffic/Parser/MultipartType.php | 39 ++++++++++++++ src/Traffic/Parser/Smtp.php | 23 +++++--- .../Parser/MultipartBodyParserTest.php | 53 ++++++++++++++++++- 5 files changed, 121 insertions(+), 29 deletions(-) create mode 100644 src/Traffic/Parser/MultipartType.php diff --git a/src/Traffic/Message/Multipart/Part.php b/src/Traffic/Message/Multipart/Part.php index 8c93408b..61d4e93a 100644 --- a/src/Traffic/Message/Multipart/Part.php +++ b/src/Traffic/Message/Multipart/Part.php @@ -28,26 +28,23 @@ protected function __construct( */ public static function create(array $headers): Part { - /** - * Check Content-Disposition header - * - * @var string $contentDisposition - */ - $contentDisposition = self::findHeader($headers, 'Content-Disposition')[0] - ?? throw new \RuntimeException('Missing Content-Disposition header.'); - if ($contentDisposition === '') { - throw new \RuntimeException('Missing Content-Disposition header, can\'t be empty'); - } + $contentDisposition = self::findHeader($headers, 'Content-Disposition')[0] ?? null; + + $name = $fileName = null; + if ((string) $contentDisposition !== '') { + // Get field name and file name + $name = \preg_match('/\bname=(?:(?[^" ;,]++)|"(?[^"]++)")/', $contentDisposition, $matches) === 1 + ? ($matches['a'] ?: $matches['b']) + : null; - // Get field name and file name - $name = \preg_match('/\bname=(?:(?[^" ;,]++)|"(?[^"]++)")/', $contentDisposition, $matches) === 1 - ? ($matches['a'] ?: $matches['b']) - : null; + // Decode file name + $fileName = \preg_match( + '/\bfilename=(?:(?[^" ;,]++)|"(?[^"]++)")/', + $contentDisposition, + $matches + ) === 1 ? ($matches['a'] ?: $matches['b']) : null; + } - // Decode file name - $fileName = \preg_match('/\bfilename=(?:(?[^" ;,]++)|"(?[^"]++)")/', $contentDisposition, $matches) === 1 - ? ($matches['a'] ?: $matches['b']) - : null; $fileName = $fileName !== null ? \html_entity_decode($fileName) : null; $isFile = (string) $fileName !== '' || \preg_match('/text\\/.++/', self::findHeader($headers, 'Content-Type')[0] ?? 'text/plain') !== 1; diff --git a/src/Traffic/Parser/Http.php b/src/Traffic/Parser/Http.php index 85ccee34..3441895e 100644 --- a/src/Traffic/Parser/Http.php +++ b/src/Traffic/Parser/Http.php @@ -224,7 +224,7 @@ private function parseBody(StreamClient $stream, ServerRequestInterface $request $contentType = $request->getHeaderLine('Content-Type'); return match (true) { $contentType === 'application/x-www-form-urlencoded' => self::parseUrlEncodedBody($request), - \str_contains($contentType, 'multipart/form-data') => $this->processMultipartForm($request), + \str_contains($contentType, 'multipart/') => $this->processMultipartForm($request), default => $request, }; } diff --git a/src/Traffic/Parser/MultipartType.php b/src/Traffic/Parser/MultipartType.php new file mode 100644 index 00000000..5215734b --- /dev/null +++ b/src/Traffic/Parser/MultipartType.php @@ -0,0 +1,39 @@ + self::Mixed, + 'multipart/alternative' => self::Alternative, + 'multipart/digest' => self::Digest, + 'multipart/parallel' => self::Parallel, + 'multipart/form-data' => self::FormData, + 'multipart/report' => self::Report, + 'multipart/signed' => self::Signed, + 'multipart/encrypted' => self::Encrypted, + 'multipart/related' => self::Related, + default => self::Other, + }; + } +} diff --git a/src/Traffic/Parser/Smtp.php b/src/Traffic/Parser/Smtp.php index 50310489..2cfd6e84 100644 --- a/src/Traffic/Parser/Smtp.php +++ b/src/Traffic/Parser/Smtp.php @@ -32,7 +32,7 @@ public function parseStream(array $protocol, StreamClient $stream): Message\Smtp $message = Message\Smtp::create($protocol, headers: $headers); // Defaults - $boundary = "\r\n.\r\n"; + $endOfStream = ["\r\n.\r\n"]; $isMultipart = false; // Check the message is multipart. @@ -41,10 +41,11 @@ public function parseStream(array $protocol, StreamClient $stream): Message\Smtp && \preg_match('/boundary="?([^"\\s;]++)"?/', $contentType, $matches) === 1 ) { $isMultipart = true; - $boundary = "\r\n--{$matches[1]}--\r\n\r\n"; + $endOfStream = ["\r\n--{$matches[1]}--\r\n\r\n"]; + $endOfStream[] = $endOfStream[0] . ".\r\n"; } - $stored = $this->storeBody($fileStream, $stream, $boundary); + $stored = $this->storeBody($fileStream, $stream, $endOfStream); $message = $message->withBody($fileStream); // Message's body must be seeked to the beginning of the body. $fileStream->seek(-$stored, \SEEK_CUR); @@ -62,16 +63,19 @@ public function parseStream(array $protocol, StreamClient $stream): Message\Smtp * Flush stream data into PSR stream. * Note: there can be read more data than {@see $limit} bytes but write only {@see $limit} bytes. * - * @return int Number of bytes written to the stream. + * @param non-empty-array $endings + * + * @return int<0, max> Number of bytes written to the stream. */ private function storeBody( StreamInterface $fileStream, StreamClient $stream, - string $end = "\r\n.\r\n", + array $endings = ["\r\n.\r\n"], ): int { $written = 0; - $endLen = \strlen($end); + $endLen = \min(\array_map('\strlen', $endings)); + /** @var string $chunk */ foreach ($stream->getIterator() as $chunk) { // Write chunk to the file stream. $fileStream->write($chunk); @@ -83,8 +87,11 @@ private function storeBody( $fileStream->seek(-$endLen, \SEEK_CUR); $chunk = $fileStream->read($endLen); } - if (\str_ends_with($chunk, $end)) { - return $written; + + foreach ($endings as $end) { + if (\str_ends_with($chunk, $end)) { + return $written; + } } } diff --git a/tests/Unit/Traffic/Parser/MultipartBodyParserTest.php b/tests/Unit/Traffic/Parser/MultipartBodyParserTest.php index 2f4b303c..f7ac0c6f 100644 --- a/tests/Unit/Traffic/Parser/MultipartBodyParserTest.php +++ b/tests/Unit/Traffic/Parser/MultipartBodyParserTest.php @@ -9,6 +9,7 @@ use Buggregator\Trap\Traffic\Message\Multipart\File; use Buggregator\Trap\Traffic\Message\Multipart\Part; use Buggregator\Trap\Traffic\Parser; +use Buggregator\Trap\Traffic\Parser\MultipartType; use Nyholm\Psr7\Stream; use PHPUnit\Framework\TestCase; use Psr\Http\Message\StreamInterface; @@ -159,6 +160,51 @@ public function testBase64Encoded(): void self::assertSame($file2, $file->getStream()->__toString()); } + /** + * Simple multipart/mixed message without nested parts. + */ + public function testMultipartMixed(): void + { + $body = $this->makeStream( + <<parse($body, '40ugHb8e', MultipartType::Mixed); + + + self::assertCount(2, $result); + // Field + $file = $result[0]; + self::assertInstanceOf(Field::class, $file); + self::assertNull($file->getName()); + self::assertSame('Test Body', $file->getValue()); + + // Attached file + $file = $result[1]; + self::assertInstanceOf(File::class, $file); + self::assertSame('test.txt', $file->getName()); + self::assertSame('test.txt', $file->getClientFilename()); + self::assertSame('text/plain', $file->getClientMediaType()); + self::assertNull($file->getEmbeddingId()); + self::assertSame("sdg\n", $file->getStream()->__toString()); + } + private function makeStream(string $body): StreamInterface { $stream = Stream::create($body); @@ -171,8 +217,11 @@ private function makeStream(string $body): StreamInterface * * @return iterable */ - private function parse(StreamInterface $body, string $boundary): iterable - { + private function parse( + StreamInterface $body, + string $boundary, + MultipartType $type = MultipartType::FormData, + ): iterable { return $this->runInFiber(static fn() => Parser\Http::parseMultipartBody($body, $boundary)); } }