From 53acf602f03bf5c2f7175e335d92ceb4503333b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= Date: Sat, 1 Jun 2024 14:29:12 +0200 Subject: [PATCH 1/3] Add empty SQL functional test --- tests/clihighlight.txt | 2 ++ tests/compress.txt | 2 ++ tests/format-highlight.html | 2 ++ tests/format.txt | 2 ++ tests/highlight.html | 2 ++ tests/sql.sql | 2 ++ 6 files changed, 12 insertions(+) diff --git a/tests/clihighlight.txt b/tests/clihighlight.txt index c98a763..4d4386c 100644 --- a/tests/clihighlight.txt +++ b/tests/clihighlight.txt @@ -13,6 +13,8 @@ ORDER BY COUNT(order_id) DESC; --- + +--- UPDATE customers SET diff --git a/tests/compress.txt b/tests/compress.txt index 5805fce..fd507ba 100644 --- a/tests/compress.txt +++ b/tests/compress.txt @@ -1,4 +1,6 @@ SELECT customer_id, customer_name, COUNT(order_id) as total FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id GROUP BY customer_id, customer_name HAVING COUNT(order_id) > 5 ORDER BY COUNT(order_id) DESC; +--- + --- UPDATE customers SET totalorders = ordersummary.total FROM (SELECT customer_id, count(order_id) As total FROM orders GROUP BY customer_id) As ordersummary WHERE customers.customer_id = ordersummary.customer_id --- diff --git a/tests/format-highlight.html b/tests/format-highlight.html index 8d6521c..7c9b1d9 100644 --- a/tests/format-highlight.html +++ b/tests/format-highlight.html @@ -13,6 +13,8 @@ ORDER BY COUNT(order_id) DESC; --- +

+---
 
UPDATE
   customers
 SET
diff --git a/tests/format.txt b/tests/format.txt
index a41ffd3..ffa5cb7 100644
--- a/tests/format.txt
+++ b/tests/format.txt
@@ -13,6 +13,8 @@ HAVING
 ORDER BY
   COUNT(order_id) DESC;
 ---
+
+---
 UPDATE
   customers
 SET
diff --git a/tests/highlight.html b/tests/highlight.html
index 6eb6da8..6337210 100644
--- a/tests/highlight.html
+++ b/tests/highlight.html
@@ -4,6 +4,8 @@
 HAVING COUNT(order_id) > 5
 ORDER BY COUNT(order_id) DESC;
--- +

+---
 
UPDATE customers
         SET totalorders = ordersummary.total
         FROM (SELECT customer_id, count(order_id) As total
diff --git a/tests/sql.sql b/tests/sql.sql
index 267a504..d57da6e 100644
--- a/tests/sql.sql
+++ b/tests/sql.sql
@@ -3,6 +3,8 @@ FROM customers INNER JOIN orders ON customers.customer_id = orders.customer_id
 GROUP BY customer_id, customer_name
 HAVING COUNT(order_id) > 5
 ORDER BY COUNT(order_id) DESC;
+---
+
 ---
 UPDATE customers
         SET totalorders = ordersummary.total

From 25df6af93b7396b06740a0d8930e5419a079575a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= 
Date: Sat, 1 Jun 2024 14:33:31 +0200
Subject: [PATCH 2/3] Add and reorganize unit tests

---
 tests/SqlFormatterTest.php |  39 ++++++++---
 tests/TokenizerTest.php    | 128 +++++++++++++++++++++++++++++++++++--
 2 files changed, 153 insertions(+), 14 deletions(-)

diff --git a/tests/SqlFormatterTest.php b/tests/SqlFormatterTest.php
index 05dd17f..92dd437 100644
--- a/tests/SqlFormatterTest.php
+++ b/tests/SqlFormatterTest.php
@@ -18,6 +18,7 @@
 use function defined;
 use function explode;
 use function file_get_contents;
+use function implode;
 use function pack;
 use function rtrim;
 use function sprintf;
@@ -41,6 +42,7 @@ public function testFormatHighlight(string $sql, string $html): void
     }
 
     #[DataProvider('formatData')]
+    #[DataProvider('formatLongConcatData')]
     public function testFormat(string $sql, string $html): void
     {
         $formatter = new SqlFormatter(new NullHighlighter());
@@ -99,13 +101,22 @@ public function testUsePre(): void
         $this->assertSame($actual, $expected);
     }
 
+    /** @return string[] */
+    private static function fileSqlData(): array
+    {
+        $contents = file_get_contents(__DIR__ . '/sql.sql');
+        assert($contents !== false);
+
+        return explode("\n---\n", rtrim($contents, "\n"));
+    }
+
     /** @return Generator */
     private static function fileDataProvider(string $file): Generator
     {
         $contents = file_get_contents(__DIR__ . '/' . $file);
         assert($contents !== false);
         $formatHighlightData = explode("\n---\n", rtrim($contents, "\n"));
-        $sqlData             = self::sqlData();
+        $sqlData             = self::fileSqlData();
         if (count($formatHighlightData) !== count($sqlData)) {
             throw new UnexpectedValueException(sprintf(
                 '"%s" (%d sections) and sql.sql (%d sections) should have the same number of sections',
@@ -138,6 +149,23 @@ public static function formatData(): Generator
         return self::fileDataProvider('format.txt');
     }
 
+    /** @return Generator */
+    public static function formatLongConcatData(): Generator
+    {
+        $sqlParts = [];
+        for ($i = 0; $i < 2_000; $i++) {
+            $sqlParts[] = 'cast(\'foo' . $i . '\' as blob)';
+        }
+
+        $inConcat  = 'concat(' . implode(', ', $sqlParts) . ')';
+        $outConcat = "concat(\n      " . implode(",\n      ", $sqlParts) . "\n    )";
+
+        yield 'long concat' => [
+            'select iif(' . $inConcat . ' = ' . $inConcat . ', 10, 20) x',
+            "select\n  iif(\n    " . $outConcat . ' = ' . $outConcat . ",\n    10,\n    20\n  ) x",
+        ];
+    }
+
     /** @return Generator */
     public static function compressData(): Generator
     {
@@ -149,13 +177,4 @@ public static function highlightData(): Generator
     {
         return self::fileDataProvider('highlight.html');
     }
-
-    /** @return mixed[] */
-    private static function sqlData(): array
-    {
-        $contents = file_get_contents(__DIR__ . '/sql.sql');
-        assert($contents !== false);
-
-        return explode("\n---\n", rtrim($contents, "\n"));
-    }
 }
diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php
index 9f82319..71c53a1 100644
--- a/tests/TokenizerTest.php
+++ b/tests/TokenizerTest.php
@@ -4,13 +4,18 @@
 
 namespace Doctrine\SqlFormatter\Tests;
 
+use Doctrine\SqlFormatter\Cursor;
+use Doctrine\SqlFormatter\Token;
 use Doctrine\SqlFormatter\Tokenizer;
-use PHPUnit\Framework\Attributes\DoesNotPerformAssertions;
+use Generator;
+use PHPUnit\Framework\Attributes\DataProvider;
 use PHPUnit\Framework\TestCase;
 use ReflectionClass;
 
 use function array_filter;
+use function implode;
 use function preg_match;
+use function serialize;
 use function sort;
 use function strtoupper;
 
@@ -58,9 +63,124 @@ public function testKeywordsReservedAreSingleUpperWord(): void
         self::assertSame([], $kwsDiff);
     }
 
-    #[DoesNotPerformAssertions]
-    public function testThereAreNoRegressions(): void
+    /** @param list $expectedTokens */
+    public static function assertEqualsTokens(array $expectedTokens, Cursor $cursor): void
     {
-        (new Tokenizer())->tokenize('*/');
+        $tokens = [];
+
+        $cursor = $cursor->subCursor();
+
+        while ($token = $cursor->next()) {
+            $tokens[] = $token;
+        }
+
+        if (serialize($tokens) === serialize($expectedTokens)) { // optimize self::assertEquals() for large inputs
+            self::assertTrue(true);
+        } else {
+            self::assertEquals($expectedTokens, $tokens);
+        }
+    }
+
+    /** @param list $expectedTokens */
+    #[DataProvider('tokenizeData')]
+    #[DataProvider('tokenizeLongConcatData')]
+    public function testTokenize(array $expectedTokens, string $sql): void
+    {
+        self::assertEqualsTokens($expectedTokens, (new Tokenizer())->tokenize($sql));
+    }
+
+    /** @return Generator */
+    public static function tokenizeData(): Generator
+    {
+        yield 'empty' => [
+            [],
+            '',
+        ];
+
+        yield 'basic' => [
+            [
+                new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
+                new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
+                new Token(Token::TOKEN_TYPE_NUMBER, '1'),
+            ],
+            'select 1',
+        ];
+
+        yield 'there are no regressions' => [
+            [
+                new Token(Token::TOKEN_TYPE_BOUNDARY, '*'),
+                new Token(Token::TOKEN_TYPE_BOUNDARY, '/'),
+            ],
+            '*/',
+        ];
+
+        yield 'unclosed quoted string' => [
+            [
+                new Token(Token::TOKEN_TYPE_QUOTE, '\'foo...'),
+            ],
+            '\'foo...',
+        ];
+    }
+
+    /** @return Generator */
+    public static function tokenizeLongConcatData(): Generator
+    {
+        $count = 2_000;
+
+        $sqlParts = [];
+        for ($i = 0; $i < $count; $i++) {
+            $sqlParts[] = 'cast(\'foo' . $i . '\' as blob)';
+        }
+
+        $concat = 'concat(' . implode(', ', $sqlParts) . ')';
+        $sql    = 'select iif(' . $concat . ' = ' . $concat . ', 10, 20) x';
+
+        $expectedTokens = [
+            new Token(Token::TOKEN_TYPE_RESERVED_TOPLEVEL, 'select'),
+            new Token(Token::TOKEN_TYPE_WHITESPACE, ' '),
+            new Token(Token::TOKEN_TYPE_WORD, 'iif'),
+            new Token(Token::TOKEN_TYPE_BOUNDARY, '('),
+        ];
+
+        for ($j = 0; $j < 2; $j++) {
+            if ($j !== 0) {
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '=');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+            }
+
+            $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'concat');
+            $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');
+
+            for ($i = 0; $i < $count; $i++) {
+                if ($i !== 0) {
+                    $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
+                    $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+                }
+
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'cast');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, '(');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_QUOTE, '\'foo' . $i . '\'');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_RESERVED, 'as');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'blob');
+                $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
+            }
+
+            $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
+        }
+
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '10');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ',');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_NUMBER, '20');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_BOUNDARY, ')');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WHITESPACE, ' ');
+        $expectedTokens[] = new Token(Token::TOKEN_TYPE_WORD, 'x');
+
+        yield 'long concat' => [$expectedTokens, $sql];
     }
 }

From 9b4d087f296546ba8d90eafc0944df04c4de0804 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michael=20Vo=C5=99=C3=AD=C5=A1ek?= 
Date: Sat, 1 Jun 2024 14:35:32 +0200
Subject: [PATCH 3/3] Fix unclosed block comment tokenize

---
 src/Tokenizer.php       | 7 ++++---
 tests/TokenizerTest.php | 7 +++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/Tokenizer.php b/src/Tokenizer.php
index 82ace8c..2ac597d 100644
--- a/src/Tokenizer.php
+++ b/src/Tokenizer.php
@@ -825,9 +825,10 @@ private function createNextToken(string $string, Token|null $previous = null): T
                 $last = strpos($string, "\n");
                 $type = Token::TOKEN_TYPE_COMMENT;
             } else { // Comment until closing comment tag
-                $pos = strpos($string, '*/', 2);
-                assert($pos !== false);
-                $last = $pos + 2;
+                $pos  = strpos($string, '*/', 2);
+                $last = $pos !== false
+                    ? $pos + 2
+                    : false;
                 $type = Token::TOKEN_TYPE_BLOCK_COMMENT;
             }
 
diff --git a/tests/TokenizerTest.php b/tests/TokenizerTest.php
index 71c53a1..f8a767e 100644
--- a/tests/TokenizerTest.php
+++ b/tests/TokenizerTest.php
@@ -120,6 +120,13 @@ public static function tokenizeData(): Generator
             ],
             '\'foo...',
         ];
+
+        yield 'unclosed block comment' => [
+            [
+                new Token(Token::TOKEN_TYPE_BLOCK_COMMENT, '/* foo...'),
+            ],
+            '/* foo...',
+        ];
     }
 
     /** @return Generator */