Skip to content

Commit 236b6e5

Browse files
committed
Build regexes only once
speedup non-first Tokenizer bootstrap
1 parent 98811d6 commit 236b6e5

File tree

1 file changed

+25
-22
lines changed

1 file changed

+25
-22
lines changed

src/Tokenizer.php

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -719,14 +719,13 @@ final class Tokenizer
719719
];
720720

721721
// Regular expressions for tokenizing
722-
723-
private readonly string $nextTokenRegexNumber;
724-
private readonly string $nextTokenRegexBoundaryCharacter;
725-
private readonly string $nextTokenRegexReservedToplevel;
726-
private readonly string $nextTokenRegexReservedNewline;
727-
private readonly string $nextTokenRegexReserved;
728-
private readonly string $nextTokenRegexFunction;
729-
private readonly string $nextTokenRegexNonReserved;
722+
private static string $nextTokenRegexNumber;
723+
private static string $nextTokenRegexBoundaryCharacter;
724+
private static string $nextTokenRegexReservedToplevel;
725+
private static string $nextTokenRegexReservedNewline;
726+
private static string $nextTokenRegexReserved;
727+
private static string $nextTokenRegexFunction;
728+
private static string $nextTokenRegexNonReserved;
730729

731730
/**
732731
* Punctuation that can be used as a boundary between other tokens
@@ -762,20 +761,24 @@ final class Tokenizer
762761
*/
763762
public function __construct()
764763
{
764+
if (isset(self::$nextTokenRegexNumber)) {
765+
return;
766+
}
767+
765768
// Set up regular expressions
766769
$regexBoundaries = $this->makeRegexFromList($this->boundaries);
767770
$regexReserved = $this->makeRegexFromList($this->reserved);
768771
$regexReservedToplevel = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedToplevel));
769772
$regexReservedNewline = str_replace(' ', '\s+', $this->makeRegexFromList($this->reservedNewline));
770773
$regexFunction = $this->makeRegexFromList($this->functions);
771774

772-
$this->nextTokenRegexNumber = '/\G(?:\d+(?:\.\d+)?|0x[\da-fA-F]+|0b[01]+)(?=$|\s|"\'`|' . $regexBoundaries . ')/';
773-
$this->nextTokenRegexBoundaryCharacter = '/\G' . $regexBoundaries . '/';
774-
$this->nextTokenRegexReservedToplevel = '/\G' . $regexReservedToplevel . '(?=$|\s|' . $regexBoundaries . ')/';
775-
$this->nextTokenRegexReservedNewline = '/\G' . $regexReservedNewline . '(?=$|\s|' . $regexBoundaries . ')/';
776-
$this->nextTokenRegexReserved = '/\G' . $regexReserved . '(?=$|\s|' . $regexBoundaries . ')/';
777-
$this->nextTokenRegexFunction = '/\G' . $regexFunction . '(?=\s*\()/';
778-
$this->nextTokenRegexNonReserved = '/\G.*?(?=$|\s|["\'`]|' . $regexBoundaries . ')/';
775+
self::$nextTokenRegexNumber = '/\G(?:\d+(?:\.\d+)?|0x[\da-fA-F]+|0b[01]+)(?=$|\s|"\'`|' . $regexBoundaries . ')/';
776+
self::$nextTokenRegexBoundaryCharacter = '/\G' . $regexBoundaries . '/';
777+
self::$nextTokenRegexReservedToplevel = '/\G' . $regexReservedToplevel . '(?=$|\s|' . $regexBoundaries . ')/';
778+
self::$nextTokenRegexReservedNewline = '/\G' . $regexReservedNewline . '(?=$|\s|' . $regexBoundaries . ')/';
779+
self::$nextTokenRegexReserved = '/\G' . $regexReserved . '(?=$|\s|' . $regexBoundaries . ')/';
780+
self::$nextTokenRegexFunction = '/\G' . $regexFunction . '(?=\s*\()/';
781+
self::$nextTokenRegexNonReserved = '/\G.*?(?=$|\s|["\'`]|' . $regexBoundaries . ')/';
779782
}
780783

781784
/** @param list<string> $values */
@@ -946,7 +949,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
946949
// Number (decimal, binary, or hex)
947950
if (
948951
preg_match(
949-
$this->nextTokenRegexNumber,
952+
self::$nextTokenRegexNumber,
950953
$string,
951954
$matches,
952955
0,
@@ -957,7 +960,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
957960
}
958961

959962
// Boundary Character (punctuation and symbols)
960-
if (preg_match($this->nextTokenRegexBoundaryCharacter, $string, $matches, 0, $offset)) {
963+
if (preg_match(self::$nextTokenRegexBoundaryCharacter, $string, $matches, 0, $offset)) {
961964
return new Token(Token::TOKEN_TYPE_BOUNDARY, $matches[0]);
962965
}
963966

@@ -967,7 +970,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
967970
// Top Level Reserved Word
968971
if (
969972
preg_match(
970-
$this->nextTokenRegexReservedToplevel,
973+
self::$nextTokenRegexReservedToplevel,
971974
$upper,
972975
$matches,
973976
0,
@@ -983,7 +986,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
983986
// Newline Reserved Word
984987
if (
985988
preg_match(
986-
$this->nextTokenRegexReservedNewline,
989+
self::$nextTokenRegexReservedNewline,
987990
$upper,
988991
$matches,
989992
0,
@@ -999,7 +1002,7 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
9991002
// Other Reserved Word
10001003
if (
10011004
preg_match(
1002-
$this->nextTokenRegexReserved,
1005+
self::$nextTokenRegexReserved,
10031006
$upper,
10041007
$matches,
10051008
0,
@@ -1015,15 +1018,15 @@ private function createNextToken(string $string, string $upper, int $offset, Tok
10151018

10161019
// A function must be succeeded by '('
10171020
// this makes it so "count(" is considered a function, but "count" alone is not function
1018-
if (preg_match($this->nextTokenRegexFunction, $upper, $matches, 0, $offset)) {
1021+
if (preg_match(self::$nextTokenRegexFunction, $upper, $matches, 0, $offset)) {
10191022
return new Token(
10201023
Token::TOKEN_TYPE_RESERVED,
10211024
substr($string, $offset, strlen($matches[0])),
10221025
);
10231026
}
10241027

10251028
// Non reserved word
1026-
preg_match($this->nextTokenRegexNonReserved, $string, $matches, 0, $offset);
1029+
preg_match(self::$nextTokenRegexNonReserved, $string, $matches, 0, $offset);
10271030

10281031
return new Token(Token::TOKEN_TYPE_WORD, $matches[0]);
10291032
}

0 commit comments

Comments
 (0)