From 0ba6dd94dec24257be4f528c70d3c62d0c9eb48a Mon Sep 17 00:00:00 2001
From: Stanislav Modrak <stanislav.modrak@gmail.com>
Date: Fri, 20 Mar 2026 13:24:01 +0000
Subject: [PATCH 1/3] refactor: scan regex patterns in a single pass

---
 lib/tokenizer.ts | 326 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 256 insertions(+), 70 deletions(-)

diff --git a/lib/tokenizer.ts b/lib/tokenizer.ts
index 34a522a..dddcd81 100644
--- a/lib/tokenizer.ts
+++ b/lib/tokenizer.ts
@@ -1,8 +1,14 @@
-import * as util from './util';
-import { Group, types, Root, Token, Reference, Char } from './types';
+import { Group, types, Root, Token, Reference, Char, Position, SetTokens, Set as SetToken } from './types';
 import * as sets from './sets';
 
 type ReferenceQueue = { reference: (Reference | Char), stack: Token[], index: number }[];
+type DecodedEscape = { value: number, nextIndex: number } | null;
+type ParsedNumber = { value: number, nextIndex: number } | null;
+type EscapedToken = Char | Position | Reference | SetToken;
+type EscapedTokenResult = { token: EscapedToken, nextIndex: number };
+type ClassToken = Char | SetToken;
+type ClassTokenResult = { token: ClassToken, nextIndex: number };
+type RepetitionBounds = { min: number, max: number, nextIndex: number } | null;
 
 /**
  * Valid opening characters for capture group names.
@@ -14,7 +20,239 @@ const captureGroupFirstChar = /^[a-zA-Z_$]$/i;
  */
 const captureGroupChars = /^[a-zA-Z0-9_$]$/i;
 
-const digit = /\d/;
+const CTRL = '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^ ?';
+
+const isDigitCode = (charCode: number) => charCode >= 48 && charCode <= 57;
+
+const isUpperHexCode = (charCode: number) =>
+  isDigitCode(charCode) || (charCode >= 65 && charCode <= 70);
+
+const hexValue = (charCode: number) => charCode <= 57 ? charCode - 48 : charCode - 55;
+
+const readNumber = (str: string, index: number): ParsedNumber => {
+  if (!isDigitCode(str.charCodeAt(index))) {
+    return null;
+  }
+
+  let value = 0;
+  let nextIndex = index;
+  while (nextIndex < str.length && isDigitCode(str.charCodeAt(nextIndex))) {
+    value = value * 10 + str.charCodeAt(nextIndex) - 48;
+    nextIndex++;
+  }
+
+  return { value, nextIndex };
+};
+
+const readEscapedChar = (str: string, slashIndex: number): DecodedEscape => {
+  const escapeType = str[slashIndex + 1];
+
+  switch (escapeType) {
+    case 'u': {
+      if (slashIndex + 5 >= str.length) {
+        return null;
+      }
+
+      let value = 0;
+      for (let index = slashIndex + 2; index <= slashIndex + 5; index++) {
+        const charCode = str.charCodeAt(index);
+        if (!isUpperHexCode(charCode)) {
+          return null;
+        }
+        value = value * 16 + hexValue(charCode);
+      }
+      return { value, nextIndex: slashIndex + 6 };
+    }
+
+    case 'x': {
+      if (slashIndex + 3 >= str.length) {
+        return null;
+      }
+
+      let value = 0;
+      for (let index = slashIndex + 2; index <= slashIndex + 3; index++) {
+        const charCode = str.charCodeAt(index);
+        if (!isUpperHexCode(charCode)) {
+          return null;
+        }
+        value = value * 16 + hexValue(charCode);
+      }
+      return { value, nextIndex: slashIndex + 4 };
+    }
+
+    case 'c': {
+      const ctrlChar = str[slashIndex + 2];
+      if (!ctrlChar) {
+        return null;
+      }
+
+      const value = CTRL.indexOf(ctrlChar);
+      return value === -1 ? null : { value, nextIndex: slashIndex + 3 };
+    }
+
+    case '0':
+      return { value: 0, nextIndex: slashIndex + 2 };
+
+    case 't':
+      return { value: 9, nextIndex: slashIndex + 2 };
+
+    case 'n':
+      return { value: 10, nextIndex: slashIndex + 2 };
+
+    case 'v':
+      return { value: 11, nextIndex: slashIndex + 2 };
+
+    case 'f':
+      return { value: 12, nextIndex: slashIndex + 2 };
+
+    case 'r':
+      return { value: 13, nextIndex: slashIndex + 2 };
+
+    default:
+      return null;
+  }
+};
+
+const readSet = (escapedType: string): SetToken | null => {
+  switch (escapedType) {
+    case 'w':
+      return sets.words();
+    case 'W':
+      return sets.notWords();
+    case 'd':
+      return sets.ints();
+    case 'D':
+      return sets.notInts();
+    case 's':
+      return sets.whitespace();
+    case 'S':
+      return sets.notWhitespace();
+    default:
+      return null;
+  }
+};
+
+const readEscapedToken = (str: string, slashIndex: number, inClass: boolean): EscapedTokenResult => {
+  const escapedType = str[slashIndex + 1];
+  const setToken = readSet(escapedType);
+  if (setToken) {
+    return { token: setToken, nextIndex: slashIndex + 2 };
+  }
+
+  if (inClass) {
+    // Character classes treat \b as backspace instead of a word boundary.
+    if (escapedType === 'b') {
+      return { token: { type: types.CHAR, value: 8 }, nextIndex: slashIndex + 2 };
+    }
+  } else {
+    if (escapedType === 'b' || escapedType === 'B') {
+      return {
+        token: { type: types.POSITION, value: escapedType },
+        nextIndex: slashIndex + 2,
+      };
+    }
+
+    // Outside character classes, decimal escapes are parsed as references first
+    // and only normalized to chars later if there are not enough capture groups.
+    if (escapedType !== '0') {
+      const reference = readNumber(str, slashIndex + 1);
+      if (reference) {
+        return {
+          token: { type: types.REFERENCE, value: reference.value },
+          nextIndex: reference.nextIndex,
+        };
+      }
+    }
+  }
+
+  const decoded = readEscapedChar(str, slashIndex);
+  return {
+    token: {
+      type: types.CHAR,
+      value: decoded ? decoded.value : str.charCodeAt(slashIndex + 1),
+    },
+    nextIndex: decoded ? decoded.nextIndex : slashIndex + 2,
+  };
+};
+
+const readClassToken = (str: string, index: number): ClassTokenResult => {
+  if (str[index] === '\\') {
+    return readEscapedToken(str, index, true) as ClassTokenResult;
+  }
+
+  return {
+    token: { type: types.CHAR, value: str.charCodeAt(index) },
+    nextIndex: index + 1,
+  };
+};
+
+const tokenizeClassAt = (str: string, index: number, regexpStr: string): [SetTokens, number] => {
+  const tokens: SetTokens = [];
+  let i = index;
+
+  while (i < str.length) {
+    if (str[i] === ']') {
+      return [tokens, i + 1];
+    }
+
+    if (str[i] === '\\' && i + 1 >= str.length) {
+      break;
+    }
+
+    const first = readClassToken(str, i);
+    const nextChar = str[first.nextIndex];
+    if (
+      first.token.type === types.CHAR &&
+      nextChar === '-' &&
+      first.nextIndex + 1 < str.length &&
+      str[first.nextIndex + 1] !== ']'
+    ) {
+      // Only char-to-char pairs become ranges; predefined sets keep the dash literal.
+      const second = readClassToken(str, first.nextIndex + 1);
+      if (second.token.type === types.CHAR) {
+        tokens.push({
+          type: types.RANGE,
+          from: first.token.value,
+          to: second.token.value,
+        });
+        i = second.nextIndex;
+        continue;
+      }
+    }
+
+    tokens.push(first.token);
+    i = first.nextIndex;
+  }
+
+  throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated character class`);
+};
+
+const readRepetition = (str: string, index: number): RepetitionBounds => {
+  const min = readNumber(str, index);
+  if (!min) {
+    return null;
+  }
+
+  if (str[min.nextIndex] === '}') {
+    return { min: min.value, max: min.value, nextIndex: min.nextIndex + 1 };
+  }
+
+  if (str[min.nextIndex] !== ',') {
+    return null;
+  }
+
+  // `{n,}` is open-ended, while `{n,m}` requires a second parsed integer.
+  if (str[min.nextIndex + 1] === '}') {
+    return { min: min.value, max: Infinity, nextIndex: min.nextIndex + 2 };
+  }
+
+  const max = readNumber(str, min.nextIndex + 1);
+  if (!max || str[max.nextIndex] !== '}') {
+    return null;
+  }
+
+  return { min: min.value, max: max.value, nextIndex: max.nextIndex + 1 };
+};
 
 /**
  * Tokenizes a regular expression (that is currently a string)
@@ -42,14 +280,13 @@ export const tokenizer = (regexpStr: string): Root => {
     );
   };
 
-  // Decode a few escaped characters.
-  let str = util.strToChars(regexpStr);
+  const str = regexpStr;
 
   // Iterate through each character in string.
   while (i < str.length) {
     switch (c = str[i++]) {
       // Handle escaped characters, inclues a few sets.
-      case '\\':
+      case '\\': {
         if (i === str.length) {
           throw new SyntaxError(
             `Invalid regular expression: /${
@@ -57,62 +294,15 @@ export const tokenizer = (regexpStr: string): Root => {
             }/: \\ at end of pattern`,
           );
         }
-        switch (c = str[i++]) {
-          case 'b':
-            last.push({ type: types.POSITION, value: 'b' });
-            break;
-
-          case 'B':
-            last.push({ type: types.POSITION, value: 'B' });
-            break;
-
-          case 'w':
-            last.push(sets.words());
-            break;
-
-          case 'W':
-            last.push(sets.notWords());
-            break;
-
-          case 'd':
-            last.push(sets.ints());
-            break;
-
-          case 'D':
-            last.push(sets.notInts());
-            break;
-
-          case 's':
-            last.push(sets.whitespace());
-            break;
-
-          case 'S':
-            last.push(sets.notWhitespace());
-            break;
-
-          default:
-            // Check if c is integer.
-            // In which case it's a reference.
-            if (digit.test(c)) {
-              let digits = c;
-
-              while (i < str.length && digit.test(str[i])) {
-                digits += str[i++];
-              }
-
-              let value = parseInt(digits, 10);
-              const reference: Reference = { type: types.REFERENCE, value };
-
-              last.push(reference);
-              referenceQueue.push({ reference, stack: last, index: last.length - 1 });
-
-            // Escaped character.
-            } else {
-              last.push({ type: types.CHAR, value: c.charCodeAt(0) });
-            }
+        const escaped = readEscapedToken(str, i - 1, false);
+        last.push(escaped.token);
+        if (escaped.token.type === types.REFERENCE) {
+          referenceQueue.push({ reference: escaped.token, stack: last, index: last.length - 1 });
         }
+        i = escaped.nextIndex;
 
         break;
+      }
 
 
       // Positionals.
@@ -137,10 +327,8 @@ export const tokenizer = (regexpStr: string): Root => {
         }
 
         // Get all the characters in class.
-        let classTokens = util.tokenizeClass(str.slice(i), regexpStr);
-
-        // Increase index by length of class.
-        i += classTokens[1];
+        let classTokens = tokenizeClassAt(str, i, regexpStr);
+        i = classTokens[1];
         last.push({
           type: types.SET,
           set: classTokens[0],
@@ -291,19 +479,17 @@ export const tokenizer = (regexpStr: string): Root => {
       // This design is chosen because there could be more than
       // one repetition symbols in a regex i.e. `a?+{2,3}`.
       case '{': {
-        let rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max;
-        if (rs !== null) {
+        let repetition = readRepetition(str, i);
+        if (repetition !== null) {
           if (last.length === 0) {
             repeatErr(i);
           }
-          min = parseInt(rs[1], 10);
-          max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min;
-          i += rs[0].length;
+          i = repetition.nextIndex;
 
           last.push({
             type: types.REPETITION,
-            min,
-            max,
+            min: repetition.min,
+            max: repetition.max,
             value: last.pop(),
           });
         } else {

From f40fbe279e2da55e27db5a8f61fd1dc397d66db0 Mon Sep 17 00:00:00 2001
From: Stanislav Modrak <stanislav.modrak@gmail.com>
Date: Fri, 20 Mar 2026 13:31:02 +0000
Subject: [PATCH 2/3] test: add benchmark suite

---
 README.md          |  15 ++++
 benchmark/index.js | 216 +++++++++++++++++++++++++++++++++++++++++++++
 package.json       |   1 +
 3 files changed, 232 insertions(+)
 create mode 100644 benchmark/index.js

diff --git a/README.md b/README.md
index 140c229..b7c72e1 100644
--- a/README.md
+++ b/README.md
@@ -208,6 +208,21 @@ The following latest JavaScript additions are not supported yet:
 }
 ```
 
+# Benchmarking
+
+Run the benchmark suite against the compiled build with:
+
+```sh
+npm run bench
+```
+
+The benchmark covers representative tokenizer, reconstruct, and roundtrip workloads.
+You can narrow it to one suite or run longer samples:
+
+```sh
+npm run bench -- --suite tokenizer --min-ms 750
+```
+
 `/[abc]/`
 
 ```js
diff --git a/benchmark/index.js b/benchmark/index.js
new file mode 100644
index 0000000..7a0f470
--- /dev/null
+++ b/benchmark/index.js
@@ -0,0 +1,216 @@
+#!/usr/bin/env node
+'use strict';
+
+const { performance } = require('perf_hooks');
+const tokenizer = require('../dist');
+
+const reconstruct = tokenizer.reconstruct;
+
+const DEFAULT_MIN_MS = 400;
+const DEFAULT_WARMUP_RUNS = 5000;
+
+const CASES = [
+  {
+    name: 'literal',
+    pattern: 'walnuts',
+  },
+  {
+    name: 'alternation',
+    pattern: '(?:foo|bar|baz){1,3}(qux|quux)',
+  },
+  {
+    name: 'named-group',
+    pattern: '(?<year>\\d{4})-(?<month>\\d{2})-(?<day>\\d{2})',
+  },
+  {
+    name: 'path-like',
+    pattern: '^(?:\\/(?:[A-Za-z0-9._~-]|%[0-9A-Fa-f]{2}){1,64}){1,24}\\/?$',
+  },
+  {
+    name: 'email-like',
+    pattern: '^(?:[A-Za-z0-9_+.-]{1,64})@(?:[A-Za-z0-9-]{1,63}\\.){1,8}[A-Za-z]{2,24}$',
+  },
+  {
+    name: 'class-heavy',
+    pattern: '^(?:[A-Za-z0-9_./-]{3,32}|\\[(?:\\d{1,3}\\.){3}\\d{1,3}\\])(?:,(?:[A-Za-z0-9_./-]{3,32}|\\[(?:\\d{1,3}\\.){3}\\d{1,3}\\])){0,50}$',
+  },
+  {
+    name: 'dense-sets',
+    pattern: '^(?:[\\w.-]{1,16}:[^\\s\\]]{1,32};?){1,40}$',
+  },
+  {
+    name: 'backrefs',
+    pattern: '<(\\w+)>(?:[^<]|<(?!\\/\\1>))*<\\/\\1>',
+  },
+];
+
+const SUITE_NAMES = ['tokenizer', 'reconstruct', 'roundtrip'];
+
+const parseArgs = (argv) => {
+  const options = {
+    minMs: DEFAULT_MIN_MS,
+    warmupRuns: DEFAULT_WARMUP_RUNS,
+    suite: 'all',
+  };
+
+  for (let i = 0; i < argv.length; i++) {
+    const arg = argv[i];
+    switch (arg) {
+      case '--min-ms':
+        options.minMs = parsePositiveInt(argv[++i], '--min-ms');
+        break;
+
+      case '--warmup-runs':
+        options.warmupRuns = parsePositiveInt(argv[++i], '--warmup-runs');
+        break;
+
+      case '--suite':
+        options.suite = argv[++i];
+        if (options.suite !== 'all' && !SUITE_NAMES.includes(options.suite)) {
+          throw new Error(`Unknown suite '${options.suite}'`);
+        }
+        break;
+
+      case '--help':
+        printHelp();
+        process.exit(0);
+        break;
+
+      default:
+        throw new Error(`Unknown argument '${arg}'`);
+    }
+  }
+
+  return options;
+};
+
+const parsePositiveInt = (value, flagName) => {
+  const parsed = Number.parseInt(value, 10);
+  if (!Number.isFinite(parsed) || parsed <= 0) {
+    throw new Error(`${flagName} must be a positive integer`);
+  }
+  return parsed;
+};
+
+const printHelp = () => {
+  console.log('Usage: node benchmark/index.js [--suite <name>] [--min-ms <ms>] [--warmup-runs <count>]');
+  console.log('');
+  console.log('Suites: all, tokenizer, reconstruct, roundtrip');
+};
+
+const benchmark = (fn, input, minMs, warmupRuns) => {
+  for (let i = 0; i < warmupRuns; i++) {
+    fn(input);
+  }
+
+  let iterations = 0;
+  let elapsedMs = 0;
+  const start = performance.now();
+
+  // Sample elapsed time periodically so the timer itself does not dominate fast cases.
+  do {
+    fn(input);
+    iterations++;
+    if ((iterations & 0x3ff) === 0) {
+      elapsedMs = performance.now() - start;
+    }
+  } while (elapsedMs < minMs);
+
+  const totalMs = performance.now() - start;
+  return {
+    iterations,
+    ms: totalMs,
+    opsPerSec: iterations / (totalMs / 1000),
+  };
+};
+
+const formatNumber = (value, fractionDigits) =>
+  value.toLocaleString('en-US', {
+    minimumFractionDigits: fractionDigits,
+    maximumFractionDigits: fractionDigits,
+  });
+
+const truncate = (pattern, maxLength) =>
+  pattern.length <= maxLength ? pattern : `${pattern.slice(0, maxLength - 3)}...`;
+
+const printSuite = (name, rows) => {
+  console.log(`${name}:`);
+  console.log('case'.padEnd(16) + 'len'.padStart(6) + 'ops/s'.padStart(15) + 'iterations'.padStart(14) + '  sample');
+
+  for (const row of rows) {
+    console.log(
+      row.name.padEnd(16) +
+      String(row.length).padStart(6) +
+      formatNumber(row.opsPerSec, 2).padStart(15) +
+      formatNumber(row.iterations, 0).padStart(14) +
+      `  ${truncate(row.label, 72)}`,
+    );
+  }
+
+  const averageOpsPerSec = rows.reduce((sum, row) => sum + row.opsPerSec, 0) / rows.length;
+  console.log(`average ops/s: ${formatNumber(averageOpsPerSec, 2)}`);
+  console.log('');
+};
+
+const main = () => {
+  const options = parseArgs(process.argv.slice(2));
+
+  // Reconstruct benchmarks reuse one token tree per pattern so they measure the
+  // serializer itself instead of folding tokenizer time back into the result.
+  const tokenCases = CASES.map(({ name, pattern }) => ({
+    name,
+    label: pattern,
+    length: pattern.length,
+    input: tokenizer(pattern),
+  }));
+
+  const suites = {
+    tokenizer: {
+      cases: CASES.map(({ name, pattern }) => ({
+        name,
+        label: pattern,
+        length: pattern.length,
+        input: pattern,
+      })),
+      fn: tokenizer,
+    },
+    reconstruct: {
+      cases: tokenCases,
+      fn: reconstruct,
+    },
+    roundtrip: {
+      cases: CASES.map(({ name, pattern }) => ({
+        name,
+        label: pattern,
+        length: pattern.length,
+        input: pattern,
+      })),
+      fn: (pattern) => reconstruct(tokenizer(pattern)),
+    },
+  };
+
+  const suiteNames = options.suite === 'all' ? SUITE_NAMES : [options.suite];
+
+  for (const suiteName of suiteNames) {
+    const suite = suites[suiteName];
+    const rows = suite.cases.map((testCase) => {
+      const result = benchmark(suite.fn, testCase.input, options.minMs, options.warmupRuns);
+      return {
+        name: testCase.name,
+        label: testCase.label,
+        length: testCase.length,
+        opsPerSec: result.opsPerSec,
+        iterations: result.iterations,
+      };
+    });
+
+    printSuite(suiteName, rows);
+  }
+};
+
+try {
+  main();
+} catch (error) {
+  console.error(error.message);
+  process.exit(1);
+}
diff --git a/package.json b/package.json
index 8c46562..aea7a7d 100644
--- a/package.json
+++ b/package.json
@@ -21,6 +21,7 @@
   "scripts": {
     "test": "nyc --extension .ts --reporter=lcov --reporter=text-summary vows -- --spec test/*-test.js",
     "build": "tsc",
+    "bench": "npm run build && node benchmark/index.js",
     "prepare": "tsc",
     "lint": "eslint ./lib ./test",
     "lint:fix": "eslint --fix ./lib ./test"

From fe4865bb82f366a5f962f1820dc3c187875daa87 Mon Sep 17 00:00:00 2001
From: Stanislav Modrak <stanislav.modrak@gmail.com>
Date: Fri, 20 Mar 2026 13:58:31 +0000
Subject: [PATCH 3/3] simplify

---
 lib/tokenizer.ts | 321 ++++++++++++++++++-----------------------------
 1 file changed, 122 insertions(+), 199 deletions(-)

diff --git a/lib/tokenizer.ts b/lib/tokenizer.ts
index dddcd81..099f3c2 100644
--- a/lib/tokenizer.ts
+++ b/lib/tokenizer.ts
@@ -2,13 +2,11 @@ import { Group, types, Root, Token, Reference, Char, Position, SetTokens, Set as
 import * as sets from './sets';
 
 type ReferenceQueue = { reference: (Reference | Char), stack: Token[], index: number }[];
-type DecodedEscape = { value: number, nextIndex: number } | null;
-type ParsedNumber = { value: number, nextIndex: number } | null;
+type ReadResult<T> = { result: T, nextIndex: number };
+type MaybeReadResult<T> = ReadResult<T> | null;
 type EscapedToken = Char | Position | Reference | SetToken;
-type EscapedTokenResult = { token: EscapedToken, nextIndex: number };
 type ClassToken = Char | SetToken;
-type ClassTokenResult = { token: ClassToken, nextIndex: number };
-type RepetitionBounds = { min: number, max: number, nextIndex: number } | null;
+type RepetitionBounds = { min: number, max: number };
 
 /**
  * Valid opening characters for capture group names.
@@ -29,7 +27,10 @@ const isUpperHexCode = (charCode: number) =>
 
 const hexValue = (charCode: number) => charCode <= 57 ? charCode - 48 : charCode - 55;
 
-const readNumber = (str: string, index: number): ParsedNumber => {
+const syntaxError = (regexpStr: string, message: string) =>
+  new SyntaxError(`Invalid regular expression: /${regexpStr}/: ${message}`);
+
+const readNumber = (str: string, index: number): MaybeReadResult<number> => {
   if (!isDigitCode(str.charCodeAt(index))) {
     return null;
   }
@@ -41,72 +42,56 @@ const readNumber = (str: string, index: number): ParsedNumber => {
     nextIndex++;
   }
 
-  return { value, nextIndex };
+  return { result: value, nextIndex };
 };
 
-const readEscapedChar = (str: string, slashIndex: number): DecodedEscape => {
-  const escapeType = str[slashIndex + 1];
-
-  switch (escapeType) {
-    case 'u': {
-      if (slashIndex + 5 >= str.length) {
-        return null;
-      }
+const readHex = (str: string, index: number, length: number): MaybeReadResult<number> => {
+  if (index + length > str.length) {
+    return null;
+  }
 
-      let value = 0;
-      for (let index = slashIndex + 2; index <= slashIndex + 5; index++) {
-        const charCode = str.charCodeAt(index);
-        if (!isUpperHexCode(charCode)) {
-          return null;
-        }
-        value = value * 16 + hexValue(charCode);
-      }
-      return { value, nextIndex: slashIndex + 6 };
+  let value = 0;
+  for (let i = index; i < index + length; i++) {
+    const charCode = str.charCodeAt(i);
+    if (!isUpperHexCode(charCode)) {
+      return null;
     }
+    value = value * 16 + hexValue(charCode);
+  }
 
-    case 'x': {
-      if (slashIndex + 3 >= str.length) {
-        return null;
-      }
+  return { result: value, nextIndex: index + length };
+};
 
-      let value = 0;
-      for (let index = slashIndex + 2; index <= slashIndex + 3; index++) {
-        const charCode = str.charCodeAt(index);
-        if (!isUpperHexCode(charCode)) {
-          return null;
-        }
-        value = value * 16 + hexValue(charCode);
-      }
-      return { value, nextIndex: slashIndex + 4 };
-    }
+const readEscapedChar = (str: string, slashIndex: number): MaybeReadResult<number> => {
+  switch (str[slashIndex + 1]) {
+    case 'u':
+      return readHex(str, slashIndex + 2, 4);
 
-    case 'c': {
-      const ctrlChar = str[slashIndex + 2];
-      if (!ctrlChar) {
-        return null;
-      }
+    case 'x':
+      return readHex(str, slashIndex + 2, 2);
 
-      const value = CTRL.indexOf(ctrlChar);
-      return value === -1 ? null : { value, nextIndex: slashIndex + 3 };
+    case 'c': {
+      const value = CTRL.indexOf(str[slashIndex + 2]);
+      return value === -1 ? null : { result: value, nextIndex: slashIndex + 3 };
     }
 
     case '0':
-      return { value: 0, nextIndex: slashIndex + 2 };
+      return { result: 0, nextIndex: slashIndex + 2 };
 
     case 't':
-      return { value: 9, nextIndex: slashIndex + 2 };
+      return { result: 9, nextIndex: slashIndex + 2 };
 
     case 'n':
-      return { value: 10, nextIndex: slashIndex + 2 };
+      return { result: 10, nextIndex: slashIndex + 2 };
 
     case 'v':
-      return { value: 11, nextIndex: slashIndex + 2 };
+      return { result: 11, nextIndex: slashIndex + 2 };
 
     case 'f':
-      return { value: 12, nextIndex: slashIndex + 2 };
+      return { result: 12, nextIndex: slashIndex + 2 };
 
     case 'r':
-      return { value: 13, nextIndex: slashIndex + 2 };
+      return { result: 13, nextIndex: slashIndex + 2 };
 
     default:
       return null;
@@ -132,22 +117,24 @@ const readSet = (escapedType: string): SetToken | null => {
   }
 };
 
-const readEscapedToken = (str: string, slashIndex: number, inClass: boolean): EscapedTokenResult => {
+function readEscapedToken(str: string, slashIndex: number, inClass: true): ReadResult<ClassToken>;
+function readEscapedToken(str: string, slashIndex: number, inClass: false): ReadResult<EscapedToken>;
+function readEscapedToken(str: string, slashIndex: number, inClass: boolean): ReadResult<EscapedToken> {
   const escapedType = str[slashIndex + 1];
   const setToken = readSet(escapedType);
   if (setToken) {
-    return { token: setToken, nextIndex: slashIndex + 2 };
+    return { result: setToken, nextIndex: slashIndex + 2 };
   }
 
   if (inClass) {
     // Character classes treat \b as backspace instead of a word boundary.
     if (escapedType === 'b') {
-      return { token: { type: types.CHAR, value: 8 }, nextIndex: slashIndex + 2 };
+      return { result: { type: types.CHAR, value: 8 }, nextIndex: slashIndex + 2 };
     }
   } else {
     if (escapedType === 'b' || escapedType === 'B') {
       return {
-        token: { type: types.POSITION, value: escapedType },
+        result: { type: types.POSITION, value: escapedType },
         nextIndex: slashIndex + 2,
       };
     }
@@ -158,7 +145,7 @@ const readEscapedToken = (str: string, slashIndex: number, inClass: boolean): Es
       const reference = readNumber(str, slashIndex + 1);
       if (reference) {
         return {
-          token: { type: types.REFERENCE, value: reference.value },
+          result: { type: types.REFERENCE, value: reference.result },
           nextIndex: reference.nextIndex,
         };
       }
@@ -167,21 +154,21 @@ const readEscapedToken = (str: string, slashIndex: number, inClass: boolean): Es
 
   const decoded = readEscapedChar(str, slashIndex);
   return {
-    token: {
+    result: {
       type: types.CHAR,
-      value: decoded ? decoded.value : str.charCodeAt(slashIndex + 1),
+      value: decoded ? decoded.result : str.charCodeAt(slashIndex + 1),
     },
     nextIndex: decoded ? decoded.nextIndex : slashIndex + 2,
   };
-};
+}
 
-const readClassToken = (str: string, index: number): ClassTokenResult => {
+const readClassToken = (str: string, index: number): ReadResult<ClassToken> => {
   if (str[index] === '\\') {
-    return readEscapedToken(str, index, true) as ClassTokenResult;
+    return readEscapedToken(str, index, true);
   }
 
   return {
-    token: { type: types.CHAR, value: str.charCodeAt(index) },
+    result: { type: types.CHAR, value: str.charCodeAt(index) },
     nextIndex: index + 1,
   };
 };
@@ -200,41 +187,40 @@ const tokenizeClassAt = (str: string, index: number, regexpStr: string): [SetTok
     }
 
     const first = readClassToken(str, i);
-    const nextChar = str[first.nextIndex];
     if (
-      first.token.type === types.CHAR &&
-      nextChar === '-' &&
+      first.result.type === types.CHAR &&
+      str[first.nextIndex] === '-' &&
       first.nextIndex + 1 < str.length &&
       str[first.nextIndex + 1] !== ']'
     ) {
       // Only char-to-char pairs become ranges; predefined sets keep the dash literal.
       const second = readClassToken(str, first.nextIndex + 1);
-      if (second.token.type === types.CHAR) {
+      if (second.result.type === types.CHAR) {
         tokens.push({
           type: types.RANGE,
-          from: first.token.value,
-          to: second.token.value,
+          from: first.result.value,
+          to: second.result.value,
         });
         i = second.nextIndex;
         continue;
       }
     }
 
-    tokens.push(first.token);
+    tokens.push(first.result);
     i = first.nextIndex;
   }
 
-  throw new SyntaxError(`Invalid regular expression: /${regexpStr}/: Unterminated character class`);
+  throw syntaxError(regexpStr, 'Unterminated character class');
 };
 
-const readRepetition = (str: string, index: number): RepetitionBounds => {
+const readRepetition = (str: string, index: number): MaybeReadResult<RepetitionBounds> => {
   const min = readNumber(str, index);
   if (!min) {
     return null;
   }
 
   if (str[min.nextIndex] === '}') {
-    return { min: min.value, max: min.value, nextIndex: min.nextIndex + 1 };
+    return { result: { min: min.result, max: min.result }, nextIndex: min.nextIndex + 1 };
   }
 
   if (str[min.nextIndex] !== ',') {
@@ -243,7 +229,7 @@ const readRepetition = (str: string, index: number): RepetitionBounds => {
 
   // `{n,}` is open-ended, while `{n,m}` requires a second parsed integer.
   if (str[min.nextIndex + 1] === '}') {
-    return { min: min.value, max: Infinity, nextIndex: min.nextIndex + 2 };
+    return { result: { min: min.result, max: Infinity }, nextIndex: min.nextIndex + 2 };
   }
 
   const max = readNumber(str, min.nextIndex + 1);
@@ -251,7 +237,30 @@ const readRepetition = (str: string, index: number): RepetitionBounds => {
     return null;
   }
 
-  return { min: min.value, max: max.value, nextIndex: max.nextIndex + 1 };
+  return { result: { min: min.result, max: max.result }, nextIndex: max.nextIndex + 1 };
+};
+
+const readGroupName = (str: string, index: number, regexpStr: string): ReadResult<string> => {
+  if (!captureGroupFirstChar.test(str[index])) {
+    throw syntaxError(
+      regexpStr,
+      `Invalid capture group name, character '${str[index]}' after '<' at column ${index + 1}`,
+    );
+  }
+
+  let name = str[index++];
+  while (index < str.length && captureGroupChars.test(str[index])) {
+    name += str[index++];
+  }
+
+  if (str[index] !== '>') {
+    throw syntaxError(
+      regexpStr,
+      `Unclosed capture group name, expected '>', found '${str[index]}' at column ${index + 1}`,
+    );
+  }
+
+  return { result: name, nextIndex: index + 1 };
 };
 
 /**
@@ -272,12 +281,21 @@ export const tokenizer = (regexpStr: string): Root => {
   let referenceQueue: ReferenceQueue = [];
   let groupCount = 0;
 
-  const repeatErr = (col: number) => {
-    throw new SyntaxError(
-      `Invalid regular expression: /${
-        regexpStr
-      }/: Nothing to repeat at column ${col - 1}`,
-    );
+  const repeatErr = (col: number): never => {
+    throw syntaxError(regexpStr, `Nothing to repeat at column ${col - 1}`);
+  };
+
+  const pushRepetition = (min: number, max: number, col: number) => {
+    if (last.length === 0) {
+      repeatErr(col);
+    }
+
+    last.push({
+      type: types.REPETITION,
+      min,
+      max,
+      value: last.pop()!,
+    });
   };
 
   const str = regexpStr;
@@ -288,16 +306,12 @@ export const tokenizer = (regexpStr: string): Root => {
       // Handle escaped characters, inclues a few sets.
       case '\\': {
         if (i === str.length) {
-          throw new SyntaxError(
-            `Invalid regular expression: /${
-              regexpStr
-            }/: \\ at end of pattern`,
-          );
+          throw syntaxError(regexpStr, '\\ at end of pattern');
         }
         const escaped = readEscapedToken(str, i - 1, false);
-        last.push(escaped.token);
-        if (escaped.token.type === types.REFERENCE) {
-          referenceQueue.push({ reference: escaped.token, stack: last, index: last.length - 1 });
+        last.push(escaped.result);
+        if (escaped.result.type === types.REFERENCE) {
+          referenceQueue.push({ reference: escaped.result, stack: last, index: last.length - 1 });
         }
         i = escaped.nextIndex;
 
@@ -307,27 +321,19 @@ export const tokenizer = (regexpStr: string): Root => {
 
       // Positionals.
       case '^':
-        last.push({ type: types.POSITION, value: '^' });
-        break;
-
       case '$':
-        last.push({ type: types.POSITION, value: '$' });
+        last.push({ type: types.POSITION, value: c });
         break;
 
 
       // Handle custom sets.
       case '[': {
         // Check if this class is 'anti' i.e. [^abc].
-        let not;
-        if (str[i] === '^') {
-          not = true;
-          i++;
-        } else {
-          not = false;
-        }
+        const not = str[i] === '^';
+        if (not) i++;
 
         // Get all the characters in class.
-        let classTokens = tokenizeClassAt(str, i, regexpStr);
+        const classTokens = tokenizeClassAt(str, i, regexpStr);
         i = classTokens[1];
         last.push({
           type: types.SET,
@@ -369,54 +375,13 @@ export const tokenizer = (regexpStr: string): Root => {
             group.notFollowedBy = true;
             group.remember = false;
           } else if (c === '<') {
-            let name = '';
-
-            if (captureGroupFirstChar.test(str[i])) {
-              name += str[i];
-              i++;
-            } else {
-              throw new SyntaxError(
-                `Invalid regular expression: /${
-                  regexpStr
-                }/: Invalid capture group name, character '${str[i]}'` +
-                ` after '<' at column ${i + 1}`,
-              );
-            }
-
-            while (i < str.length && captureGroupChars.test(str[i])) {
-              name += str[i];
-              i++;
-            }
-
-            if (!name) {
-              throw new SyntaxError(
-                `Invalid regular expression: /${
-                  regexpStr
-                }/: Invalid capture group name, character '${str[i]}'` +
-                ` after '<' at column ${i + 1}`,
-              );
-            }
-
-            if (str[i] !== '>') {
-              throw new SyntaxError(
-                `Invalid regular expression: /${
-                  regexpStr
-                }/: Unclosed capture group name, expected '>', found` +
-                ` '${str[i]}' at column ${i + 1}`,
-              );
-            }
-
-            group.name = name;
-            i++;
+            const name = readGroupName(str, i, regexpStr);
+            group.name = name.result;
+            i = name.nextIndex;
           } else if (c === ':') {
             group.remember = false;
           } else {
-            throw new SyntaxError(
-              `Invalid regular expression: /${
-                regexpStr
-              }/: Invalid group, character '${c}'` +
-              ` after '?' at column ${i - 1}`,
-            );
+            throw syntaxError(regexpStr, `Invalid group, character '${c}' after '?' at column ${i - 1}`);
           }
         } else {
           groupCount += 1;
@@ -439,11 +404,7 @@ export const tokenizer = (regexpStr: string): Root => {
       // Pop group out of stack.
       case ')':
         if (groupStack.length === 0) {
-          throw new SyntaxError(
-            `Invalid regular expression: /${
-              regexpStr
-            }/: Unmatched ) at column ${i - 1}`,
-          );
+          throw syntaxError(regexpStr, `Unmatched ) at column ${i - 1}`);
         }
         lastGroup = groupStack.pop();
 
@@ -465,7 +426,7 @@ export const tokenizer = (regexpStr: string): Root => {
           delete lastGroup.stack;
         }
         // Create a new stack and add to options for rest of clause.
-        let stack: Token[] = [];
+        const stack: Token[] = [];
         lastGroup.options.push(stack);
         last = stack;
 
@@ -479,65 +440,31 @@ export const tokenizer = (regexpStr: string): Root => {
       // This design is chosen because there could be more than
       // one repetition symbols in a regex i.e. `a?+{2,3}`.
       case '{': {
-        let repetition = readRepetition(str, i);
-        if (repetition !== null) {
-          if (last.length === 0) {
-            repeatErr(i);
-          }
-          i = repetition.nextIndex;
-
-          last.push({
-            type: types.REPETITION,
-            min: repetition.min,
-            max: repetition.max,
-            value: last.pop(),
-          });
-        } else {
+        const repetition = readRepetition(str, i);
+        if (!repetition) {
           last.push({
             type: types.CHAR,
             value: 123,
           });
+          break;
         }
 
+        pushRepetition(repetition.result.min, repetition.result.max, i);
+        i = repetition.nextIndex;
+
         break;
       }
 
       case '?':
-        if (last.length === 0) {
-          repeatErr(i);
-        }
-        last.push({
-          type: types.REPETITION,
-          min: 0,
-          max: 1,
-          value: last.pop(),
-        });
+        pushRepetition(0, 1, i);
         break;
 
       case '+':
-        if (last.length === 0) {
-          repeatErr(i);
-        }
-        last.push({
-          type: types.REPETITION,
-          min: 1,
-          max: Infinity,
-          value: last.pop(),
-        });
-
+        pushRepetition(1, Infinity, i);
         break;
 
       case '*':
-        if (last.length === 0) {
-          repeatErr(i);
-        }
-        last.push({
-          type: types.REPETITION,
-          min: 0,
-          max: Infinity,
-          value: last.pop(),
-        });
-
+        pushRepetition(0, Infinity, i);
         break;
 
 
@@ -552,11 +479,7 @@ export const tokenizer = (regexpStr: string): Root => {
 
   // Check if any groups have not been closed.
   if (groupStack.length !== 0) {
-    throw new SyntaxError(
-      `Invalid regular expression: /${
-        regexpStr
-      }/: Unterminated group`,
-    );
+    throw syntaxError(regexpStr, 'Unterminated group');
   }
 
   updateReferences(referenceQueue, groupCount);