Remove dependencies and update Vite config (#90)

* Vite config fix attempt * export css separately * point at dist folder * skip sanitization on the server * bump version * revert sanitize * switch to use basic html sanitize * bump version * decodeHtmlEntities * lowercase entity * bump version * fix named entity to be case insensitive --------- Co-authored-by: tblackwell-tm <t.blackwell@techmodal.com>
t-blackwell · Mar 1, 2025 · 9e4f131 · 9e4f131
1 parent 805557b
commit 9e4f131
Show file tree

Hide file tree

Showing 10 changed files with 191 additions and 399 deletions.
diff --git a/.nvmrc b/.nvmrc
@@ -1 +1 @@
-22.11.0
+22.13.1
diff --git a/README.md b/README.md
@@ -29,6 +29,7 @@ npm install mycrossword
 
 ```js
 import MyCrossword from 'mycrossword';
+import 'mycrossword/style.css';
 
 const data = {
   /* ... crossword data (see below) ... */

diff --git a/lib/components/AnagramHelper/ClueDisplay.tsx b/lib/components/AnagramHelper/ClueDisplay.tsx
@@ -23,15 +23,17 @@ export default function ClueDisplay({
     return (
       <span
         dangerouslySetInnerHTML={{
-          __html: sanitize(clue, { allowedTags: allowedHtmlTags }),
+          __html: sanitize(decodeHtmlEntities(clue), {
+            allowedTags: allowedHtmlTags,
+          }),
         }}
       />
     );
   }
 
   // use a Unicode-aware regex that includes accented characters
   // this matches any sequence of letters (including accented ones) as a word
-  const cleanClue = decodeHtmlEntities(sanitize(clue));
+  const cleanClue = sanitize(decodeHtmlEntities(clue));
   const words = cleanClue.split(/\b([\p{L}\p{M}]+)\b/u);
 
   return (

diff --git a/lib/components/Clue/Clue.tsx b/lib/components/Clue/Clue.tsx
@@ -106,9 +106,11 @@ function Clue({
       <span
         className={bem('Clue__text')}
         dangerouslySetInnerHTML={{
-          __html: sanitize(text, { allowedTags: allowedHtmlTags }),
+          __html: sanitize(decodeHtmlEntities(text), {
+            allowedTags: allowedHtmlTags,
+          }),
         }}
-        data-text={decodeHtmlEntities(sanitize(text))}
+        data-text={sanitize(decodeHtmlEntities(text))}
       />
     </div>
   );

diff --git a/lib/components/StickyClue/StickyClue.tsx b/lib/components/StickyClue/StickyClue.tsx
@@ -1,7 +1,7 @@
 import ChevronLeftIcon from '~/icons/ChevronLeftIcon';
 import ChevronRightIcon from '~/icons/ChevronRightIcon';
 import { getBem } from '~/utils/bem';
-import { sanitize } from '~/utils/html';
+import { decodeHtmlEntities, sanitize } from '~/utils/html';
 import './StickyClue.css';
 
 interface StickyClueProps {
@@ -40,7 +40,7 @@ export default function StickyClue({
               <span className={bem('StickyClue__num')}>{num}</span>
               <span
                 dangerouslySetInnerHTML={{
-                  __html: sanitize(text, { allowedTags }),
+                  __html: sanitize(decodeHtmlEntities(text), { allowedTags }),
                 }}
               />
             </span>

diff --git a/lib/utils/html.ts b/lib/utils/html.ts
@@ -1,22 +1,131 @@
-import DOMPurify from 'isomorphic-dompurify';
+/**
+ * Basic HTML sanitization function.
+ *
+ * Note this doesn't use DOMParser or any other browser APIs.
+ */
+export function sanitize(
+  html: string,
+  options: { allowedTags?: string[] } = {},
+) {
+  const { allowedTags = [] } = options;
 
-type SanitizeOptions = {
-  allowedAttributes?: string[];
-  allowedTags?: string[];
-};
+  // Short-circuit: if there are no < or > characters, return the input as is
+  if (!/<|>/.test(html)) {
+    return html;
+  }
+
+  let previous = '';
+  let current = html;
+
+  // Ensure allowedTags are treated as whole words using `\b`
+  const allowedPattern =
+    allowedTags.length > 0 ? `\\b(${allowedTags.join('|')})\\b` : '(?!)'; // "(?!)" ensures an empty list matches nothing
+
+  // Regex to remove disallowed tags (fully removes both opening & closing tags)
+  const tagPattern = new RegExp(`</?(?!${allowedPattern})\\w+[^>]*>`, 'gi');
+
+  // Regex to strip all attributes from allowed tags
+  const attributePattern = /<(\w+)[^>]*>/gi;
+
+  while (previous !== current) {
+    previous = current;
+
+    // Remove disallowed tags completely
+    current = current.replace(tagPattern, '');
 
-export function sanitize(html: string, options: SanitizeOptions = {}): string {
-  const domPurifyOptions = {
-    ALLOWED_ATTR: options.allowedAttributes ?? [],
-    ALLOWED_TAGS: options.allowedTags ?? [],
-  };
+    // Remove all attributes from allowed tags
+    current = current.replace(attributePattern, '<$1>');
+  }
 
-  return DOMPurify.sanitize(html, domPurifyOptions);
+  return current;
 }
 
-export function decodeHtmlEntities(html: string) {
-  const textarea = document.createElement('textarea');
-  textarea.innerHTML = html;
+const entityDictionary: Record<string, string> = {
+  '&amp;': '&',
+  '&lt;': '<',
+  '&gt;': '>',
+  '&quot;': '"',
+  '&#39;': "'",
+  '&#x27;': "'",
+  '&#x2F;': '/',
+  '&#96;': '`',
+  '&#x3D;': '=',
+  '&nbsp;': ' ',
+  '&copy;': '©',
+  '&reg;': '®',
+  '&trade;': '™',
+  '&euro;': '€',
+  '&pound;': '£',
+  '&yen;': '¥',
+  '&cent;': '¢',
+  '&apos;': "'",
+  '&sect;': '§',
+  '&para;': '¶',
+  '&plusmn;': '±',
+  '&times;': '×',
+  '&divide;': '÷',
+  '&laquo;': '«',
+  '&raquo;': '»',
+  '&ldquo;': '“',
+  '&rdquo;': '”',
+  '&lsquo;': '‘',
+  '&rsquo;': '’',
+  '&hellip;': '…',
+  '&middot;': '·',
+  '&bull;': '•',
+  '&ndash;': '–',
+  '&mdash;': '—',
+  '&alpha;': 'α',
+  '&beta;': 'β',
+  '&gamma;': 'γ',
+  '&delta;': 'δ',
+  '&pi;': 'π',
+  '&sigma;': 'σ',
+  '&omega;': 'ω',
+  '&mu;': 'μ',
+  '&tau;': 'τ',
+  '&phi;': 'φ',
+  '&chi;': 'χ',
+  '&psi;': 'ψ',
+  '&theta;': 'θ',
+};
+
+/**
+ * Decode HTML entities in a string.
+ */
+export function decodeHtmlEntities(html: string): string {
+  return html.replace(/&([^;]+);/g, (entity, entityCode) => {
+    // Check dictionary for named entities (case-insensitive)
+    const namedEntity = entityDictionary[entity.toLowerCase()];
+
+    if (namedEntity !== undefined) {
+      return namedEntity;
+    }
+
+    // Handle decimal numeric entities
+    if (entityCode.startsWith('#')) {
+      let code: number;
+
+      // Handle hexadecimal entities (&#x...)
+      if (entityCode.startsWith('#x') || entityCode.startsWith('#X')) {
+        code = parseInt(entityCode.slice(2), 16);
+      } else {
+        // Handle decimal entities (&#...)
+        code = parseInt(entityCode.slice(1), 10);
+      }
+
+      // Use String.fromCodePoint instead of String.fromCharCode to handle all Unicode
+      if (!isNaN(code)) {
+        try {
+          return String.fromCodePoint(code);
+        } catch (e) {
+          // Return the original entity if the code point is invalid
+          return entity;
+        }
+      }
+    }
 
-  return textarea.value;
+    // Return unchanged if not recognized
+    return entity;
+  });
 }