Skip to content

Commit

Permalink
Remove dependencies and update Vite config (#90)
Browse files Browse the repository at this point in the history
* Vite config fix attempt

* export css separately

* point at dist folder

* skip sanitization on the server

* bump version

* revert sanitize

* switch to use basic html sanitize

* bump version

* decodeHtmlEntities

* lowercase entity

* bump version

* fix named entity to be case insensitive

---------

Co-authored-by: tblackwell-tm <t.blackwell@techmodal.com>
  • Loading branch information
t-blackwell and tblackwell-tm authored Mar 1, 2025
1 parent 805557b commit 9e4f131
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 399 deletions.
2 changes: 1 addition & 1 deletion .nvmrc
Original file line number Diff line number Diff line change
@@ -1 +1 @@
22.11.0
22.13.1
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ npm install mycrossword

```js
import MyCrossword from 'mycrossword';
import 'mycrossword/style.css';

const data = {
/* ... crossword data (see below) ... */
Expand Down
6 changes: 4 additions & 2 deletions lib/components/AnagramHelper/ClueDisplay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@ export default function ClueDisplay({
return (
<span
dangerouslySetInnerHTML={{
__html: sanitize(clue, { allowedTags: allowedHtmlTags }),
__html: sanitize(decodeHtmlEntities(clue), {
allowedTags: allowedHtmlTags,
}),
}}
/>
);
}

// use a Unicode-aware regex that includes accented characters
// this matches any sequence of letters (including accented ones) as a word
const cleanClue = decodeHtmlEntities(sanitize(clue));
const cleanClue = sanitize(decodeHtmlEntities(clue));
const words = cleanClue.split(/\b([\p{L}\p{M}]+)\b/u);

return (
Expand Down
6 changes: 4 additions & 2 deletions lib/components/Clue/Clue.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ function Clue({
<span
className={bem('Clue__text')}
dangerouslySetInnerHTML={{
__html: sanitize(text, { allowedTags: allowedHtmlTags }),
__html: sanitize(decodeHtmlEntities(text), {
allowedTags: allowedHtmlTags,
}),
}}
data-text={decodeHtmlEntities(sanitize(text))}
data-text={sanitize(decodeHtmlEntities(text))}
/>
</div>
);
Expand Down
4 changes: 2 additions & 2 deletions lib/components/StickyClue/StickyClue.tsx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import ChevronLeftIcon from '~/icons/ChevronLeftIcon';
import ChevronRightIcon from '~/icons/ChevronRightIcon';
import { getBem } from '~/utils/bem';
import { sanitize } from '~/utils/html';
import { decodeHtmlEntities, sanitize } from '~/utils/html';
import './StickyClue.css';

interface StickyClueProps {
Expand Down Expand Up @@ -40,7 +40,7 @@ export default function StickyClue({
<span className={bem('StickyClue__num')}>{num}</span>
<span
dangerouslySetInnerHTML={{
__html: sanitize(text, { allowedTags }),
__html: sanitize(decodeHtmlEntities(text), { allowedTags }),
}}
/>
</span>
Expand Down
139 changes: 124 additions & 15 deletions lib/utils/html.ts
Original file line number Diff line number Diff line change
@@ -1,22 +1,131 @@
import DOMPurify from 'isomorphic-dompurify';
/**
* Basic HTML sanitization function.
*
* Note this doesn't use DOMParser or any other browser APIs.
*/
export function sanitize(
html: string,
options: { allowedTags?: string[] } = {},
) {
const { allowedTags = [] } = options;

type SanitizeOptions = {
allowedAttributes?: string[];
allowedTags?: string[];
};
// Short-circuit: if there are no < or > characters, return the input as is
if (!/<|>/.test(html)) {
return html;
}

let previous = '';
let current = html;

// Ensure allowedTags are treated as whole words using `\b`
const allowedPattern =
allowedTags.length > 0 ? `\\b(${allowedTags.join('|')})\\b` : '(?!)'; // "(?!)" ensures an empty list matches nothing

// Regex to remove disallowed tags (fully removes both opening & closing tags)
const tagPattern = new RegExp(`</?(?!${allowedPattern})\\w+[^>]*>`, 'gi');

// Regex to strip all attributes from allowed tags
const attributePattern = /<(\w+)[^>]*>/gi;

while (previous !== current) {
previous = current;

// Remove disallowed tags completely
current = current.replace(tagPattern, '');

export function sanitize(html: string, options: SanitizeOptions = {}): string {
const domPurifyOptions = {
ALLOWED_ATTR: options.allowedAttributes ?? [],
ALLOWED_TAGS: options.allowedTags ?? [],
};
// Remove all attributes from allowed tags
current = current.replace(attributePattern, '<$1>');
}

return DOMPurify.sanitize(html, domPurifyOptions);
return current;
}

export function decodeHtmlEntities(html: string) {
const textarea = document.createElement('textarea');
textarea.innerHTML = html;
const entityDictionary: Record<string, string> = {
'&amp;': '&',
'&lt;': '<',
'&gt;': '>',
'&quot;': '"',
'&#39;': "'",
'&#x27;': "'",
'&#x2F;': '/',
'&#96;': '`',
'&#x3D;': '=',
'&nbsp;': ' ',
'&copy;': '©',
'&reg;': '®',
'&trade;': '™',
'&euro;': '€',
'&pound;': '£',
'&yen;': '¥',
'&cent;': '¢',
'&apos;': "'",
'&sect;': '§',
'&para;': '¶',
'&plusmn;': '±',
'&times;': '×',
'&divide;': '÷',
'&laquo;': '«',
'&raquo;': '»',
'&ldquo;': '“',
'&rdquo;': '”',
'&lsquo;': '‘',
'&rsquo;': '’',
'&hellip;': '…',
'&middot;': '·',
'&bull;': '•',
'&ndash;': '–',
'&mdash;': '—',
'&alpha;': 'α',
'&beta;': 'β',
'&gamma;': 'γ',
'&delta;': 'δ',
'&pi;': 'π',
'&sigma;': 'σ',
'&omega;': 'ω',
'&mu;': 'μ',
'&tau;': 'τ',
'&phi;': 'φ',
'&chi;': 'χ',
'&psi;': 'ψ',
'&theta;': 'θ',
};

/**
* Decode HTML entities in a string.
*/
export function decodeHtmlEntities(html: string): string {
return html.replace(/&([^;]+);/g, (entity, entityCode) => {
// Check dictionary for named entities (case-insensitive)
const namedEntity = entityDictionary[entity.toLowerCase()];

if (namedEntity !== undefined) {
return namedEntity;
}

// Handle decimal numeric entities
if (entityCode.startsWith('#')) {
let code: number;

// Handle hexadecimal entities (&#x...)
if (entityCode.startsWith('#x') || entityCode.startsWith('#X')) {
code = parseInt(entityCode.slice(2), 16);
} else {
// Handle decimal entities (&#...)
code = parseInt(entityCode.slice(1), 10);
}

// Use String.fromCodePoint instead of String.fromCharCode to handle all Unicode
if (!isNaN(code)) {
try {
return String.fromCodePoint(code);
} catch (e) {
// Return the original entity if the code point is invalid
return entity;
}
}
}

return textarea.value;
// Return unchanged if not recognized
return entity;
});
}
Loading

0 comments on commit 9e4f131

Please sign in to comment.