Skip to content

Commit

Permalink
Use wordBoundary instead of whitespace
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmaa committed Aug 23, 2024
1 parent 18c8446 commit 2ae9999
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 20 deletions.
33 changes: 16 additions & 17 deletions ext/js/dom/dom-text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@ import {readCodePointsBackward, readCodePointsForward} from '../data/string-util
* A class used to scan text in a document.
*/
export class DOMTextScanner {
/**
* A regular expression used to match word delimiters.
* \p{L} matches any kind of letter from any language
* \p{N} matches any kind of numeric character in any script
* `'` and `-` are also included as word characters.
* @type {RegExp}
*/
static WORD_DELIMITER_REGEX = /[^\w\p{L}\p{N}']/u;

/**
* Creates a new instance of a DOMTextScanner.
* @param {Node} node The DOM Node to start at.
Expand All @@ -30,9 +39,9 @@ export class DOMTextScanner {
* @param {boolean} forcePreserveWhitespace Whether or not whitespace should be forced to be preserved,
* regardless of CSS styling.
* @param {boolean} generateLayoutContent Whether or not newlines should be added based on CSS styling.
* @param {boolean} stopAtWhitespaceBackwards Whether to pause scanning when whitespace is encountered when scanning backwards.
* @param {boolean} stopAtWordBoundary Whether to pause scanning when whitespace is encountered when scanning backwards.
*/
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true, stopAtWhitespaceBackwards = false) {
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true, stopAtWordBoundary = false) {
const ruby = DOMTextScanner.getParentRubyElement(node);
const resetOffset = (ruby !== null);
if (resetOffset) { node = ruby; }
Expand Down Expand Up @@ -63,9 +72,9 @@ export class DOMTextScanner {
/** @type {boolean} */
this._generateLayoutContent = generateLayoutContent;
/**
* @type {boolean} Whether or not whitespace should be scanned backwards.
* @type {boolean} Whether or not to stop scanning when word boundaries are encountered.
*/
this._stopAtWhitespaceBackwards = stopAtWhitespaceBackwards;
this._stopAtWordBoundary = stopAtWordBoundary;
}

/**
Expand Down Expand Up @@ -214,10 +223,9 @@ export class DOMTextScanner {
const nodeValueLength = nodeValue.length;
const {preserveNewlines, preserveWhitespace} = this._getWhitespaceSettings(textNode);
if (resetOffset) { this._offset = nodeValueLength; }

while (this._offset > 0) {
const char = readCodePointsBackward(nodeValue, this._offset - 1, 1);
if (this._stopAtWhitespaceBackwards && DOMTextScanner.isWhitespace(char)) {
if (this._stopAtWordBoundary && DOMTextScanner.isWordDelimiter(char)) {
return false;
}
this._offset -= char.length;
Expand Down Expand Up @@ -528,17 +536,8 @@ export class DOMTextScanner {
* @param {string} character
* @returns {boolean}
*/
static isWhitespace(character) {
switch (character.charCodeAt(0)) {
case 0x09: // Tab ('\t')
case 0x0c: // Form feed ('\f')
case 0x0d: // Carriage return ('\r')
case 0x20: // Space (' ')
case 0x0a: // Line feed ('\n')
return true;
default: // Other
return false;
}
static isWordDelimiter(character) {
return DOMTextScanner.WORD_DELIMITER_REGEX.test(character);
}

/**
Expand Down
6 changes: 3 additions & 3 deletions ext/js/dom/text-source-range.js
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,12 @@ export class TextSourceRange {
* @param {number} length The maximum number of codepoints to move by.
* @param {boolean} layoutAwareScan Whether or not HTML layout information should be used to generate
* the string content when scanning.
* @param {boolean} stopAtWhitespace Whether to stop at whitespace characters.
* @param {boolean} stopAtWordBoundary Whether to stop at whitespace characters.
* @returns {number} The actual number of codepoints that were read.
*/
setStartOffset(length, layoutAwareScan, stopAtWhitespace = false) {
setStartOffset(length, layoutAwareScan, stopAtWordBoundary = false) {
if (this._disallowExpandSelection) { return 0; }
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan, stopAtWhitespace).seek(-length);
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan, stopAtWordBoundary).seek(-length);
this._range.setStart(state.node, state.offset);
this._rangeStartOffset = this._range.startOffset;
this._content = state.content + this._content;
Expand Down

0 comments on commit 2ae9999

Please sign in to comment.