Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Whole word scanning #1330

Merged
merged 13 commits into from
Aug 24, 2024
11 changes: 10 additions & 1 deletion ext/data/schemas/options-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@
"hidePopupOnCursorExit",
"hidePopupOnCursorExitDelay",
"normalizeCssZoom",
"scanWithoutMousemove"
"scanWithoutMousemove",
"scanResolution"
],
"properties": {
"inputs": {
Expand Down Expand Up @@ -764,6 +765,14 @@
"scanWithoutMousemove": {
"type": "boolean",
"default": true
},
"scanResolution": {
"type": "string",
"enum": [
"character",
"word"
],
"default": "character"
}
}
},
Expand Down
1 change: 1 addition & 0 deletions ext/js/app/frontend.js
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ export class Frontend {
sentenceParsingOptions,
scanAltText: scanningOptions.scanAltText,
scanWithoutMousemove: scanningOptions.scanWithoutMousemove,
scanResolution: scanningOptions.scanResolution,
});
this._updateTextScannerEnabled();

Expand Down
11 changes: 11 additions & 0 deletions ext/js/data/options-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,7 @@ export class OptionsUtil {
this._updateVersion48,
this._updateVersion49,
this._updateVersion50,
this._updateVersion51,
];
/* eslint-enable @typescript-eslint/unbound-method */
if (typeof targetVersion === 'number' && targetVersion < result.length) {
Expand Down Expand Up @@ -1476,6 +1477,16 @@ export class OptionsUtil {
}
}

/**
* - Add scanning.scanResolution
* @type {import('options-util').UpdateFunction}
*/
async _updateVersion51(options) {
for (const profile of options.profiles) {
profile.options.scanning.scanResolution = 'character';
}
}

/**
* @param {string} url
* @returns {Promise<chrome.tabs.Tab>}
Expand Down
1 change: 1 addition & 0 deletions ext/js/display/display.js
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ export class Display extends EventDispatcher {
sentenceParsingOptions,
scanAltText: scanningOptions.scanAltText,
scanWithoutMousemove: scanningOptions.scanWithoutMousemove,
scanResolution: scanningOptions.scanResolution,
},
});

Expand Down
71 changes: 63 additions & 8 deletions ext/js/dom/dom-text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ import {readCodePointsBackward, readCodePointsForward} from '../data/string-util
* A class used to scan text in a document.
*/
export class DOMTextScanner {
/**
* A regular expression used to match word delimiters.
* \p{L} matches any kind of letter from any language
* \p{N} matches any kind of numeric character in any script
* @type {RegExp}
*/
static WORD_DELIMITER_REGEX = /[^\w\p{L}\p{N}]/u;

/**
* Creates a new instance of a DOMTextScanner.
* @param {Node} node The DOM Node to start at.
Expand All @@ -30,8 +38,9 @@ export class DOMTextScanner {
* @param {boolean} forcePreserveWhitespace Whether or not whitespace should be forced to be preserved,
* regardless of CSS styling.
* @param {boolean} generateLayoutContent Whether or not newlines should be added based on CSS styling.
* @param {boolean} stopAtWordBoundary Whether to pause scanning when whitespace is encountered when scanning backwards.
*/
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true) {
constructor(node, offset, forcePreserveWhitespace = false, generateLayoutContent = true, stopAtWordBoundary = false) {
const ruby = DOMTextScanner.getParentRubyElement(node);
const resetOffset = (ruby !== null);
if (resetOffset) { node = ruby; }
Expand All @@ -54,10 +63,17 @@ export class DOMTextScanner {
this._lineHasWhitespace = false;
/** @type {boolean} */
this._lineHasContent = false;
/** @type {boolean} */
/**
* @type {boolean} Whether or not whitespace should be forced to be preserved,
* regardless of CSS styling.
*/
this._forcePreserveWhitespace = forcePreserveWhitespace;
/** @type {boolean} */
this._generateLayoutContent = generateLayoutContent;
/**
* @type {boolean} Whether or not to stop scanning when word boundaries are encountered.
*/
this._stopAtWordBoundary = stopAtWordBoundary;
}

/**
Expand Down Expand Up @@ -130,6 +146,10 @@ export class DOMTextScanner {
break;
}
} else if (nodeType === ELEMENT_NODE) {
if (this._stopAtWordBoundary && !forward) {
// Element nodes are considered word boundaries when scanning backwards
break;
}
lastNode = node;
const initialNodeAtBeginningOfNodeGoingBackwards = node === this._initialNode && this._offset === 0 && !forward;
const initialNodeAtEndOfNodeGoingForwards = node === this._initialNode && this._offset === node.childNodes.length && forward;
Expand All @@ -145,7 +165,7 @@ export class DOMTextScanner {

/** @type {Node[]} */
const exitedNodes = [];
node = DOMTextScanner.getNextNode(node, forward, enterable, exitedNodes);
node = DOMTextScanner.getNextNodeToProcess(node, forward, enterable, exitedNodes);

for (const exitedNode of exitedNodes) {
if (exitedNode.nodeType !== ELEMENT_NODE) { continue; }
Expand Down Expand Up @@ -206,9 +226,19 @@ export class DOMTextScanner {
const nodeValueLength = nodeValue.length;
const {preserveNewlines, preserveWhitespace} = this._getWhitespaceSettings(textNode);
if (resetOffset) { this._offset = nodeValueLength; }

while (this._offset > 0) {
const char = readCodePointsBackward(nodeValue, this._offset - 1, 1);
if (this._stopAtWordBoundary && DOMTextScanner.isWordDelimiter(char)) {
if (DOMTextScanner.isSingleQuote(char) && this._offset > 1) {
// Check to see if char before single quote is a word character (e.g. "don't")
const prevChar = readCodePointsBackward(nodeValue, this._offset - 2, 1);
if (DOMTextScanner.isWordDelimiter(prevChar)) {
return false;
}
} else {
return false;
}
}
this._offset -= char.length;
const charAttributes = DOMTextScanner.getCharacterAttributes(char, preserveNewlines, preserveWhitespace);
if (this._checkCharacterBackward(char, charAttributes)) { break; }
Expand Down Expand Up @@ -244,7 +274,7 @@ export class DOMTextScanner {
/**
* @param {string} char
* @param {import('dom-text-scanner').CharacterAttributes} charAttributes
* @returns {boolean}
* @returns {boolean} Whether or not to stop scanning.
*/
_checkCharacterForward(char, charAttributes) {
switch (charAttributes) {
Expand Down Expand Up @@ -300,7 +330,7 @@ export class DOMTextScanner {
/**
* @param {string} char
* @param {import('dom-text-scanner').CharacterAttributes} charAttributes
* @returns {boolean}
* @returns {boolean} Whether or not to stop scanning.
*/
_checkCharacterBackward(char, charAttributes) {
switch (charAttributes) {
Expand Down Expand Up @@ -356,14 +386,14 @@ export class DOMTextScanner {
// Static helpers

/**
* Gets the next node in the document for a specified scanning direction.
* Gets the next node to process in the document for a specified scanning direction.
* @param {Node} node The current DOM Node.
* @param {boolean} forward Whether to scan forward in the document or backward.
* @param {boolean} visitChildren Whether the children of the current node should be visited.
* @param {Node[]} exitedNodes An array which stores nodes which were exited.
* @returns {?Node} The next node in the document, or `null` if there is no next node.
*/
static getNextNode(node, forward, visitChildren, exitedNodes) {
static getNextNodeToProcess(node, forward, visitChildren, exitedNodes) {
/** @type {?Node} */
let next = visitChildren ? (forward ? node.firstChild : node.lastChild) : null;
if (next === null) {
Expand Down Expand Up @@ -488,6 +518,31 @@ export class DOMTextScanner {
}
}

/**
* @param {string} character
* @returns {boolean}
*/
static isWordDelimiter(character) {
return DOMTextScanner.WORD_DELIMITER_REGEX.test(character);
}

/**
* @param {string} character
* @returns {boolean}
*/
static isSingleQuote(character) {
switch (character.charCodeAt(0)) {
case 0x27: // Single quote ('')
case 0x2019: // Right single quote (’)
case 0x2032: // Prime (′)
case 0x2035: // Reversed prime (‵)
case 0x02bc: // Modifier letter apostrophe (ʼ)
return true;
default:
return false;
}
}

/**
* Checks whether a given style is visible or not.
* This function does not check `style.display === 'none'`.
Expand Down
6 changes: 3 additions & 3 deletions ext/js/dom/text-source-generator.js
Original file line number Diff line number Diff line change
Expand Up @@ -531,19 +531,19 @@ export class TextSourceGenerator {
let previousStyles = null;
try {
let i = 0;
let startContinerPre = null;
let startContainerPre = null;
while (true) {
const range = this._caretRangeFromPoint(x, y);
if (range === null) {
return null;
}

const startContainer = range.startContainer;
if (startContinerPre !== startContainer) {
if (startContainerPre !== startContainer) {
if (this._isPointInRange(x, y, range, normalizeCssZoom, language)) {
return range;
}
startContinerPre = startContainer;
startContainerPre = startContainer;
}

if (previousStyles === null) { previousStyles = new Map(); }
Expand Down
7 changes: 4 additions & 3 deletions ext/js/dom/text-source-range.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,16 @@ export class TextSourceRange {


/**
* Moves the start offset of the text by a set amount of unicode codepoints.
* Moves the start offset of the text backwards by a set amount of unicode codepoints.
* @param {number} length The maximum number of codepoints to move by.
* @param {boolean} layoutAwareScan Whether or not HTML layout information should be used to generate
* the string content when scanning.
* @param {boolean} stopAtWordBoundary Whether to stop at whitespace characters.
* @returns {number} The actual number of codepoints that were read.
*/
setStartOffset(length, layoutAwareScan) {
setStartOffset(length, layoutAwareScan, stopAtWordBoundary = false) {
if (this._disallowExpandSelection) { return 0; }
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan).seek(-length);
const state = new DOMTextScanner(this._range.startContainer, this._range.startOffset, !layoutAwareScan, layoutAwareScan, stopAtWordBoundary).seek(-length);
this._range.setStart(state.node, state.offset);
this._rangeStartOffset = this._range.startOffset;
this._content = state.content + this._content;
Expand Down
9 changes: 9 additions & 0 deletions ext/js/language/text-scanner.js
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ export class TextScanner extends EventDispatcher {
matchTypePrefix,
scanAltText,
scanWithoutMousemove,
scanResolution,
}) {
if (Array.isArray(inputs)) {
this._inputs = inputs.map((input) => this._convertInput(input));
Expand Down Expand Up @@ -299,6 +300,9 @@ export class TextScanner extends EventDispatcher {
if (typeof scanWithoutMousemove === 'boolean') {
this._scanWithoutMousemove = scanWithoutMousemove;
}
if (typeof scanResolution === 'string') {
this._scanResolution = scanResolution;
}
if (typeof sentenceParsingOptions === 'object' && sentenceParsingOptions !== null) {
const {scanExtent, terminationCharacterMode, terminationCharacters} = sentenceParsingOptions;
if (typeof scanExtent === 'number') {
Expand Down Expand Up @@ -465,6 +469,11 @@ export class TextScanner extends EventDispatcher {
null
);

if (this._scanResolution === 'word') {
// Move the start offset to the beginning of the word
textSource.setStartOffset(this._scanLength, this._layoutAwareScan, true);
}

if (this._textSourceCurrent !== null && this._textSourceCurrent.hasSameStart(textSource)) {
return;
}
Expand Down
27 changes: 27 additions & 0 deletions ext/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,33 @@ <h1>Yomitan Settings</h1>
</p>
</div>
</div>
<div class="settings-item">
<div class="settings-item-inner settings-item-inner-wrappable">
<div class="settings-item-left">
<div class="settings-item-label">Scan resolution</div>
<div class="settings-item-description">
Start the lookup scan at the word or character of the cursor position.
<a tabindex="0" class="more-toggle more-only" data-parent-distance="4">More&hellip;</a>
</div>
</div>
<div class="settings-item-right">
<select data-setting="scanning.scanResolution">
<option value="character">Character</option>
<option value="word">Word</option>
</select>
</div>
</div>
<div class="settings-item-children more" hidden>
<p>
The scan resolution determines where the scan starts when the cursor is moved.
The <code>Character</code> option will start scanning at the cursor's current position,
while the <code>Word</code> option will start scanning at the beginning of the word.
</p>
<p>
<a tabindex="0" class="more-toggle" data-parent-distance="3">Less&hellip;</a>
</p>
</div>
</div>
<div class="settings-item"><div class="settings-item-inner">
<div class="settings-item-left">
<div class="settings-item-label">Scan using middle mouse button</div>
Expand Down
3 changes: 2 additions & 1 deletion test/options-util.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@ function createProfileOptionsUpdatedTestData1() {
onSearchQuery: false,
},
scanWithoutMousemove: true,
scanResolution: 'character',
inputs: [
{
include: 'shift',
Expand Down Expand Up @@ -644,7 +645,7 @@ function createOptionsUpdatedTestData1() {
},
],
profileCurrent: 0,
version: 50,
version: 51,
global: {
database: {
prefixWildcardsSupported: false,
Expand Down
14 changes: 14 additions & 0 deletions types/ext/dom-text-scanner.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,15 @@ export type CharacterAttributes = 0 | 1 | 2 | 3;
* - 2 newlines corresponds to a significant visual distinction since the previous content.
*/
export type ElementSeekInfo = {
/**
* Indicates whether the content of this node should be entered.
*/
enterable: boolean;
/**
* The number of newline characters that should be added.
* - 1 newline corresponds to a simple new line in the layout.
* - 2 newlines corresponds to a significant visual distinction since the previous content.
*/
newlines: number;
};

Expand All @@ -43,6 +51,12 @@ export type ElementSeekInfo = {
* `preserveWhitespace` indicates whether or not sequences of whitespace characters are collapsed.
*/
export type WhitespaceSettings = {
/**
* Indicates whether or not newline characters are treated as line breaks.
*/
preserveNewlines: boolean;
/**
* Indicates whether or not sequences of whitespace characters are collapsed.¬
*/
preserveWhitespace: boolean;
};
1 change: 1 addition & 0 deletions types/ext/settings.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ export type ScanningOptions = {
normalizeCssZoom: boolean;
scanAltText: boolean;
scanWithoutMousemove: boolean;
scanResolution: string;
};

export type ScanningInput = {
Expand Down
1 change: 1 addition & 0 deletions types/ext/text-scanner.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export type Options = {
sentenceParsingOptions?: SentenceParsingOptions;
scanAltText?: boolean;
scanWithoutMousemove?: boolean;
scanResolution?: string;
};

export type InputOptionsOuter = {
Expand Down
Loading