From e30ee63ea7f4a090ad866dd0e9a8b4bb752a6d71 Mon Sep 17 00:00:00 2001 From: Jeroen Rotty Date: Fri, 7 Apr 2023 17:03:32 +0200 Subject: [PATCH 001/616] Update block.js Fixes #20134 --- packages/js/src/structured-data-blocks/faq/block.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/structured-data-blocks/faq/block.js b/packages/js/src/structured-data-blocks/faq/block.js index b83cc3a9674..6c6f9ef7688 100644 --- a/packages/js/src/structured-data-blocks/faq/block.js +++ b/packages/js/src/structured-data-blocks/faq/block.js @@ -19,7 +19,7 @@ const attributes = { export default () => { registerBlockType( "yoast/faq-block", { title: __( "Yoast FAQ", "wordpress-seo" ), - description: __( "List your Frequently Asked Questions in an SEO-friendly way. You can only use one FAQ block per post.", "wordpress-seo" ), + description: __( "List your Frequently Asked Questions in an SEO-friendly way.", "wordpress-seo" ), icon: "editor-ul", category: "yoast-structured-data-blocks", keywords: [ From ca068f4e5133f7b408ae775f89803a3295f3c926 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Fri, 7 Apr 2023 17:18:43 +0200 Subject: [PATCH 002/616] Create a helper to get sentences from tree nodes --- .../helpers/sentence/getSentencesFromTree.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js new file mode 100644 index 00000000000..47ba3542941 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -0,0 +1,17 @@ +import { flatten } from "lodash-es"; +import { Paragraph, Heading } from "../../../parse/structure"; + +/** + * Gets all the sentences from paragraph and heading nodes. + * These two node types are the nodes that should contain sentences for the analysis. + * + * @param {Paper} paper The paper to get the sentences from. + * + * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes. + */ +export default function( paper ) { + const tree = paper.getTree().findAll( treeNode => treeNode instanceof Paragraph || treeNode instanceof Heading ); + + const sentences = tree.map( node => node.sentences ); + return flatten( sentences ); +} From 737107f45839e860089a87b941af3f6af58fa29d Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Fri, 7 Apr 2023 17:18:56 +0200 Subject: [PATCH 003/616] Add unit tests --- .../sentence/getSentencesFromTreeSpec.js | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js new file mode 100644 index 00000000000..042bd750c2e --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -0,0 +1,44 @@ +import Paper from "../../../../src/values/Paper"; +import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sentence/getSentencesFromTree"; +import buildTree from "../../../specHelpers/parse/buildTree"; +import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; + +describe( "test", () => { + let researcher; + beforeEach( () => { + researcher = new EnglishResearcher(); + } ); + it( "test", () => { + const paper = new Paper( "

A very intelligent cat loves their human. A dog is very cute.

A subheading 3" + + "

text text text

A subheading 4

more text." ); + buildTree( paper, researcher ); + expect( getSentencesFromTree( paper ) ).toEqual( [ + { + sourceCodeRange: { endOffset: 44, startOffset: 3 }, + text: "A very intelligent cat loves their human.", + tokens: [ { text: "A" }, { text: " " }, { text: "very" }, { text: " " }, { text: "intelligent" }, { text: " " }, { text: "cat" }, + { text: " " }, { text: "loves" }, { text: " " }, { text: "their" }, { text: " " }, { text: "human" }, { text: "." } ] }, + { + sourceCodeRange: { endOffset: 64, startOffset: 44 }, + text: " A dog is very cute.", + tokens: [ { text: " " }, { text: "A" }, { text: " " }, { text: "dog" }, { text: " " }, { text: "is" }, { text: " " }, + { text: "very" }, { text: " " }, { text: "cute" }, { text: "." } ] }, + { + sourceCodeRange: { endOffset: 86, startOffset: 72 }, + text: "A subheading 3", + tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "3" } ] }, + { + sourceCodeRange: {}, + text: "text text text", + tokens: [ { text: "text" }, { text: " " }, { text: "text" }, { text: " " }, { text: "text" } ] }, + { + sourceCodeRange: { endOffset: 123, startOffset: 109 }, + text: "A subheading 4", + tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "4" } ] }, + { + sourceCodeRange: { endOffset: 138, startOffset: 128 }, + text: "more text.", tokens: [ { text: "more" }, { text: " " }, { text: "text" }, { text: "." } ] }, + ] + ); + } ); +} ); From 5f5b5c5c74faab2f761ebb9b7501abb91f04b530 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Tue, 11 Apr 2023 17:12:28 +0200 Subject: [PATCH 004/616] Add helper to match word with tokens --- .../helpers/match/matchWordFormsWithTokens.js | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js new file mode 100644 index 00000000000..33220364da8 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js @@ -0,0 +1,25 @@ +import { uniq as unique } from "lodash-es"; + +/** + * Matches an array of words with an array of tokens. + * + * @param {array} wordForms The array of words to check. + * @param {array} tokens The array of tokens to check. + * @returns {{count: number, matches: *[]}} The matched words. + */ +export default function matchWordFormsWithTokens( wordForms, tokens ) { + const result = { + count: 0, + matches: [], + }; + + unique( wordForms ).forEach( function( form ) { + const foundWord = tokens.filter( token => token === form ); + if ( foundWord.length > 0 ) { + result.matches.concat( foundWord ); + result.count += foundWord.length; + } + } ); + + return result; +} From 3e1af315ebd5e5faaabea5eaffae87ce8a7bd57f Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Tue, 11 Apr 2023 17:12:51 +0200 Subject: [PATCH 005/616] Rename variables --- .../seo/KeywordDensityAssessment.js | 40 +++++++++---------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index a8a43ca7b35..6a27d4f0abb 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -101,23 +101,23 @@ class KeywordDensityAssessment extends Assessment { */ getResult( paper, researcher ) { const customGetWords = researcher.getHelper( "getWordsCustomHelper" ); - this._keywordCount = researcher.getResearch( "keywordCount" ); - const keyphraseLength = this._keywordCount.length; + this._keyphraseCount = researcher.getResearch( "keywordCount" ); + const keyphraseLength = this._keyphraseCount.length; const assessmentResult = new AssessmentResult(); - this._keywordDensity = researcher.getResearch( "getKeywordDensity" ); + this._keyphraseDensity = researcher.getResearch( "getKeywordDensity" ); this._hasMorphologicalForms = researcher.getData( "morphology" ) !== false; this.setBoundaries( paper.getText(), keyphraseLength, customGetWords ); - this._keywordDensity = this._keywordDensity * keyphraseLengthFactor( keyphraseLength ); + this._keyphraseDensity = this._keyphraseDensity * keyphraseLengthFactor( keyphraseLength ); const calculatedScore = this.calculateResult(); assessmentResult.setScore( calculatedScore.score ); assessmentResult.setText( calculatedScore.resultText ); - assessmentResult.setHasMarks( this._keywordCount.count > 0 ); + assessmentResult.setHasMarks( this._keyphraseCount.count > 0 ); return assessmentResult; } @@ -128,7 +128,7 @@ class KeywordDensityAssessment extends Assessment { * @returns {boolean} Returns true if the keyphrase count is 0. */ hasNoMatches() { - return this._keywordCount.count === 0; + return this._keyphraseCount.count === 0; } /** @@ -139,10 +139,10 @@ class KeywordDensityAssessment extends Assessment { */ hasTooFewMatches() { return inRangeStartInclusive( - this._keywordDensity, + this._keyphraseDensity, 0, this._boundaries.minimum - ) || this._keywordCount.count === 1; + ) || this._keyphraseCount.count === 1; } /** @@ -153,10 +153,10 @@ class KeywordDensityAssessment extends Assessment { */ hasGoodNumberOfMatches() { return inRangeStartEndInclusive( - this._keywordDensity, + this._keyphraseDensity, this._boundaries.minimum, this._boundaries.maximum - ) || ( this._keywordCount.count === 2 && this._minRecommendedKeywordCount <= 2 ); + ) || ( this._keyphraseCount.count === 2 && this._minRecommendedKeywordCount <= 2 ); } /** @@ -169,7 +169,7 @@ class KeywordDensityAssessment extends Assessment { */ hasTooManyMatches() { return inRangeEndInclusive( - this._keywordDensity, + this._keyphraseDensity, this._boundaries.maximum, this._boundaries.overMaximum ); @@ -216,14 +216,14 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d time. That's less than the recommended minimum of %3$d times for a text of this length. %4$sFocus on your keyphrase%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d times. That's less than the recommended minimum of %3$d times for a text of this length. %4$sFocus on your keyphrase%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", this._minRecommendedKeywordCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -239,12 +239,12 @@ class KeywordDensityAssessment extends Assessment { _n( "%1$sKeyphrase density%2$s: The focus keyphrase was found %3$d time. This is great!", "%1$sKeyphrase density%2$s: The focus keyphrase was found %3$d times. This is great!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -263,14 +263,14 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d time. That's more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d times. That's more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", this._maxRecommendedKeywordCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -289,14 +289,14 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d time. That's way more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The focus keyphrase was found %5$d times. That's way more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", this._maxRecommendedKeywordCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -308,7 +308,7 @@ class KeywordDensityAssessment extends Assessment { * @returns {Array} Marks that should be applied. */ getMarks() { - return this._keywordCount.markings; + return this._keyphraseCount.markings; } From 957c05cb1ae598a27b8e5f121f3a5c3d93909c75 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 10:00:12 +0200 Subject: [PATCH 006/616] initial adaptation of keywordcount --- .../researches/keywordCount.js | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 29c433dfb2e..3e96452f9be 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,63 +1,78 @@ -/** @module analyses/getKeywordCount */ - -import matchWords from "../helpers/match/matchTextWithArray"; import { flattenDeep, uniq as unique } from "lodash-es"; -import getSentences from "../helpers/sentence/getSentences"; -import { markWordsInSentences } from "../helpers/word/markWordsInSentences"; +import matchTextWithArray from "../helpers/match/matchTextWithArray"; +import matchWordFormsWithTokens from "../helpers/match/matchWordFormsWithTokens"; +import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; +import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; /** - * Calculates the keyword count, takes morphology into account. + * Calculates the keyphrase count, takes morphology into account. * - * @param {object} paper The paper containing keyword and text. - * @param {object} researcher The researcher. + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. * - * @returns {object} An array of all the matches, markings and the keyword count. + * @returns {object} An array of all the matches, markings and the keyphrase count. */ export default function( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); - const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); // A helper to return all the matches for the keyphrase. const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); - const text = paper.getText(); const locale = paper.getLocale(); - const sentences = getSentences( text, memoizedTokenizer ); + const sentences = getSentencesFromTree( paper ); const keywordsFound = { count: 0, matches: [], - sentencesWithKeywords: [], + markings: [], }; - /* - * Count the amount of keyphrase occurrences in the sentences. - * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - */ sentences.forEach( sentence => { - const matchesInSentence = topicForms.keyphraseForms.map( keywordForms => matchWords( sentence, - keywordForms, locale, matchWordCustomHelper ) ); + const keyphraseForms = topicForms.keyphraseForms; + const tokens = sentence.tokens.map( token => token.text ); + // eslint-disable-next-line no-warning-comments + + if ( matchWordCustomHelper ) { + /* + * Count the amount of keyphrase occurrences in the sentences. + * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. + * Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + */ + const matchesInSentence = topicForms.keyphraseForms.map( keywordForms => matchTextWithArray( sentence.text, + keywordForms, locale, matchWordCustomHelper ) ); + const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); - const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); + if ( hasAllKeywords ) { + const counts = matchesInSentence.map( match => match.count ); + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + // eslint-disable-next-line no-warning-comments + keywordsFound.count += Math.min( ...counts ); + keywordsFound.matches.push( foundWords ); + keywordsFound.markings.push( markWordsInASentence( sentence.text, keyphraseForms, matchWordCustomHelper ) ); + } + } else { + const matchesInSentence = topicForms.keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); + const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); - if ( hasAllKeywords ) { - const counts = matchesInSentence.map( match => match.count ); - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - keywordsFound.count += Math.min( ...counts ); - keywordsFound.matches.push( foundWords ); - keywordsFound.sentencesWithKeywords.push( sentence ); + if ( hasAllKeywords ) { + const counts = matchesInSentence.map( match => match.count ); + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + // eslint-disable-next-line no-warning-comments + // TODO: create a new helper for adding marking to tokens + keywordsFound.count += Math.min( ...counts ); + keywordsFound.matches.push( foundWords ); + } } } ); const matches = unique( flattenDeep( keywordsFound.matches ) ).sort( ( a, b ) => b.length - a.length ); - const keyphraseForms = flattenDeep( topicForms.keyphraseForms ); return { count: keywordsFound.count, matches: matches, - markings: markWordsInSentences( keyphraseForms, keywordsFound.sentencesWithKeywords, locale, matchWordCustomHelper ), + markings: keywordsFound.markings, length: topicForms.keyphraseForms.length, }; } From ab4bd49d76c17216b45946c50dc862da022ddc44 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 10:41:20 +0200 Subject: [PATCH 007/616] Create separate helper for marking individual sentence --- .../helpers/word/markWordsInSentences.js | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js index d29f325656e..b7531fd1093 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js @@ -107,6 +107,21 @@ export const collectMarkingsInSentence = function( sentence, wordsFoundInSentenc return ( markup.replace( new RegExp( " ", "ig" ), " " ) ); }; +/** + * Adds marks to a sentence. + * + * @param {string} sentence The sentence in which we want to apply highlighting. + * @param {Array} wordsFoundInSentence The words to highlight in a sentence. + * @param {function} matchWordCustomHelper The language-specific helper function to match word in text. + * @returns {Mark} The mark object of each sentence. + */ +export function markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) { + return new Mark( { + original: sentence, + marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), + } ); +} + /** * Adds marks to an array of sentences. * @@ -119,16 +134,13 @@ export const collectMarkingsInSentence = function( sentence, wordsFoundInSentenc */ export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) { let wordsFoundInSentence = []; - let markings = []; + const markings = []; sentences.forEach( function( sentence ) { wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; if ( wordsFoundInSentence.length > 0 ) { - markings = markings.concat( new Mark( { - original: sentence, - marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), - } ) ); + markings.push( markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) ); } } ); From 35fb6fb8f54352589a0edb113b091f9990ef5a06 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 10:56:12 +0200 Subject: [PATCH 008/616] Add unit test --- .../helpers/word/markWordsInSentenceSpec.js | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js index 5e299385445..ce48dca4057 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js @@ -1,4 +1,8 @@ -import { deConstructAnchor, markWordsInSentences, reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences"; +import { + deConstructAnchor, + markWordsInSentences, + markWordsInASentence, + reConstructAnchor } from "../../../../src/languageProcessing/helpers/word/markWordsInSentences"; import Mark from "../../../../src/values/Mark"; import matchWordCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; @@ -84,6 +88,16 @@ describe( "Adds Yoast marks to specific words in a sentence", function() { "en_EN" ) ).toEqual( [] ); } ); + + it( "should add Yoast marks to all instances of specified words in a sentence", function() { + expect( markWordsInASentence( + "A cat and a turtle and a hamster.", + [ "turtle", "hamster" ] + ) ).toEqual( new Mark( { + marked: "A cat and a turtle " + + "and a hamster.", + original: "A cat and a turtle and a hamster." } ) ); + } ); } ); describe( "Adds Yoast marks to specific words in a sentence for languages with custom helper to match words", function() { From b430fa195f1688a7a521128a1e82ea91cc540819 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 11:39:35 +0200 Subject: [PATCH 009/616] Add unit test --- .../sentence/getSentencesFromTreeSpec.js | 70 ++++++++++++++++--- .../helpers/match/matchWordFormsWithTokens.js | 2 +- .../researches/keywordCount.js | 7 +- 3 files changed, 63 insertions(+), 16 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index 042bd750c2e..a2e6c485270 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -16,29 +16,79 @@ describe( "test", () => { { sourceCodeRange: { endOffset: 44, startOffset: 3 }, text: "A very intelligent cat loves their human.", - tokens: [ { text: "A" }, { text: " " }, { text: "very" }, { text: " " }, { text: "intelligent" }, { text: " " }, { text: "cat" }, - { text: " " }, { text: "loves" }, { text: " " }, { text: "their" }, { text: " " }, { text: "human" }, { text: "." } ] }, + tokens: [ + { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, + { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: " " }, + { sourceCodeRange: { endOffset: 9, startOffset: 5 }, text: "very" }, + { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, + { sourceCodeRange: { endOffset: 21, startOffset: 10 }, text: "intelligent" }, + { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: " " }, + { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "cat" }, + { sourceCodeRange: { endOffset: 26, startOffset: 25 }, text: " " }, + { sourceCodeRange: { endOffset: 31, startOffset: 26 }, text: "loves" }, + { sourceCodeRange: { endOffset: 32, startOffset: 31 }, text: " " }, + { sourceCodeRange: { endOffset: 37, startOffset: 32 }, text: "their" }, + { sourceCodeRange: { endOffset: 38, startOffset: 37 }, text: " " }, + { sourceCodeRange: { endOffset: 43, startOffset: 38 }, text: "human" }, + { sourceCodeRange: { endOffset: 44, startOffset: 43 }, text: "." }, + ], + }, { sourceCodeRange: { endOffset: 64, startOffset: 44 }, text: " A dog is very cute.", - tokens: [ { text: " " }, { text: "A" }, { text: " " }, { text: "dog" }, { text: " " }, { text: "is" }, { text: " " }, - { text: "very" }, { text: " " }, { text: "cute" }, { text: "." } ] }, + tokens: [ + { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: " " }, + { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: "A" }, + { sourceCodeRange: { endOffset: 6, startOffset: 5 }, text: " " }, + { sourceCodeRange: { endOffset: 9, startOffset: 6 }, text: "dog" }, + { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, + { sourceCodeRange: { endOffset: 12, startOffset: 10 }, text: "is" }, + { sourceCodeRange: { endOffset: 13, startOffset: 12 }, text: " " }, + { sourceCodeRange: { endOffset: 17, startOffset: 13 }, text: "very" }, + { sourceCodeRange: { endOffset: 18, startOffset: 17 }, text: " " }, + { sourceCodeRange: { endOffset: 22, startOffset: 18 }, text: "cute" }, + { sourceCodeRange: { endOffset: 23, startOffset: 22 }, text: "." }, + ] }, { sourceCodeRange: { endOffset: 86, startOffset: 72 }, text: "A subheading 3", - tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "3" } ] }, + tokens: [ + { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: "A" }, + { sourceCodeRange: { endOffset: 74, startOffset: 73 }, text: " " }, + { sourceCodeRange: { endOffset: 84, startOffset: 74 }, text: "subheading" }, + { sourceCodeRange: { endOffset: 85, startOffset: 84 }, text: " " }, + { sourceCodeRange: { endOffset: 86, startOffset: 85 }, text: "3" }, + ] }, { sourceCodeRange: {}, text: "text text text", - tokens: [ { text: "text" }, { text: " " }, { text: "text" }, { text: " " }, { text: "text" } ] }, + tokens: [ + { sourceCodeRange: {}, text: "text" }, + { sourceCodeRange: {}, text: " " }, + { sourceCodeRange: {}, text: "text" }, + { sourceCodeRange: {}, text: " " }, + { sourceCodeRange: {}, text: "text" }, + ] }, { sourceCodeRange: { endOffset: 123, startOffset: 109 }, text: "A subheading 4", - tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "4" } ] }, + tokens: [ + { sourceCodeRange: { endOffset: 110, startOffset: 109 }, text: "A" }, + { sourceCodeRange: { endOffset: 111, startOffset: 110 }, text: " " }, + { sourceCodeRange: { endOffset: 121, startOffset: 111 }, text: "subheading" }, + { sourceCodeRange: { endOffset: 122, startOffset: 121 }, text: " " }, + { sourceCodeRange: { endOffset: 123, startOffset: 122 }, text: "4" }, + ] }, { sourceCodeRange: { endOffset: 138, startOffset: 128 }, - text: "more text.", tokens: [ { text: "more" }, { text: " " }, { text: "text" }, { text: "." } ] }, - ] - ); + text: "more text.", + tokens: [ + { sourceCodeRange: { endOffset: 132, startOffset: 128 }, text: "more" }, + { sourceCodeRange: { endOffset: 133, startOffset: 132 }, text: " " }, + { sourceCodeRange: { endOffset: 137, startOffset: 133 }, text: "text" }, + { sourceCodeRange: { endOffset: 138, startOffset: 137 }, text: "." }, + ], + }, + ] ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js index 33220364da8..7b9070af97a 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js @@ -14,7 +14,7 @@ export default function matchWordFormsWithTokens( wordForms, tokens ) { }; unique( wordForms ).forEach( function( form ) { - const foundWord = tokens.filter( token => token === form ); + const foundWord = tokens.filter( token => token.text === form ); if ( foundWord.length > 0 ) { result.matches.concat( foundWord ); result.count += foundWord.length; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 3e96452f9be..66c7164c284 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -29,10 +29,6 @@ export default function( paper, researcher ) { }; sentences.forEach( sentence => { - const keyphraseForms = topicForms.keyphraseForms; - const tokens = sentence.tokens.map( token => token.text ); - // eslint-disable-next-line no-warning-comments - if ( matchWordCustomHelper ) { /* * Count the amount of keyphrase occurrences in the sentences. @@ -50,9 +46,10 @@ export default function( paper, researcher ) { // eslint-disable-next-line no-warning-comments keywordsFound.count += Math.min( ...counts ); keywordsFound.matches.push( foundWords ); - keywordsFound.markings.push( markWordsInASentence( sentence.text, keyphraseForms, matchWordCustomHelper ) ); + keywordsFound.markings.push( markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ) ); } } else { + const tokens = sentence.tokens; const matchesInSentence = topicForms.keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); From 74dd09b7302025551c5b69d0154b04f351a2e582 Mon Sep 17 00:00:00 2001 From: Aida Marfuaty <48715883+FAMarfuaty@users.noreply.github.com> Date: Wed, 12 Apr 2023 11:54:26 +0200 Subject: [PATCH 010/616] Simplify code Co-authored-by: hdvos --- .../helpers/sentence/getSentencesFromTree.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 47ba3542941..8c6e2648d5b 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -12,6 +12,5 @@ import { Paragraph, Heading } from "../../../parse/structure"; export default function( paper ) { const tree = paper.getTree().findAll( treeNode => treeNode instanceof Paragraph || treeNode instanceof Heading ); - const sentences = tree.map( node => node.sentences ); - return flatten( sentences ); + return tree.flatMap( node => node.sentences ); } From de7b24d4fa1245773689b2aab7116bf329ea35c0 Mon Sep 17 00:00:00 2001 From: Aida Marfuaty <48715883+FAMarfuaty@users.noreply.github.com> Date: Wed, 12 Apr 2023 14:09:48 +0200 Subject: [PATCH 011/616] simplify code Co-authored-by: hdvos --- .../languageProcessing/helpers/sentence/getSentencesFromTree.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 8c6e2648d5b..96f1e22d92e 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -10,7 +10,7 @@ import { Paragraph, Heading } from "../../../parse/structure"; * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes. */ export default function( paper ) { - const tree = paper.getTree().findAll( treeNode => treeNode instanceof Paragraph || treeNode instanceof Heading ); + const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences ); return tree.flatMap( node => node.sentences ); } From 2c7334c70f0b0e3ef13ba011f6ada34ddab68fc9 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 14:53:08 +0200 Subject: [PATCH 012/616] Adapt function --- .../helpers/match/matchWordFormsWithTokens.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js index 7b9070af97a..280716fcfac 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js @@ -14,10 +14,10 @@ export default function matchWordFormsWithTokens( wordForms, tokens ) { }; unique( wordForms ).forEach( function( form ) { - const foundWord = tokens.filter( token => token.text === form ); - if ( foundWord.length > 0 ) { - result.matches.concat( foundWord ); - result.count += foundWord.length; + const foundWords = tokens.filter( token => token.text === form ); + if ( foundWords.length > 0 ) { + result.matches = result.matches.concat( foundWords ); + result.count += foundWords.length; } } ); From eb460022e8eba89533893ea359cfe846dfcc1ecb Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 16:18:15 +0200 Subject: [PATCH 013/616] Create helper for counting keyphrase --- .../researches/keywordCount.js | 99 ++++++++++--------- 1 file changed, 54 insertions(+), 45 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 66c7164c284..215974e8486 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,7 +2,59 @@ import { flattenDeep, uniq as unique } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import matchWordFormsWithTokens from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; -import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; +import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; +import Mark from "../../values/Mark"; + +/** + * + * @param sentences + * @param topicForms + * @param matchWordCustomHelper + * @param locale + * @return {{markings: *[], count: number}} + */ +function countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper, locale ) { + const result = { + count: 0, + markings: [], + }; + sentences.forEach( sentence => { + const keyphraseForms = topicForms.keyphraseForms; + let matchesInSentence; + if ( matchWordCustomHelper ) { + /* + * Count the amount of keyphrase occurrences in the sentences. + * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. + * Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + * */ + matchesInSentence = keyphraseForms.map( forms => matchTextWithArray( sentence.text, + forms, locale, matchWordCustomHelper ) ); + } else { + const tokens = sentence.tokens; + matchesInSentence = keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); + } + const hasAllKeywords = matchesInSentence.every( form => form.count > 0 ); + + if ( hasAllKeywords ) { + const counts = matchesInSentence.map( match => match.count ); + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + result.count += Math.min( ...counts ); + const markings = matchWordCustomHelper + ? markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ) + : foundWords.map( word => { + return new Mark( + { + position: { startOffset: word.sourceCodeRange.startOffset, endOffset: word.sourceCodeRange.endOffset }, + marked: collectMarkingsInSentence( sentence.text, foundWords.map( wordB => wordB.text ) ), + original: sentence.text, + } ); + } ); + result.markings.push( markings ); + } + } ); + return result; +} /** * Calculates the keyphrase count, takes morphology into account. @@ -22,53 +74,10 @@ export default function( paper, researcher ) { const sentences = getSentencesFromTree( paper ); - const keywordsFound = { - count: 0, - matches: [], - markings: [], - }; - - sentences.forEach( sentence => { - if ( matchWordCustomHelper ) { - /* - * Count the amount of keyphrase occurrences in the sentences. - * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. - * Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - */ - const matchesInSentence = topicForms.keyphraseForms.map( keywordForms => matchTextWithArray( sentence.text, - keywordForms, locale, matchWordCustomHelper ) ); - const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); - - if ( hasAllKeywords ) { - const counts = matchesInSentence.map( match => match.count ); - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - // eslint-disable-next-line no-warning-comments - keywordsFound.count += Math.min( ...counts ); - keywordsFound.matches.push( foundWords ); - keywordsFound.markings.push( markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ) ); - } - } else { - const tokens = sentence.tokens; - const matchesInSentence = topicForms.keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); - const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); - - if ( hasAllKeywords ) { - const counts = matchesInSentence.map( match => match.count ); - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - // eslint-disable-next-line no-warning-comments - // TODO: create a new helper for adding marking to tokens - keywordsFound.count += Math.min( ...counts ); - keywordsFound.matches.push( foundWords ); - } - } - } ); - - const matches = unique( flattenDeep( keywordsFound.matches ) ).sort( ( a, b ) => b.length - a.length ); + const keywordsFound = countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper, locale ); return { count: keywordsFound.count, - matches: matches, markings: keywordsFound.markings, length: topicForms.keyphraseForms.length, }; From 87cf8783118057ed5264f5500c195d1cde40512a Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 16:59:24 +0200 Subject: [PATCH 014/616] remove unnecessary imports --- .../helpers/sentence/getSentencesFromTree.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 96f1e22d92e..23a09d4acbe 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -1,6 +1,3 @@ -import { flatten } from "lodash-es"; -import { Paragraph, Heading } from "../../../parse/structure"; - /** * Gets all the sentences from paragraph and heading nodes. * These two node types are the nodes that should contain sentences for the analysis. From 1b43dafeffea5efc5c8a9eef9f179a7c7ebba67c Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Wed, 12 Apr 2023 16:59:44 +0200 Subject: [PATCH 015/616] flatten nested array --- .../src/languageProcessing/researches/keywordCount.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 215974e8486..b45e5a7f825 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,4 +1,4 @@ -import { flattenDeep, uniq as unique } from "lodash-es"; +import { flattenDeep, flatten } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import matchWordFormsWithTokens from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; @@ -78,7 +78,7 @@ export default function( paper, researcher ) { return { count: keywordsFound.count, - markings: keywordsFound.markings, + markings: flatten( keywordsFound.markings ), length: topicForms.keyphraseForms.length, }; } From c7c339bd3b08d943bf02153b2b5c985789f5d260 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Wed, 12 Apr 2023 17:21:15 +0200 Subject: [PATCH 016/616] Refactor countKeyphraseOccurrences function for easy reading. --- .../researches/keywordCount.js | 87 +++++++++++-------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 215974e8486..50f6a04dd1f 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,59 +1,70 @@ -import { flattenDeep, uniq as unique } from "lodash-es"; +import { flattenDeep, min } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import matchWordFormsWithTokens from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; import Mark from "../../values/Mark"; +function getMatchesInSentence( sentence, matchWordCustomHelper, keyphraseForms, locale ) { + if ( matchWordCustomHelper ) { + /* + * Count the amount of keyphrase occurrences in the sentences. + * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. + * Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + * */ + return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + } + + const tokens = sentence.tokens; + return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); +} + +function getMarkings( sentence, matchWordCustomHelper, matchesInSentence ) { + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + + if ( matchWordCustomHelper ) { + return markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); + } + + const wordTexts = foundWords.map( wordB => wordB.text ); + + return foundWords.map( word => + new Mark( + { + position: { startOffset: word.sourceCodeRange.startOffset, endOffset: word.sourceCodeRange.endOffset }, + marked: collectMarkingsInSentence( sentence.text, wordTexts ), + original: sentence.text, + } ) ); +} /** * * @param sentences * @param topicForms * @param matchWordCustomHelper * @param locale - * @return {{markings: *[], count: number}} + * @returns {{markings: *[], count: number}} */ function countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper, locale ) { - const result = { - count: 0, - markings: [], - }; - sentences.forEach( sentence => { - const keyphraseForms = topicForms.keyphraseForms; - let matchesInSentence; - if ( matchWordCustomHelper ) { - /* - * Count the amount of keyphrase occurrences in the sentences. - * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. - * Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - * */ - matchesInSentence = keyphraseForms.map( forms => matchTextWithArray( sentence.text, - forms, locale, matchWordCustomHelper ) ); - } else { - const tokens = sentence.tokens; - matchesInSentence = keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); - } + return sentences.reduce( ( sentence, acc ) => { + const matchesInSentence = getMatchesInSentence( sentence, matchWordCustomHelper, topicForms.keyphraseForms, locale ); + const hasAllKeywords = matchesInSentence.every( form => form.count > 0 ); - if ( hasAllKeywords ) { - const counts = matchesInSentence.map( match => match.count ); - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - result.count += Math.min( ...counts ); - const markings = matchWordCustomHelper - ? markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ) - : foundWords.map( word => { - return new Mark( - { - position: { startOffset: word.sourceCodeRange.startOffset, endOffset: word.sourceCodeRange.endOffset }, - marked: collectMarkingsInSentence( sentence.text, foundWords.map( wordB => wordB.text ) ), - original: sentence.text, - } ); - } ); - result.markings.push( markings ); + if ( ! hasAllKeywords ) { + return acc; } + + const markings = getMarkings( sentence, matchWordCustomHelper, matchesInSentence ); + + return { + count: acc.count + min( matchesInSentence.map( match => match.count ) ), + markings: acc.markings.concat( markings ), + }; + }, { + count: 0, + markings: [], } ); - return result; } /** From 4ed3c32fe844a7221f2cbefdaf2d0aaf5c535f32 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Thu, 13 Apr 2023 10:55:04 +0200 Subject: [PATCH 017/616] Adapt code --- .../helpers/word/markWordsInSentenceSpec.js | 4 ++-- .../helpers/word/markWordsInSentences.js | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js index ce48dca4057..f6e09af6101 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/markWordsInSentenceSpec.js @@ -93,10 +93,10 @@ describe( "Adds Yoast marks to specific words in a sentence", function() { expect( markWordsInASentence( "A cat and a turtle and a hamster.", [ "turtle", "hamster" ] - ) ).toEqual( new Mark( { + ) ).toEqual( [ new Mark( { marked: "A cat and a turtle " + "and a hamster.", - original: "A cat and a turtle and a hamster." } ) ); + original: "A cat and a turtle and a hamster." } ) ] ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js index b7531fd1093..92d618c60a0 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js @@ -110,16 +110,16 @@ export const collectMarkingsInSentence = function( sentence, wordsFoundInSentenc /** * Adds marks to a sentence. * - * @param {string} sentence The sentence in which we want to apply highlighting. - * @param {Array} wordsFoundInSentence The words to highlight in a sentence. + * @param {string} sentence The sentence in which we want to apply highlighting. + * @param {Array} wordsFoundInSentence The words to highlight in a sentence. * @param {function} matchWordCustomHelper The language-specific helper function to match word in text. - * @returns {Mark} The mark object of each sentence. + * @returns {Mark[]} The array of Mark objects of each sentence. */ export function markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) { - return new Mark( { + return [ new Mark( { original: sentence, marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), - } ); + } ) ]; } /** @@ -134,13 +134,13 @@ export function markWordsInASentence( sentence, wordsFoundInSentence, matchWordC */ export function markWordsInSentences( wordsToMark, sentences, locale, matchWordCustomHelper ) { let wordsFoundInSentence = []; - const markings = []; + let markings = []; sentences.forEach( function( sentence ) { wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; if ( wordsFoundInSentence.length > 0 ) { - markings.push( markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) ); + markings = markings.concat( markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) ); } } ); From d7bf9649537654c72c8d40376098b9288a4f1be4 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Thu, 13 Apr 2023 10:59:00 +0200 Subject: [PATCH 018/616] Adapt keyphraseCount.js --- .../languageProcessing/AbstractResearcher.js | 3 +- .../{keywordCount.js => keyphraseCount.js} | 69 ++++++++++++++----- 2 files changed, 55 insertions(+), 17 deletions(-) rename packages/yoastseo/src/languageProcessing/researches/{keywordCount.js => keyphraseCount.js} (50%) diff --git a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js index ed6e79fe100..d1b188a93af 100644 --- a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js +++ b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js @@ -24,7 +24,7 @@ import getSubheadingTextLengths from "./researches/getSubheadingTextLengths.js"; import h1s from "./researches/h1s"; import imageCount from "./researches/imageCount.js"; import keyphraseLength from "./researches/keyphraseLength"; -import keywordCount from "./researches/keywordCount"; +import keyphraseCount, { keywordCount } from "./researches/keyphraseCount"; import { keywordCountInSlug, keywordCountInUrl } from "./researches/keywordCountInUrl"; import matchKeywordInSubheadings from "./researches/matchKeywordInSubheadings"; import metaDescriptionKeyword from "./researches/metaDescriptionKeyword"; @@ -73,6 +73,7 @@ export default class AbstractResearcher { h1s, imageCount, keyphraseLength, + keyphraseCount, keywordCount, keywordCountInSlug, keywordCountInUrl, diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js similarity index 50% rename from packages/yoastseo/src/languageProcessing/researches/keywordCount.js rename to packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js index c01fb451784..abaff81ec8e 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js @@ -5,7 +5,17 @@ import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; import Mark from "../../values/Mark"; -function getMatchesInSentence( sentence, matchWordCustomHelper, keyphraseForms, locale ) { +/** + * Gets the matched keyphrase form(s). + * + * @param {string} sentence The sentence to check. + * @param {Array} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * + * @returns {Array} The array of matched keyphrase form(s). + */ +function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { if ( matchWordCustomHelper ) { /* * Count the amount of keyphrase occurrences in the sentences. @@ -16,11 +26,20 @@ function getMatchesInSentence( sentence, matchWordCustomHelper, keyphraseForms, return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); } - const tokens = sentence.tokens; - return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, tokens ) ); + return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); } -function getMarkings( sentence, matchWordCustomHelper, matchesInSentence ) { + +/** + * Gets the Mark objects of all keyphrase matches. + * + * @param {string} sentence The sentence to check. + * @param {Array} matchesInSentence The array of keyphrase matches in the sentence. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * + * @returns {Mark[]} The array of Mark objects of the keyphrase matches. + */ +function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ) { const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); if ( matchWordCustomHelper ) { @@ -29,7 +48,7 @@ function getMarkings( sentence, matchWordCustomHelper, matchesInSentence ) { const wordTexts = foundWords.map( wordB => wordB.text ); - return foundWords.map( word => + return foundWords.map( word => new Mark( { position: { startOffset: word.sourceCodeRange.startOffset, endOffset: word.sourceCodeRange.endOffset }, @@ -37,16 +56,19 @@ function getMarkings( sentence, matchWordCustomHelper, matchesInSentence ) { original: sentence.text, } ) ); } + /** + * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. + * + * @param {Array} sentences The sentences to check. + * @param {Array} topicForms The keyphrase forms. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {string} locale The locale used in the analysis. * - * @param sentences - * @param topicForms - * @param matchWordCustomHelper - * @param locale - * @returns {{markings: *[], count: number}} + * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ -function countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper, locale ) { - return sentences.reduce( ( sentence, acc ) => { +export function countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ) { + return sentences.reduce( ( acc, sentence ) => { const matchesInSentence = getMatchesInSentence( sentence, matchWordCustomHelper, topicForms.keyphraseForms, locale ); const hasAllKeywords = matchesInSentence.every( form => form.count > 0 ); @@ -55,7 +77,7 @@ function countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper return acc; } - const markings = getMarkings( sentence, matchWordCustomHelper, matchesInSentence ); + const markings = getMarkingsInSentence( sentence, matchWordCustomHelper, matchesInSentence ); return { count: acc.count + min( matchesInSentence.map( match => match.count ) ), @@ -73,9 +95,9 @@ function countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper * @param {Paper} paper The paper containing keyphrase and text. * @param {Researcher} researcher The researcher. * - * @returns {object} An array of all the matches, markings and the keyphrase count. + * @returns {Object} An object containing an array of all the matches, markings and the keyphrase count. */ -export default function( paper, researcher ) { +export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); // A helper to return all the matches for the keyphrase. @@ -85,7 +107,7 @@ export default function( paper, researcher ) { const sentences = getSentencesFromTree( paper ); - const keywordsFound = countKeyphraseOccurrences( sentences, topicForms, matchWordCustomHelper, locale ); + const keywordsFound = countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ); return { count: keywordsFound.count, @@ -93,3 +115,18 @@ export default function( paper, researcher ) { length: topicForms.keyphraseForms.length, }; } + +/** + * Calculates the keyphrase count, takes morphology into account. + * + * @deprecated Since version 20.8. Use keywordCountInSlug instead. + * + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. + * + * @returns {Object} An array of all the matches, markings and the keyphrase count. + */ +export function keywordCount( paper, researcher ) { + console.warn( "This function is deprecated, use keyphraseCount instead." ); + return keyphraseCount( paper, researcher ); +} From 3c52c543a48923c2cd046f73784843c7e576d8a4 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Thu, 13 Apr 2023 10:59:48 +0200 Subject: [PATCH 019/616] Use keyphraseCount research --- .../src/scoring/assessments/seo/KeywordDensityAssessment.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 6a27d4f0abb..d0eb13b4376 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -5,7 +5,7 @@ import recommendedKeywordCount from "../../helpers/assessments/recommendedKeywor import Assessment from "../assessment"; import AssessmentResult from "../../../values/AssessmentResult"; import { inRangeEndInclusive, inRangeStartEndInclusive, inRangeStartInclusive } from "../../helpers/assessments/inRange"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; /** @@ -101,7 +101,7 @@ class KeywordDensityAssessment extends Assessment { */ getResult( paper, researcher ) { const customGetWords = researcher.getHelper( "getWordsCustomHelper" ); - this._keyphraseCount = researcher.getResearch( "keywordCount" ); + this._keyphraseCount = researcher.getResearch( "keyphraseCount" ); const keyphraseLength = this._keyphraseCount.length; const assessmentResult = new AssessmentResult(); From 07e33770723f32ba46235f7b662e0a0cbf5ae653 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Fri, 14 Apr 2023 16:10:36 +0200 Subject: [PATCH 020/616] deprecate research and assessment with keyword word --- .../spec/fullTextTests/runFullTextTests.js | 4 +-- .../fullTextTests/runFullTextTests.js | 8 ++--- .../languageProcessing/AbstractResearcher.js | 5 +-- .../researches/getKeywordDensity.js | 29 +++++++++++----- .../{keyphraseCount.js => keywordCount.js} | 29 ++++++++-------- .../yoastseo/src/scoring/assessments/index.js | 3 +- .../seo/KeywordDensityAssessment.js | 34 ++++++++++++++++--- .../cornerstone/relatedKeywordAssessor.js | 4 +-- .../cornerstone/seoAssessor.js | 4 +-- .../collectionPages/relatedKeywordAssessor.js | 4 +-- .../scoring/collectionPages/seoAssessor.js | 4 +-- .../cornerstone/relatedKeywordAssessor.js | 4 +-- .../src/scoring/cornerstone/seoAssessor.js | 4 +-- .../cornerstone/relatedKeywordAssessor.js | 4 +-- .../productPages/cornerstone/seoAssessor.js | 4 +-- .../productPages/relatedKeywordAssessor.js | 4 +-- .../src/scoring/productPages/seoAssessor.js | 2 +- .../src/scoring/relatedKeywordAssessor.js | 4 +-- .../scoring/relatedKeywordTaxonomyAssessor.js | 4 +-- packages/yoastseo/src/scoring/seoAssessor.js | 4 +-- .../cornerstone/relatedKeywordAssessor.js | 4 +-- .../cornerstone/seoAssessor.js | 4 +-- .../relatedKeywordAssessor.js | 4 +-- .../scoring/storePostsAndPages/seoAssessor.js | 4 +-- .../yoastseo/src/scoring/taxonomyAssessor.js | 4 +-- 25 files changed, 111 insertions(+), 71 deletions(-) rename packages/yoastseo/src/languageProcessing/researches/{keyphraseCount.js => keywordCount.js} (82%) diff --git a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js index c599a32d4d5..299b0693c11 100644 --- a/packages/yoastseo/spec/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/fullTextTests/runFullTextTests.js @@ -8,7 +8,7 @@ import buildTree from "../specHelpers/parse/buildTree"; // Import SEO assessments import IntroductionKeywordAssessment from "../../src/scoring/assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "../../src/scoring/assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "../../src/scoring/assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../../src/scoring/assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "../../src/scoring/assessments/seo/MetaDescriptionKeywordAssessment"; import MetaDescriptionLengthAssessment from "../../src/scoring/assessments/seo/MetaDescriptionLengthAssessment"; import SubheadingsKeywordAssessment from "../../src/scoring/assessments/seo/SubHeadingsKeywordAssessment"; @@ -96,7 +96,7 @@ testPapers.forEach( function( testPaper ) { } ); it( "returns a score and the associated feedback text for the keywordDensity assessment", function() { - compare( new KeywordDensityAssessment(), expectedResults.keywordDensity ); + compare( new KeyphraseDensityAssessment(), expectedResults.keywordDensity ); } ); it( "returns a score and the associated feedback text for the metaDescriptionKeyword assessment", function() { diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js index 4bae9d573d1..c95c29f76b1 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js @@ -11,7 +11,7 @@ import buildTree from "../../../../../yoastseo/spec/specHelpers/parse/buildTree" // Import SEO assessments. import IntroductionKeywordAssessment from "../../../../src/scoring/assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "../../../../src/scoring/assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "../../../../src/scoring/assessments/seo/MetaDescriptionKeywordAssessment"; import MetaDescriptionLengthAssessment from "../../../../src/scoring/assessments/seo/MetaDescriptionLengthAssessment"; import TextLengthAssessment from "../../../../src/scoring/assessments/seo/TextLengthAssessment"; @@ -67,7 +67,7 @@ testPapers.forEach( function( testPaper ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ); - const keywordDensityAssessment = new KeywordDensityAssessment( { + const keyphraseDensityAssessment = new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ); @@ -169,11 +169,11 @@ testPapers.forEach( function( testPaper ) { } ); it( "returns a score and the associated feedback text for the keywordDensity assessment", function() { - const isApplicable = keywordDensityAssessment.isApplicable( paper, researcher ); + const isApplicable = keyphraseDensityAssessment.isApplicable( paper, researcher ); expect( isApplicable ).toBe( expectedResults.keywordDensity.isApplicable ); if ( isApplicable ) { - result.keywordDensity = keywordDensityAssessment.getResult( + result.keywordDensity = keyphraseDensityAssessment.getResult( paper, researcher ); diff --git a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js index d1b188a93af..44785dcde94 100644 --- a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js +++ b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js @@ -12,7 +12,7 @@ import findTransitionWords from "./researches/findTransitionWords"; import functionWordsInKeyphrase from "./researches/functionWordsInKeyphrase"; import getAnchorsWithKeyphrase from "./researches/getAnchorsWithKeyphrase"; import getFleschReadingScore from "./researches/getFleschReadingScore"; -import getKeywordDensity from "./researches/getKeywordDensity.js"; +import getKeyphraseDensity, { getKeywordDensity } from "./researches/getKeywordDensity.js"; import getLinks from "./researches/getLinks.js"; import getLinkStatistics from "./researches/getLinkStatistics"; import getParagraphLength from "./researches/getParagraphLength.js"; @@ -24,7 +24,7 @@ import getSubheadingTextLengths from "./researches/getSubheadingTextLengths.js"; import h1s from "./researches/h1s"; import imageCount from "./researches/imageCount.js"; import keyphraseLength from "./researches/keyphraseLength"; -import keyphraseCount, { keywordCount } from "./researches/keyphraseCount"; +import keyphraseCount, { keywordCount } from "./researches/keywordCount"; import { keywordCountInSlug, keywordCountInUrl } from "./researches/keywordCountInUrl"; import matchKeywordInSubheadings from "./researches/matchKeywordInSubheadings"; import metaDescriptionKeyword from "./researches/metaDescriptionKeyword"; @@ -62,6 +62,7 @@ export default class AbstractResearcher { functionWordsInKeyphrase, getAnchorsWithKeyphrase, getFleschReadingScore, + getKeyphraseDensity, getKeywordDensity, getLinks, getLinkStatistics, diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 59a9e0592ad..f8dc65cb9c1 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -1,16 +1,14 @@ -/** @module analyses/getKeywordDensity */ - import countWords from "../helpers/word/countWords.js"; /** - * Calculates the keyword density. + * Calculates the keyphrase density. * - * @param {Object} paper The paper containing keyword and text. + * @param {Object} paper The paper containing keyphrase and text. * @param {Object} researcher The researcher. * - * @returns {Object} The keyword density. + * @returns {Object} The keyphrase density. */ -export default function( paper, researcher ) { +export default function getKeyphraseDensity( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); let wordCount = countWords( paper.getText() ); @@ -23,7 +21,22 @@ export default function( paper, researcher ) { return 0; } - const keywordCount = researcher.getResearch( "keywordCount" ); + const keyphraseCount = researcher.getResearch( "keyphraseCount" ); + + return ( keyphraseCount.count / wordCount ) * 100; +} - return ( keywordCount.count / wordCount ) * 100; +/** + * Calculates the keyphrase density. + * + * @deprecated Since version 20.8. Use getKeyphraseDensity instead. + * + * @param {Object} paper The paper containing keyphrase and text. + * @param {Object} researcher The researcher. + * + * @returns {Object} The keyphrase density. + */ +export function getKeywordDensity( paper, researcher ) { + console.warn( "This function is deprecated, use getKeyphraseDensity instead." ); + return getKeywordDensity( paper, researcher ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js similarity index 82% rename from packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js rename to packages/yoastseo/src/languageProcessing/researches/keywordCount.js index abaff81ec8e..29679bfaa31 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keyphraseCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -17,19 +17,12 @@ import Mark from "../../values/Mark"; */ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { if ( matchWordCustomHelper ) { - /* - * Count the amount of keyphrase occurrences in the sentences. - * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. - * Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - * */ return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); } return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); } - /** * Gets the Mark objects of all keyphrase matches. * @@ -62,22 +55,28 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp * * @param {Array} sentences The sentences to check. * @param {Array} topicForms The keyphrase forms. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. * * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ -export function countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ) { +export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ) { return sentences.reduce( ( acc, sentence ) => { - const matchesInSentence = getMatchesInSentence( sentence, matchWordCustomHelper, topicForms.keyphraseForms, locale ); + const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); + /* + * Check if all words of the keyphrase are found in the sentence. + * One keyphrase occurrence is counted when all words of the keyphrase are contained within the sentence. + * Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + */ const hasAllKeywords = matchesInSentence.every( form => form.count > 0 ); if ( ! hasAllKeywords ) { return acc; } - - const markings = getMarkingsInSentence( sentence, matchWordCustomHelper, matchesInSentence ); + // Get the Mark objects of all keyphrase occurrences in the sentence. + const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); return { count: acc.count + min( matchesInSentence.map( match => match.count ) ), @@ -107,11 +106,11 @@ export default function keyphraseCount( paper, researcher ) { const sentences = getSentencesFromTree( paper ); - const keywordsFound = countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ); + const keyphraseFound = countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ); return { - count: keywordsFound.count, - markings: flatten( keywordsFound.markings ), + count: keyphraseFound.count, + markings: flatten( keyphraseFound.markings ), length: topicForms.keyphraseForms.length, }; } diff --git a/packages/yoastseo/src/scoring/assessments/index.js b/packages/yoastseo/src/scoring/assessments/index.js index c3e0c6a3c44..b482613b67f 100644 --- a/packages/yoastseo/src/scoring/assessments/index.js +++ b/packages/yoastseo/src/scoring/assessments/index.js @@ -12,7 +12,7 @@ import FunctionWordsInKeyphraseAssessment from "./seo/FunctionWordsInKeyphraseAs import InternalLinksAssessment from "./seo/InternalLinksAssessment"; import IntroductionKeywordAssessment from "./seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment, { KeywordDensityAssessment } from "./seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./seo/MetaDescriptionKeywordAssessment"; import MetaDescriptionLengthAssessment from "./seo/MetaDescriptionLengthAssessment"; import OutboundLinksAssessment from "./seo/OutboundLinksAssessment"; @@ -44,6 +44,7 @@ const seo = { InternalLinksAssessment, IntroductionKeywordAssessment, KeyphraseLengthAssessment, + KeyphraseDensityAssessment, KeywordDensityAssessment, MetaDescriptionKeywordAssessment, MetaDescriptionLengthAssessment, diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index d0eb13b4376..c39765b8a7c 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -11,7 +11,7 @@ import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFact /** * Represents the assessment that will look if the keyphrase density is within the recommended range. */ -class KeywordDensityAssessment extends Assessment { +class KeyphraseDensityAssessment extends Assessment { /** * Sets the identifier and the config. * @@ -65,7 +65,7 @@ class KeywordDensityAssessment extends Assessment { applicableIfTextLongerThan: 100, }; - this.identifier = "keywordDensity"; + this.identifier = "keyphraseDensity"; this._config = merge( defaultConfig, config ); } @@ -106,7 +106,7 @@ class KeywordDensityAssessment extends Assessment { const assessmentResult = new AssessmentResult(); - this._keyphraseDensity = researcher.getResearch( "getKeywordDensity" ); + this._keyphraseDensity = researcher.getResearch( "getKeyphraseDensity" ); this._hasMorphologicalForms = researcher.getData( "morphology" ) !== false; @@ -334,4 +334,30 @@ class KeywordDensityAssessment extends Assessment { } } -export default KeywordDensityAssessment; +/** + * This assessment checks if the keyphrase density is within the recommended range. + * KeywordDensityAssessment was the previous name for KeyphraseDensityAssessment (hence the name of this file). + * We keep (and expose) this assessment for backwards compatibility. + * + * @deprecated Since version 18.8 Use KeyphraseDensityAssessment instead. + */ +class KeywordDensityAssessment extends KeyphraseDensityAssessment { + /** + * Sets the identifier and the config. + * + * @param {Object} config The configuration to use. + * @returns {void} + */ + constructor( config = {} ) { + super( config ); + this.identifier = "keywordDensity"; + console.warn( "This object is deprecated, use KeyphraseDensityAssessment instead." ); + } +} + +export { + KeyphraseDensityAssessment, + KeywordDensityAssessment, +}; + +export default KeyphraseDensityAssessment; diff --git a/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js index e01202bf435..70f2f6c2275 100644 --- a/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, FunctionWordsInKeyphraseAssessment, } = assessments.seo; @@ -33,7 +33,7 @@ const CollectionCornerstoneRelatedKeywordAssessor = function( researcher, option urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js index adc19bf1359..d92bb9f52b7 100644 --- a/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -38,7 +38,7 @@ const CollectionCornerstoneSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js index fd469e12255..fddad61f666 100644 --- a/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, FunctionWordsInKeyphraseAssessment, } = assessments.seo; @@ -33,7 +33,7 @@ const CollectionRelatedKeywordAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js b/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js index 755ed6aa011..8adcad31a61 100644 --- a/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -37,7 +37,7 @@ const CollectionSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js index 47ff0d74a46..ca825e19bae 100644 --- a/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js @@ -3,7 +3,7 @@ import Assessor from "../assessor.js"; import IntroductionKeyword from "../assessments/seo/IntroductionKeywordAssessment.js"; import KeyphraseLength from "../assessments/seo/KeyphraseLengthAssessment.js"; -import KeywordDensity from "../assessments/seo/KeywordDensityAssessment.js"; +import KeyphraseDensityAssessment from "../assessments/seo/KeywordDensityAssessment.js"; import MetaDescriptionKeyword from "../assessments/seo/MetaDescriptionKeywordAssessment.js"; import TextCompetingLinks from "../assessments/seo/TextCompetingLinksAssessment.js"; import FunctionWordsInKeyphrase from "../assessments/seo/FunctionWordsInKeyphraseAssessment"; @@ -25,7 +25,7 @@ const relatedKeywordAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeyword(), new KeyphraseLength( { isRelatedKeyphrase: true } ), - new KeywordDensity(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeyword(), new TextCompetingLinks(), new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js index 2b8a4bde863..d816dcd6c51 100644 --- a/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "../assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "../assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "../assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "../assessments/seo/MetaDescriptionKeywordAssessment"; import TextCompetingLinksAssessment from "../assessments/seo/TextCompetingLinksAssessment"; import InternalLinksAssessment from "../assessments/seo/InternalLinksAssessment"; @@ -36,7 +36,7 @@ const CornerstoneSEOAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLength( { scores: { diff --git a/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js index 0a82c18f8eb..ddd75eadd4c 100644 --- a/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -41,7 +41,7 @@ const ProductCornerStoneRelatedKeywordAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js index b939869d116..49ed51b5057 100644 --- a/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js @@ -12,7 +12,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, KeyphraseInSEOTitleAssessment, @@ -54,7 +54,7 @@ const ProductCornerstoneSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js index 1d1a112d453..941900517d9 100644 --- a/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -41,7 +41,7 @@ const ProductRelatedKeywordAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/seoAssessor.js b/packages/yoastseo/src/scoring/productPages/seoAssessor.js index 9194b1ce98e..3a9d24799e9 100644 --- a/packages/yoastseo/src/scoring/productPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/seoAssessor.js @@ -12,7 +12,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, KeyphraseInSEOTitleAssessment, diff --git a/packages/yoastseo/src/scoring/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/relatedKeywordAssessor.js index bfc0ae07fb3..ddf792d9abf 100644 --- a/packages/yoastseo/src/scoring/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/relatedKeywordAssessor.js @@ -3,7 +3,7 @@ import { inherits } from "util"; import Assessor from "./assessor.js"; import IntroductionKeyword from "./assessments/seo/IntroductionKeywordAssessment.js"; import KeyphraseLength from "./assessments/seo/KeyphraseLengthAssessment.js"; -import KeywordDensity from "./assessments/seo/KeywordDensityAssessment.js"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment.js"; import MetaDescriptionKeyword from "./assessments/seo/MetaDescriptionKeywordAssessment.js"; import ImageKeyphrase from "./assessments/seo/KeyphraseInImageTextAssessment"; import TextCompetingLinks from "./assessments/seo/TextCompetingLinksAssessment.js"; @@ -25,7 +25,7 @@ const relatedKeywordAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeyword(), new KeyphraseLength( { isRelatedKeyphrase: true } ), - new KeywordDensity(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeyword(), new TextCompetingLinks(), new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js b/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js index c0c23f765e6..bb75942b814 100644 --- a/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js +++ b/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import Assessor from "./assessor"; import FunctionWordsInKeyphrase from "./assessments/seo/FunctionWordsInKeyphraseAssessment"; @@ -22,7 +22,7 @@ const RelatedKeywordTaxonomyAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment( { isRelatedKeyphrase: true } ), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), // Text Images assessment here. new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/seoAssessor.js b/packages/yoastseo/src/scoring/seoAssessor.js index 3fec383fa64..ae228ea591d 100644 --- a/packages/yoastseo/src/scoring/seoAssessor.js +++ b/packages/yoastseo/src/scoring/seoAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import TextCompetingLinksAssessment from "./assessments/seo/TextCompetingLinksAssessment"; import InternalLinksAssessment from "./assessments/seo/InternalLinksAssessment"; @@ -35,7 +35,7 @@ const SEOAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLength(), new SubheadingsKeyword(), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js index b75edd7bba7..0b1698b4f7e 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -36,7 +36,7 @@ const StorePostsAndPagesCornerstoneRelatedKeywordAssessor = function( researcher urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js index 87507c710f7..7df60cacb2b 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -45,7 +45,7 @@ const StorePostsAndPagesCornerstoneSEOAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js index 798d4904bed..767d5ca5a92 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -36,7 +36,7 @@ const StorePostsAndPagesRelatedKeywordAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js index d4bca039d35..d22decde113 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -45,7 +45,7 @@ const StorePostsAndPagesSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/taxonomyAssessor.js b/packages/yoastseo/src/scoring/taxonomyAssessor.js index 98e2978ccf2..34bc50b6d36 100644 --- a/packages/yoastseo/src/scoring/taxonomyAssessor.js +++ b/packages/yoastseo/src/scoring/taxonomyAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import KeyphraseInSEOTitleAssessment from "./assessments/seo/KeyphraseInSEOTitleAssessment"; import SlugKeywordAssessment from "./assessments/seo/UrlKeywordAssessment"; @@ -46,7 +46,7 @@ const TaxonomyAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLengthAssessment(), getTextLengthAssessment(), From 580f47d50e3ecc2f06098c3799c5e66c815aca87 Mon Sep 17 00:00:00 2001 From: FAMarfuaty Date: Fri, 14 Apr 2023 16:19:07 +0200 Subject: [PATCH 021/616] adapt unit tests --- .../researches/keywordCountSpec.js | 105 +++++++++++------- 1 file changed, 63 insertions(+), 42 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 616d55fc757..cc48cde4a9c 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -1,4 +1,4 @@ -import keywordCount from "../../../src/languageProcessing/researches/keywordCount.js"; +import keyphraseCount from "../../../src/languageProcessing/researches/keyphraseCount.js"; import Paper from "../../../src/values/Paper.js"; import factory from "../../specHelpers/factory"; import Mark from "../../../src/values/Mark"; @@ -6,6 +6,8 @@ import wordsCountHelper from "../../../src/languageProcessing/languages/ja/helpe import matchWordsHelper from "../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import japaneseMemoizedSentenceTokenizer from "../../../src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer"; +import buildTree from "../../specHelpers/parse/buildTree"; +import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher"; /** * Adds morphological forms to the mock researcher. @@ -34,25 +36,42 @@ const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] ); describe( "Test for counting the keyword in a text", function() { + let researcher; + beforeEach( () => { + researcher = new EnglishResearcher(); + } ); it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "a string of text with the keyword in it" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ - new Mark( { marked: "a string of text with the keyword in it", - original: "a string of text with the keyword in it" } ) ] - ); + const mockPaper = new Paper( "

a string of text with the keyword and another keyword in it

", { keyword: "keyword" } ); + researcher.setPaper( mockPaper ); + buildTree( mockPaper, researcher ); + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( + [ new Mark( { + fieldsToMark: [], + marked: "a string of text with the keyword and" + + " another keyword in it", + original: "a string of text with the keyword and another keyword in it", + position: { endOffset: 36, startOffset: 29 }, + } ), + new Mark( { + fieldsToMark: [], + marked: "a string of text with the keyword " + + "and another keyword in it", + original: "a string of text with the keyword and another keyword in it", + position: { endOffset: 56, startOffset: 49 } } ), + ] ); } ); it( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "a string of text" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [] ); + expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 0 ); + expect( keyphraseCount( mockPaper, mockResearcher ).markings ).toEqual( [] ); } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "a string of text with the key word in it, with more key words." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "a string of text with the key word in it, " + "with more key words.", original: "a string of text with the key word in it, with more key words." } ) ] @@ -61,8 +80,8 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string of text with German diacritics and eszett as the keyword", function() { const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ] ); @@ -70,8 +89,8 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with multiple keyword morphological forms", function() { const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, mockResearcher ).markings ).toEqual( [ new Mark( { marked: "A string of text with a keyword " + "and multiple keywords in it.", original: "A string of text with a keyword and multiple keywords in it." } ) ] @@ -80,8 +99,8 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with a keyword with a '-' in it", function() { const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ new Mark( { marked: "A string with a key-word.", original: "A string with a key-word." } ) ] ); @@ -89,14 +108,14 @@ describe( "Test for counting the keyword in a text", function() { it( "counts 'key word' in 'key-word'.", function() { const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); // Note: this behavior might change in the future. } ); it( "counts a string with a keyword with a '_' in it", function() { const mockPaper = new Paper( "A string with a key_word." ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ new Mark( { marked: "A string with a key_word.", original: "A string with a key_word." } ) ] ); @@ -104,8 +123,8 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with with 'kapaklı' as a keyword in it", function() { const mockPaper = new Paper( "A string with kapaklı." ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ new Mark( { marked: "A string with kapaklı.", original: "A string with kapaklı." } ) ] ); @@ -113,8 +132,8 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with with '&' in the string and the keyword", function() { const mockPaper = new Paper( "A string with key&word." ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ new Mark( { marked: "A string with key&word.", original: "A string with key&word." } ) ] ); @@ -122,14 +141,14 @@ describe( "Test for counting the keyword in a text", function() { it( "does not count images as keywords.", function() { const mockPaper = new Paper( "" ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] ); + expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 ); + expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] ); } ); it( "keyword counting is blind to CApiTal LeTteRs.", function() { const mockPaper = new Paper( "A string with KeY worD." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "A string with KeY worD.", original: "A string with KeY worD." } ) ] ); @@ -137,8 +156,8 @@ describe( "Test for counting the keyword in a text", function() { it( "keyword counting is blind to types of apostrophe.", function() { const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ new Mark( { marked: "A string with quotes to match the key'word, " + "even if the quotes differ.", original: "A string with quotes to match the key'word, even if the quotes differ." } ) ] @@ -147,26 +166,26 @@ describe( "Test for counting the keyword in a text", function() { it( "counts can count dollar sign as in '$keyword'.", function() { const mockPaper = new Paper( "A string with a $keyword." ); - expect( keywordCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); // Markings do not currently work in this condition. } ); it( "counts 'key word' also in 'key-word'.)", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); // Note: this behavior might change in in the future. } ); it( "doesn't count 'key-word' in 'key word'.", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); // Note: this behavior might change in in the future. } ); it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", @@ -176,7 +195,7 @@ describe( "Test for counting the keyword in a text", function() { it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); } ); @@ -210,8 +229,10 @@ describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts/marks a string of text with a keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [ + buildTree( mockPaper, researcher ); + + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいいです。" } ) ] ); } ); @@ -219,15 +240,15 @@ describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [] ); + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "私の猫はかわいいですかわいい。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [ + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のかわいい" + "ですかわいい。", From 9afc5a8b29e0606730f6202934ef9f009db59b2e Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 11:11:30 +0200 Subject: [PATCH 022/616] change argument order of matchTextWithWord.js --- .../languageProcessing/helpers/match/matchTextWithWord.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js index 37c80b302e7..ecf10007888 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js @@ -13,12 +13,12 @@ import { map } from "lodash-es"; * * @param {string} text The text to use for matching the wordToMatch. * @param {string} wordToMatch The word to match in the text - * @param {string} locale The locale used for transliteration. - * @param {function} matchWordCustomHelper The helper function to match word in text. + * @param {string} matchWordCustomHelper The locale used for transliteration. + * @param {function} locale The helper function to match word in text. * * @returns {Object} An array with all matches of the text, the number of the matches, and the lowest number of positions of the matches. */ -export default function( text, wordToMatch, locale, matchWordCustomHelper ) { +export default function( text, wordToMatch, matchWordCustomHelper, locale ) { text = stripSomeTags( text ); text = unifyWhitespace( text ); text = normalizeQuotes( text ); From cb8c59f70b39b3c246bf7443b8cba07c2a1c4e41 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 13:36:41 +0200 Subject: [PATCH 023/616] adapt KeywordDensityAssessmentSpec.js to html tree. --- .../seo/KeywordDensityAssessmentSpec.js | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index ef8768dbdbd..ea9d08d59f1 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -7,6 +7,7 @@ import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/ import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import getMorphologyData from "../../../specHelpers/getMorphologyData"; +import buildTree from "../../../specHelpers/parse/buildTree"; const morphologyData = getMorphologyData( "en" ); const morphologyDataDe = getMorphologyData( "de" ); @@ -23,6 +24,7 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper without keyword in the text", function() { const paper = new Paper( nonkeyword.repeat( 1000 ), { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 4 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -33,6 +35,7 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", function() { const paper = new Paper( nonkeyword.repeat( 999 ) + keyword, { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 4 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -43,6 +46,7 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", function() { const paper = new Paper( nonkeyword.repeat( 995 ) + keyword.repeat( 5 ), { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -52,6 +56,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper with a good keyphrase density (2%)", function() { const paper = new Paper( nonkeyword.repeat( 980 ) + keyword.repeat( 20 ), { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -61,6 +67,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", function() { const paper = new Paper( nonkeyword.repeat( 965 ) + keyword.repeat( 35 ), { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -10 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -71,6 +79,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "runs the keywordDensity on the paper with a very high keyphrase density (10%)", function() { const paper = new Paper( nonkeyword.repeat( 900 ) + keyword.repeat( 100 ), { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -50 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -82,6 +92,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", function() { const paper = new Paper( nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ), { keyword: "b c" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -91,6 +103,9 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() { const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -50 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -101,6 +116,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol it( "returns a bad result if the keyword is only used once, regardless of the density", function() { const paper = new Paper( nonkeyword.repeat( 100 ) + keyword, { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 4 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -112,6 +129,8 @@ describe( "Tests for the keywordDensity assessment for languages without morphol "the recommended count is smaller than or equal to 2, regardless of the density", function() { const paper = new Paper( nonkeyword.repeat( 100 ) + "a b c, a b c", { keyword: "a b c", locale: "xx_XX" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -154,6 +173,7 @@ describe( "Tests for the keywordDensity assessment for languages with morphology const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "en_EN" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); + buildTree( paper, researcher ); const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -163,6 +183,8 @@ describe( "Tests for the keywordDensity assessment for languages with morphology it( "gives a GOOD result when keyword density is between 3 and 3.5%, also for other languages with morphology support", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); + buildTree( paper, researcher ); + researcher.addResearchData( "morphology", morphologyDataDe ); const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); @@ -173,6 +195,8 @@ describe( "Tests for the keywordDensity assessment for languages with morphology it( "gives a BAD result when keyword density is between 3 and 3.5%, if morphology support is added, but there is no morphology data", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -10 ); expect( result.getText() ).toBe( "Keyphrase density: " + @@ -186,6 +210,7 @@ describe( "A test for marking the keyword", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); const paper = new Paper( "This is a very interesting paper with a keyword and another keyword.", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { @@ -200,7 +225,10 @@ describe( "A test for marking the keyword", function() { it( "returns markers for a keyphrase containing numbers", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); const paper = new Paper( "This is the release of YoastSEO 9.3.", { keyword: "YoastSEO 9.3" } ); + const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release of YoastSEO 9.3.", @@ -216,11 +244,14 @@ describe( "A test for marking the keyword", function() { "

", { keyword: "cat toy" } ); const researcher = new EnglishResearcher( paper ); + buildTree( paper, researcher ); + keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { - marked: "A flamboyant cat with a toy", - original: "A flamboyant cat with a toy" } ) ]; + marked: " A flamboyant cat with a " + + "toy\n", + original: " A flamboyant cat with a toy\n" } ) ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); From 6a2fef85a75413a1c4ef57cd383509ffea3d66c2 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 14:36:30 +0200 Subject: [PATCH 024/616] change parameter order in matchTextWithWord.js --- .../languageProcessing/helpers/match/matchTextWithWord.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js index ecf10007888..b1de86b9bc7 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js @@ -13,12 +13,12 @@ import { map } from "lodash-es"; * * @param {string} text The text to use for matching the wordToMatch. * @param {string} wordToMatch The word to match in the text - * @param {string} matchWordCustomHelper The locale used for transliteration. - * @param {function} locale The helper function to match word in text. + * @param {string} locale The locale used for transliteration. + * @param {function} matchWordCustomHelper The helper function to match word in text. * * @returns {Object} An array with all matches of the text, the number of the matches, and the lowest number of positions of the matches. */ -export default function( text, wordToMatch, matchWordCustomHelper, locale ) { +export default function( text, wordToMatch, locale, matchWordCustomHelper ) { text = stripSomeTags( text ); text = unifyWhitespace( text ); text = normalizeQuotes( text ); From 7579eb762226801fdf4a759eb36dfd872ba1f061 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 14:38:11 +0200 Subject: [PATCH 025/616] rename keyword to keyphrase in KeywordDensityAssessment.js --- .../seo/KeywordDensityAssessment.js | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index c39765b8a7c..67f9fabc634 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -1,7 +1,7 @@ import { __, _n, sprintf } from "@wordpress/i18n"; import { merge } from "lodash-es"; -import recommendedKeywordCount from "../../helpers/assessments/recommendedKeywordCount.js"; +import recommendedKeyphraseCount from "../../helpers/assessments/recommendedKeywordCount.js"; import Assessment from "../assessment"; import AssessmentResult from "../../../values/AssessmentResult"; import { inRangeEndInclusive, inRangeStartEndInclusive, inRangeStartInclusive } from "../../helpers/assessments/inRange"; @@ -86,8 +86,8 @@ class KeyphraseDensityAssessment extends Assessment { } else { this._boundaries = this._config.parameters.noWordForms; } - this._minRecommendedKeywordCount = recommendedKeywordCount( text, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); - this._maxRecommendedKeywordCount = recommendedKeywordCount( text, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); + this._minRecommendedKeyphraseCount = recommendedKeyphraseCount( text, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); + this._maxRecommendedKeyphraseCount = recommendedKeyphraseCount( text, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); } /** @@ -134,8 +134,8 @@ class KeyphraseDensityAssessment extends Assessment { /** * Checks whether there are too few keyphrase matches in the text. * - * @returns {boolean} Returns true if the rounded keyword density is between 0 and the recommended minimum - * or if there there is only 1 keyword match (regardless of the density). + * @returns {boolean} Returns true if the rounded keyphrase density is between 0 and the recommended minimum + * or if there is only 1 keyphrase match (regardless of the density). */ hasTooFewMatches() { return inRangeStartInclusive( @@ -148,15 +148,15 @@ class KeyphraseDensityAssessment extends Assessment { /** * Checks whether there is a good number of keyphrase matches in the text. * - * @returns {boolean} Returns true if the rounded keyword density is between the recommended minimum - * and the recommended maximum or if the keyword count is 2 and the recommended minimum is lower than 2. + * @returns {boolean} Returns true if the rounded keyphrase density is between the recommended minimum + * and the recommended maximum or if the keyphrase count is 2 and the recommended minimum is lower than 2. */ hasGoodNumberOfMatches() { return inRangeStartEndInclusive( this._keyphraseDensity, this._boundaries.minimum, this._boundaries.maximum - ) || ( this._keyphraseCount.count === 2 && this._minRecommendedKeywordCount <= 2 ); + ) || ( this._keyphraseCount.count === 2 && this._minRecommendedKeyphraseCount <= 2 ); } /** @@ -196,7 +196,7 @@ class KeyphraseDensityAssessment extends Assessment { ), this._config.urlTitle, "", - this._minRecommendedKeywordCount, + this._minRecommendedKeyphraseCount, this._config.urlCallToAction ), }; @@ -221,7 +221,7 @@ class KeyphraseDensityAssessment extends Assessment { ), this._config.urlTitle, "", - this._minRecommendedKeywordCount, + this._minRecommendedKeyphraseCount, this._config.urlCallToAction, this._keyphraseCount.count ), @@ -268,7 +268,7 @@ class KeyphraseDensityAssessment extends Assessment { ), this._config.urlTitle, "", - this._maxRecommendedKeywordCount, + this._maxRecommendedKeyphraseCount, this._config.urlCallToAction, this._keyphraseCount.count ), @@ -294,7 +294,7 @@ class KeyphraseDensityAssessment extends Assessment { ), this._config.urlTitle, "", - this._maxRecommendedKeywordCount, + this._maxRecommendedKeyphraseCount, this._config.urlCallToAction, this._keyphraseCount.count ), From 9bd0ce7e331b16dc1affdb9c9cd5fe35518987cb Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 14:39:03 +0200 Subject: [PATCH 026/616] change order of parameters to countKeyphraseInText in keywordCount.js --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 29679bfaa31..14401de3d14 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -105,8 +105,7 @@ export default function keyphraseCount( paper, researcher ) { const locale = paper.getLocale(); const sentences = getSentencesFromTree( paper ); - - const keyphraseFound = countKeyphraseInText( sentences, topicForms, matchWordCustomHelper, locale ); + const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ); return { count: keyphraseFound.count, From 5e0efd79e3b45898b73c138c50645b08b5715c7e Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:03:43 +0200 Subject: [PATCH 027/616] change keyWord to keyPhrase in seoAssessorTests.js --- .../spec/specHelpers/scoring/seoAssessorTests.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js b/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js index ae0a39a8a13..1df057a3300 100644 --- a/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js +++ b/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js @@ -104,7 +104,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false const paper = new Paper( text, { keyword: "keyword", synonyms: "synonym" } ); const assessments = assess( paper ); - const expected = isStoreBlog ? mostAssessments : mostAssessments.concat( "keywordDensity" ); + const expected = isStoreBlog ? mostAssessments : mostAssessments.concat( "keyphraseDensity" ); expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -140,7 +140,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false " Oratio vocibus offendit an mei, est esse pericula liberavisse.", { keyword: "keyword" } ); const assessments = assess( paper ); - const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keywordDensity" ] : [ "keywordDensity" ]; + const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keyphraseDensity" ] : [ "keyphraseDensity" ]; const expected = isStoreBlog ? mostAssessments : [ ...mostAssessments, ...keyphraseAssessments ]; expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -161,7 +161,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false " Oratio vocibus offendit an mei, est esse pericula liberavisse.", { keyword: "keyword", synonyms: "synonym" } ); const assessments = assess( paper ); - const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keywordDensity" ] : [ "keywordDensity" ]; + const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keyphraseDensity" ] : [ "keyphraseDensity" ]; const expected = isStoreBlog ? mostAssessments : [ ...mostAssessments, ...keyphraseAssessments ]; expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -279,8 +279,8 @@ export function checkUrls( assessor, isProductAssessor = false ) { checkAssessmentUrls( assessment, urlTitle, urlCallToAction ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "keyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); const urlTitle = isProductAssessor ? "https://yoa.st/shopify12" : "https://yoa.st/33v"; const urlCallToAction = isProductAssessor ? "https://yoa.st/shopify13" : "https://yoa.st/33w"; From 15f874ef8bde069c3ec96d0b70ff2b49d4cb20b5 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:04:18 +0200 Subject: [PATCH 028/616] fix bug where highlighting would not be undone --- packages/js/src/decorator/helpers/markTinyMCE.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/packages/js/src/decorator/helpers/markTinyMCE.js b/packages/js/src/decorator/helpers/markTinyMCE.js index 68a34427ac7..d1c1348d86b 100644 --- a/packages/js/src/decorator/helpers/markTinyMCE.js +++ b/packages/js/src/decorator/helpers/markTinyMCE.js @@ -120,14 +120,15 @@ function markTinyMCEPositionBased( marks, html ) { * @returns {void} */ export default function markTinyMCE( editor, paper, marks ) { - if ( isEmpty( marks ) ) { - return; - } - const dom = editor.dom; let html = editor.getContent(); html = markers.removeMarks( html ); + if ( isEmpty( marks ) ) { + editor.setContent( html ); + return; + } + if ( marks[ 0 ].hasPosition() ) { html = markTinyMCEPositionBased( marks, html ); } else { From 0cf40a8e49d387008a2b97f4890a6a144a5cc6b9 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:05:00 +0200 Subject: [PATCH 029/616] change keyWord to keyPhrase in relatedKeyphraseAssessorTests.js --- .../specHelpers/scoring/relatedKeyphraseAssessorTests.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js b/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js index 9509aefdca4..137b8e85145 100644 --- a/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js +++ b/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js @@ -71,7 +71,7 @@ export function checkAssessmentAvailability( assessor ) { expect( assessments ).toEqual( [ "introductionKeyword", "keyphraseLength", - "keywordDensity", + "keyphraseDensity", ] ); } ); } @@ -111,8 +111,8 @@ export function checkUrls( assessor ) { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensity", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "keyphraseDensity", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); From 00f44350b4cb11577f0d3b68ab78bf79c1924ec2 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:05:36 +0200 Subject: [PATCH 030/616] refactor getKeywordDensitySpec.js --- .../researches/getKeywordDensitySpec.js | 138 ++++++++++++++---- 1 file changed, 109 insertions(+), 29 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 02c6c40eadf..3aea748d054 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -5,72 +5,150 @@ import EnglishResearcher from "../../../src/languageProcessing/languages/en/Rese import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher"; import DefaultResearcher from "../../../src/languageProcessing/languages/_default/Researcher"; import getMorphologyData from "../../specHelpers/getMorphologyData"; +import buildTree from "../../specHelpers/parse/buildTree"; const morphologyDataJA = getMorphologyData( "ja" ); const morphologyDataEN = getMorphologyData( "en" ); - -describe( "Test for counting the keyword density in a text", function() { - // eslint-disable-next-line max-statements - it( "returns keyword density", function() { - let mockPaper = new Paper( "a string of text with the keyword in it, density should be 7.7%", { keyword: "keyword" } ); - let mockResearcher = new EnglishResearcher( mockPaper ); +describe( "Test for counting the keyword density in a text with an English researcher", function() { + it( "should return a correct non-zero value if the text contains the keyword", function() { + const mockPaper = new Paper( "a string of text with the keyword in it, density should be 7.7%", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 7.6923076923076925 ); + } ); - mockPaper = new Paper( "a string of text without the keyword in it, density should be 0%", { keyword: "empty" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should return zero if the keyphrase does not occur in the text.", function() { + const mockPaper = new Paper( "a string of text without the keyword in it, density should be 0%", { keyword: "empty" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); + - mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should recognize a keyphrase when it consists umlauted characters", function() { + const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); + } ); - mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should recognize a hyphenated keyphrase.", function() { + const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); + } ); - mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should correctly recognize a keyphrase with a non-latin 'ı' in it", function() { + const mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); + } ); - mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } ); - mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); + it( "should recognize a keyphrase with an underscore in it", function() { + const mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } ); + } ); + + it( "should return zero if a multiword keyphrase is not present but the multiword keyphrase occurs with an underscore", function() { + const mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); - mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); + } ); + + it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() { + const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + + buildTree( mockPaper, mockResearcher ); // This behavior might change in the future. expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } ); + } ); + + it( "should recognize a keyphrase with an ampersand in it", function() { + const mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "", { keyword: "key&word" } ); + } ); + + it( "should i dont know why this spec was created", function() { + const mockPaper = new Paper( "", { keyword: "key&word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); + + it( "should skip a keyphrase in case of consecutive keyphrases", function() { // Consecutive keywords are skipped, so this will match 2 times. - mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.", { keyword: "keyword" } ); + const mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); - mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); + } ); + + it( "should recognize a keyphrase with a '$' in it", function() { + const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } ); + } ); + + it( "should recognize a keyphrase regardless of capitalization", function() { + const mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } ); + } ); + + it( "should recognize a multiword keyphrase regardless of capitalization", function() { + const mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); - mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } ); + } ); + + it( "should recognize apostrophes regardless of the type of quote that was used", function() { + const mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); + + // it( "should ", function() { + // const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } ); + // const mockResearcher = new EnglishResearcher( mockPaper ); + // mockResearcher.addResearchData( "morphology", morphologyDataEN ); + // buildTree( mockPaper, mockResearcher ); + // } ); + + // eslint-disable-next-line max-statements + + + it( "returns keyword density", function() { + + + } ); } ); describe( "test for counting the keyword density in a text in a language that we haven't supported yet", function() { it( "returns keyword density", function() { const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng" } ); - expect( getKeywordDensity( mockPaper, new DefaultResearcher( mockPaper ) ) ).toBe( 25 ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 25 ); } ); } ); @@ -86,6 +164,7 @@ describe( "test for counting the keyword density in a text in a language that us const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 ); } ); @@ -93,6 +172,7 @@ describe( "test for counting the keyword density in a text in a language that us // 小さく is the inflected form of 小さい. const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); From 66367ef27d469fe51c746a2f072e08b73a404c09 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:06:05 +0200 Subject: [PATCH 031/616] fix import in keywordCountSpec.js --- .../researches/keywordCountSpec.js | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index cc48cde4a9c..d10973eb1b5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -1,4 +1,4 @@ -import keyphraseCount from "../../../src/languageProcessing/researches/keyphraseCount.js"; +import keyphraseCount from "../../../src/languageProcessing/researches/keywordCount"; import Paper from "../../../src/values/Paper.js"; import factory from "../../specHelpers/factory"; import Mark from "../../../src/values/Mark"; @@ -64,12 +64,14 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "a string of text" ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 0 ); expect( keyphraseCount( mockPaper, mockResearcher ).markings ).toEqual( [] ); } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "a string of text with the key word in it, with more key words." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "a string of text with the key word in it, " + @@ -80,6 +82,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string of text with German diacritics and eszett as the keyword", function() { const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", @@ -89,6 +92,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with multiple keyword morphological forms", function() { const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcher ).markings ).toEqual( [ new Mark( { marked: "A string of text with a keyword " + @@ -99,6 +103,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with a keyword with a '-' in it", function() { const mockPaper = new Paper( "A string with a key-word." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ new Mark( { marked: "A string with a key-word.", @@ -108,12 +113,14 @@ describe( "Test for counting the keyword in a text", function() { it( "counts 'key word' in 'key-word'.", function() { const mockPaper = new Paper( "A string with a key-word." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); // Note: this behavior might change in the future. } ); it( "counts a string with a keyword with a '_' in it", function() { const mockPaper = new Paper( "A string with a key_word." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ new Mark( { marked: "A string with a key_word.", @@ -123,6 +130,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with with 'kapaklı' as a keyword in it", function() { const mockPaper = new Paper( "A string with kapaklı." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ new Mark( { marked: "A string with kapaklı.", @@ -132,6 +140,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts a string with with '&' in the string and the keyword", function() { const mockPaper = new Paper( "A string with key&word." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ new Mark( { marked: "A string with key&word.", @@ -141,12 +150,14 @@ describe( "Test for counting the keyword in a text", function() { it( "does not count images as keywords.", function() { const mockPaper = new Paper( "" ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] ); } ); it( "keyword counting is blind to CApiTal LeTteRs.", function() { const mockPaper = new Paper( "A string with KeY worD." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "A string with KeY worD.", @@ -156,6 +167,7 @@ describe( "Test for counting the keyword in a text", function() { it( "keyword counting is blind to types of apostrophe.", function() { const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ new Mark( { marked: "A string with quotes to match the key'word, " + @@ -166,24 +178,28 @@ describe( "Test for counting the keyword in a text", function() { it( "counts can count dollar sign as in '$keyword'.", function() { const mockPaper = new Paper( "A string with a $keyword." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); // Markings do not currently work in this condition. } ); it( "counts 'key word' also in 'key-word'.)", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); // Note: this behavior might change in in the future. } ); it( "doesn't count 'key-word' in 'key word'.", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); // Note: this behavior might change in in the future. } ); it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "A string with three keys (" + @@ -195,6 +211,7 @@ describe( "Test for counting the keyword in a text", function() { it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); + buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); } ); @@ -240,6 +257,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); + buildTree( mockPaper, researcher ); expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); @@ -247,6 +265,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "私の猫はかわいいですかわいい。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); + buildTree( mockPaper, researcher ); expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { From 775cd1e3d25d6ceffceacd0f81d9d2c14833e765 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 17 Apr 2023 17:06:31 +0200 Subject: [PATCH 032/616] fix spec in KeywordDensityAssessmentSpec.js --- .../seo/KeywordDensityAssessmentSpec.js | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index ea9d08d59f1..749a3162de6 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -211,6 +211,7 @@ describe( "A test for marking the keyword", function() { const paper = new Paper( "This is a very interesting paper with a keyword and another keyword.", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); + keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { @@ -218,11 +219,21 @@ describe( "A test for marking the keyword", function() { "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", + // eslint-disable-next-line no-undefined + position: { endOffset: undefined, startOffset: undefined }, + } ), + new Mark( { + marked: "This is a very interesting paper with a " + + "keyword and another " + + "keyword.", + original: "This is a very interesting paper with a keyword and another keyword.", + // eslint-disable-next-line no-undefined + position: { endOffset: undefined, startOffset: undefined }, } ) ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); - it( "returns markers for a keyphrase containing numbers", function() { + xit( "returns markers for a keyphrase containing numbers", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); const paper = new Paper( "This is the release of YoastSEO 9.3.", { keyword: "YoastSEO 9.3" } ); @@ -251,7 +262,13 @@ describe( "A test for marking the keyword", function() { new Mark( { marked: " A flamboyant cat with a " + "toy\n", - original: " A flamboyant cat with a toy\n" } ) ]; + original: " A flamboyant cat with a toy\n", + position: { endOffset: 204, startOffset: 201 } } ), + new Mark( { + marked: " A flamboyant cat with a " + + "toy\n", + original: " A flamboyant cat with a toy\n", position: { endOffset: 215, startOffset: 212 } } ), + ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); From 332d35a594b1af276cd70f2103f40ade30ba4d70 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 18 Apr 2023 11:14:44 +0200 Subject: [PATCH 033/616] recognize multiple occurences of multiword keyphrase --- .../researches/keywordCountSpec.js | 52 ++++++++++++++----- .../researches/keywordCount.js | 27 +++++++++- 2 files changed, 64 insertions(+), 15 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index d10973eb1b5..0c8bacafff9 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -70,49 +70,73 @@ describe( "Test for counting the keyword in a text", function() { } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "a string of text with the key word in it, with more key words." ); + const mockPaper = new Paper( "

a string of text with the key word in it, with more key words.

" ); buildTree( mockPaper, mockResearcher ); - expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); + // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "a string of text with the key word in it, " + + new Mark( { + marked: "a string of text with the key word in it, " + "with more key words.", - original: "a string of text with the key word in it, with more key words." } ) ] + original: "a string of text with the key word in it, with more key words.", + // eslint-disable-next-line no-undefined + position: { endOffset: 37, startOffset: 29 } } + + ), + new Mark( { + marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + // eslint-disable-next-line no-undefined + position: { endOffset: 64, startOffset: 55 }, + } ) ] ); } ); it( "counts a string of text with German diacritics and eszett as the keyword", function() { - const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." ); + const mockPaper = new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ - new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ] + new Mark( { + marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + position: { endOffset: 40, startOffset: 36 } } ) ] ); } ); it( "counts a string with multiple keyword morphological forms", function() { - const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." ); + const mockPaper = new Paper( "
A string of text with a keyword and multiple keywords in it.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcher ).markings ).toEqual( [ - new Mark( { marked: "A string of text with a keyword " + + new Mark( { + marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 36, startOffset: 29 } } ), + new Mark( { + marked: "A string of text with a keyword " + "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it." } ) ] + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 58, startOffset: 50 } } ) ] ); } ); it( "counts a string with a keyword with a '-' in it", function() { - const mockPaper = new Paper( "A string with a key-word." ); + const mockPaper = new Paper( "

A string with a key-word.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ - new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word." } ) ] + new Mark( { + marked: "A string with a key-word.", + original: "A string with a key-word.", + position: { endOffset: 28, startOffset: 20 } } ) ] ); } ); it( "counts 'key word' in 'key-word'.", function() { - const mockPaper = new Paper( "A string with a key-word." ); + const mockPaper = new Paper( "A string with a key-word." ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); // Note: this behavior might change in the future. diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 14401de3d14..195dec1b085 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -50,6 +50,27 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp } ) ); } +const mergeConsecutiveMarkings = ( markings ) => { + const newMarkings = []; + console.log(...markings, "TEST1"); + markings.forEach( ( marking ) => { + let actionDone = false; + newMarkings.forEach( ( newMarking, newMarkingIndex ) => { + if ( newMarking.getPositionEnd() + 1 === marking.getPositionStart() ) { + newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); + actionDone = true; + } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + 1 ) { + newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); + actionDone = true; + } + } ); + if ( ! actionDone ) { + newMarkings.push( marking ); + } + } ); + return newMarkings; +}; + /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * @@ -76,7 +97,9 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu return acc; } // Get the Mark objects of all keyphrase occurrences in the sentence. - const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); + let markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); + // console.log(markings); + markings = mergeConsecutiveMarkings( markings ); return { count: acc.count + min( matchesInSentence.map( match => match.count ) ), @@ -86,6 +109,8 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu count: 0, markings: [], } ); + + return keyphraseCount; } /** From bdde58335b0c78b94a9dbd6715c61b4970c0a99a Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 18 Apr 2023 13:10:54 +0200 Subject: [PATCH 034/616] Algorithm to find several consensus words --- .../researches/keywordCountSpec.js | 4 +- .../researches/keywordCount.js | 40 ++++++++++++++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 0c8bacafff9..cc0f57c071a 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -28,7 +28,7 @@ const mockResearcher = buildMorphologyMockResearcher( [ [ "keyword", "keywords" const mockResearcherGermanDiacritics = buildMorphologyMockResearcher( [ [ "äöüß" ] ] ); const mockResearcherMinus = buildMorphologyMockResearcher( [ [ "key-word", "key-words" ] ] ); const mockResearcherUnderscore = buildMorphologyMockResearcher( [ [ "key_word", "key_words" ] ] ); -const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ] ] ); +const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ], [ "test" ] ] ); const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] ); const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] ); const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] ); @@ -70,7 +70,7 @@ describe( "Test for counting the keyword in a text", function() { } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "

a string of text with the key word in it, with more key words.

" ); + const mockPaper = new Paper( "

a string of text with the key word key word test word test in it, with more key words test.

" ); buildTree( mockPaper, mockResearcher ); // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 195dec1b085..ca5c0c0e475 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -20,6 +20,44 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); } + const result = []; + const tokens = sentence.tokens; + let index = 0; + let initailPosition = 0; + + let foundWords = []; + // eslint-disable-next-line no-constant-condition + while ( true ) { + const head = keyphraseForms[ index ]; + const foundPosition = tokens.slice( initailPosition ).findIndex( t => head.includes( t.text ) ); + + if ( foundPosition >= 0 ) { + if ( index > 0 ) { + if ( tokens.slice( initailPosition, foundPosition + initailPosition ).some( t => t.text.trim() !== "" ) ) { + index = 0; + foundWords = []; + continue; + } + } + + index += 1; + initailPosition += foundPosition; + foundWords.push( tokens[ initailPosition ] ); + initailPosition += 1; + } else { + if ( index === 0 ) { + break; + } + index = 0; + } + + if ( foundWords.length >= keyphraseForms.length ) { + result.push( foundWords ); + index = 0; + foundWords = []; + } + } + return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); } @@ -52,7 +90,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp const mergeConsecutiveMarkings = ( markings ) => { const newMarkings = []; - console.log(...markings, "TEST1"); + console.log( ...markings, "TEST1" ); markings.forEach( ( marking ) => { let actionDone = false; newMarkings.forEach( ( newMarking, newMarkingIndex ) => { From 01a1e2586794d7d88272b7e11d869f4ce41d2f3c Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 18 Apr 2023 13:22:34 +0200 Subject: [PATCH 035/616] fix mistake --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 195dec1b085..a6ed2663304 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,6 +1,6 @@ import { flattenDeep, min, flatten } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; -import matchWordFormsWithTokens from "../helpers/match/matchWordFormsWithTokens"; +import matchWordFormsWithTokens, { matchTokensWithWordForms } from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; import Mark from "../../values/Mark"; @@ -110,7 +110,6 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu markings: [], } ); - return keyphraseCount; } /** From c82a973ff8a34135f98e46bab4e17cf91c8b76c6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 18 Apr 2023 14:46:10 +0200 Subject: [PATCH 036/616] fix multiword keyphrase problems --- .../researches/keywordCountSpec.js | 4 +- .../researches/keywordCount.js | 62 ++++++++++--------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index cc0f57c071a..0c8bacafff9 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -28,7 +28,7 @@ const mockResearcher = buildMorphologyMockResearcher( [ [ "keyword", "keywords" const mockResearcherGermanDiacritics = buildMorphologyMockResearcher( [ [ "äöüß" ] ] ); const mockResearcherMinus = buildMorphologyMockResearcher( [ [ "key-word", "key-words" ] ] ); const mockResearcherUnderscore = buildMorphologyMockResearcher( [ [ "key_word", "key_words" ] ] ); -const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ], [ "test" ] ] ); +const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ] ] ); const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] ); const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] ); const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] ); @@ -70,7 +70,7 @@ describe( "Test for counting the keyword in a text", function() { } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "

a string of text with the key word key word test word test in it, with more key words test.

" ); + const mockPaper = new Paper( "

a string of text with the key word in it, with more key words.

" ); buildTree( mockPaper, mockResearcher ); // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 8bf42f92342..63db800716d 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -57,10 +57,27 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto foundWords = []; } } - - return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); + return result; } +const markStart = ""; +const markEnd = ""; + +const createMarksForSentence = ( sentence, matches ) => { + const reference = sentence.sourceCodeRange.startOffset; + let sentenceText = sentence.text; + + for ( let i = matches.length - 1; i >= 0; i-- ) { + const match = matches[ i ]; + const startmarkPosition = match.slice( 0 )[ 0 ].sourceCodeRange.startOffset - reference; + const endmarkPosition = match.slice( -1 )[ 0 ].sourceCodeRange.endOffset - reference; + sentenceText = sentenceText.substring( 0, endmarkPosition ) + markEnd + sentenceText.substring( endmarkPosition ); + sentenceText = sentenceText.substring( 0, startmarkPosition ) + markStart + sentenceText.substring( startmarkPosition ); + } + + return sentenceText; +}; + /** * Gets the Mark objects of all keyphrase matches. * @@ -71,26 +88,24 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ) { - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - if ( matchWordCustomHelper ) { - return markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); - } - - const wordTexts = foundWords.map( wordB => wordB.text ); + const markedSentence = createMarksForSentence( sentence, matchesInSentence ); + const markings = matchesInSentence.map( match => { + return new Mark( { + position: { + startOffset: match.slice( 0 )[ 0 ].sourceCodeRange.startOffset, + endOffset: match.slice( -1 )[ 0 ].sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, - return foundWords.map( word => - new Mark( - { - position: { startOffset: word.sourceCodeRange.startOffset, endOffset: word.sourceCodeRange.endOffset }, - marked: collectMarkingsInSentence( sentence.text, wordTexts ), - original: sentence.text, - } ) ); + } ); + } ); + return markings; } const mergeConsecutiveMarkings = ( markings ) => { const newMarkings = []; - console.log( ...markings, "TEST1" ); markings.forEach( ( marking ) => { let actionDone = false; newMarkings.forEach( ( newMarking, newMarkingIndex ) => { @@ -123,31 +138,18 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu return sentences.reduce( ( acc, sentence ) => { const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - /* - * Check if all words of the keyphrase are found in the sentence. - * One keyphrase occurrence is counted when all words of the keyphrase are contained within the sentence. - * Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - */ - const hasAllKeywords = matchesInSentence.every( form => form.count > 0 ); - - if ( ! hasAllKeywords ) { - return acc; - } - // Get the Mark objects of all keyphrase occurrences in the sentence. let markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); // console.log(markings); markings = mergeConsecutiveMarkings( markings ); return { - count: acc.count + min( matchesInSentence.map( match => match.count ) ), + count: markings.length, markings: acc.markings.concat( markings ), }; }, { count: 0, markings: [], } ); - } /** From 1ea01d339cb7d5d22b50977abc416f0496549760 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 19 Apr 2023 14:25:20 +0200 Subject: [PATCH 037/616] tokenize keyphrase forms --- .../languageProcessing/researches/keywordCount.js | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 63db800716d..bf85a643763 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -23,7 +23,19 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const result = []; const tokens = sentence.tokens; let index = 0; - let initailPosition = 0; + let initialPosition = 0; + + const newKeyphraseForms = []; + keyphraseForms.forEach( word => { + // const newWord = [] + const tokenizedWord = word.map( form => { + return form.split( tokenizerSplitter ); + } ); + + const tokenizedWordTransposed = tokenizedWord[ 0 ].map( ( _, index ) => uniq( tokenizedWord.map( row => row[ index ] ) ) ); + + tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); + } ); let foundWords = []; // eslint-disable-next-line no-constant-condition From be92253d1dab942b8a9a8bb9d574af6b6c9fb263 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 19 Apr 2023 14:26:00 +0200 Subject: [PATCH 038/616] refine keywordCount.js --- .../researches/keywordCount.js | 96 ++++++++++++++++--- 1 file changed, 84 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index bf85a643763..216d02a1a7e 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,9 +1,73 @@ -import { flattenDeep, min, flatten } from "lodash-es"; +import { flattenDeep, min, flatten, map, uniq } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import matchWordFormsWithTokens, { matchTokensWithWordForms } from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; import Mark from "../../values/Mark"; +import addWordBoundary from "../helpers/word/addWordboundary"; +import { replaceTurkishIsMemoized } from "../helpers/transliterate/specialCharacterMappings"; +import transliterate from "../helpers/transliterate/transliterate"; +import transliterateWP from "../helpers/transliterate/transliterateWPstyle"; +import stripSpaces from "../helpers/sanitize/stripSpaces"; +import { punctuationMatcher } from "../helpers/sanitize/removePunctuation"; +import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; +import { normalizeSingle } from "../helpers/sanitize/quotes"; + + +/** + * Creates a regex from the keyword with included wordboundaries. + * + * @param {string} keyword The keyword to create a regex from. + * @param {string} locale The locale. + * + * @returns {RegExp} Regular expression of the keyword with word boundaries. + */ +const toRegex = function( keyword, locale ) { + keyword = addWordBoundary( keyword, false, "", locale ); + return new RegExp( keyword, "ig" ); +}; + +/** + * Matches a string with and without transliteration. + * @param {string} text The text to match. + * @param {string} keyword The keyword to match in the text. + * @param {string} locale The locale used for transliteration. + * @returns {Array} All matches from the original as the transliterated text and keyword. + */ +const matchTextWithTransliteration = function( text, keyword, locale ) { + let keywordRegex = toRegex( keyword, locale ); + + if ( locale === "tr_TR" ) { + const turkishMappings = replaceTurkishIsMemoized( keyword ); + keywordRegex = new RegExp( turkishMappings.map( x => addWordBoundary( x ) ).join( "|" ), "ig" ); + } + const matches = text.match( keywordRegex ); + + return !! matches; +}; + + +const matchHead = ( head, token ) => { + if ( head.includes( token.text ) ) { + return true; + } + return head.some( keyword => { + return matchTextWithTransliteration( token.text, keyword, "en_US" ); + } ); +}; + +const tokenizeKeyphraseForms = ( keyphraseForms ) => { + let tokenizedKeyPhraseForms = keyphraseForms.map( keyphraseForm => { + return keyphraseForm.map( word => { + return word.split( tokenizerSplitter ).filter( x => x !== "" ); + } ); + } ); + tokenizedKeyPhraseForms = tokenizedKeyPhraseForms.map( x => x[ 0 ] ); + + + // source: https://stackoverflow.com/questions/17428587/transposing-a-2d-array-in-javascript + return tokenizedKeyPhraseForms[ 0 ].map( ( _, index ) => uniq( tokenizedKeyPhraseForms.map( row => row[ index ] ) ) ); +}; /** * Gets the matched keyphrase form(s). @@ -19,7 +83,6 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto if ( matchWordCustomHelper ) { return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); } - const result = []; const tokens = sentence.tokens; let index = 0; @@ -40,12 +103,16 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto let foundWords = []; // eslint-disable-next-line no-constant-condition while ( true ) { - const head = keyphraseForms[ index ]; - const foundPosition = tokens.slice( initailPosition ).findIndex( t => head.includes( t.text ) ); + const head = newKeyphraseForms[ index ]; + + const foundPosition = tokens.slice( initialPosition ).findIndex( t => matchHead( head, t ) ); if ( foundPosition >= 0 ) { if ( index > 0 ) { - if ( tokens.slice( initailPosition, foundPosition + initailPosition ).some( t => t.text.trim() !== "" ) ) { + // if there are non keywords between two found keywords reset. + + if ( tokens.slice( initialPosition, foundPosition + initialPosition ).some( + t => ( t.text.trim() !== "" && t.text.trim() !== "-" ) ) ) { index = 0; foundWords = []; continue; @@ -53,9 +120,9 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto } index += 1; - initailPosition += foundPosition; - foundWords.push( tokens[ initailPosition ] ); - initailPosition += 1; + initialPosition += foundPosition; + foundWords.push( tokens[ initialPosition ] ); + initialPosition += 1; } else { if ( index === 0 ) { break; @@ -63,12 +130,13 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto index = 0; } - if ( foundWords.length >= keyphraseForms.length ) { + if ( foundWords.length >= newKeyphraseForms.length ) { result.push( foundWords ); index = 0; foundWords = []; } } + return result; } @@ -100,7 +168,6 @@ const createMarksForSentence = ( sentence, matches ) => { * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ) { - const markedSentence = createMarksForSentence( sentence, matchesInSentence ); const markings = matchesInSentence.map( match => { return new Mark( { @@ -149,9 +216,8 @@ const mergeConsecutiveMarkings = ( markings ) => { export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ) { return sentences.reduce( ( acc, sentence ) => { const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - let markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); - // console.log(markings); + markings = mergeConsecutiveMarkings( markings ); return { @@ -175,6 +241,12 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); + topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); + // console.log(topicForms); + // const processedTopicForms = topicForms.map(word => word.map(form => normalizeSingle(form))) + + // console.log(processedTopicForms); + // A helper to return all the matches for the keyphrase. const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); From c2be3e6dec1bd8ada7b0102423625b92816fe268 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 19 Apr 2023 14:26:17 +0200 Subject: [PATCH 039/616] adapt keywordCountSpec.js --- .../researches/keywordCountSpec.js | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 0c8bacafff9..274af4b58ac 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -72,8 +72,7 @@ describe( "Test for counting the keyword in a text", function() { it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "

a string of text with the key word in it, with more key words.

" ); buildTree( mockPaper, mockResearcher ); - // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); - // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "a string of text with the key word in it, " + @@ -139,36 +138,38 @@ describe( "Test for counting the keyword in a text", function() { const mockPaper = new Paper( "A string with a key-word." ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - // Note: this behavior might change in the future. } ); it( "counts a string with a keyword with a '_' in it", function() { - const mockPaper = new Paper( "A string with a key_word." ); + const mockPaper = new Paper( "

A string with a key_word.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word." } ) ] + original: "A string with a key_word.", + position: { endOffset: 27, startOffset: 19 } } ) ] ); } ); it( "counts a string with with 'kapaklı' as a keyword in it", function() { - const mockPaper = new Paper( "A string with kapaklı." ); + const mockPaper = new Paper( "

A string with kapaklı.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ new Mark( { marked: "A string with kapaklı.", - original: "A string with kapaklı." } ) ] + original: "A string with kapaklı.", + position: { endOffset: 24, startOffset: 17 } } ) ] ); } ); it( "counts a string with with '&' in the string and the keyword", function() { - const mockPaper = new Paper( "A string with key&word." ); + const mockPaper = new Paper( "
A string with key&word.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ new Mark( { marked: "A string with key&word.", - original: "A string with key&word." } ) ] + original: "A string with key&word.", + position: { endOffset: 26, startOffset: 18 } } ) ] ); } ); @@ -180,23 +181,25 @@ describe( "Test for counting the keyword in a text", function() { } ); it( "keyword counting is blind to CApiTal LeTteRs.", function() { - const mockPaper = new Paper( "A string with KeY worD." ); + const mockPaper = new Paper( "

A string with KeY worD.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "A string with KeY worD.", - original: "A string with KeY worD." } ) ] + original: "A string with KeY worD.", + position: { endOffset: 26, startOffset: 18 } } ) ] ); } ); it( "keyword counting is blind to types of apostrophe.", function() { - const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." ); + const mockPaper = new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

" ); buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ new Mark( { marked: "A string with quotes to match the key'word, " + "even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ." } ) ] + original: "A string with quotes to match the key'word, even if the quotes differ.", position: { endOffset: 45, startOffset: 37 } } ) ] ); } ); @@ -233,6 +236,13 @@ describe( "Test for counting the keyword in a text", function() { ); } ); + it( "counts 'key word' when interjected with a function word.", function() { + const mockPaper = new Paper( "A string with a key the word." ); + buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + // Note: this behavior might change in the future. + } ); + it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); buildTree( mockPaper, mockResearcher ); From 7d4873829a6deb127e0c99426a2ff5255d5bac33 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 19 Apr 2023 14:26:56 +0200 Subject: [PATCH 040/616] modify and export tokenizersplitter from LanguageProcessor.js --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index edd24304170..613b66188f4 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -2,6 +2,9 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; const whitespaceRegex = /^\s+$/; + +export const tokenizerSplitter = /([\s-_,.!?;:([\]'"¡¿)/])/g; + /** * Contains language-specific logic for splitting a text into sentences and tokens. */ @@ -56,7 +59,8 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; // Split the sentence string into tokens - const tokenTexts = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); + const tokenTexts = sentenceText.split( tokenizerSplitter ).filter( x => x !== "" ); + return tokenTexts.map( tokenText => new Token( tokenText ) ); } } From c26b8d741e82efd0bba38591b75f9235d53f35da Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 21 Apr 2023 13:19:40 +0200 Subject: [PATCH 041/616] simplify keywordCount.js --- .../researches/keywordCountSpec.js | 40 +++- .../researches/keywordCount.js | 182 ++++++++++++------ 2 files changed, 156 insertions(+), 66 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 274af4b58ac..e9c2ad5b55e 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -218,21 +218,36 @@ describe( "Test for counting the keyword in a text", function() { } ); it( "doesn't count 'key-word' in 'key word'.", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); + const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); // Note: this behavior might change in in the future. } ); it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { - const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); + const mockPaper = new Paper( "

A string with three keys (key and another key) and one word.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "A string with three keys (" + + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 62, startOffset: 45 } } ), + new Mark( { + marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word." } ) ] + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 27, startOffset: 23 } } ), + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 32, startOffset: 29 } } ), + ] ); } ); @@ -243,8 +258,8 @@ describe( "Test for counting the keyword in a text", function() { // Note: this behavior might change in the future. } ); - it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); + it.skip( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { + const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", { locale: "id_ID" } ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); @@ -278,7 +293,18 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) // Decided not to remove test below as it tests the added logic of the Japanese helpers. describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } ); + const mockPaper = new Paper( "

私の猫はかわいいです。猫はかわいいです。", + original: "私の猫はかわいいです。" } ) ] ); + } ); + + it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { + const mockPaper = new Paper( "

私の猫はかわいい猫です。 { return true; } return head.some( keyword => { - return matchTextWithTransliteration( token.text, keyword, "en_US" ); + return matchTextWithTransliteration( token.text, keyword, "en_US" ); // TODO get locale from params } ); }; @@ -69,25 +71,8 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { return tokenizedKeyPhraseForms[ 0 ].map( ( _, index ) => uniq( tokenizedKeyPhraseForms.map( row => row[ index ] ) ) ); }; -/** - * Gets the matched keyphrase form(s). - * - * @param {string} sentence The sentence to check. - * @param {Array} keyphraseForms The keyphrase forms. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * - * @returns {Array} The array of matched keyphrase form(s). - */ -function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { - if ( matchWordCustomHelper ) { - return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); - } - const result = []; - const tokens = sentence.tokens; - let index = 0; - let initialPosition = 0; - +// TODO: better descriptor than process +const processKeyphraseForms = ( keyphraseForms ) => { const newKeyphraseForms = []; keyphraseForms.forEach( word => { // const newWord = [] @@ -99,45 +84,100 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); } ); + return newKeyphraseForms; +}; + +const getMatchesInTokens = ( keyphraseForms, tokens ) => { + const result = { + primaryMatches: [], + secondaryMatches: [], + }; + + // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. + let keyPhraseFormsIndex = 0; + // positionInSentence keeps track of what word in the sentence we are currently at. + let positionInSentence = 0; // TODO: rename naar sentencePosition let foundWords = []; // eslint-disable-next-line no-constant-condition while ( true ) { - const head = newKeyphraseForms[ index ]; - - const foundPosition = tokens.slice( initialPosition ).findIndex( t => matchHead( head, t ) ); + const head = keyphraseForms[ keyPhraseFormsIndex ]; + const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t ) ); + // If an occurence of a word is found, see if the subsequent word also is find. if ( foundPosition >= 0 ) { - if ( index > 0 ) { + if ( keyPhraseFormsIndex > 0 ) { // if there are non keywords between two found keywords reset. - if ( tokens.slice( initialPosition, foundPosition + initialPosition ).some( - t => ( t.text.trim() !== "" && t.text.trim() !== "-" ) ) ) { - index = 0; + const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. + if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( + t => { + return previousHead.includes( t.text ); // TODO: use matchhead + } ) ) { + result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + keyPhraseFormsIndex = 0; foundWords = []; continue; } } - index += 1; - initialPosition += foundPosition; - foundWords.push( tokens[ initialPosition ] ); - initialPosition += 1; + keyPhraseFormsIndex += 1; + positionInSentence += foundPosition; + foundWords.push( tokens[ positionInSentence ] ); + positionInSentence += 1; } else { - if ( index === 0 ) { + if ( keyPhraseFormsIndex === 0 ) { break; } - index = 0; + keyPhraseFormsIndex = 0; } - if ( foundWords.length >= newKeyphraseForms.length ) { - result.push( foundWords ); - index = 0; + if ( foundWords.length >= keyphraseForms.length ) { + result.primaryMatches.push( foundWords ); + keyPhraseFormsIndex = 0; foundWords = []; } } - + console.log( result ); return result; +}; + +/** + * Gets the matched keyphrase form(s). + * + * @param {string} sentence The sentence to check. + * @param {Array} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * + * @returns {Array} The array of matched keyphrase form(s). + */ +function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { + if ( matchWordCustomHelper ) { + // TODO: unify return types and forms. + const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + + console.log( { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position } ); + const matchesAsTokens = matches.map( match => { + const count = match.count; + const firstPosition = match.position; + + const moreMatches = match.matches; + } ); + // + // } + + + return { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position }; + } + + const tokens = sentence.tokens; + + const newKeyphraseForms = processKeyphraseForms( keyphraseForms, locale ); + + const matches = getMatchesInTokens( newKeyphraseForms, tokens ); + console.log( matches ); + return matches; } const markStart = ""; @@ -147,14 +187,24 @@ const createMarksForSentence = ( sentence, matches ) => { const reference = sentence.sourceCodeRange.startOffset; let sentenceText = sentence.text; - for ( let i = matches.length - 1; i >= 0; i-- ) { - const match = matches[ i ]; - const startmarkPosition = match.slice( 0 )[ 0 ].sourceCodeRange.startOffset - reference; - const endmarkPosition = match.slice( -1 )[ 0 ].sourceCodeRange.endOffset - reference; - sentenceText = sentenceText.substring( 0, endmarkPosition ) + markEnd + sentenceText.substring( endmarkPosition ); - sentenceText = sentenceText.substring( 0, startmarkPosition ) + markStart + sentenceText.substring( startmarkPosition ); + let allMatches = flatten( matches.primaryMatches ); + if ( matches.primaryMatches.length > 0 ) { + allMatches = allMatches.concat( matches.secondaryMatches ).sort( function( a, b ) { + return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; + } ); + } + + for ( let i = allMatches.length - 1; i >= 0; i-- ) { + const match = allMatches[ i ]; + + sentenceText = sentenceText.substring( 0, match.sourceCodeRange.endOffset - reference ) + markEnd + + sentenceText.substring( match.sourceCodeRange.endOffset - reference ); + sentenceText = sentenceText.substring( 0, match.sourceCodeRange.startOffset - reference ) + markStart + + sentenceText.substring( match.sourceCodeRange.startOffset - reference ); } + sentenceText = sentenceText.replace( new RegExp( "( ?)", "ig" ), "$1" ); + return sentenceText; }; @@ -169,7 +219,7 @@ const createMarksForSentence = ( sentence, matches ) => { */ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ) { const markedSentence = createMarksForSentence( sentence, matchesInSentence ); - const markings = matchesInSentence.map( match => { + const markings = matchesInSentence.primaryMatches.map( match => { return new Mark( { position: { startOffset: match.slice( 0 )[ 0 ].sourceCodeRange.startOffset, @@ -180,9 +230,30 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp } ); } ); + + if ( matchesInSentence.primaryMatches.length > 0 ) { + matchesInSentence.secondaryMatches.forEach( match =>{ + markings.push( new Mark( { + position: { + startOffset: match.sourceCodeRange.startOffset, + endOffset: match.sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, + + } ) ); + } + ); + } + return markings; } +/** + * Merges consecutive markings into one marking. + * @param {[]} markings + * @returns {*[]} + */ const mergeConsecutiveMarkings = ( markings ) => { const newMarkings = []; markings.forEach( ( marking ) => { @@ -214,20 +285,18 @@ const mergeConsecutiveMarkings = ( markings ) => { * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ) { - return sentences.reduce( ( acc, sentence ) => { + const result = { count: 0, markings: [] }; + + sentences.forEach( sentence => { const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - let markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); - markings = mergeConsecutiveMarkings( markings ); + const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); - return { - count: markings.length, - markings: acc.markings.concat( markings ), - }; - }, { - count: 0, - markings: [], + result.markings.push( markings ); // TODO: add test for multiple sentences + result.count += matchesInSentence.primaryMatches.length; } ); + + return result; } /** @@ -242,12 +311,7 @@ export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); - // console.log(topicForms); - // const processedTopicForms = topicForms.map(word => word.map(form => normalizeSingle(form))) - - // console.log(processedTopicForms); - // A helper to return all the matches for the keyphrase. const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); const locale = paper.getLocale(); From d69676f6fb85bc345db0f178fd9165a82d41b4e3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 21 Apr 2023 15:17:26 +0200 Subject: [PATCH 042/616] allow for setting sourceCodeRange via the constructor in Token.js --- packages/yoastseo/src/parse/structure/Token.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/parse/structure/Token.js b/packages/yoastseo/src/parse/structure/Token.js index 8298d89cc23..695fb343b50 100644 --- a/packages/yoastseo/src/parse/structure/Token.js +++ b/packages/yoastseo/src/parse/structure/Token.js @@ -6,14 +6,15 @@ class Token { * Creates a new token. * * @param {string} text The token's text. + * @param {SourceCodeRange} sourceCodeRange The start and end positions of the token in the source code. */ - constructor( text ) { + constructor( text, sourceCodeRange = {} ) { this.text = text; /** * The start and end positions of the token in the source code. * @type {SourceCodeRange} */ - this.sourceCodeRange = {}; + this.sourceCodeRange = sourceCodeRange; } } From 91fd1e5a0483f47b06f0fbab14db64e239adc255 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 21 Apr 2023 15:18:25 +0200 Subject: [PATCH 043/616] fix wordmatching for languages with a custom matchWordCustomHelper in keywordCount.js --- .../researches/keywordCountSpec.js | 11 ++++-- .../researches/keywordCount.js | 38 +++++++++++++------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index e9c2ad5b55e..e548ede43a3 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -300,7 +300,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", - original: "私の猫はかわいいです。" } ) ] ); + original: "私の猫はかわいいです。", position: { endOffset: 6, startOffset: 5 } } ) ] ); } ); it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { @@ -308,10 +308,13 @@ describe( "Test for counting the keyword in a text for Japanese", () => { const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); - expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ - new Mark( { marked: "私のはかわいいです。", - original: "私の猫はかわいいです。" } ) ] ); + new Mark( { marked: "私のはかわいいです。", + original: "私の猫はかわいい猫です。", + position: { endOffset: 6, startOffset: 5 } } ), new Mark( { marked: "私のはかわいいです。", + original: "私の猫はかわいい猫です。", + position: { endOffset: 12, startOffset: 11 } } ) ] ); } ); it( "counts a string of text with no keyword in it.", function() { diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 97ba40b9e87..8f3bc97a7ae 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -142,6 +142,30 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { return result; }; +const getAllIndicesOfWord = ( word, sentence ) => { + return sentence.text.split( "" ).map( ( x, i ) => x === word ? i : "" ).filter( Boolean ); +}; + +const convertToPositionResult = ( matches, sentence ) => { + const result = { primaryMatches: [], secondaryMatches: [], position: 0 }; + matches.forEach( matchObject => { + const matchWords = matchObject.matches; + + uniq( matchWords ).forEach( matchWord => { + const indices = getAllIndicesOfWord( matchWord, sentence ); + indices.forEach( index => { + const startOffset = sentence.sourceCodeRange.startOffset + index; + const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; + + const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); + result.primaryMatches.push( [ matchToken ] ); + } ); + } ); + } ); + + return result; +}; + /** * Gets the matched keyphrase form(s). * @@ -155,20 +179,12 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { if ( matchWordCustomHelper ) { // TODO: unify return types and forms. - const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); - console.log( { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position } ); - const matchesAsTokens = matches.map( match => { - const count = match.count; - const firstPosition = match.position; - - const moreMatches = match.matches; - } ); - // - // } + const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + const result = convertToPositionResult( matches, sentence ); - return { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position }; + return result; } const tokens = sentence.tokens; From b4c888d9186286b78623ff34fb0e8ff5368fc576 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 21 Apr 2023 15:21:24 +0200 Subject: [PATCH 044/616] cleanup code in keywordCount.js --- .../src/languageProcessing/researches/keywordCount.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 8f3bc97a7ae..79d428d93b4 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -178,8 +178,6 @@ const convertToPositionResult = ( matches, sentence ) => { */ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { if ( matchWordCustomHelper ) { - // TODO: unify return types and forms. - const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); const result = convertToPositionResult( matches, sentence ); @@ -192,7 +190,7 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const newKeyphraseForms = processKeyphraseForms( keyphraseForms, locale ); const matches = getMatchesInTokens( newKeyphraseForms, tokens ); - console.log( matches ); + return matches; } From 11297c26029c88c4dceff12ecdcb30dade917861 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 24 Apr 2023 10:28:52 +0200 Subject: [PATCH 045/616] Fix more edge cases --- .../researches/keywordCountSpec.js | 20 +++++++++++++++++++ .../researches/keywordCount.js | 15 +++++++++----- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index e548ede43a3..36d52a606c6 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -62,6 +62,26 @@ describe( "Test for counting the keyword in a text", function() { ] ); } ); + it( "counts/marks a string of text consisting of multiple sentences with a keyword in it.", function() { + const mockPaper = new Paper( "

A sentence with the keyword in it. Another sentence with the keyword in it.

", { keyword: "keyword" } ); + researcher.setPaper( mockPaper ); + buildTree( mockPaper, researcher ); + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( + [ new Mark( { + fieldsToMark: [], + marked: "A sentence with the keyword in it.", + original: "A sentence with the keyword in it.", + position: { endOffset: 30, startOffset: 23 }, + } ), + new Mark( { + fieldsToMark: [], + marked: " Another sentence with the keyword in it.", + original: " Another sentence with the keyword in it.", + position: { endOffset: 37, startOffset: 30 } } ), + ] ); + } ); + it( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "a string of text" ); buildTree( mockPaper, mockResearcher ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 79d428d93b4..644af36277d 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -197,6 +197,12 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const markStart = ""; const markEnd = ""; +/** + * Create marks for a sentence. This function creates marks for the (old) search based highlighting. + * @param {Sentence[]} sentence The sentence to which to apply the marks. + * @param {{}[]} matches The matches to apply. + * @returns {string} The sentence with marks applied. + */ const createMarksForSentence = ( sentence, matches ) => { const reference = sentence.sourceCodeRange.startOffset; let sentenceText = sentence.text; @@ -227,11 +233,10 @@ const createMarksForSentence = ( sentence, matches ) => { * * @param {string} sentence The sentence to check. * @param {Array} matchesInSentence The array of keyphrase matches in the sentence. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. * * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ -function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ) { +function getMarkingsInSentence( sentence, matchesInSentence ) { const markedSentence = createMarksForSentence( sentence, matchesInSentence ); const markings = matchesInSentence.primaryMatches.map( match => { return new Mark( { @@ -265,8 +270,8 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp /** * Merges consecutive markings into one marking. - * @param {[]} markings - * @returns {*[]} + * @param {{}[]} markings An array of markings to merge. + * @returns {{}[]} An array of markings where consecutive markings are merged. */ const mergeConsecutiveMarkings = ( markings ) => { const newMarkings = []; @@ -306,7 +311,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); - result.markings.push( markings ); // TODO: add test for multiple sentences + result.markings.push( markings ); result.count += matchesInSentence.primaryMatches.length; } ); From 325c67f97488d581df047942fa2651383313b831 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Apr 2023 10:04:18 +0200 Subject: [PATCH 046/616] fix regular multi-sentence situation. --- .../spec/languageProcessing/researches/keywordCountSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 36d52a606c6..b918898c1cd 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -78,7 +78,7 @@ describe( "Test for counting the keyword in a text", function() { fieldsToMark: [], marked: " Another sentence with the keyword in it.", original: " Another sentence with the keyword in it.", - position: { endOffset: 37, startOffset: 30 } } ), + position: { endOffset: 71, startOffset: 64 } } ), ] ); } ); From 9fe216661a9ae872d314f82e9bcaa34f2b07863e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Apr 2023 10:57:53 +0200 Subject: [PATCH 047/616] checkpoint 26 april 10:57 --- .../researches/keywordCountSpec.js | 25 +++- .../researches/keywordCount.js | 117 +++++++++++++++--- 2 files changed, 121 insertions(+), 21 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index b918898c1cd..b9eb77de26e 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -332,9 +332,10 @@ describe( "Test for counting the keyword in a text for Japanese", () => { expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいい猫です。", - position: { endOffset: 6, startOffset: 5 } } ), new Mark( { marked: "私のはかわいいです。", - original: "私の猫はかわいい猫です。", - position: { endOffset: 12, startOffset: 11 } } ) ] ); + position: { endOffset: 6, startOffset: 5 } } ), new Mark( { marked: "私のはかわいい" + + "です。", + original: "私の猫はかわいい猫です。", + position: { endOffset: 12, startOffset: 11 } } ) ] ); } ); it( "counts a string of text with no keyword in it.", function() { @@ -346,7 +347,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "私の猫はかわいいですかわいい。", { locale: "ja" } ); + const mockPaper = new Paper( "

私の猫はかわいいですかわいい。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); @@ -355,6 +356,20 @@ describe( "Test for counting the keyword in a text for Japanese", () => { marked: "私のかわいい" + "ですかわいい。", original: "私の猫はかわいいですかわいい。", - } ) ] ); + position: { endOffset: 6, startOffset: 5 }, + } ), + new Mark( { + marked: "私のかわいい" + + "ですかわいい。", + original: "私の猫はかわいいですかわいい。", + position: { endOffset: 11, startOffset: 7 }, + } ), + new Mark( { + marked: "私のかわいい" + + "ですかわいい。", + original: "私の猫はかわいいですかわいい。", + position: { endOffset: 17, startOffset: 13 }, + } ), + ] ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 644af36277d..3a57f90654a 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -71,7 +71,12 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { return tokenizedKeyPhraseForms[ 0 ].map( ( _, index ) => uniq( tokenizedKeyPhraseForms.map( row => row[ index ] ) ) ); }; -// TODO: better descriptor than process +// TODO: better descriptor than `process` +/** + * Processes the keyphrase forms to make sure that they are tokenized in the same way as the text. + * @param {array[]} keyphraseForms The keyphrase forms to process. + * @returns {*[]} The processed keyphrase forms. + */ const processKeyphraseForms = ( keyphraseForms ) => { const newKeyphraseForms = []; keyphraseForms.forEach( word => { @@ -87,6 +92,12 @@ const processKeyphraseForms = ( keyphraseForms ) => { return newKeyphraseForms; }; +/** + * Matches the keyphrase forms in an array of tokens. + * @param {array[]} keyphraseForms The keyphrase forms to match. + * @param {Token[]} tokens The tokens to match the keyphrase forms in. + * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. + */ const getMatchesInTokens = ( keyphraseForms, tokens ) => { const result = { primaryMatches: [], @@ -96,7 +107,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. let keyPhraseFormsIndex = 0; // positionInSentence keeps track of what word in the sentence we are currently at. - let positionInSentence = 0; // TODO: rename naar sentencePosition + let positionInSentence = 0; // TODO: rename naar sentencePosition? let foundWords = []; // eslint-disable-next-line no-constant-condition @@ -138,16 +149,41 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { foundWords = []; } } - console.log( result ); + return result; }; +/** + * Gets all indices of a word in a sentence. + * @param {string} word The word. + * @param {Sentence} sentence The sentence. + * @returns {array} The indices. + */ const getAllIndicesOfWord = ( word, sentence ) => { - return sentence.text.split( "" ).map( ( x, i ) => x === word ? i : "" ).filter( Boolean ); + // TODO: more fuzzy matching. Agnostic to capitalization. + const text = sentence.text; + + // get all the indices of the word in the sentence. + const indices = []; + let index = text.indexOf( word ); + while ( index > -1 ) { + indices.push( index ); + index = text.indexOf( word, index + 1 ); + } + + return indices; + // return sentence.text.split( "" ).map( ( x, i ) => x === word ? i : "" ).filter( Boolean ); }; -const convertToPositionResult = ( matches, sentence ) => { +/** + * Converts the matches to the format that is used in the assessment. + * @param {array} matches The matches. + * @param {Sentence} sentence The sentence. + * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. + */ +const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { const result = { primaryMatches: [], secondaryMatches: [], position: 0 }; + const matchTokens = []; matches.forEach( matchObject => { const matchWords = matchObject.matches; @@ -158,12 +194,55 @@ const convertToPositionResult = ( matches, sentence ) => { const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); - result.primaryMatches.push( [ matchToken ] ); + + matchTokens.push( matchToken ); } ); } ); } ); + console.log( matchTokens ); + // Sort tokens on startOffset. + matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); + console.log( matchTokens ); - return result; + const primaryMatches = []; + const secondaryMatches = []; + + let currentMatch = []; + let keyPhraseFormsIndex = 0; + + // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. + // A secondary match is any other match. + matchTokens.forEach( ( token, index ) => { + const head = keyPhraseForms[ keyPhraseFormsIndex ]; + if ( head && matchHead( head, token ) ) { + currentMatch.push( token ); + keyPhraseFormsIndex += 1; + if ( currentMatch.length === keyPhraseForms.length ) { + primaryMatches.push( currentMatch ); + currentMatch = []; + keyPhraseFormsIndex = 0; + } + } else { + if ( currentMatch.length > 0 ) { + if ( currentMatch.length === keyPhraseForms.length ) { + primaryMatches.push( currentMatch ); + } else { + secondaryMatches.push( currentMatch ); + } + } else { + secondaryMatches.push( [ token ] ); + } + currentMatch = []; + keyPhraseFormsIndex = 0; + } + } ); + + + return { + primaryMatches: primaryMatches, + secondaryMatches: secondaryMatches, + position: 0, + }; }; /** @@ -177,10 +256,16 @@ const convertToPositionResult = ( matches, sentence ) => { * @returns {Array} The array of matched keyphrase form(s). */ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { + let matches, result; + if ( matchWordCustomHelper ) { - const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + // If one of the forms is not found, return an empty result. + if ( matches.some( match => match.position === -1 ) ) { + return { primaryMatches: [], secondaryMatches: [], position: -1 }; + } - const result = convertToPositionResult( matches, sentence ); + result = convertToPositionResult( matches, sentence, keyphraseForms ); return result; } @@ -189,7 +274,7 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const newKeyphraseForms = processKeyphraseForms( keyphraseForms, locale ); - const matches = getMatchesInTokens( newKeyphraseForms, tokens ); + matches = getMatchesInTokens( newKeyphraseForms, tokens ); return matches; } @@ -209,7 +294,10 @@ const createMarksForSentence = ( sentence, matches ) => { let allMatches = flatten( matches.primaryMatches ); if ( matches.primaryMatches.length > 0 ) { - allMatches = allMatches.concat( matches.secondaryMatches ).sort( function( a, b ) { + console.log( matches.primaryMatches ); + console.log( matches.secondaryMatches ); + console.log( allMatches.concat( matches.secondaryMatches ) ); + allMatches = allMatches.concat( flatten( matches.secondaryMatches ) ).sort( function( a, b ) { return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; } ); } @@ -251,7 +339,7 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { } ); if ( matchesInSentence.primaryMatches.length > 0 ) { - matchesInSentence.secondaryMatches.forEach( match =>{ + flatten( matchesInSentence.secondaryMatches ).forEach( match =>{ markings.push( new Mark( { position: { startOffset: match.sourceCodeRange.startOffset, @@ -328,14 +416,11 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu */ export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); - topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); - const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); - const locale = paper.getLocale(); - const sentences = getSentencesFromTree( paper ); + const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ); return { From 0a41e5f0b28e5c3de3e7bc0528dbae2110700502 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Apr 2023 13:06:26 +0200 Subject: [PATCH 048/616] all keywordCount Specs now pass --- .../researches/keywordCountSpec.js | 33 ++++--- .../researches/keywordCount.js | 91 ++++++++++--------- 2 files changed, 69 insertions(+), 55 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index b9eb77de26e..da55b28d3c0 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -46,19 +46,20 @@ describe( "Test for counting the keyword in a text", function() { buildTree( mockPaper, researcher ); expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( - [ new Mark( { - fieldsToMark: [], - marked: "a string of text with the keyword and" + + [ + new Mark( { + fieldsToMark: [], + marked: "a string of text with the keyword and" + " another keyword in it", - original: "a string of text with the keyword and another keyword in it", - position: { endOffset: 36, startOffset: 29 }, - } ), - new Mark( { - fieldsToMark: [], - marked: "a string of text with the keyword " + + original: "a string of text with the keyword and another keyword in it", + position: { endOffset: 36, startOffset: 29 }, + } ), + new Mark( { + fieldsToMark: [], + marked: "a string of text with the keyword " + "and another keyword in it", - original: "a string of text with the keyword and another keyword in it", - position: { endOffset: 56, startOffset: 49 } } ), + original: "a string of text with the keyword and another keyword in it", + position: { endOffset: 56, startOffset: 49 } } ), ] ); } ); @@ -254,18 +255,24 @@ describe( "Test for counting the keyword in a text", function() { "key
and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 62, startOffset: 45 } } ), + position: { endOffset: 48, startOffset: 45 } } ), new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 27, startOffset: 23 } } ), + position: { endOffset: 62, startOffset: 58 } } ), new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 27, startOffset: 23 } } ), + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", position: { endOffset: 32, startOffset: 29 } } ), ] ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 3a57f90654a..56f92494fd2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -14,6 +14,7 @@ import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import Token from "../../parse/structure/Token"; import matchTextWithWord from "../helpers/match/matchTextWithWord"; +import { getLanguage } from "../index"; /** @@ -199,10 +200,9 @@ const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { } ); } ); } ); - console.log( matchTokens ); + // Sort tokens on startOffset. matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); - console.log( matchTokens ); const primaryMatches = []; const secondaryMatches = []; @@ -294,9 +294,6 @@ const createMarksForSentence = ( sentence, matches ) => { let allMatches = flatten( matches.primaryMatches ); if ( matches.primaryMatches.length > 0 ) { - console.log( matches.primaryMatches ); - console.log( matches.secondaryMatches ); - console.log( allMatches.concat( matches.secondaryMatches ) ); allMatches = allMatches.concat( flatten( matches.secondaryMatches ) ).sort( function( a, b ) { return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; } ); @@ -316,6 +313,39 @@ const createMarksForSentence = ( sentence, matches ) => { return sentenceText; }; +/** + * Merges consecutive markings into one marking. + * @param {{}[]} markings An array of markings to merge. + * @returns {{}[]} An array of markings where consecutive markings are merged. + */ +const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { + const newMarkings = []; + markings.forEach( ( marking ) => { + let actionDone = false; + newMarkings.forEach( ( newMarking, newMarkingIndex ) => { + // If the markings are consecutive, merge them. + if ( newMarking.getPositionEnd() + ( isJapanese ? 0 : 1 ) === marking.getPositionStart() ) { + newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); + actionDone = true; + // if the markings are overlapping, merge them. + } else if ( newMarking.getPositionEnd() >= marking.getPositionStart() && newMarking.getPositionStart() <= marking.getPositionEnd() ) { + newMarkings[ newMarkingIndex ]._properties.position.startOffset = Math.min( newMarking.getPositionStart(), marking.getPositionStart() ); + newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); + actionDone = true; + // If the markings are consecutive, merge them. + } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + (isJapanese ? 0 : 1 ) ) { + newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); + actionDone = true; + } + } ); + if ( ! actionDone ) { + newMarkings.push( marking ); + } + } ); + return newMarkings; +}; + + /** * Gets the Mark objects of all keyphrase matches. * @@ -324,17 +354,18 @@ const createMarksForSentence = ( sentence, matches ) => { * * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ -function getMarkingsInSentence( sentence, matchesInSentence ) { +function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { const markedSentence = createMarksForSentence( sentence, matchesInSentence ); - const markings = matchesInSentence.primaryMatches.map( match => { - return new Mark( { - position: { - startOffset: match.slice( 0 )[ 0 ].sourceCodeRange.startOffset, - endOffset: match.slice( -1 )[ 0 ].sourceCodeRange.endOffset, - }, - marked: markedSentence, - original: sentence.text, - + const markings = matchesInSentence.primaryMatches.flatMap( match => { + return match.map( token => { + return new Mark( { + position: { + startOffset: token.sourceCodeRange.startOffset, + endOffset: token.sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, + } ); } ); } ); @@ -352,34 +383,10 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { } ); } - - return markings; + const mergedMarkings = mergeConsecutiveMarkings( markings, getLanguage( locale ) === "ja" ); + return mergedMarkings; } -/** - * Merges consecutive markings into one marking. - * @param {{}[]} markings An array of markings to merge. - * @returns {{}[]} An array of markings where consecutive markings are merged. - */ -const mergeConsecutiveMarkings = ( markings ) => { - const newMarkings = []; - markings.forEach( ( marking ) => { - let actionDone = false; - newMarkings.forEach( ( newMarking, newMarkingIndex ) => { - if ( newMarking.getPositionEnd() + 1 === marking.getPositionStart() ) { - newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); - actionDone = true; - } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + 1 ) { - newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); - actionDone = true; - } - } ); - if ( ! actionDone ) { - newMarkings.push( marking ); - } - } ); - return newMarkings; -}; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. @@ -397,7 +404,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper ); + const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); result.count += matchesInSentence.primaryMatches.length; From 1f37545b01876fe7f8dab90c11c56f16562e8a3e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Apr 2023 13:19:13 +0200 Subject: [PATCH 049/616] rename keyword to keyphrase in seoAssessor.js and seoAssessorSpec.js --- packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js | 2 +- packages/yoastseo/src/scoring/productPages/seoAssessor.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js index fc3b14ea253..d978ad3f3f2 100644 --- a/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js @@ -102,7 +102,7 @@ describe( "has configuration overrides", () => { } ); test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/src/scoring/productPages/seoAssessor.js b/packages/yoastseo/src/scoring/productPages/seoAssessor.js index 3a9d24799e9..136375fc9dd 100644 --- a/packages/yoastseo/src/scoring/productPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/seoAssessor.js @@ -54,7 +54,7 @@ const ProductSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), From 6e5d2aaeec8f64b6c415dfc4bbbf02c3b82986de Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Apr 2023 14:12:04 +0200 Subject: [PATCH 050/616] Fix specs for KeywordDensityAssessmentSpec.js --- .../assessments/seo/KeywordDensityAssessmentSpec.js | 10 ++++------ .../src/languageProcessing/researches/keywordCount.js | 1 + 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 749a3162de6..7312c1b3904 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -208,7 +208,7 @@ describe( "Tests for the keywordDensity assessment for languages with morphology describe( "A test for marking the keyword", function() { it( "returns markers", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); - const paper = new Paper( "This is a very interesting paper with a keyword and another keyword.", { keyword: "keyword" } ); + const paper = new Paper( "

This is a very interesting paper with a keyword and another keyword.

", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); @@ -219,21 +219,19 @@ describe( "A test for marking the keyword", function() { "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", - // eslint-disable-next-line no-undefined - position: { endOffset: undefined, startOffset: undefined }, + position: { endOffset: 50, startOffset: 43 }, } ), new Mark( { marked: "This is a very interesting paper with a " + "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", - // eslint-disable-next-line no-undefined - position: { endOffset: undefined, startOffset: undefined }, + position: { endOffset: 70, startOffset: 63 }, } ) ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); - xit( "returns markers for a keyphrase containing numbers", function() { + it( "returns markers for a keyphrase containing numbers", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); const paper = new Paper( "This is the release of YoastSEO 9.3.", { keyword: "YoastSEO 9.3" } ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 56f92494fd2..a2af19f0002 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -329,6 +329,7 @@ const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { actionDone = true; // if the markings are overlapping, merge them. } else if ( newMarking.getPositionEnd() >= marking.getPositionStart() && newMarking.getPositionStart() <= marking.getPositionEnd() ) { + // eslint-disable-next-line max-len newMarkings[ newMarkingIndex ]._properties.position.startOffset = Math.min( newMarking.getPositionStart(), marking.getPositionStart() ); newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); actionDone = true; From fe36f5ca637bd9a910392cdeffcd1cfe37407ce2 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:25:54 +0200 Subject: [PATCH 051/616] remove unused imports in keywordCount.js --- .../src/languageProcessing/researches/keywordCount.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index a2af19f0002..bb26ada4306 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,19 +1,12 @@ -import { flattenDeep, min, flatten, map, uniq } from "lodash-es"; +import { flatten, uniq } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; -import matchWordFormsWithTokens, { matchTokensWithWordForms } from "../helpers/match/matchWordFormsWithTokens"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; -import { collectMarkingsInSentence, markWordsInASentence } from "../helpers/word/markWordsInSentences"; import Mark from "../../values/Mark"; import addWordBoundary from "../helpers/word/addWordboundary"; import { replaceTurkishIsMemoized } from "../helpers/transliterate/specialCharacterMappings"; -import transliterate from "../helpers/transliterate/transliterate"; -import transliterateWP from "../helpers/transliterate/transliterateWPstyle"; -import stripSpaces from "../helpers/sanitize/stripSpaces"; -import { punctuationMatcher } from "../helpers/sanitize/removePunctuation"; import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import Token from "../../parse/structure/Token"; -import matchTextWithWord from "../helpers/match/matchTextWithWord"; import { getLanguage } from "../index"; From 1fd3582638c70a5eb8e00a1c941a9c8b01e25ea1 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:33:33 +0200 Subject: [PATCH 052/616] refactor tokenizeKeyphraseForms and processKeyPhraseForms in keywordCount.js --- .../researches/keywordCount.js | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index bb26ada4306..5694f63c410 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -52,26 +52,14 @@ const matchHead = ( head, token ) => { } ); }; -const tokenizeKeyphraseForms = ( keyphraseForms ) => { - let tokenizedKeyPhraseForms = keyphraseForms.map( keyphraseForm => { - return keyphraseForm.map( word => { - return word.split( tokenizerSplitter ).filter( x => x !== "" ); - } ); - } ); - tokenizedKeyPhraseForms = tokenizedKeyPhraseForms.map( x => x[ 0 ] ); - - - // source: https://stackoverflow.com/questions/17428587/transposing-a-2d-array-in-javascript - return tokenizedKeyPhraseForms[ 0 ].map( ( _, index ) => uniq( tokenizedKeyPhraseForms.map( row => row[ index ] ) ) ); -}; - -// TODO: better descriptor than `process` /** - * Processes the keyphrase forms to make sure that they are tokenized in the same way as the text. - * @param {array[]} keyphraseForms The keyphrase forms to process. - * @returns {*[]} The processed keyphrase forms. + * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. + * This solves the problem that "key-word" would not match with "key-word" in the text, + * because it would occur like ["key-word"] in the keyphraseForms and in ["key", "-", "word"] in the tokenized text. + * @param {array[]} keyphraseForms The keyphrase forms to tokenize. + * @returns {array[]} The tokenized keyphrase forms. */ -const processKeyphraseForms = ( keyphraseForms ) => { +const tokenizeKeyphraseForms = ( keyphraseForms ) => { const newKeyphraseForms = []; keyphraseForms.forEach( word => { // const newWord = [] From cc6b82f57ee9c201cdd202abb3defb894ddb4a1b Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:34:22 +0200 Subject: [PATCH 053/616] implement check for consecutive tokens --- .../researches/keywordCount.js | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 5694f63c410..77b8ecf1f92 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -74,6 +74,64 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { return newKeyphraseForms; }; +/** + * Checks if the first token is directly followed by the second token. + * @param {Token} firstToken The first token. + * @param {Token} secondToken The second token. + * @returns {boolean} Whether the tokens are consecutive. + */ +const tokensAreConsecutive = ( firstToken, secondToken ) => { + return firstToken.sourceCodeRange.endOffset + 1 === secondToken.sourceCodeRange.startOffset; +}; + +/** + * Matches the keyphrase forms in an array of tokens. + * @param {Token[]} currentMatch The current match. + * @param {Token[]} firstPreviousMatch The first previous match. + * @param {Token[]} secondPreviousMatch The second previous match. + * @returns {boolean} Whether the keyphrase forms are consecutive. + */ +const isConsecutiveKeyphrase = ( currentMatch, firstPreviousMatch, secondPreviousMatch ) => { + return currentMatch[ 0 ].text === firstPreviousMatch[ 0 ].text && + tokensAreConsecutive( firstPreviousMatch[ 0 ], currentMatch[ 0 ] ) && + firstPreviousMatch[ 0 ].text === secondPreviousMatch[ 0 ].text && + tokensAreConsecutive( secondPreviousMatch[ 0 ], firstPreviousMatch[ 0 ] ); +}; + +/** + * Matches the keyphrase forms in an array of tokens. + * @param {Object} result The result object. + * @returns {Object} The result object with the consecutive keyphrase removed. + */ +const removeConsecutiveKeyphraseFromResult = ( result ) => { + const newResult = { ...result, primaryMatches: [] }; + // If three or more matches are consecutive, only keep the first two. + // Iterate backwards over the primary matches + for ( let i = result.primaryMatches.length - 1; i >= 0; i-- ) { + const currentMatch = result.primaryMatches[ i ]; + // If the current match consists of two or more tokens, add it to the front of the new primary matches. + if ( currentMatch.length > 1 ) { + newResult.primaryMatches.unshift( currentMatch ); + continue; + } + + // If the current match is the same as the previous two matches, and it is consecutive, do not add it to the primary matches. + const firstPreviousMatch = result.primaryMatches[ i - 1 ]; + const secondPreviousMatch = result.primaryMatches[ i - 2 ]; + + if ( firstPreviousMatch && secondPreviousMatch ) { + if ( isConsecutiveKeyphrase( currentMatch, firstPreviousMatch, secondPreviousMatch ) ) { + continue; + } else { + newResult.primaryMatches.unshift( currentMatch ); + } + } else { + newResult.primaryMatches.unshift( currentMatch ); + } + } + return newResult; +}; + /** * Matches the keyphrase forms in an array of tokens. * @param {array[]} keyphraseForms The keyphrase forms to match. From d98eb11dc0984ece1e950a527317561498bf2d80 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:38:52 +0200 Subject: [PATCH 054/616] use more fuzzy matching in getMatchesInTokens in keywordCount.js --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 77b8ecf1f92..e64ed421766 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -163,7 +163,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( t => { - return previousHead.includes( t.text ); // TODO: use matchhead + return matchHead( previousHead, t ); } ) ) { result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); keyPhraseFormsIndex = 0; From fecbf24946453d8a819885722697a71269b31bbe Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:39:41 +0200 Subject: [PATCH 055/616] Add comment to clarify what we mean with head in getMatchesInTokens in keywordCount.js --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index e64ed421766..922ae4efbac 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -152,6 +152,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { let foundWords = []; // eslint-disable-next-line no-constant-condition while ( true ) { + // The head of the keyphrase form we are currently analyzing. const head = keyphraseForms[ keyPhraseFormsIndex ]; const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t ) ); From cdb4f01e9c4002966e54cd3018001990fd3e2c66 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:40:21 +0200 Subject: [PATCH 056/616] make possible to match keyphrases with underscores. --- .../src/languageProcessing/researches/keywordCount.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 922ae4efbac..5531ffa4d24 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -171,6 +171,15 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { foundWords = []; continue; } + + const previousToken = tokens[ Math.max( positionInSentence + foundPosition - 1, 0 ) ]; // TODO: better way to extract previous token. + // if the previous token is an underscore and the previous head is not an underscore, reset. + if ( previousToken.text === "_" && ! previousHead.includes( "_" ) ) { + // result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + keyPhraseFormsIndex = 0; + foundWords = []; + continue; + } } keyPhraseFormsIndex += 1; From ad4283fc291ea45b72867910bda93df1b52a915b Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:40:48 +0200 Subject: [PATCH 057/616] call removeConsecutiveKeyphraseFromResult --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 5531ffa4d24..1a61ebb070c 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -200,7 +200,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { } } - return result; + return removeConsecutiveKeyphraseFromResult( result ); }; /** From a5ce31bde04ea71b7c237dd8ecf65fbf927c13cc Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:41:24 +0200 Subject: [PATCH 058/616] improve jsdocs in keywordCount.js --- .../src/languageProcessing/researches/keywordCount.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 1a61ebb070c..434d0920c32 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -365,6 +365,8 @@ const createMarksForSentence = ( sentence, matches ) => { /** * Merges consecutive markings into one marking. * @param {{}[]} markings An array of markings to merge. + * @param {boolean} isJapanese Whether the text is Japanese. + * * @returns {{}[]} An array of markings where consecutive markings are merged. */ const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { @@ -401,6 +403,8 @@ const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { * * @param {string} sentence The sentence to check. * @param {Array} matchesInSentence The array of keyphrase matches in the sentence. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {string} locale The locale used in the analysis. * * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ From 2df4a5baeeb9d1e85c2352a2c44668e6fd767345 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:43:55 +0200 Subject: [PATCH 059/616] remove unused const in keywordCount.js --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 434d0920c32..7473184fd0d 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -232,7 +232,6 @@ const getAllIndicesOfWord = ( word, sentence ) => { * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. */ const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { - const result = { primaryMatches: [], secondaryMatches: [], position: 0 }; const matchTokens = []; matches.forEach( matchObject => { const matchWords = matchObject.matches; From 507e37c547ff49edc01c0b671d021d2640b5945d Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:44:26 +0200 Subject: [PATCH 060/616] misc fixes in keywordCount.js --- .../src/languageProcessing/researches/keywordCount.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 7473184fd0d..7e901956ef4 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -260,7 +260,7 @@ const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. // A secondary match is any other match. - matchTokens.forEach( ( token, index ) => { + matchTokens.forEach( ( token ) => { const head = keyPhraseForms[ keyPhraseFormsIndex ]; if ( head && matchHead( head, token ) ) { currentMatch.push( token ); @@ -320,7 +320,7 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const tokens = sentence.tokens; - const newKeyphraseForms = processKeyphraseForms( keyphraseForms, locale ); + const newKeyphraseForms = tokenizeKeyphraseForms( keyphraseForms, locale ); matches = getMatchesInTokens( newKeyphraseForms, tokens ); @@ -384,7 +384,7 @@ const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); actionDone = true; // If the markings are consecutive, merge them. - } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + (isJapanese ? 0 : 1 ) ) { + } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + ( isJapanese ? 0 : 1 ) ) { newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); actionDone = true; } From e3bf92f5279e7f815648765311049d9035fb51ea Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:45:15 +0200 Subject: [PATCH 061/616] add specs for consecutive keyphrases in getKeywordDensitySpec.js --- .../researches/getKeywordDensitySpec.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 3aea748d054..41fbaeadf0d 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -99,6 +99,25 @@ describe( "Test for counting the keyword density in a text with an English resea expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); } ); + it( "should only keep the first two keyphrases if there more than three consecutive", function() { + // Consecutive keywords are skipped, so this will match 2 times. + const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.

", + { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 12.5 ); + } ); + + it( "should not skip a keyphrase in case of consecutive keyphrases with morphology sss", function() { + // Consecutive keywords are skipped, so this will match 2 times. + const mockPaper = new Paper( "

This is a nice string with a keyword keyword keywords.

", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 30 ); + } ); + + it( "should recognize a keyphrase with a '$' in it", function() { const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); From 66ccd257d832d6e12ba58bd6c7938da43b94341a Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:45:52 +0200 Subject: [PATCH 062/616] misc fixes for getKeywordDensitySpec.js --- .../languageProcessing/researches/getKeywordDensitySpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 41fbaeadf0d..7d8e912a2d1 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -91,9 +91,9 @@ describe( "Test for counting the keyword density in a text with an English resea expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); } ); - it( "should skip a keyphrase in case of consecutive keyphrases", function() { + it( "should skip a keyphrase in case of three consecutive keyphrases", function() { // Consecutive keywords are skipped, so this will match 2 times. - const mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.", { keyword: "keyword" } ); + const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword.

", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); From 7eddee6471e55889dc3c58ef61aaf5fa55f39ac2 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 11:51:35 +0200 Subject: [PATCH 063/616] skip the test for an image tag in getTextElementPositionsSpec.js --- .../spec/parse/build/private/getTextElementPositionsSpec.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js index 10f62c544b0..63e09366ea2 100644 --- a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js +++ b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js @@ -214,12 +214,13 @@ describe( "A test for getting positions of sentences", () => { expect( helloSentence.sourceCodeRange ).toEqual( { startOffset: 5, endOffset: 21 } ); } ); - it( "should get the correct sentence position for a sentence in an image caption", function() { + it.skip( "should get the correct sentence position for a sentence in an image caption", function() { // html: "

a different cat with toy " + // "A flamboyant cat with a toy

\n" + // "

" + // eslint-disable-next-line max-len const html = "

a different cat with toyA flamboyant cat with a toy

\n

"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); From aaa4b9da09f028f246bd2fa3cb994ff532f7ba33 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 13:15:32 +0200 Subject: [PATCH 064/616] fix passing on Locale for matchHead in keywordCount.js --- .../researches/keywordCount.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 7e901956ef4..f6c51c07527 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -43,12 +43,12 @@ const matchTextWithTransliteration = function( text, keyword, locale ) { }; -const matchHead = ( head, token ) => { +const matchHead = ( head, token, locale ) => { if ( head.includes( token.text ) ) { return true; } return head.some( keyword => { - return matchTextWithTransliteration( token.text, keyword, "en_US" ); // TODO get locale from params + return matchTextWithTransliteration( token.text, keyword, locale ); // TODO get locale from params } ); }; @@ -138,7 +138,7 @@ const removeConsecutiveKeyphraseFromResult = ( result ) => { * @param {Token[]} tokens The tokens to match the keyphrase forms in. * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. */ -const getMatchesInTokens = ( keyphraseForms, tokens ) => { +const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { const result = { primaryMatches: [], secondaryMatches: [], @@ -155,7 +155,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { // The head of the keyphrase form we are currently analyzing. const head = keyphraseForms[ keyPhraseFormsIndex ]; - const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t ) ); + const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t, locale ) ); // If an occurence of a word is found, see if the subsequent word also is find. if ( foundPosition >= 0 ) { if ( keyPhraseFormsIndex > 0 ) { @@ -164,7 +164,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens ) => { const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( t => { - return matchHead( previousHead, t ); + return matchHead( previousHead, t, locale ); } ) ) { result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); keyPhraseFormsIndex = 0; @@ -231,7 +231,7 @@ const getAllIndicesOfWord = ( word, sentence ) => { * @param {Sentence} sentence The sentence. * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. */ -const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { +const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { const matchTokens = []; matches.forEach( matchObject => { const matchWords = matchObject.matches; @@ -262,7 +262,7 @@ const convertToPositionResult = ( matches, sentence, keyPhraseForms ) => { // A secondary match is any other match. matchTokens.forEach( ( token ) => { const head = keyPhraseForms[ keyPhraseFormsIndex ]; - if ( head && matchHead( head, token ) ) { + if ( head && matchHead( head, token, locale ) ) { currentMatch.push( token ); keyPhraseFormsIndex += 1; if ( currentMatch.length === keyPhraseForms.length ) { @@ -313,7 +313,7 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto return { primaryMatches: [], secondaryMatches: [], position: -1 }; } - result = convertToPositionResult( matches, sentence, keyphraseForms ); + result = convertToPositionResult( matches, sentence, keyphraseForms, locale ); return result; } @@ -322,7 +322,7 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto const newKeyphraseForms = tokenizeKeyphraseForms( keyphraseForms, locale ); - matches = getMatchesInTokens( newKeyphraseForms, tokens ); + matches = getMatchesInTokens( newKeyphraseForms, tokens, locale ); return matches; } From 2f4282e269b08493913f404013473cab81b895e3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 14:15:35 +0200 Subject: [PATCH 065/616] move functionality to remove Consecutive Keyphrases to a separate helper. --- .../removeConsecutiveKeyphraseMatchesSpec.js | 88 +++++++++++++++++++ .../removeConsecutiveKeyphraseMatches.js | 59 +++++++++++++ .../researches/keywordCount.js | 58 +----------- 3 files changed, 148 insertions(+), 57 deletions(-) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js new file mode 100644 index 00000000000..37bbe3b6fee --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js @@ -0,0 +1,88 @@ +import removeConsecutiveKeyphraseMatches from "../../../../src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches"; + +/* +The following cases are tested: + +This case is consecutive. +keyword +This is a nice string with a keyword keyword keyword. + +This case is not consecutive. +key word +This is a nice string with a key word key word key word. + +This case is not consecutive. +keyword +This is a nice string with a keyword keywords keyword. + +This case is not consecutive. +keyword +This is a nice string with a keyword keyword nonkeyword keyword. + */ + +/* eslint-disable max-len */ +const testCases = [ + [ + "keyphrases are onsecutive when they have length one and occur after each other 3 or more times", + "keyword", + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 16, endOffset: 23 } } ], + ] }, + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], + ] }, + ], + [ + "keyphrase matches are not consecutive when they consist of multiple tokens", + "key word", + { primaryMatches: [ + [ { text: "key", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, { text: "word", sourceCodeRange: { startOffset: 3, endOffset: 7 } } ], + [ { text: "key", sourceCodeRange: { startOffset: 8, endOffset: 11 } }, { text: "word", sourceCodeRange: { startOffset: 11, endOffset: 15 } } ], + [ { text: "key", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, { text: "word", sourceCodeRange: { startOffset: 19, endOffset: 23 } } ], + ] }, + { primaryMatches: [ + [ { text: "key", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, { text: "word", sourceCodeRange: { startOffset: 3, endOffset: 7 } } ], + [ { text: "key", sourceCodeRange: { startOffset: 8, endOffset: 11 } }, { text: "word", sourceCodeRange: { startOffset: 11, endOffset: 15 } } ], + [ { text: "key", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, { text: "word", sourceCodeRange: { startOffset: 19, endOffset: 23 } } ], + ] }, + ], + [ + "keyphrase matches are not consecutive when they are the same word but in a different form (morphology doesn't count)", + "keyword", + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keywords", sourceCodeRange: { startOffset: 8, endOffset: 16 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 17, endOffset: 24 } } ], + ] }, + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keywords", sourceCodeRange: { startOffset: 8, endOffset: 16 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 17, endOffset: 24 } } ], + ] }, + ], + [ + "keyphrase matches are not consecutive when they do not occur right after each other", + "keyword", + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 27, endOffset: 34 } } ], + ] }, + { primaryMatches: [ + [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], + [ { text: "keyword", sourceCodeRange: { startOffset: 27, endOffset: 34 } } ], + ] }, + ], +]; +/* eslint-enable max-len */ + +describe.each( testCases )( "removeConsecutiveKeyPhraseMatches", + ( descriptor, keyphrase, result, expected ) => { + it( descriptor, () => { + expect( removeConsecutiveKeyphraseMatches( result ) ).toStrictEqual( expected ); + } ); + } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js new file mode 100644 index 00000000000..594ee83caad --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js @@ -0,0 +1,59 @@ +/** + * Checks if the first token is directly followed by the second token. + * @param {Token} firstToken The first token. + * @param {Token} secondToken The second token. + * @returns {boolean} Whether the tokens are consecutive. + */ +const tokensAreConsecutive = ( firstToken, secondToken ) => { + return firstToken.sourceCodeRange.endOffset + 1 === secondToken.sourceCodeRange.startOffset; +}; + +/** + * Matches the keyphrase forms in an array of tokens. + * @param {Token[]} currentMatch The current match. + * @param {Token[]} firstPreviousMatch The first previous match. + * @param {Token[]} secondPreviousMatch The second previous match. + * @returns {boolean} Whether the keyphrase forms are consecutive. + */ +const isConsecutiveKeyphrase = ( currentMatch, firstPreviousMatch, secondPreviousMatch ) => { + return currentMatch[ 0 ].text === firstPreviousMatch[ 0 ].text && + tokensAreConsecutive( firstPreviousMatch[ 0 ], currentMatch[ 0 ] ) && + firstPreviousMatch[ 0 ].text === secondPreviousMatch[ 0 ].text && + tokensAreConsecutive( secondPreviousMatch[ 0 ], firstPreviousMatch[ 0 ] ); +}; + +/** + * Matches the keyphrase forms in an array of tokens. + * @param {Object} result The result object. + * @returns {Object} The result object with the consecutive keyphrase removed. + */ +const removeConsecutiveKeyphraseFromResult = ( result ) => { + const newResult = { ...result, primaryMatches: [] }; + // If three or more matches are consecutive, only keep the first two. + // Iterate backwards over the primary matches + for ( let i = result.primaryMatches.length - 1; i >= 0; i-- ) { + const currentMatch = result.primaryMatches[ i ]; + // If the current match consists of two or more tokens, add it to the front of the new primary matches. + if ( currentMatch.length > 1 ) { + newResult.primaryMatches.unshift( currentMatch ); + continue; + } + + // If the current match is the same as the previous two matches, and it is consecutive, do not add it to the primary matches. + const firstPreviousMatch = result.primaryMatches[ i - 1 ]; + const secondPreviousMatch = result.primaryMatches[ i - 2 ]; + + if ( firstPreviousMatch && secondPreviousMatch ) { + if ( isConsecutiveKeyphrase( currentMatch, firstPreviousMatch, secondPreviousMatch ) ) { + continue; + } else { + newResult.primaryMatches.unshift( currentMatch ); + } + } else { + newResult.primaryMatches.unshift( currentMatch ); + } + } + return newResult; +}; + +export default removeConsecutiveKeyphraseFromResult; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index f6c51c07527..4f435d7d847 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -8,6 +8,7 @@ import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import Token from "../../parse/structure/Token"; import { getLanguage } from "../index"; +import removeConsecutiveKeyphraseFromResult from "../helpers/keywordCount/removeConsecutiveKeyphraseMatches"; /** @@ -74,63 +75,6 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { return newKeyphraseForms; }; -/** - * Checks if the first token is directly followed by the second token. - * @param {Token} firstToken The first token. - * @param {Token} secondToken The second token. - * @returns {boolean} Whether the tokens are consecutive. - */ -const tokensAreConsecutive = ( firstToken, secondToken ) => { - return firstToken.sourceCodeRange.endOffset + 1 === secondToken.sourceCodeRange.startOffset; -}; - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {Token[]} currentMatch The current match. - * @param {Token[]} firstPreviousMatch The first previous match. - * @param {Token[]} secondPreviousMatch The second previous match. - * @returns {boolean} Whether the keyphrase forms are consecutive. - */ -const isConsecutiveKeyphrase = ( currentMatch, firstPreviousMatch, secondPreviousMatch ) => { - return currentMatch[ 0 ].text === firstPreviousMatch[ 0 ].text && - tokensAreConsecutive( firstPreviousMatch[ 0 ], currentMatch[ 0 ] ) && - firstPreviousMatch[ 0 ].text === secondPreviousMatch[ 0 ].text && - tokensAreConsecutive( secondPreviousMatch[ 0 ], firstPreviousMatch[ 0 ] ); -}; - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {Object} result The result object. - * @returns {Object} The result object with the consecutive keyphrase removed. - */ -const removeConsecutiveKeyphraseFromResult = ( result ) => { - const newResult = { ...result, primaryMatches: [] }; - // If three or more matches are consecutive, only keep the first two. - // Iterate backwards over the primary matches - for ( let i = result.primaryMatches.length - 1; i >= 0; i-- ) { - const currentMatch = result.primaryMatches[ i ]; - // If the current match consists of two or more tokens, add it to the front of the new primary matches. - if ( currentMatch.length > 1 ) { - newResult.primaryMatches.unshift( currentMatch ); - continue; - } - - // If the current match is the same as the previous two matches, and it is consecutive, do not add it to the primary matches. - const firstPreviousMatch = result.primaryMatches[ i - 1 ]; - const secondPreviousMatch = result.primaryMatches[ i - 2 ]; - - if ( firstPreviousMatch && secondPreviousMatch ) { - if ( isConsecutiveKeyphrase( currentMatch, firstPreviousMatch, secondPreviousMatch ) ) { - continue; - } else { - newResult.primaryMatches.unshift( currentMatch ); - } - } else { - newResult.primaryMatches.unshift( currentMatch ); - } - } - return newResult; -}; /** * Matches the keyphrase forms in an array of tokens. From 2cc9ff06589672b847d2e75a77e4b79b13b45883 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 14:16:37 +0200 Subject: [PATCH 066/616] fix typo in removeConsecutiveKeyphraseMatchesSpec.js --- .../keywordCount/removeConsecutiveKeyphraseMatchesSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js index 37bbe3b6fee..833ef6a6cd8 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js @@ -23,7 +23,7 @@ This is a nice string with a keyword keyword nonkeyword keyword. /* eslint-disable max-len */ const testCases = [ [ - "keyphrases are onsecutive when they have length one and occur after each other 3 or more times", + "keyphrases are consecutive when they have length one and occur after each other 3 or more times", "keyword", { primaryMatches: [ [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], From e40c672f9f8b1b31b9277a75474cc73fa400b74c Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 14:26:37 +0200 Subject: [PATCH 067/616] refactor matchTokenWithWordForms in keywordCount.js --- .../researches/keywordCount.js | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 4f435d7d847..5537ca9d7b5 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -43,13 +43,21 @@ const matchTextWithTransliteration = function( text, keyword, locale ) { return !! matches; }; - -const matchHead = ( head, token, locale ) => { - if ( head.includes( token.text ) ) { +/** + * Checks if a token matches with different forms of a word. + * @param {string[]} wordForms Different forms of a word to match the token against. + * @param {Token} token The token to match. + * @param {string} locale The locale used for transliteration. + * @returns {boolean|*} True if the token matches with one of the word forms. + */ +const matchTokenWithWordForms = ( wordForms, token, locale ) => { + // First try a literal match. + if ( wordForms.includes( token.text ) ) { return true; } - return head.some( keyword => { - return matchTextWithTransliteration( token.text, keyword, locale ); // TODO get locale from params + // Then try a match with transliteration. + return wordForms.some( keyword => { + return matchTextWithTransliteration( token.text, keyword, locale ); } ); }; @@ -99,7 +107,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { // The head of the keyphrase form we are currently analyzing. const head = keyphraseForms[ keyPhraseFormsIndex ]; - const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t, locale ) ); + const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchTokenWithWordForms( head, t, locale ) ); // If an occurence of a word is found, see if the subsequent word also is find. if ( foundPosition >= 0 ) { if ( keyPhraseFormsIndex > 0 ) { @@ -108,7 +116,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( t => { - return matchHead( previousHead, t, locale ); + return matchTokenWithWordForms( previousHead, t, locale ); } ) ) { result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); keyPhraseFormsIndex = 0; @@ -173,6 +181,8 @@ const getAllIndicesOfWord = ( word, sentence ) => { * Converts the matches to the format that is used in the assessment. * @param {array} matches The matches. * @param {Sentence} sentence The sentence. + * @param {array[]} keyPhraseForms The keyphrase forms. + * @param {string} locale The locale. * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. */ const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { @@ -206,7 +216,7 @@ const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => // A secondary match is any other match. matchTokens.forEach( ( token ) => { const head = keyPhraseForms[ keyPhraseFormsIndex ]; - if ( head && matchHead( head, token, locale ) ) { + if ( head && matchTokenWithWordForms( head, token, locale ) ) { currentMatch.push( token ); keyPhraseFormsIndex += 1; if ( currentMatch.length === keyPhraseForms.length ) { From 59a377bbc61398e10f3cd62bef618734763eb6cf Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 8 May 2023 15:17:02 +0200 Subject: [PATCH 068/616] refactor matchTokenWithWordForms in keywordCount.js --- .../matchTokenWithWordFormsSpec.js | 19 +++++++ .../keywordCount/matchTokenWithWordForms.js | 55 +++++++++++++++++++ .../researches/keywordCount.js | 53 +----------------- 3 files changed, 75 insertions(+), 52 deletions(-) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js new file mode 100644 index 00000000000..ac982d9f00c --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js @@ -0,0 +1,19 @@ +import matchTokenWithWordForms from "../../../../src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms"; +import Token from "../../../../src/parse/structure/Token"; + +/* eslint-disable max-len */ +const testCases = [ [ "The wordforms contain the token", [ "keyword" ] , new Token( "keyword" ), "en_US", true ], + [ "The wordforms do not contain the token", [ "keyword" ] , new Token( "notkeyword" ), "en_US", false ], + [ "The wordforms contain the token in a different form: singular matches plural", [ "keyword", "keywords" ], new Token( "keyword" ), "en_US", true ], + [ "The wordforms contain the token in a different form: plural matches singular", [ "keyword", "keywords" ], new Token( "keywords" ), "en_US", true ], + [ "The wordforms contain the token but with a capital letter", [ "Keyword" ], new Token( "keyword" ), "en_US", true ], + [ "The wordforms contain the token in a different form: singular matches plural (different locale)", [ "anahtar", "anahtarlar" ], new Token( "Anahtar" ), "tr_TR", true ], +]; +/* eslint-enable max-len */ + +describe.each( testCases )( "matchTokenWithWordForms", ( testDescription, wordForms, token, locale, expectedResult ) => { + it( testDescription, () => { + expect( matchTokenWithWordForms( wordForms, token, locale ) ).toBe( expectedResult ); + } ); +} ); + diff --git a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js new file mode 100644 index 00000000000..2c656f06ac0 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js @@ -0,0 +1,55 @@ +import addWordBoundary from "../word/addWordboundary"; +import { replaceTurkishIsMemoized } from "../transliterate/specialCharacterMappings"; +// import matchTextWithTransliteration from "../match/matchTextWithTransliteration"; + +/** + * Creates a regex from the keyword with included wordboundaries. + * + * @param {string} keyword The keyword to create a regex from. + * @param {string} locale The locale. + * + * @returns {RegExp} Regular expression of the keyword with word boundaries. + */ +const toRegex = function( keyword, locale ) { + keyword = addWordBoundary( keyword, false, "", locale ); + return new RegExp( keyword, "ig" ); +}; + +/** + * Matches a string with and without transliteration. + * Adapted from yoastseo/src/languageProcessing/helpers/match/matchTextWithTransliteration.js + * @param {string} text The text to match. + * @param {string} keyword The keyword to match in the text. + * @param {string} locale The locale used for transliteration. + * @returns {Boolean} All matches from the original as the transliterated text and keyword. + */ +const matchTextWithTransliteration = function( text, keyword, locale ) { + let keywordRegex = toRegex( keyword, locale ); + + if ( locale === "tr_TR" ) { + const turkishMappings = replaceTurkishIsMemoized( keyword ); + keywordRegex = new RegExp( turkishMappings.map( x => addWordBoundary( x ) ).join( "|" ), "ig" ); + } + const matches = text.match( keywordRegex ); + + return !! matches; +}; + +/** + * Checks if a token matches with different forms of a word. + * @param {string[]} wordForms Different forms of a word to match the token against. + * @param {Token} token The token to match. + * @param {string} locale The locale used for transliteration. + * @returns {boolean|*} True if the token matches with one of the word forms. + */ +const matchTokenWithWordForms = ( wordForms, token, locale ) => { + // First try a literal match to avoid unnecessary regexes. + if ( wordForms.includes( token.text ) ) { + return true; + } + return wordForms.some( keyword => { + return !! matchTextWithTransliteration( token.text, keyword, locale ); + } ); +}; + +export default matchTokenWithWordForms; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 5537ca9d7b5..de24120ea15 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,65 +2,14 @@ import { flatten, uniq } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import Mark from "../../values/Mark"; -import addWordBoundary from "../helpers/word/addWordboundary"; -import { replaceTurkishIsMemoized } from "../helpers/transliterate/specialCharacterMappings"; import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import Token from "../../parse/structure/Token"; import { getLanguage } from "../index"; import removeConsecutiveKeyphraseFromResult from "../helpers/keywordCount/removeConsecutiveKeyphraseMatches"; +import matchTokenWithWordForms from "../helpers/keywordCount/matchTokenWithWordForms"; -/** - * Creates a regex from the keyword with included wordboundaries. - * - * @param {string} keyword The keyword to create a regex from. - * @param {string} locale The locale. - * - * @returns {RegExp} Regular expression of the keyword with word boundaries. - */ -const toRegex = function( keyword, locale ) { - keyword = addWordBoundary( keyword, false, "", locale ); - return new RegExp( keyword, "ig" ); -}; - -/** - * Matches a string with and without transliteration. - * @param {string} text The text to match. - * @param {string} keyword The keyword to match in the text. - * @param {string} locale The locale used for transliteration. - * @returns {Array} All matches from the original as the transliterated text and keyword. - */ -const matchTextWithTransliteration = function( text, keyword, locale ) { - let keywordRegex = toRegex( keyword, locale ); - - if ( locale === "tr_TR" ) { - const turkishMappings = replaceTurkishIsMemoized( keyword ); - keywordRegex = new RegExp( turkishMappings.map( x => addWordBoundary( x ) ).join( "|" ), "ig" ); - } - const matches = text.match( keywordRegex ); - - return !! matches; -}; - -/** - * Checks if a token matches with different forms of a word. - * @param {string[]} wordForms Different forms of a word to match the token against. - * @param {Token} token The token to match. - * @param {string} locale The locale used for transliteration. - * @returns {boolean|*} True if the token matches with one of the word forms. - */ -const matchTokenWithWordForms = ( wordForms, token, locale ) => { - // First try a literal match. - if ( wordForms.includes( token.text ) ) { - return true; - } - // Then try a match with transliteration. - return wordForms.some( keyword => { - return matchTextWithTransliteration( token.text, keyword, locale ); - } ); -}; - /** * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. * This solves the problem that "key-word" would not match with "key-word" in the text, From 32a3156c2a5beeaeb79fe448c9dbb31671544ae0 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 08:13:04 +0200 Subject: [PATCH 069/616] refactor getMarkingsInSentence and create specs. --- .../highlighting/getMarkingsInSentenceSpec.js | 104 ++++++++++++ .../highlighting/getMarkingsInSentence.js | 155 ++++++++++++++++++ .../researches/keywordCount.js | 117 +------------ 3 files changed, 260 insertions(+), 116 deletions(-) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js new file mode 100644 index 00000000000..d28b0fb20bf --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -0,0 +1,104 @@ +import getMarkingsInSentence from "../../../../src/languageProcessing/helpers/highlighting/getMarkingsInSentence"; +import Mark from "../../../../src/values/Mark"; +import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; + +/* eslint-disable max-len */ +const testCases = [ + { + testDescription: "No markings in sentence", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + matchesInSentence: { primaryMatches: [], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [], + }, + { + testDescription: "One marking in sentence", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [ new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 4, startOffset: 0 }, + } ) ], + }, + { + testDescription: "One marking in sentence with two consecutive matches", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [ new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 7, startOffset: 0 }, + } ) ], + }, + { + testDescription: "Two markings that are not consecutive in sentence", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], [ { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [ + new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 4, startOffset: 0 }, + } ), + new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 18, startOffset: 10 }, + } ), + ], + }, + { + testDescription: "One marking in a sentence that has a non-zero startOffset", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [ new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 14, startOffset: 10 }, + } ) ], + }, + { + testDescription: "One marking in a sentence of a language that does not use spaces", + sentence: { text: "これは文です.", sourceCodeRange: { startOffset: 0, endOffset: 7 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 3, endOffset: 4 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: JapaneseCustomHelper, + locale: "ja", + expectedResult: [ new Mark( { + marked: "これはです.", + original: "これは文です.", + position: { endOffset: 4, startOffset: 3 }, + } ) ], + }, + { + testDescription: "Two markings that overlap", + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], [ { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ] ], secondaryMatches: [] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [ + new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { endOffset: 9, startOffset: 0 }, + } ), + ], + }, +]; +/* eslint-enable max-len */ + +// eslint-disable-next-line max-len +describe.each( testCases )( "getMarkingsInSentence", ( { testDescription, sentence, matchesInSentence, matchWordCustomHelper, locale, expectedResult } ) => { + it( testDescription, () => { + expect( getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) ).toEqual( expectedResult ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js new file mode 100644 index 00000000000..46013b1d541 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -0,0 +1,155 @@ +import { flatten } from "lodash-es"; +import Mark from "../../../../src/values/Mark"; +import { getLanguage } from "../../../../src/languageProcessing"; + +const markStart = ""; +const markEnd = ""; + +/** + * This function creates the old style marked sentence for search based highlighting. + * Ideally this function becomes obsolete when position based highlighting is implemented everywhere. + * @param {Sentence} sentence The sentence to which to apply the marks. + * @param {Object} matches The matches to apply. + * @returns {string} The sentence with marks applied. + */ +const createMarksForSentence = ( sentence, matches ) => { + let sentenceText = sentence.text; + + // Create one array with both primary and secondary matches, sorted by start offset. + let allMatches = flatten( matches.primaryMatches ); + if ( matches.primaryMatches.length > 0 ) { + allMatches = allMatches.concat( flatten( matches.secondaryMatches ) ).sort( function( a, b ) { + return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; + } ); + } + + const sentenceStartOffset = sentence.sourceCodeRange.startOffset; + // Loop through the matches backwards, so that the reference is not affected by the changes. + for ( let i = allMatches.length - 1; i >= 0; i-- ) { + const match = allMatches[ i ]; + + // Apply the mark to the sentence. + // sentenceStartOffset is subtracted because the start and end offsets are relative to the start of the source code. + // Subtracting the sentenceStartOffset makes them relative to the start of the sentence. + sentenceText = sentenceText.substring( 0, match.sourceCodeRange.endOffset - sentenceStartOffset ) + markEnd + + sentenceText.substring( match.sourceCodeRange.endOffset - sentenceStartOffset ); + sentenceText = sentenceText.substring( 0, match.sourceCodeRange.startOffset - sentenceStartOffset ) + markStart + + sentenceText.substring( match.sourceCodeRange.startOffset - sentenceStartOffset ); + } + + // Merge consecutive markings into one marking. + sentenceText = sentenceText.replace( new RegExp( "
( ?)", "ig" ), "$1" ); + + return sentenceText; +}; + +/** + * Merges consecutive markings into one marking. + * This is a helper for position based highlighting. + * + * @param {Mark[]} markings An array of markings to merge. + * @param {boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * + * @returns {Mark[]} An array of markings where consecutive markings are merged. + */ +const mergeConsecutiveMarkings = ( markings, useSpace = true ) => { + const newMarkings = []; + + /** + * The first iteration, newMarkings is empty, so the first marking is always added. + * After that, the newMarkings array is iterated over, + * and for each marking it is checked whether it is consecutive to any of the markings in the newMarkings array. + * If the newMarking and the current marking are consecutive, the current marking is merged into the newMarking. + * If the current marking precedes the newMarking, the start of the newMarking is set to the start of the current marking. + * The actionDone flag is set to false, such that the newMarking is not added to the newMarkings array. + * If the current marking succeeds the newMarking, the end of the newMarking is set to the end of the current marking. + * The actionDone flag is set to true, such that the newMarking is added to the newMarkings array. + * In the (theoretical and unlikely) edge case that the current marking is overlapping with the newMarking, + * the start of the newMarking is set to the minimum of the start of the newMarking and the current marking. + * The end of the newMarking is set to the maximum of the end of the newMarking and the current marking. + * The actionDone flag is set to true, such that the newMarking is added to the newMarkings array. + * + * If the actionDone flag is false, the current marking is added to the newMarkings array, + * because it is not found to be consecutive to any of the markings in the newMarkings array. + */ + + markings.forEach( ( marking ) => { + let actionDone = false; + newMarkings.forEach( ( newMarking, newMarkingIndex ) => { + // If the markings are consecutive, merge them. + if ( newMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { + newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); + actionDone = true; + // if the markings are overlapping, merge them. + } else if ( newMarking.getPositionEnd() >= marking.getPositionStart() && newMarking.getPositionStart() <= marking.getPositionEnd() ) { + // eslint-disable-next-line max-len + newMarkings[ newMarkingIndex ]._properties.position.startOffset = Math.min( newMarking.getPositionStart(), marking.getPositionStart() ); + newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); + actionDone = true; + // If the markings are consecutive, merge them. + } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + ( useSpace ? 1 : 0 ) ) { + newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); + actionDone = true; + } + } ); + if ( ! actionDone ) { + newMarkings.push( marking ); + } + } ); + return newMarkings; +}; + + +/** + * Gets the Mark objects of all keyphrase matches. + * + * @param {Sentence} sentence The sentence to check. + * @param {Object} matchesInSentence An object containing the matches in the sentence. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {string} locale The locale used in the analysis. + * + * @returns {Mark[]} The array of Mark objects of the keyphrase matches. + */ +function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { + // Create the marked sentence that is used for search based highlighting. + const markedSentence = createMarksForSentence( sentence, matchesInSentence ); + + // Create the markings for the primary matches. + // Note that there is a paradigm shift: + // With search based highlighting there would be one marking for the entire sentence. + // With Position based highlighting there is a marking for each match. + // In order to be backwards compatible with search based highlighting, + // all markings for a sentence have the same markedSentence. + // ... + const markings = matchesInSentence.primaryMatches.flatMap( match => { + return match.map( token => { + return new Mark( { + position: { + startOffset: token.sourceCodeRange.startOffset, + endOffset: token.sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, + } ); + } ); + } ); + + // Only if there are primary matches, add the secondary matches. + if ( matchesInSentence.primaryMatches.length > 0 ) { + flatten( matchesInSentence.secondaryMatches ).forEach( match =>{ + markings.push( new Mark( { + position: { + startOffset: match.sourceCodeRange.startOffset, + endOffset: match.sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, + + } ) ); + } + ); + } + return mergeConsecutiveMarkings( markings, getLanguage( locale ) !== "ja" ); +} + +export default getMarkingsInSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index de24120ea15..815e64e58aa 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,14 +1,12 @@ import { flatten, uniq } from "lodash-es"; import matchTextWithArray from "../helpers/match/matchTextWithArray"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; -import Mark from "../../values/Mark"; import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import Token from "../../parse/structure/Token"; -import { getLanguage } from "../index"; import removeConsecutiveKeyphraseFromResult from "../helpers/keywordCount/removeConsecutiveKeyphraseMatches"; import matchTokenWithWordForms from "../helpers/keywordCount/matchTokenWithWordForms"; - +import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; /** * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. @@ -230,119 +228,6 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto return matches; } -const markStart = ""; -const markEnd = ""; - -/** - * Create marks for a sentence. This function creates marks for the (old) search based highlighting. - * @param {Sentence[]} sentence The sentence to which to apply the marks. - * @param {{}[]} matches The matches to apply. - * @returns {string} The sentence with marks applied. - */ -const createMarksForSentence = ( sentence, matches ) => { - const reference = sentence.sourceCodeRange.startOffset; - let sentenceText = sentence.text; - - let allMatches = flatten( matches.primaryMatches ); - if ( matches.primaryMatches.length > 0 ) { - allMatches = allMatches.concat( flatten( matches.secondaryMatches ) ).sort( function( a, b ) { - return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; - } ); - } - - for ( let i = allMatches.length - 1; i >= 0; i-- ) { - const match = allMatches[ i ]; - - sentenceText = sentenceText.substring( 0, match.sourceCodeRange.endOffset - reference ) + markEnd + - sentenceText.substring( match.sourceCodeRange.endOffset - reference ); - sentenceText = sentenceText.substring( 0, match.sourceCodeRange.startOffset - reference ) + markStart + - sentenceText.substring( match.sourceCodeRange.startOffset - reference ); - } - - sentenceText = sentenceText.replace( new RegExp( "( ?)", "ig" ), "$1" ); - - return sentenceText; -}; - -/** - * Merges consecutive markings into one marking. - * @param {{}[]} markings An array of markings to merge. - * @param {boolean} isJapanese Whether the text is Japanese. - * - * @returns {{}[]} An array of markings where consecutive markings are merged. - */ -const mergeConsecutiveMarkings = ( markings, isJapanese = false ) => { - const newMarkings = []; - markings.forEach( ( marking ) => { - let actionDone = false; - newMarkings.forEach( ( newMarking, newMarkingIndex ) => { - // If the markings are consecutive, merge them. - if ( newMarking.getPositionEnd() + ( isJapanese ? 0 : 1 ) === marking.getPositionStart() ) { - newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); - actionDone = true; - // if the markings are overlapping, merge them. - } else if ( newMarking.getPositionEnd() >= marking.getPositionStart() && newMarking.getPositionStart() <= marking.getPositionEnd() ) { - // eslint-disable-next-line max-len - newMarkings[ newMarkingIndex ]._properties.position.startOffset = Math.min( newMarking.getPositionStart(), marking.getPositionStart() ); - newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); - actionDone = true; - // If the markings are consecutive, merge them. - } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + ( isJapanese ? 0 : 1 ) ) { - newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); - actionDone = true; - } - } ); - if ( ! actionDone ) { - newMarkings.push( marking ); - } - } ); - return newMarkings; -}; - - -/** - * Gets the Mark objects of all keyphrase matches. - * - * @param {string} sentence The sentence to check. - * @param {Array} matchesInSentence The array of keyphrase matches in the sentence. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * @param {string} locale The locale used in the analysis. - * - * @returns {Mark[]} The array of Mark objects of the keyphrase matches. - */ -function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { - const markedSentence = createMarksForSentence( sentence, matchesInSentence ); - const markings = matchesInSentence.primaryMatches.flatMap( match => { - return match.map( token => { - return new Mark( { - position: { - startOffset: token.sourceCodeRange.startOffset, - endOffset: token.sourceCodeRange.endOffset, - }, - marked: markedSentence, - original: sentence.text, - } ); - } ); - } ); - - if ( matchesInSentence.primaryMatches.length > 0 ) { - flatten( matchesInSentence.secondaryMatches ).forEach( match =>{ - markings.push( new Mark( { - position: { - startOffset: match.sourceCodeRange.startOffset, - endOffset: match.sourceCodeRange.endOffset, - }, - marked: markedSentence, - original: sentence.text, - - } ) ); - } - ); - } - const mergedMarkings = mergeConsecutiveMarkings( markings, getLanguage( locale ) === "ja" ); - return mergedMarkings; -} - /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. From 88bb8d8addb837fe483a25d7ae6768465dd9bb8f Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 11:45:11 +0200 Subject: [PATCH 070/616] Optimize mergeConsecutiveAndOverlappingMarkings --- .../highlighting/getMarkingsInSentence.js | 65 +++++++------------ 1 file changed, 23 insertions(+), 42 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 46013b1d541..d8db725ea8c 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -44,58 +44,39 @@ const createMarksForSentence = ( sentence, matches ) => { }; /** - * Merges consecutive markings into one marking. + * Merges consecutive and overlapping markings into one marking. * This is a helper for position based highlighting. - * * @param {Mark[]} markings An array of markings to merge. - * @param {boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. - * - * @returns {Mark[]} An array of markings where consecutive markings are merged. + * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ -const mergeConsecutiveMarkings = ( markings, useSpace = true ) => { +const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => { const newMarkings = []; - /** - * The first iteration, newMarkings is empty, so the first marking is always added. - * After that, the newMarkings array is iterated over, - * and for each marking it is checked whether it is consecutive to any of the markings in the newMarkings array. - * If the newMarking and the current marking are consecutive, the current marking is merged into the newMarking. - * If the current marking precedes the newMarking, the start of the newMarking is set to the start of the current marking. - * The actionDone flag is set to false, such that the newMarking is not added to the newMarkings array. - * If the current marking succeeds the newMarking, the end of the newMarking is set to the end of the current marking. - * The actionDone flag is set to true, such that the newMarking is added to the newMarkings array. - * In the (theoretical and unlikely) edge case that the current marking is overlapping with the newMarking, - * the start of the newMarking is set to the minimum of the start of the newMarking and the current marking. - * The end of the newMarking is set to the maximum of the end of the newMarking and the current marking. - * The actionDone flag is set to true, such that the newMarking is added to the newMarkings array. - * - * If the actionDone flag is false, the current marking is added to the newMarkings array, - * because it is not found to be consecutive to any of the markings in the newMarkings array. - */ + // Sort markings by start offset. This is probably redundant, but for extra safety. + markings.sort( function( a, b ) { + return a.getPositionStart() - b.getPositionStart(); + } ); markings.forEach( ( marking ) => { - let actionDone = false; - newMarkings.forEach( ( newMarking, newMarkingIndex ) => { - // If the markings are consecutive, merge them. - if ( newMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { - newMarkings[ newMarkingIndex ]._properties.position.endOffset = marking.getPositionEnd(); - actionDone = true; - // if the markings are overlapping, merge them. - } else if ( newMarking.getPositionEnd() >= marking.getPositionStart() && newMarking.getPositionStart() <= marking.getPositionEnd() ) { - // eslint-disable-next-line max-len - newMarkings[ newMarkingIndex ]._properties.position.startOffset = Math.min( newMarking.getPositionStart(), marking.getPositionStart() ); - newMarkings[ newMarkingIndex ]._properties.position.endOffset = Math.max( newMarking.getPositionEnd(), marking.getPositionEnd() ); - actionDone = true; - // If the markings are consecutive, merge them. - } else if ( newMarking.getPositionStart() === marking.getPositionEnd() + ( useSpace ? 1 : 0 ) ) { - newMarkings[ newMarkingIndex ]._properties.position.startOffset = marking.getPositionStart(); - actionDone = true; - } - } ); - if ( ! actionDone ) { + if ( newMarkings.length === 0 ) { + newMarkings.push( marking ); + return; + } + + const lastMarking = newMarkings[ newMarkings.length - 1 ]; + if ( lastMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { + // The marking is consecutive to the last marking, so we extend the last marking to include the new marking. + lastMarking.setPositionEnd( marking.getPositionEnd() ); + } else if ( marking.getPositionStart() <= lastMarking.getPositionEnd() ) { + // The marking overlaps with the last marking, so we extend the last marking to include the new marking. + lastMarking.setPositionEnd( marking.getPositionEnd() ); + } else { + // The marking is not consecutive to the last marking, so we add it to the array by itself. newMarkings.push( marking ); } } ); + return newMarkings; }; From 3a145471ae2166ee8b62fc0dfcd9d72bc61c317b Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 11:45:58 +0200 Subject: [PATCH 071/616] Merge matches before applying them to sentence string. --- .../highlighting/getMarkingsInSentence.js | 40 ++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index d8db725ea8c..dd6e3f90b6d 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -5,6 +5,41 @@ import { getLanguage } from "../../../../src/languageProcessing"; const markStart = ""; const markEnd = ""; +/** + * Merges consecutive and overlapping markings into one marking. + * This is a helper for position based highlighting. + * @param {Object[]} matches An array of markings to merge. + * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. + */ +const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { + const newMatches = []; + + // Sort matches by start offset. This is probably redundant, but for extra safety. + matches.sort( function( a, b ) { + return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; + } ); + + matches.forEach( ( match ) => { + if ( newMatches.length === 0 ) { + newMatches.push( match ); + return; + } + + const lastMatch = newMatches[ newMatches.length - 1 ]; + if ( lastMatch.sourceCodeRange.endOffset + ( useSpace ? 1 : 0 ) === match.sourceCodeRange.startOffset ) { + lastMatch.sourceCodeRange.endOffset = match.sourceCodeRange.endOffset; + } else if ( match.sourceCodeRange.startOffset <= lastMatch.sourceCodeRange.endOffset ) { + // The match overlaps with the last match, so we extend the last match to include the new match. + lastMatch.sourceCodeRange.endOffset = match.sourceCodeRange.endOffset; + } else { + newMatches.push( match ); + } + } ); + + return newMatches; +}; + /** * This function creates the old style marked sentence for search based highlighting. * Ideally this function becomes obsolete when position based highlighting is implemented everywhere. @@ -23,6 +58,9 @@ const createMarksForSentence = ( sentence, matches ) => { } ); } + // Merge consecutive and overlapping matches. + allMatches = mergeConsecutiveAndOverlappingMatches( allMatches ); + const sentenceStartOffset = sentence.sourceCodeRange.startOffset; // Loop through the matches backwards, so that the reference is not affected by the changes. for ( let i = allMatches.length - 1; i >= 0; i-- ) { @@ -130,7 +168,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp } ); } - return mergeConsecutiveMarkings( markings, getLanguage( locale ) !== "ja" ); + return mergeConsecutiveAndOverlappingMarkings( markings, getLanguage( locale ) !== "ja" ); } export default getMarkingsInSentence; From 96169b4325f9318b785c54ba8f611847dfd18f14 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 11:46:19 +0200 Subject: [PATCH 072/616] Add setters for positionstart and position end on Mark.js --- packages/yoastseo/src/values/Mark.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index d9e3f7d4e04..1e36be49cbe 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -68,6 +68,24 @@ Mark.prototype.getPositionEnd = function() { return this._properties.position && this._properties.position.endOffset; }; +/** + * Sets the start position. + * @param {number} positionStart The new start position. + * @returns {void} + */ +Mark.prototype.setPositionStart = function( positionStart ) { + this._properties.position.startOffset = positionStart; +}; + +/** + * Sets the end position. + * @param {number} positionEnd The new end position. + * @returns {void} + */ +Mark.prototype.setPositionEnd = function( positionEnd ) { + this._properties.position.endOffset = positionEnd; +}; + /** * Applies this mark to the given text with replacement-based highlighting. * From 8dfacecae294172db6f3417fa65aece812dde496 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 11:58:53 +0200 Subject: [PATCH 073/616] Adapt spec now marks are sorted. --- .../languageProcessing/researches/keywordCountSpec.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index da55b28d3c0..5ba6d8aa1af 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -255,25 +255,25 @@ describe( "Test for counting the keyword in a text", function() { "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 48, startOffset: 45 } } ), + position: { endOffset: 27, startOffset: 23 } } ), new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 62, startOffset: 58 } } ), + position: { endOffset: 32, startOffset: 29 } } ), new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 27, startOffset: 23 } } ), + position: { endOffset: 48, startOffset: 45 } } ), new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 32, startOffset: 29 } } ), + position: { endOffset: 62, startOffset: 58 } } ), ] ); } ); From 92aea10be5b7efbcf00095042278949aaa88d9ed Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 12:00:12 +0200 Subject: [PATCH 074/616] add doc for locale --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 815e64e58aa..c17408da122 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -35,6 +35,7 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { * Matches the keyphrase forms in an array of tokens. * @param {array[]} keyphraseForms The keyphrase forms to match. * @param {Token[]} tokens The tokens to match the keyphrase forms in. + * @param {string} locale The locale used in the analysis. * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. */ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { From fce897d276ecba187541ff52a35dc2228984096d Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 9 May 2023 12:00:36 +0200 Subject: [PATCH 075/616] Check for useSpace --- .../helpers/highlighting/getMarkingsInSentence.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index dd6e3f90b6d..5db10b9c44e 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -45,9 +45,10 @@ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { * Ideally this function becomes obsolete when position based highlighting is implemented everywhere. * @param {Sentence} sentence The sentence to which to apply the marks. * @param {Object} matches The matches to apply. + * @param {string} locale The locale of the text. * @returns {string} The sentence with marks applied. */ -const createMarksForSentence = ( sentence, matches ) => { +const createMarksForSentence = ( sentence, matches, locale ) => { let sentenceText = sentence.text; // Create one array with both primary and secondary matches, sorted by start offset. @@ -59,7 +60,7 @@ const createMarksForSentence = ( sentence, matches ) => { } // Merge consecutive and overlapping matches. - allMatches = mergeConsecutiveAndOverlappingMatches( allMatches ); + allMatches = mergeConsecutiveAndOverlappingMatches( allMatches, getLanguage( locale ) !== "ja" ); const sentenceStartOffset = sentence.sourceCodeRange.startOffset; // Loop through the matches backwards, so that the reference is not affected by the changes. @@ -131,7 +132,7 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => */ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { // Create the marked sentence that is used for search based highlighting. - const markedSentence = createMarksForSentence( sentence, matchesInSentence ); + const markedSentence = createMarksForSentence( sentence, matchesInSentence, locale ); // Create the markings for the primary matches. // Note that there is a paradigm shift: From 43611e4345ddd137a094fbb4740eb15865e9f5b4 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 10 May 2023 13:11:23 +0200 Subject: [PATCH 076/616] improve docstring of removeConsecutiveKeyphraseFromResult --- .../helpers/keywordCount/removeConsecutiveKeyphraseMatches.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js index 594ee83caad..762a40d439b 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js +++ b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js @@ -25,7 +25,7 @@ const isConsecutiveKeyphrase = ( currentMatch, firstPreviousMatch, secondPreviou /** * Matches the keyphrase forms in an array of tokens. * @param {Object} result The result object. - * @returns {Object} The result object with the consecutive keyphrase removed. + * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The result object with the consecutive keyphrase removed. */ const removeConsecutiveKeyphraseFromResult = ( result ) => { const newResult = { ...result, primaryMatches: [] }; From 7fa9a001b99c300bbc5782185cbf619a10661457 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 10 May 2023 13:12:29 +0200 Subject: [PATCH 077/616] refactor findKeyWordFormsInSentence.js --- .../match/findKeyWordFormsInSentenceSpec.js | 341 ++++++++++++++++++ .../match/findKeyWordFormsInSentence.js | 240 ++++++++++++ .../researches/keywordCount.js | 231 +----------- 3 files changed, 584 insertions(+), 228 deletions(-) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js new file mode 100644 index 00000000000..09c1b195f23 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js @@ -0,0 +1,341 @@ +import findKeyWordFormsInSentence from "../../../../src/languageProcessing/helpers/match/findKeyWordFormsInSentence"; +import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; + +/* eslint-disable max-len */ +const testCases = [ + { + testDescription: "No markings in sentence", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + keyphraseForms: [ [ "keyword", "keywords" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { primaryMatches: [], secondaryMatches: [] }, + }, + { + testDescription: "return empty result if KeyphraseForms is empty", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + keyphraseForms: [ [] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { primaryMatches: [], secondaryMatches: [] }, + + }, + { + testDescription: "One marking in sentence of a single-word keyphrase", + sentence: { + text: "A sentence with the keyword.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + keyphraseForms: [ [ "keyword", "keywords" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [ [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ] ], + secondaryMatches: [] }, + }, + { + testDescription: "One marking in sentence of a multi word keyphrase", + sentence: { + text: "A sentence with the key words.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 30 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [ + [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], + ], + secondaryMatches: [] }, + }, + { + testDescription: "Two markings of multi word keyphrase in sentence", + sentence: { + text: "A sentence with the key words and the key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, + { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, + { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, + { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, + { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, + { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, + { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 47 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [ + [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], + [ { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" } ], + ], + secondaryMatches: [], + }, + }, + { + testDescription: "One primary and one secondary marking of multi word keyphrase in sentence", + sentence: { + text: "A key sentence with a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, + { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, + { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, + { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, + { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, + { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [ + [ { sourceCodeRange: { startOffset: 22, endOffset: 25 }, text: "key" }, { sourceCodeRange: { startOffset: 26, endOffset: 30 }, text: "word" } ], + ], + secondaryMatches: + [ { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" } ], + }, + }, + { + testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence and a secondary match.", + sentence: { + text: "A sentence with a key_word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [], + secondaryMatches: [], + }, + }, + { + testDescription: "A secondary match if a multi word keyphrase is separated with an underscore in the sentence.", + sentence: { + text: "A key sentence with a key_word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, + { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, + { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, + { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, + { text: "_", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, + { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [], + secondaryMatches: [ { sourceCodeRange: { endOffset: 5, startOffset: 2 }, text: "key" } ], + }, + }, + { + testDescription: "If there is a secondary match, an empty result is returned.", + sentence: { + text: "A sentence with a key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + locale: "en_US", + matchWordCustomHelper: false, + expectedResult: { + primaryMatches: [], + secondaryMatches: [], + }, + }, + { + testDescription: "Only a primary match in a language that uses a custom helper to match words.", + sentence: { + text: "私の猫はかわいいです。", + tokens: [ + { text: "私の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + matchWordCustomHelper: JapaneseCustomHelper, + expectedResult: { + position: 0, + primaryMatches: [ [ + { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, + { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" }, + ] ], + secondaryMatches: [], + }, + }, + { + testDescription: "A primary and seconday match in a language that uses a custom helper to match words.", + sentence: { + text: "私の猫はかわいいですかわいい。", + tokens: [ + { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + matchWordCustomHelper: JapaneseCustomHelper, + expectedResult: { + position: 0, + primaryMatches: [ [ + { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, + { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" } ] ], + secondaryMatches: [ [ + { sourceCodeRange: { startOffset: 10, endOffset: 14 }, text: "かわいい" } ] ], + }, + }, + { + testDescription: "Only secondary matches in a language that uses a custom helper to match words.", + sentence: { + text: "私のはかわいいですかわいい。", + tokens: [ + { text: "私のはかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 14 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 14 } }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + matchWordCustomHelper: JapaneseCustomHelper, + expectedResult: { + position: -1, + primaryMatches: [], + secondaryMatches: [], + }, + }, + + + // { + // testDescription: "One marking in sentence with two consecutive matches", + // }, + // { + // testDescription: "Two markings that are not consecutive in sentence", + // }, + // { + // testDescription: "One marking in a sentence that has a non-zero startOffset", + // }, + // { + // testDescription: "One primary and one secondary marking in a sentence", + // }, + // { + // testDescription: "One primary and two secondary markings in a sentence", + // }, + // { + // testDescription: "One marking in a sentence in a language that does not have spaces between words", + // }, + // { + // testDescription: "One marking in a sentence in a language that does not have spaces between words with two consecutive matches", + // }, + // { + // testDescription: "Two markings that are not consecutive in a sentence in a language that does not have spaces between words", + // }, +]; +// eslint-enable max-len + +// eslint-disable-next-line max-len +describe.each( testCases )( "findKeyWordFormsInSentence", ( { testDescription, sentence, keyphraseForms, locale, matchWordCustomHelper, expectedResult } ) => { + it( testDescription, () => { + expect( findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) ).toEqual( expectedResult ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js new file mode 100644 index 00000000000..82f4e03fbe2 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js @@ -0,0 +1,240 @@ +import matchTextWithArray from "./matchTextWithArray"; +import { uniq } from "lodash-es"; +import Token from "../../../parse/structure/Token"; +import matchTokenWithWordForms from "../keywordCount/matchTokenWithWordForms"; +import { tokenizerSplitter } from "../../../parse/language/LanguageProcessor"; +import removeConsecutiveKeyphraseFromResult from "../keywordCount/removeConsecutiveKeyphraseMatches"; + +/** + * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. + * This solves the problem that "key-word" would not match with "key-word" in the text, + * because it would occur like ["key-word"] in the keyphraseForms and in ["key", "-", "word"] in the tokenized text. + * @param {string[][]} keyphraseForms The keyphrase forms to tokenize. + * @returns {array[]} The tokenized keyphrase forms. + */ +const tokenizeKeyphraseForms = ( keyphraseForms ) => { + const newKeyphraseForms = []; + keyphraseForms.forEach( word => { + // const newWord = [] + const tokenizedWord = word.map( form => { + return form.split( tokenizerSplitter ); + } ); + + const tokenizedWordTransposed = tokenizedWord[ 0 ].map( ( _, index ) => uniq( tokenizedWord.map( row => row[ index ] ) ) ); + + tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); + } ); + return newKeyphraseForms; +}; + + +/** + * Gets all indices of a word in a sentence. + * @param {string} word The word. + * @param {Sentence} sentence The sentence. + * @returns {array} The indices. + */ +const getAllIndicesOfWord = ( word, sentence ) => { + // TODO: more fuzzy matching. Agnostic to capitalization. + const text = sentence.text; + + // get all the indices of the word in the sentence. + const indices = []; + let index = text.indexOf( word ); + while ( index > -1 ) { + indices.push( index ); + index = text.indexOf( word, index + 1 ); + } + + return indices; +}; + +/** + * Matches the keyphrase forms in an array of tokens. + * @param {array[]} keyphraseForms The keyphrase forms to match. + * @param {Token[]} tokens The tokens to match the keyphrase forms in. + * @param {string} locale The locale used in the analysis. + * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. + */ +const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { + // TODO: write documentation for primary and secondary matches. + const result = { + primaryMatches: [], + secondaryMatches: [], + }; + + // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. + let keyPhraseFormsIndex = 0; + // positionInSentence keeps track of what word in the sentence we are currently at. + let positionInSentence = 0; + + let foundWords = []; + // eslint-disable-next-line no-constant-condition + while ( true ) { + // The head of the keyphrase form we are currently analyzing. + const head = keyphraseForms[ keyPhraseFormsIndex ]; + + const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchTokenWithWordForms( head, t, locale ) ); + // If an occurence of a word is found, see if the subsequent word also is find. + if ( foundPosition >= 0 ) { + if ( keyPhraseFormsIndex > 0 ) { + // if there are non keywords between two found keywords reset. + + const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. + if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( + t => { + return matchTokenWithWordForms( previousHead, t, locale ); + } ) ) { + result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + keyPhraseFormsIndex = 0; + foundWords = []; + continue; + } + + const previousToken = tokens[ Math.max( positionInSentence + foundPosition - 1, 0 ) ]; // TODO: better way to extract previous token. + // if the previous token is an underscore and the previous head is not an underscore, reset. + if ( previousToken.text === "_" && ! previousHead.includes( "_" ) ) { + // result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + keyPhraseFormsIndex = 0; + foundWords = []; + continue; + } + } + + keyPhraseFormsIndex += 1; + positionInSentence += foundPosition; + foundWords.push( tokens[ positionInSentence ] ); + positionInSentence += 1; + } else { + if ( keyPhraseFormsIndex === 0 ) { + break; + } + keyPhraseFormsIndex = 0; + } + + if ( foundWords.length >= keyphraseForms.length ) { + result.primaryMatches.push( foundWords ); + keyPhraseFormsIndex = 0; + foundWords = []; + } + } + + return removeConsecutiveKeyphraseFromResult( result ); +}; + + +/** + * Converts the matches to the format that is used in the assessment. + * This function is for compatibility. If matches were found with the old method with a custom match helper, + * this function converts them to the new format that is used for the position based highlighting. + * @param {array} matches The matches. + * @param {Sentence} sentence The sentence. + * @param {string[][]} keyPhraseForms The keyphrase forms. + * @param {string} locale The locale. + * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. + */ +const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { + const matchTokens = []; + matches.forEach( matchObject => { + const matchWords = matchObject.matches; + + uniq( matchWords ).forEach( matchWord => { + const indices = getAllIndicesOfWord( matchWord, sentence ); + indices.forEach( index => { + const startOffset = sentence.sourceCodeRange.startOffset + index; + const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; + + const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); + + matchTokens.push( matchToken ); + } ); + } ); + } ); + + // Sort tokens on startOffset. + matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); + + const primaryMatches = []; + const secondaryMatches = []; + + let currentMatch = []; + let keyPhraseFormsIndex = 0; + + // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. + // A secondary match is any other match. + matchTokens.forEach( ( token ) => { + const head = keyPhraseForms[ keyPhraseFormsIndex ]; + if ( head && matchTokenWithWordForms( head, token, locale ) ) { + // If the token matches the head of the keyphrase form. + currentMatch.push( token ); + keyPhraseFormsIndex += 1; + if ( currentMatch.length === keyPhraseForms.length ) { + primaryMatches.push( currentMatch ); + currentMatch = []; + keyPhraseFormsIndex = 0; + } + } else { + // If the token does not match the head of the keyphrase form. + if ( currentMatch.length > 0 ) { + // If there is a current match, add the current token to the secondary matches. + if ( currentMatch.length === keyPhraseForms.length ) { + primaryMatches.push( currentMatch ); + } else { + secondaryMatches.push( currentMatch ); + } + } else { + secondaryMatches.push( [ token ] ); + } + currentMatch = []; + keyPhraseFormsIndex = 0; + } + } ); + + + return { + primaryMatches: primaryMatches, + secondaryMatches: secondaryMatches, + position: 0, + }; +}; + + +/** + * Gets the matched keyphrase form(s). + * + * @param {Sentence} sentence The sentence to check. + * @param {string[][]} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * + * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The array of matched keyphrase form(s). + */ +function findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { + let matches, result; + + if ( keyphraseForms[ 0 ].length === 0 ) { + return { primaryMatches: [], secondaryMatches: [] }; + } + + if ( matchWordCustomHelper ) { + matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + // If one of the forms is not found, return an empty result. + if ( matches.some( match => match.position === -1 ) ) { + return { primaryMatches: [], secondaryMatches: [], position: -1 }; + } + + result = convertToPositionResult( matches, sentence, keyphraseForms, locale ); + + return result; + } + + const tokens = sentence.tokens; + + const newKeyphraseForms = tokenizeKeyphraseForms( keyphraseForms ); + + matches = getMatchesInTokens( newKeyphraseForms, tokens, locale ); + + return matches; +} + +export default findKeyWordFormsInSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index c17408da122..a1432af708a 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,233 +1,8 @@ -import { flatten, uniq } from "lodash-es"; -import matchTextWithArray from "../helpers/match/matchTextWithArray"; +import { flatten } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; -import { tokenizerSplitter } from "../../parse/language/LanguageProcessor"; import { normalizeSingle } from "../helpers/sanitize/quotes"; -import Token from "../../parse/structure/Token"; -import removeConsecutiveKeyphraseFromResult from "../helpers/keywordCount/removeConsecutiveKeyphraseMatches"; -import matchTokenWithWordForms from "../helpers/keywordCount/matchTokenWithWordForms"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; - -/** - * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. - * This solves the problem that "key-word" would not match with "key-word" in the text, - * because it would occur like ["key-word"] in the keyphraseForms and in ["key", "-", "word"] in the tokenized text. - * @param {array[]} keyphraseForms The keyphrase forms to tokenize. - * @returns {array[]} The tokenized keyphrase forms. - */ -const tokenizeKeyphraseForms = ( keyphraseForms ) => { - const newKeyphraseForms = []; - keyphraseForms.forEach( word => { - // const newWord = [] - const tokenizedWord = word.map( form => { - return form.split( tokenizerSplitter ); - } ); - - const tokenizedWordTransposed = tokenizedWord[ 0 ].map( ( _, index ) => uniq( tokenizedWord.map( row => row[ index ] ) ) ); - - tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); - } ); - return newKeyphraseForms; -}; - - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {array[]} keyphraseForms The keyphrase forms to match. - * @param {Token[]} tokens The tokens to match the keyphrase forms in. - * @param {string} locale The locale used in the analysis. - * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. - */ -const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { - const result = { - primaryMatches: [], - secondaryMatches: [], - }; - - // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. - let keyPhraseFormsIndex = 0; - // positionInSentence keeps track of what word in the sentence we are currently at. - let positionInSentence = 0; // TODO: rename naar sentencePosition? - - let foundWords = []; - // eslint-disable-next-line no-constant-condition - while ( true ) { - // The head of the keyphrase form we are currently analyzing. - const head = keyphraseForms[ keyPhraseFormsIndex ]; - - const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchTokenWithWordForms( head, t, locale ) ); - // If an occurence of a word is found, see if the subsequent word also is find. - if ( foundPosition >= 0 ) { - if ( keyPhraseFormsIndex > 0 ) { - // if there are non keywords between two found keywords reset. - - const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. - if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( - t => { - return matchTokenWithWordForms( previousHead, t, locale ); - } ) ) { - result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); - keyPhraseFormsIndex = 0; - foundWords = []; - continue; - } - - const previousToken = tokens[ Math.max( positionInSentence + foundPosition - 1, 0 ) ]; // TODO: better way to extract previous token. - // if the previous token is an underscore and the previous head is not an underscore, reset. - if ( previousToken.text === "_" && ! previousHead.includes( "_" ) ) { - // result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); - keyPhraseFormsIndex = 0; - foundWords = []; - continue; - } - } - - keyPhraseFormsIndex += 1; - positionInSentence += foundPosition; - foundWords.push( tokens[ positionInSentence ] ); - positionInSentence += 1; - } else { - if ( keyPhraseFormsIndex === 0 ) { - break; - } - keyPhraseFormsIndex = 0; - } - - if ( foundWords.length >= keyphraseForms.length ) { - result.primaryMatches.push( foundWords ); - keyPhraseFormsIndex = 0; - foundWords = []; - } - } - - return removeConsecutiveKeyphraseFromResult( result ); -}; - -/** - * Gets all indices of a word in a sentence. - * @param {string} word The word. - * @param {Sentence} sentence The sentence. - * @returns {array} The indices. - */ -const getAllIndicesOfWord = ( word, sentence ) => { - // TODO: more fuzzy matching. Agnostic to capitalization. - const text = sentence.text; - - // get all the indices of the word in the sentence. - const indices = []; - let index = text.indexOf( word ); - while ( index > -1 ) { - indices.push( index ); - index = text.indexOf( word, index + 1 ); - } - - return indices; - // return sentence.text.split( "" ).map( ( x, i ) => x === word ? i : "" ).filter( Boolean ); -}; - -/** - * Converts the matches to the format that is used in the assessment. - * @param {array} matches The matches. - * @param {Sentence} sentence The sentence. - * @param {array[]} keyPhraseForms The keyphrase forms. - * @param {string} locale The locale. - * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. - */ -const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { - const matchTokens = []; - matches.forEach( matchObject => { - const matchWords = matchObject.matches; - - uniq( matchWords ).forEach( matchWord => { - const indices = getAllIndicesOfWord( matchWord, sentence ); - indices.forEach( index => { - const startOffset = sentence.sourceCodeRange.startOffset + index; - const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; - - const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); - - matchTokens.push( matchToken ); - } ); - } ); - } ); - - // Sort tokens on startOffset. - matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); - - const primaryMatches = []; - const secondaryMatches = []; - - let currentMatch = []; - let keyPhraseFormsIndex = 0; - - // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. - // A secondary match is any other match. - matchTokens.forEach( ( token ) => { - const head = keyPhraseForms[ keyPhraseFormsIndex ]; - if ( head && matchTokenWithWordForms( head, token, locale ) ) { - currentMatch.push( token ); - keyPhraseFormsIndex += 1; - if ( currentMatch.length === keyPhraseForms.length ) { - primaryMatches.push( currentMatch ); - currentMatch = []; - keyPhraseFormsIndex = 0; - } - } else { - if ( currentMatch.length > 0 ) { - if ( currentMatch.length === keyPhraseForms.length ) { - primaryMatches.push( currentMatch ); - } else { - secondaryMatches.push( currentMatch ); - } - } else { - secondaryMatches.push( [ token ] ); - } - currentMatch = []; - keyPhraseFormsIndex = 0; - } - } ); - - - return { - primaryMatches: primaryMatches, - secondaryMatches: secondaryMatches, - position: 0, - }; -}; - -/** - * Gets the matched keyphrase form(s). - * - * @param {string} sentence The sentence to check. - * @param {Array} keyphraseForms The keyphrase forms. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * - * @returns {Array} The array of matched keyphrase form(s). - */ -function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { - let matches, result; - - if ( matchWordCustomHelper ) { - matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); - // If one of the forms is not found, return an empty result. - if ( matches.some( match => match.position === -1 ) ) { - return { primaryMatches: [], secondaryMatches: [], position: -1 }; - } - - result = convertToPositionResult( matches, sentence, keyphraseForms, locale ); - - return result; - } - - const tokens = sentence.tokens; - - const newKeyphraseForms = tokenizeKeyphraseForms( keyphraseForms, locale ); - - matches = getMatchesInTokens( newKeyphraseForms, tokens, locale ); - - return matches; -} +import findKeyWordFormsInSentence from "../helpers/match/findKeyWordFormsInSentence"; /** @@ -244,7 +19,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - const matchesInSentence = getMatchesInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); + const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); From def875e4fe38b0c1abe55f0ae692fc78bab301c3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 10 May 2023 13:12:41 +0200 Subject: [PATCH 078/616] Add spec --- .../helpers/highlighting/getMarkingsInSentenceSpec.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index d28b0fb20bf..d041919fb33 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -93,6 +93,15 @@ const testCases = [ } ), ], }, + { + testDescription: "No secondary match if a multi word keyphrase is separated with an underscore in the sentence.", + sentence: { text: "A key sentence with a key_word.", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + matchesInSentence: { primaryMatches: [ ], secondaryMatches: [ [ { sourceCodeRange: { startOffset: 2, endOffset: 5 } } ] ] }, + matchWordCustomHelper: false, + locale: "en_US", + expectedResult: [], + }, + ]; /* eslint-enable max-len */ From da43682e1d50ada7c1f36e85c7943a229ecf91fa Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 10 May 2023 14:08:59 +0200 Subject: [PATCH 079/616] rename specs in findKeyWordFormsInSentenceSpec.js --- .../helpers/match/findKeyWordFormsInSentenceSpec.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js index 09c1b195f23..10e327edbe3 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js @@ -4,7 +4,7 @@ import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/j /* eslint-disable max-len */ const testCases = [ { - testDescription: "No markings in sentence", + testDescription: "No matches in sentence", sentence: { text: "A sentence with notthekeyphrase.", tokens: [ @@ -24,7 +24,7 @@ const testCases = [ expectedResult: { primaryMatches: [], secondaryMatches: [] }, }, { - testDescription: "return empty result if KeyphraseForms is empty", + testDescription: "should return empty result if KeyphraseForms is empty", sentence: { text: "A sentence with notthekeyphrase.", tokens: [ @@ -45,7 +45,7 @@ const testCases = [ }, { - testDescription: "One marking in sentence of a single-word keyphrase", + testDescription: "One match in sentence of a single-word keyphrase", sentence: { text: "A sentence with the keyword.", tokens: [ @@ -69,7 +69,7 @@ const testCases = [ secondaryMatches: [] }, }, { - testDescription: "One marking in sentence of a multi word keyphrase", + testDescription: "One match in sentence of a multi word keyphrase", sentence: { text: "A sentence with the key words.", tokens: [ @@ -97,7 +97,7 @@ const testCases = [ secondaryMatches: [] }, }, { - testDescription: "Two markings of multi word keyphrase in sentence", + testDescription: "Two matches of multi word keyphrase in sentence", sentence: { text: "A sentence with the key words and the key word.", tokens: [ @@ -135,7 +135,7 @@ const testCases = [ }, }, { - testDescription: "One primary and one secondary marking of multi word keyphrase in sentence", + testDescription: "One primary and one secondary match of multi word keyphrase in sentence", sentence: { text: "A key sentence with a key word.", tokens: [ From bbf6da69857e0d35feaa675e0b5ae2ce2018d7c1 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 15 May 2023 16:02:49 +0200 Subject: [PATCH 080/616] create convertMatchToPositionResult.js --- .../match/convertMatchToPositionResultSpec.js | 282 ++++++++++++++++++ .../match/convertMatchToPositionResult.js | 102 +++++++ 2 files changed, 384 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js new file mode 100644 index 00000000000..7d200a83e7c --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js @@ -0,0 +1,282 @@ +import convertMatchToPositionResult from "../../../../src/languageProcessing/helpers/match/convertMatchToPositionResult"; + +/* eslint-disable max-len */ +const testCases = [ + { + testDescription: "No matches", + matches: [], + sentence: { + text: "私の猫はかわいいです。", + tokens: [ + { + text: "私の猫はかわいいです。", + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [], + secondaryMatches: [], + position: 0, + }, + }, + { + testDescription: "Only a primary match consisting of a single token", + matches: + [ + { matches: [ "猫" ] }, + ], + sentence: { + text: "私の猫はかわいいです。", + tokens: [ + { + text: "私の猫はかわいいです。", + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + keyphraseForms: [ [ "猫" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 2, endOffset: 3 }, + }, + ] ], + secondaryMatches: [], + position: 0, + }, + }, + { + testDescription: "A primary match and a single secondary match", + matches: + [ + { matches: [ "猫" ] }, + { matches: [ "かわいい" ] }, + ], + sentence: { + text: "私の猫はかわいいですかわいい。", + tokens: [ + { + text: "私の猫はかわいいですかわいい。", + sourceCodeRange: { startOffset: 0, endOffset: 15 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 2, endOffset: 3 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 4, endOffset: 8 }, + }, + ] ], + secondaryMatches: [ [ + { + text: "かわいい", + sourceCodeRange: { + startOffset: 10, endOffset: 14 }, + }, + ] ], + position: 0, + }, + }, + { + testDescription: "Only a primary match consisting of multiple tokens", + matches: + [ + { matches: [ "猫" ] }, + { matches: [ "かわいい" ] }, + ], + sentence: { + text: "私の猫はかわいいです。", + tokens: [ + { + text: "私の猫はかわいいです。", + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 2, endOffset: 3 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 4, endOffset: 8 }, + }, + ] ], + secondaryMatches: [], + position: 0, + }, + }, + { + testDescription: "A primary match and and a secondary match that is in front of the primary match", + matches: + [ + { + matches: [ + "猫", + ], + }, + { + matches: [ + "かわいい", + ], + }, + ], + sentence: { + text: "私猫の猫はかわいいです。", + tokens: [ + { + text: "私猫の猫はかわいいです。", + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 3, endOffset: 4 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 5, endOffset: 9 }, + + }, + ] ], + secondaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 1, endOffset: 2 }, + }, + ] ], + position: 0, + }, + }, + { + testDescription: "Multiple primary matches", + matches: + [ + { + matches: [ + "猫", + ], + }, + { + matches: [ + "かわいい", + ], + }, + ], + sentence: { + text: "私猫のはかわいいで猫かわいいす。", + tokens: [ + { + text: "私猫のはかわいいで猫かわいいす。", + sourceCodeRange: { startOffset: 0, endOffset: 16 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 16 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 1, endOffset: 2 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 4, endOffset: 8 }, + + }, + ], [ + { + text: "猫", + sourceCodeRange: { startOffset: 9, endOffset: 10 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 10, endOffset: 14 }, + }, + ] ], + secondaryMatches: [ ], + position: 0, + }, + }, + { + testDescription: "A single primary match and a seconary match that is in front of the primary match and a secondary match that is after the primary match", + matches: + [ + { matches: [ "猫" ] }, + { matches: [ "かわいい" ] }, + ], + sentence: { + text: "私猫の猫はかわいいでかわいいす。", + tokens: [ + { + text: "私猫の猫はかわいいでかわいいす。", + sourceCodeRange: { startOffset: 0, endOffset: 16 }, + }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 16 }, + }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + expected: { + primaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 3, endOffset: 4 }, + }, + { + text: "かわいい", + sourceCodeRange: { startOffset: 5, endOffset: 9 }, + + }, + ] ], + secondaryMatches: [ [ + { + text: "猫", + sourceCodeRange: { startOffset: 1, endOffset: 2 }, + } ], + [ { + text: "かわいい", + sourceCodeRange: { startOffset: 10, endOffset: 14 }, + + }, + ] ], + position: 0, + }, + }, +]; +/* eslint-enable max-len */ + +describe.each( testCases )( "convertMatchToPositionResult", ( + { testDescription, matches, sentence, keyphraseForms, locale, expected } ) => { + it( testDescription, () => { + expect( convertMatchToPositionResult( matches, sentence, keyphraseForms, locale ) ).toEqual( expected ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js b/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js new file mode 100644 index 00000000000..da2e0767b3b --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js @@ -0,0 +1,102 @@ +import { uniq } from "lodash-es"; +import Token from "../../../parse/structure/Token"; +import matchTokenWithWordForms from "../keywordCount/matchTokenWithWordForms"; + +/** + * Gets all indices of a word in a sentence. Is only used for languages with a matchWordCustomHelper. + * Will probably be obsolete once the tokenizer for Japanese is implemented. + * @param {string} word The word. + * @param {Sentence} sentence The sentence. + * @returns {array} The indices. + */ +const getAllIndicesOfWord = ( word, sentence ) => { + const text = sentence.text; + + // get all the indices of the word in the sentence. + const indices = []; + let index = text.indexOf( word ); + while ( index > -1 ) { + indices.push( index ); + index = text.indexOf( word, index + 1 ); + } + + return indices; +}; + +/** + * Converts the matches to the format that is used in the assessment. + * This function is for compatibility. If matches were found with the old method with a custom match helper, + * this function converts them to the new format that is used for the position based highlighting. + * @param {array} matches The matches. + * @param {Sentence} sentence The sentence. + * @param {string[][]} keyPhraseForms The keyphrase forms. + * @param {string} locale The locale. + * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. + */ +const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { + const matchTokens = []; + matches.forEach( matchObject => { + const matchWords = matchObject.matches; + + uniq( matchWords ).forEach( matchWord => { + const indices = getAllIndicesOfWord( matchWord, sentence ); + indices.forEach( index => { + const startOffset = sentence.sourceCodeRange.startOffset + index; + const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; + + const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); + + matchTokens.push( matchToken ); + } ); + } ); + } ); + + // Sort tokens on startOffset. + matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); + + const primaryMatches = []; + const secondaryMatches = []; + + let currentMatch = []; + let keyPhraseFormsIndex = 0; + + // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. + // A secondary match is any other match. + matchTokens.forEach( ( token ) => { + const head = keyPhraseForms[ keyPhraseFormsIndex ]; + if ( head && matchTokenWithWordForms( head, token, locale ) ) { + // If the token matches the head of the keyphrase form. + currentMatch.push( token ); + keyPhraseFormsIndex += 1; + if ( currentMatch.length === keyPhraseForms.length ) { + primaryMatches.push( currentMatch ); + currentMatch = []; + keyPhraseFormsIndex = 0; + } + } else { + // If the token does not match the head of the keyphrase form. + if ( currentMatch.length > 0 ) { + // If there is a current match, add the current token to the secondary matches. + secondaryMatches.push( currentMatch ); + currentMatch = [ token ]; + keyPhraseFormsIndex = 1; + // keyPhraseFormsIndex = 0; + } else { + secondaryMatches.push( [ token ] ); + currentMatch = [ ]; + keyPhraseFormsIndex = 0; + } + // currentMatch = []; + // keyPhraseFormsIndex = 0; + } + } ); + + + return { + primaryMatches: primaryMatches, + secondaryMatches: secondaryMatches, + position: 0, + }; +}; + +export default convertToPositionResult; From 597f53f2de084a34c47e156eeba48d7fd81e4293 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 15 May 2023 16:03:38 +0200 Subject: [PATCH 081/616] remove convertToPositionResult from file --- .../match/findKeyWordFormsInSentenceSpec.js | 22 +++- .../match/findKeyWordFormsInSentence.js | 111 ++---------------- 2 files changed, 30 insertions(+), 103 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js index 10e327edbe3..9b3562b05de 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js @@ -268,7 +268,7 @@ const testCases = [ }, }, { - testDescription: "A primary and seconday match in a language that uses a custom helper to match words.", + testDescription: "A primary and secondary match in a language that uses a custom helper to match words.", sentence: { text: "私の猫はかわいいですかわいい。", tokens: [ @@ -304,6 +304,26 @@ const testCases = [ secondaryMatches: [], }, }, + { + testDescription: "A primary match and a secondary match that is in front of the primary match in a language that uses a custom helper to match words.", + sentence: { + text: "私猫の猫はかわいいです。iiii", + tokens: [ + { text: "私猫の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], + locale: "ja", + matchWordCustomHelper: JapaneseCustomHelper, + expectedResult: { + position: 0, + primaryMatches: [ [ + { sourceCodeRange: { startOffset: 3, endOffset: 4 }, text: "猫" }, + { sourceCodeRange: { startOffset: 5, endOffset: 9 }, text: "かわいい" } ] ], + secondaryMatches: [ [ + { sourceCodeRange: { startOffset: 1, endOffset: 2 }, text: "猫" } ] ], + }, + }, // { diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js index 82f4e03fbe2..c814217230b 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js @@ -1,9 +1,9 @@ import matchTextWithArray from "./matchTextWithArray"; import { uniq } from "lodash-es"; -import Token from "../../../parse/structure/Token"; import matchTokenWithWordForms from "../keywordCount/matchTokenWithWordForms"; import { tokenizerSplitter } from "../../../parse/language/LanguageProcessor"; import removeConsecutiveKeyphraseFromResult from "../keywordCount/removeConsecutiveKeyphraseMatches"; +import convertToPositionResult from "./convertMatchToPositionResult"; /** * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. @@ -28,27 +28,6 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { }; -/** - * Gets all indices of a word in a sentence. - * @param {string} word The word. - * @param {Sentence} sentence The sentence. - * @returns {array} The indices. - */ -const getAllIndicesOfWord = ( word, sentence ) => { - // TODO: more fuzzy matching. Agnostic to capitalization. - const text = sentence.text; - - // get all the indices of the word in the sentence. - const indices = []; - let index = text.indexOf( word ); - while ( index > -1 ) { - indices.push( index ); - index = text.indexOf( word, index + 1 ); - } - - return indices; -}; - /** * Matches the keyphrase forms in an array of tokens. * @param {array[]} keyphraseForms The keyphrase forms to match. @@ -72,9 +51,10 @@ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { // eslint-disable-next-line no-constant-condition while ( true ) { // The head of the keyphrase form we are currently analyzing. + // [["key"], ["word", "words"]] const head = keyphraseForms[ keyPhraseFormsIndex ]; - const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchTokenWithWordForms( head, t, locale ) ); + const foundPosition = tokens.slice( positionInSentence ).findIndex( token => matchTokenWithWordForms( head, token, locale ) ); // If an occurence of a word is found, see if the subsequent word also is find. if ( foundPosition >= 0 ) { if ( keyPhraseFormsIndex > 0 ) { @@ -85,7 +65,7 @@ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { t => { return matchTokenWithWordForms( previousHead, t, locale ); } ) ) { - result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); keyPhraseFormsIndex = 0; foundWords = []; continue; @@ -123,82 +103,6 @@ const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { }; -/** - * Converts the matches to the format that is used in the assessment. - * This function is for compatibility. If matches were found with the old method with a custom match helper, - * this function converts them to the new format that is used for the position based highlighting. - * @param {array} matches The matches. - * @param {Sentence} sentence The sentence. - * @param {string[][]} keyPhraseForms The keyphrase forms. - * @param {string} locale The locale. - * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. - */ -const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { - const matchTokens = []; - matches.forEach( matchObject => { - const matchWords = matchObject.matches; - - uniq( matchWords ).forEach( matchWord => { - const indices = getAllIndicesOfWord( matchWord, sentence ); - indices.forEach( index => { - const startOffset = sentence.sourceCodeRange.startOffset + index; - const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; - - const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); - - matchTokens.push( matchToken ); - } ); - } ); - } ); - - // Sort tokens on startOffset. - matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); - - const primaryMatches = []; - const secondaryMatches = []; - - let currentMatch = []; - let keyPhraseFormsIndex = 0; - - // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. - // A secondary match is any other match. - matchTokens.forEach( ( token ) => { - const head = keyPhraseForms[ keyPhraseFormsIndex ]; - if ( head && matchTokenWithWordForms( head, token, locale ) ) { - // If the token matches the head of the keyphrase form. - currentMatch.push( token ); - keyPhraseFormsIndex += 1; - if ( currentMatch.length === keyPhraseForms.length ) { - primaryMatches.push( currentMatch ); - currentMatch = []; - keyPhraseFormsIndex = 0; - } - } else { - // If the token does not match the head of the keyphrase form. - if ( currentMatch.length > 0 ) { - // If there is a current match, add the current token to the secondary matches. - if ( currentMatch.length === keyPhraseForms.length ) { - primaryMatches.push( currentMatch ); - } else { - secondaryMatches.push( currentMatch ); - } - } else { - secondaryMatches.push( [ token ] ); - } - currentMatch = []; - keyPhraseFormsIndex = 0; - } - } ); - - - return { - primaryMatches: primaryMatches, - secondaryMatches: secondaryMatches, - position: 0, - }; -}; - - /** * Gets the matched keyphrase form(s). * @@ -209,7 +113,7 @@ const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => * * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The array of matched keyphrase form(s). */ -function findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { +function findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { let matches, result; if ( keyphraseForms[ 0 ].length === 0 ) { @@ -217,7 +121,10 @@ function findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWor } if ( matchWordCustomHelper ) { - matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + // In practice this branch of the code is only touched for japanese (the only language with a matchWordCustomHelper). + // Since tokenization for Japanese will be implemented in the future, this branch will get obsolete. + + matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); // If one of the forms is not found, return an empty result. if ( matches.some( match => match.position === -1 ) ) { return { primaryMatches: [], secondaryMatches: [], position: -1 }; From 14eac2964a5ef8754f4dac0b7c2c5e41fe3ea779 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 15 May 2023 16:06:17 +0200 Subject: [PATCH 082/616] rename keyword to keyphrase in seoAssessorSpec.js --- .../spec/scoring/productPages/cornerstone/seoAssessorSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js index 954c58611e6..b1e2946d04a 100644 --- a/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js @@ -101,8 +101,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "
" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); From 67147896f3b7c6345e130272c28acf5f703b9c84 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 08:02:23 +0200 Subject: [PATCH 083/616] fix returns markers for a keyphrase containing numbers in tokenizeSpec.js --- .../scoring/assessments/seo/KeywordDensityAssessmentSpec.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index e9d18b33a28..582de6b5303 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -233,7 +233,7 @@ describe( "A test for marking the keyword", function() { it( "returns markers for a keyphrase containing numbers", function() { const keywordDensityAssessment = new KeywordDensityAssessment(); - const paper = new Paper( "This is the release of YoastSEO 9.3.", { keyword: "YoastSEO 9.3" } ); + const paper = new Paper( "

This is the release of YoastSEO 9.3.

", { keyword: "YoastSEO 9.3" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); @@ -241,7 +241,8 @@ describe( "A test for marking the keyword", function() { keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release of YoastSEO 9.3.", - original: "This is the release of YoastSEO 9.3." } ) ]; + original: "This is the release of YoastSEO 9.3.", + position: { startOffset: 26, endOffset: 38 } } ) ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); From 62e4191c947e98eccfd50fd5dc1d74911e4b71c7 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 08:26:59 +0200 Subject: [PATCH 084/616] rename keyword to keyphrase in productpages/scoring/relatedKeywordAssessorSpec --- .../productPages/cornerstone/relatedKeywordAssessorSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js index f7fc9ff8ab8..071decde716 100644 --- a/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js @@ -44,8 +44,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "
" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); From 8e947f6aa79046f4b96186d2f66603baea961152 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 08:34:59 +0200 Subject: [PATCH 085/616] rename keyword to keyphrase in productpages/relatedKeywordAssessorSpec --- .../spec/scoring/productPages/relatedKeywordAssessorSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js index 35a14fe7ebe..b5f838137d3 100644 --- a/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js @@ -53,8 +53,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); From e18c7c13dd5cd9299f976b30f1afdd19cfe0d5b8 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 16 May 2023 10:32:28 +0300 Subject: [PATCH 086/616] HTML Parser - Implement position based highlighting for block editor#138 Implemented --- packages/js/src/analysis/getApplyMarks.js | 1 + packages/js/src/decorator/gutenberg.js | 36 +++++++-- packages/js/tests/decorator/gutenberg.test.js | 74 +++++++++++++++++++ packages/yoastseo/src/values/Mark.js | 26 +++++++ 4 files changed, 131 insertions(+), 6 deletions(-) diff --git a/packages/js/src/analysis/getApplyMarks.js b/packages/js/src/analysis/getApplyMarks.js index 3c56601da37..76d2c1b90eb 100644 --- a/packages/js/src/analysis/getApplyMarks.js +++ b/packages/js/src/analysis/getApplyMarks.js @@ -31,6 +31,7 @@ function applyMarksTinyMCE( paper, marks ) { * @returns {void} */ function applyMarks( paper, marks ) { + console.log( "applyMarks" ); let decorator; // Classic editor diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index a0f79b084d4..1f43fd4d96d 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -311,6 +311,22 @@ export function calculateAnnotationsForTextFormat( text, mark ) { return blockOffsets; } +/** + * Create an annotation if the given mark is position based. + * + * @param {Mark} mark The mark to apply to the content. + * + * @returns {Array} The annotations to apply. + */ +export function crateAnnotationsFromPositionBasedMarks( mark ) { + return [ + { + startOffset: mark.getBlockPositionStart(), + endOffset: mark.getBlockPositionEnd(), + }, + ]; +} + /** * Returns an array of all the attributes of which we can annotate text for, for a specific block type name. * @@ -350,10 +366,15 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) const text = record.text; return flatMap( marks, ( ( mark ) => { - const annotations = calculateAnnotationsForTextFormat( - text, - mark - ); + let annotations; + if ( marks[ 0 ].hasBlockPosition && marks[ 0 ].hasBlockPosition() ) { + annotations = crateAnnotationsFromPositionBasedMarks( mark ); + } else { + annotations = calculateAnnotationsForTextFormat( + text, + mark + ); + } if ( ! annotations ) { return []; @@ -426,7 +447,7 @@ export function getAnnotationsForYoastBlocks( attribute, block, marks ) { * * @returns {Array} The annotations to apply. */ -function getAnnotationsForBlockAttribute( attribute, block, marks ) { +export function getAnnotationsForBlockAttribute( attribute, block, marks ) { const attributeKey = attribute.key; const { attributes: blockAttributes } = block; @@ -510,7 +531,7 @@ export function hasInnerBlocks( block ) { * * @returns {Object[]} An array of annotation objects. */ -function getAnnotationsForBlocks( blocks, marks ) { +export function getAnnotationsForBlocks( blocks, marks ) { return flatMap( blocks, ( ( block ) => { // If a block has innerblocks, get annotations for those blocks as well. const innerBlockAnnotations = hasInnerBlocks( block ) ? getAnnotationsForBlocks( block.innerBlocks, marks ) : []; @@ -526,6 +547,7 @@ function getAnnotationsForBlocks( blocks, marks ) { * @returns {void} */ export function applyAsAnnotations( marks ) { + console.log( "applyAsAnnotations" ); // Do this always to allow people to select a different eye marker while another one is active. removeAllAnnotations(); const fieldsToMark = getFieldsToMarkHelper( marks ); @@ -555,6 +577,8 @@ export function applyAsAnnotations( marks ) { * @returns {void} */ function removeAllAnnotationsFromBlock( blockClientId ) { + console.log( "removeAllAnnotationsFromBlock", blockClientId ); + const annotationsInBlock = select( "core/annotations" ) .__experimentalGetAnnotations() .filter( annotation => annotation.blockClientId === blockClientId && annotation.source === ANNOTATION_SOURCE ); diff --git a/packages/js/tests/decorator/gutenberg.test.js b/packages/js/tests/decorator/gutenberg.test.js index ddff14553f5..48637cc1c72 100644 --- a/packages/js/tests/decorator/gutenberg.test.js +++ b/packages/js/tests/decorator/gutenberg.test.js @@ -7,6 +7,7 @@ import { getAnnotationsFromBlock, hasInnerBlocks, getAnnotationsForYoastBlocks, + getAnnotationsForBlockAttribute, crateAnnotationsFromPositionBasedMarks, } from "../../src/decorator/gutenberg"; jest.mock( "@wordpress/rich-text", () => ( { @@ -16,6 +17,7 @@ jest.mock( "@wordpress/rich-text", () => ( { import { create } from "@wordpress/rich-text"; import { select } from "@wordpress/data"; +import { Mark } from "yoastseo/src/values"; jest.mock( "@wordpress/data" ); @@ -184,6 +186,32 @@ describe( "calculateAnnotationsForTextFormat", () => { } ); } ); +describe( "crateAnnotationsFromPositionBasedMarks", () => { + it( "create annotation from block position based mark", () => { + const mark = new Mark( { + position: { + startOffsetBlock: 22, + endOffsetBlock: 26, + }, + } ); + + /* + * "A long text. A marked text." + * 22 ^ ^ 26 + */ + const expected = [ + { + startOffset: 22, + endOffset: 26, + }, + ]; + + const actual = crateAnnotationsFromPositionBasedMarks( mark ); + + expect( actual ).toEqual( expected ); + } ); +} ); + describe( "test getAnnotationsFromBlock", () => { it( "returns an annotation if there is an applicable marker for the text", () => { create.mockImplementation( () => { @@ -437,3 +465,49 @@ describe( "tests for the hasInnerBlocks helper", () => { expect( hasInnerBlocks( mockBlockWithoutInnerblocks ) ).toBeFalsy(); } ); } ); + +describe( "test getAnnotationsForBlockAttribute", () => { + it( "returns an annotation if there is an applicable marker for the text", () => { + create.mockImplementation( () => { + return { text: "An item about lingo" }; + } ); + + const mockBlock = { + clientId: "34f61542-0902-44f7-ab48-d9f88a022b43", + name: "core/list-item", + isValid: true, + attributes: { + content: "An item about lingo", + }, + innerBlocks: [], + }; + + const mark = new Mark( { + position: { + startOffsetBlock: 14, + endOffsetBlock: 19, + }, + } ); + + const attribute = { + key: "content", + }; + + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "keyphraseDensity" ), + } ); + + const annotations = getAnnotationsForBlockAttribute( attribute, mockBlock, [ mark ] ); + + const resultWithAnnotation = [ + { + startOffset: 14, + endOffset: 19, + block: "34f61542-0902-44f7-ab48-d9f88a022b43", + richTextIdentifier: "content", + }, + ]; + + expect( annotations ).toEqual( resultWithAnnotation ); + } ); +} ); diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index d9e3f7d4e04..30fe9afb1bb 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -68,6 +68,24 @@ Mark.prototype.getPositionEnd = function() { return this._properties.position && this._properties.position.endOffset; }; +/** + * Returns the start position inside block. + * + * @returns {number} The start position inside block. + */ +Mark.prototype.getBlockPositionStart = function() { + return this._properties.position && this._properties.position.startOffsetBlock; +}; + +/** + * Returns the end position inside block. + * + * @returns {number} The end position inside block. + */ +Mark.prototype.getBlockPositionEnd = function() { + return this._properties.position && this._properties.position.endOffsetBlock; +}; + /** * Applies this mark to the given text with replacement-based highlighting. * @@ -139,6 +157,14 @@ Mark.prototype.hasPosition = function() { return !! this.getPositionStart && this.getPositionStart(); }; + +/** + * Checks if a mark has block position information available. + * @returns {boolean} Returns true if the Mark object has block position information, false otherwise. + */ +Mark.prototype.hasBlockPosition = function() { + return !! this.getBlockPositionEnd() && this.getBlockPositionStart(); +}; /** * Parses the object to a Mark. * From 8a83d8285585a7474c9eadb3fa7a78a0b6b7d170 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:27:40 +0200 Subject: [PATCH 087/616] add spec for tokenization of sentence with a number with a decimal point in it in tokenizeSpec.js --- .../spec/parse/build/private/tokenizeSpec.js | 508 ++++++++++++------ 1 file changed, 350 insertions(+), 158 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 12651e4c272..ea4eda7081a 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -4,191 +4,383 @@ import EnglishResearcher from "../../../../src/languageProcessing/languages/en/R import { buildTreeNoTokenize } from "../../../specHelpers/parse/buildTree"; import LanguageProcessor from "../../../../src/parse/language/LanguageProcessor"; -describe( "A test for the tokenize function", function() { - it( "should correctly tokenize a paragraph of one sentence", function() { - const mockPaper = new Paper( "

This is a paragraph

", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - const languageProcessor = new LanguageProcessor( mockResearcher ); - buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len - expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { attributes: {}, childNodes: [ { name: "#text", value: "This is a paragraph" } ], isImplicit: false, name: "p", sentences: [ { sourceCodeRange: { endOffset: 22, startOffset: 3 }, text: "This is a paragraph", tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { sourceCodeRange: { endOffset: 8, startOffset: 7 }, text: " " }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { sourceCodeRange: { endOffset: 11, startOffset: 10 }, text: " " }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { sourceCodeRange: { endOffset: 13, startOffset: 12 }, text: " " }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ] } ], sourceCodeLocation: { endOffset: 26, endTag: { endOffset: 26, startOffset: 22 }, startOffset: 0, startTag: { endOffset: 3, startOffset: 0 } } } ], name: "#document-fragment" } - - ); - } ); - - it( "should correctly tokenize a paragraph of two sentences", function() { - const mockPaper = new Paper( "

This is a sentence. This is another sentence.

", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - const languageProcessor = new LanguageProcessor( mockResearcher ); - buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len - expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { - attributes: {}, - childNodes: [ - { +describe( "A test for the tokenize function", + function() { + it( "should correctly tokenize a paragraph of one sentence", function() { + const mockPaper = new Paper( "

This is a paragraph

", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ { attributes: {}, - childNodes: [ - { - name: "#text", - value: "This is a sentence. This is another sentence.", - }, - ], + childNodes: [ { name: "#text", value: "This is a paragraph" } ], isImplicit: false, name: "p", - sentences: [ - { + sentences: [ { + sourceCodeRange: { endOffset: 22, startOffset: 3 }, + text: "This is a paragraph", + tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { sourceCodeRange: { - endOffset: 22, - startOffset: 3, + endOffset: 8, + startOffset: 7, + }, text: " ", + }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { + sourceCodeRange: { endOffset: 11, startOffset: 10 }, + text: " ", + }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { + sourceCodeRange: { endOffset: 13, startOffset: 12 }, + text: " ", + }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ], + } ], + sourceCodeLocation: { + endOffset: 26, + endTag: { endOffset: 26, startOffset: 22 }, + startOffset: 0, + startTag: { endOffset: 3, startOffset: 0 }, + }, + } ], + name: "#document-fragment", + } + ); + } ); + + it( "should correctly tokenize a paragraph of two sentences", function() { + const mockPaper = new Paper( "

This is a sentence. This is another sentence.

", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is a sentence. This is another sentence.", }, - text: "This is a sentence.", - tokens: [ - { - sourceCodeRange: { - endOffset: 7, - startOffset: 3, - }, - text: "This", + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + endOffset: 22, + startOffset: 3, }, - { - sourceCodeRange: { - endOffset: 8, - startOffset: 7, + text: "This is a sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 7, + startOffset: 3, + }, + text: "This", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 10, - startOffset: 8, + { + sourceCodeRange: { + endOffset: 8, + startOffset: 7, + }, + text: " ", }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 11, - startOffset: 10, + { + sourceCodeRange: { + endOffset: 10, + startOffset: 8, + }, + text: "is", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 12, - startOffset: 11, + { + sourceCodeRange: { + endOffset: 11, + startOffset: 10, + }, + text: " ", }, - text: "a", - }, - { - sourceCodeRange: { - endOffset: 13, - startOffset: 12, + { + sourceCodeRange: { + endOffset: 12, + startOffset: 11, + }, + text: "a", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 21, - startOffset: 13, + { + sourceCodeRange: { + endOffset: 13, + startOffset: 12, + }, + text: " ", }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 22, - startOffset: 21, + { + sourceCodeRange: { + endOffset: 21, + startOffset: 13, + }, + text: "sentence", }, - text: ".", - }, - ], - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 22, + { + sourceCodeRange: { + endOffset: 22, + startOffset: 21, + }, + text: ".", + }, + ], }, - text: " This is another sentence.", - tokens: [ - { - sourceCodeRange: { - endOffset: 23, - startOffset: 22, - }, - text: " ", + { + sourceCodeRange: { + endOffset: 48, + startOffset: 22, }, - { - sourceCodeRange: { - endOffset: 27, - startOffset: 23, + text: " This is another sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 23, + startOffset: 22, + }, + text: " ", }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 28, - startOffset: 27, + { + sourceCodeRange: { + endOffset: 27, + startOffset: 23, + }, + text: "This", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 30, - startOffset: 28, + { + sourceCodeRange: { + endOffset: 28, + startOffset: 27, + }, + text: " ", }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 31, - startOffset: 30, + { + sourceCodeRange: { + endOffset: 30, + startOffset: 28, + }, + text: "is", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 38, - startOffset: 31, + { + sourceCodeRange: { + endOffset: 31, + startOffset: 30, + }, + text: " ", }, - text: "another", - }, - { - sourceCodeRange: { - endOffset: 39, - startOffset: 38, + { + sourceCodeRange: { + endOffset: 38, + startOffset: 31, + }, + text: "another", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 47, - startOffset: 39, + { + sourceCodeRange: { + endOffset: 39, + startOffset: 38, + }, + text: " ", }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 47, + { + sourceCodeRange: { + endOffset: 47, + startOffset: 39, + }, + text: "sentence", }, - text: ".", - }, - ], - }, - ], - sourceCodeLocation: { - endOffset: 52, - endTag: { + { + sourceCodeRange: { + endOffset: 48, + startOffset: 47, + }, + text: ".", + }, + ], + }, + ], + sourceCodeLocation: { endOffset: 52, - startOffset: 48, + endTag: { + endOffset: 52, + startOffset: 48, + }, + startOffset: 0, + startTag: { + endOffset: 3, + startOffset: 0, + }, }, - startOffset: 0, - startTag: { - endOffset: 3, + }, + ], + name: "#document-fragment", + } ); + } ); + + it( "should correctly tokenize a sentence with a number with a decimal point", function() { + const mockPaper = new Paper( "

This is the release of YoastSEO 9.3.

", { keyword: "YoastSEO 9.3" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + const result = tokenize( mockPaper.getTree(), languageProcessor ); + expect( result ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is the release of YoastSEO 9.3.", + }, + ], + sourceCodeLocation: { + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 39, + endOffset: 43, + }, startOffset: 0, + endOffset: 43, }, + isImplicit: false, + sentences: [ + { + text: "This is the release of YoastSEO 9.3.", + tokens: [ + { + text: "This", + sourceCodeRange: { + startOffset: 3, + endOffset: 7, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 7, + endOffset: 8, + }, + }, + { + text: "is", + sourceCodeRange: { + startOffset: 8, + endOffset: 10, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 10, + endOffset: 11, + }, + }, + { + text: "the", + sourceCodeRange: { + startOffset: 11, + endOffset: 14, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 14, + endOffset: 15, + }, + }, + { + text: "release", + sourceCodeRange: { + startOffset: 15, + endOffset: 22, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 22, + endOffset: 23, + }, + }, + { + text: "of", + sourceCodeRange: { + startOffset: 23, + endOffset: 25, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 25, + endOffset: 26, + }, + }, + { + text: "YoastSEO", + sourceCodeRange: { + startOffset: 26, + endOffset: 34, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 34, + endOffset: 35, + }, + }, + { + text: "9", + sourceCodeRange: { + startOffset: 35, + endOffset: 36, + }, + }, + { + text: ".", + sourceCodeRange: { + startOffset: 36, + endOffset: 37, + }, + }, + { + text: "3", + sourceCodeRange: { + startOffset: 37, + endOffset: 38, + }, + }, + { + text: ".", + sourceCodeRange: { + startOffset: 38, + endOffset: 39, + }, + }, + ], + sourceCodeRange: { + startOffset: 3, + endOffset: 39, + }, + }, + ], }, - }, - ], - name: "#document-fragment", + ], + } ); } ); } ); -} ); From 2370489255bbfbe20614b5317db3281a7932ba1d Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:28:11 +0200 Subject: [PATCH 088/616] Add spec for a wordform with a dollar sign in matchTokenWithWordFormsSpec.js --- .../helpers/keywordCount/matchTokenWithWordFormsSpec.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js index ac982d9f00c..b21c9f16ac7 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js @@ -3,11 +3,12 @@ import Token from "../../../../src/parse/structure/Token"; /* eslint-disable max-len */ const testCases = [ [ "The wordforms contain the token", [ "keyword" ] , new Token( "keyword" ), "en_US", true ], - [ "The wordforms do not contain the token", [ "keyword" ] , new Token( "notkeyword" ), "en_US", false ], + [ "The wordforms do not contain the token", [ "keyword" ], new Token( "notkeyword" ), "en_US", false ], [ "The wordforms contain the token in a different form: singular matches plural", [ "keyword", "keywords" ], new Token( "keyword" ), "en_US", true ], [ "The wordforms contain the token in a different form: plural matches singular", [ "keyword", "keywords" ], new Token( "keywords" ), "en_US", true ], [ "The wordforms contain the token but with a capital letter", [ "Keyword" ], new Token( "keyword" ), "en_US", true ], [ "The wordforms contain the token in a different form: singular matches plural (different locale)", [ "anahtar", "anahtarlar" ], new Token( "Anahtar" ), "tr_TR", true ], + [ "The wordforms contain a token that starts with a dollar sign.", [ "$keyword" ], new Token( "$keyword" ), "en_US", true ], ]; /* eslint-enable max-len */ From 5b5766a2691a221c9a3f02f542f97f1f4d284ac4 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:28:36 +0200 Subject: [PATCH 089/616] fix incorrect spec in getSentencesFromTreeSpec.js --- .../sentence/getSentencesFromTreeSpec.js | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index a2e6c485270..d9339833844 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -37,17 +37,17 @@ describe( "test", () => { sourceCodeRange: { endOffset: 64, startOffset: 44 }, text: " A dog is very cute.", tokens: [ - { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: " " }, - { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: "A" }, - { sourceCodeRange: { endOffset: 6, startOffset: 5 }, text: " " }, - { sourceCodeRange: { endOffset: 9, startOffset: 6 }, text: "dog" }, - { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, - { sourceCodeRange: { endOffset: 12, startOffset: 10 }, text: "is" }, - { sourceCodeRange: { endOffset: 13, startOffset: 12 }, text: " " }, - { sourceCodeRange: { endOffset: 17, startOffset: 13 }, text: "very" }, - { sourceCodeRange: { endOffset: 18, startOffset: 17 }, text: " " }, - { sourceCodeRange: { endOffset: 22, startOffset: 18 }, text: "cute" }, - { sourceCodeRange: { endOffset: 23, startOffset: 22 }, text: "." }, + { sourceCodeRange: { endOffset: 45, startOffset: 44 }, text: " " }, + { sourceCodeRange: { endOffset: 46, startOffset: 45 }, text: "A" }, + { sourceCodeRange: { endOffset: 47, startOffset: 46 }, text: " " }, + { sourceCodeRange: { endOffset: 50, startOffset: 47 }, text: "dog" }, + { sourceCodeRange: { endOffset: 51, startOffset: 50 }, text: " " }, + { sourceCodeRange: { endOffset: 53, startOffset: 51 }, text: "is" }, + { sourceCodeRange: { endOffset: 54, startOffset: 53 }, text: " " }, + { sourceCodeRange: { endOffset: 58, startOffset: 54 }, text: "very" }, + { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " }, + { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "cute" }, + { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: "." }, ] }, { sourceCodeRange: { endOffset: 86, startOffset: 72 }, From b5fdc94c5434ea6888e1d33ccdcec8bcf901db1c Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:30:03 +0200 Subject: [PATCH 090/616] fix tokenization of keywords with a full stop in findKeyWordFormsInSentence.js --- .../helpers/match/findKeyWordFormsInSentence.js | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js index c814217230b..755a97c7dfd 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js @@ -15,7 +15,6 @@ import convertToPositionResult from "./convertMatchToPositionResult"; const tokenizeKeyphraseForms = ( keyphraseForms ) => { const newKeyphraseForms = []; keyphraseForms.forEach( word => { - // const newWord = [] const tokenizedWord = word.map( form => { return form.split( tokenizerSplitter ); } ); @@ -24,6 +23,20 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); } ); + + // if a token ends with a single \ then prepend the \ to the next token and remove the \ it from the current token. + // This is a fix because the tokenization does not work well for a situation where a fullstop is escaped. + // For example: yoastseo 9.3 would be tokenized as ["yoastseo", "9\", ".", "3"]. This fix corrects this to ["yoastseo", "9", "\.", "3"]. + for ( let i = 0; i < newKeyphraseForms.length; i++ ) { + if ( newKeyphraseForms[ i ][ 0 ].endsWith( "\\" ) && ! newKeyphraseForms[ i ][ 0 ].endsWith( "\\\\" ) ) { + // Remove the \ from the current token. + newKeyphraseForms[ i ][ 0 ] = newKeyphraseForms[ i ][ 0 ].slice( 0, -1 ); + // Prepend the \ to the next token. + newKeyphraseForms[ i + 1 ][ 0 ] = "\\" + newKeyphraseForms[ i + 1 ][ 0 ]; + } + } + + return newKeyphraseForms; }; From 691066e7a9dfd476d2c1a08f1f87b44d695037d4 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:30:27 +0200 Subject: [PATCH 091/616] fix incorrect expectation in englishPaper2.js --- .../fullTextTests/testTexts/en/englishPaper2.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js index 9a297853e8d..9d41970a9f2 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js @@ -29,10 +29,8 @@ const expectedResults = { }, keywordDensity: { isApplicable: true, - score: -50, - resultText: "
Keyphrase density: The keyphrase was found 5 times. " + - "That's way more than the recommended maximum of 4 times for a text of this length. Don't overoptimize!", + score: 9, + resultText: "Keyphrase density: The keyphrase was found 4 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, From 8aafd4c9371c771471ef7b54ef7b2363cbf13224 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:31:51 +0200 Subject: [PATCH 092/616] ignore max len in englishPaper2.js --- .../collectionPages/fullTextTests/testTexts/en/englishPaper2.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js index 9d41970a9f2..2ecbe59d6b1 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js @@ -118,6 +118,7 @@ const expectedResults = { wordComplexity: { isApplicable: true, score: 6, + // eslint-disable-next-line max-len resultText: "Word complexity: 16.67% of the words in your text are considered complex. " + "Try to use shorter and more familiar words to improve readability.", }, From ee5780e2be30a174da21f86a2df90832ce686711 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 16 May 2023 10:49:19 +0200 Subject: [PATCH 093/616] fix incorrect expectation in englishPaper3.js --- .../collectionPages/fullTextTests/testTexts/en/englishPaper3.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js index 2d15e4c3562..431fa7c1e35 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js @@ -29,7 +29,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 9, - resultText: "Keyphrase density: The keyphrase was found 7 times. This is great!", + resultText: "Keyphrase density: The keyphrase was found 8 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, From b017a79187701091903f3eeb1d77896158c96543 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Wed, 17 May 2023 10:08:32 +0300 Subject: [PATCH 094/616] HTML Parser - Implement position based highlighting for block editor#138 fix warnings. --- packages/js/src/decorator/gutenberg.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index 1f43fd4d96d..b4593f7c6e8 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -547,7 +547,6 @@ export function getAnnotationsForBlocks( blocks, marks ) { * @returns {void} */ export function applyAsAnnotations( marks ) { - console.log( "applyAsAnnotations" ); // Do this always to allow people to select a different eye marker while another one is active. removeAllAnnotations(); const fieldsToMark = getFieldsToMarkHelper( marks ); @@ -577,8 +576,6 @@ export function applyAsAnnotations( marks ) { * @returns {void} */ function removeAllAnnotationsFromBlock( blockClientId ) { - console.log( "removeAllAnnotationsFromBlock", blockClientId ); - const annotationsInBlock = select( "core/annotations" ) .__experimentalGetAnnotations() .filter( annotation => annotation.blockClientId === blockClientId && annotation.source === ANNOTATION_SOURCE ); From de6352034150c2fed8e0f502ae7e7a3fa9ee338d Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Wed, 17 May 2023 10:11:22 +0300 Subject: [PATCH 095/616] HTML Parser - Implement position based highlighting for block editor#138 Remove console out. --- packages/js/src/analysis/getApplyMarks.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/js/src/analysis/getApplyMarks.js b/packages/js/src/analysis/getApplyMarks.js index 76d2c1b90eb..3c56601da37 100644 --- a/packages/js/src/analysis/getApplyMarks.js +++ b/packages/js/src/analysis/getApplyMarks.js @@ -31,7 +31,6 @@ function applyMarksTinyMCE( paper, marks ) { * @returns {void} */ function applyMarks( paper, marks ) { - console.log( "applyMarks" ); let decorator; // Classic editor From 5b94ddf663b5e7ee2a833498b8494a099944547e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 24 May 2023 11:04:36 +0200 Subject: [PATCH 096/616] adapt getMarkingsInSentence.js to word order agnostic matching --- .../highlighting/getMarkingsInSentence.js | 50 ++++++------------- 1 file changed, 15 insertions(+), 35 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 5db10b9c44e..68440bcee9c 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -52,20 +52,16 @@ const createMarksForSentence = ( sentence, matches, locale ) => { let sentenceText = sentence.text; // Create one array with both primary and secondary matches, sorted by start offset. - let allMatches = flatten( matches.primaryMatches ); - if ( matches.primaryMatches.length > 0 ) { - allMatches = allMatches.concat( flatten( matches.secondaryMatches ) ).sort( function( a, b ) { - return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; - } ); - } // Merge consecutive and overlapping matches. - allMatches = mergeConsecutiveAndOverlappingMatches( allMatches, getLanguage( locale ) !== "ja" ); + // TODO adapt last argument once japanese tokenization is implemented. + const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, getLanguage( locale ) !== "ja" ); + console.log( mergedMatches ); const sentenceStartOffset = sentence.sourceCodeRange.startOffset; // Loop through the matches backwards, so that the reference is not affected by the changes. - for ( let i = allMatches.length - 1; i >= 0; i-- ) { - const match = allMatches[ i ]; + for ( let i = mergedMatches.length - 1; i >= 0; i-- ) { + const match = mergedMatches[ i ]; // Apply the mark to the sentence. // sentenceStartOffset is subtracted because the start and end offsets are relative to the start of the source code. @@ -133,7 +129,7 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { // Create the marked sentence that is used for search based highlighting. const markedSentence = createMarksForSentence( sentence, matchesInSentence, locale ); - + console.log( markedSentence ); // Create the markings for the primary matches. // Note that there is a paradigm shift: // With search based highlighting there would be one marking for the entire sentence. @@ -141,34 +137,18 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp // In order to be backwards compatible with search based highlighting, // all markings for a sentence have the same markedSentence. // ... - const markings = matchesInSentence.primaryMatches.flatMap( match => { - return match.map( token => { - return new Mark( { - position: { - startOffset: token.sourceCodeRange.startOffset, - endOffset: token.sourceCodeRange.endOffset, - }, - marked: markedSentence, - original: sentence.text, - } ); + const markings = matchesInSentence.map( token => { + return new Mark( { + position: { + startOffset: token.sourceCodeRange.startOffset, + endOffset: token.sourceCodeRange.endOffset, + }, + marked: markedSentence, + original: sentence.text, } ); } ); + console.log( markings ); - // Only if there are primary matches, add the secondary matches. - if ( matchesInSentence.primaryMatches.length > 0 ) { - flatten( matchesInSentence.secondaryMatches ).forEach( match =>{ - markings.push( new Mark( { - position: { - startOffset: match.sourceCodeRange.startOffset, - endOffset: match.sourceCodeRange.endOffset, - }, - marked: markedSentence, - original: sentence.text, - - } ) ); - } - ); - } return mergeConsecutiveAndOverlappingMarkings( markings, getLanguage( locale ) !== "ja" ); } From 8866896fae437fa30511e5a3a27b927ad091b1e3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 24 May 2023 11:04:48 +0200 Subject: [PATCH 097/616] adapt keywordCount.js to word order agnostic matching --- .../researches/keywordCount.js | 53 +++++++++++++++++-- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index a1432af708a..bc33f23e130 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -3,7 +3,51 @@ import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; import findKeyWordFormsInSentence from "../helpers/match/findKeyWordFormsInSentence"; +import matchKeywordWithSentence from "../helpers/match/matchKeywordWithSentence"; +const countMatches = ( matches, keyphraseForms ) => { + // the count is the number of complete matches. + + const matchesCopy = [ ...matches ]; + + let nrMatches = 0; + // While the number of matches is longer than the keyphrase forms. + while ( matchesCopy.length >= keyphraseForms.length ) { + let nrKeyphraseFormsWithMatch = 0; + + // for each keyphrase form, if there is a match that is equal to the keyphrase form, remove it from the matches. + // If there is no match, return the current count. + // If all keyphrase forms have a match, increase the count by 1. + for ( let i = 0; i < keyphraseForms.length; i++ ) { + const keyphraseForm = keyphraseForms[ i ]; + + // check if any of the keyphrase forms is in the matches. + const foundMatch = matchesCopy.find( match =>{ + return keyphraseForm.some( keyphraseFormWord => { + return match.text.toLowerCase() === keyphraseFormWord.toLowerCase(); + } ); + } ); + // + if ( foundMatch ) { + matchesCopy.splice( matchesCopy.indexOf( foundMatch ), 1 ); + nrKeyphraseFormsWithMatch += 1; + } + } + if ( nrKeyphraseFormsWithMatch === keyphraseForms.length ) { + nrMatches += 1; + + // const match = matches.find(match => match === keyphraseForm); + // if (match) { + // matches.splice(matches.indexOf(match), 1); + // } else { + // return matches.length; + // } + } else { + return nrMatches; + } + } + return nrMatches; +}; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. @@ -19,12 +63,14 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - + // const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); + const matchesInSentence = matchKeywordWithSentence( topicForms.keyphraseForms, sentence ); + console.log( matchesInSentence); + const matchesCount = countMatches( matchesInSentence, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); - result.count += matchesInSentence.primaryMatches.length; + result.count += matchesCount; } ); return result; @@ -46,7 +92,6 @@ export default function keyphraseCount( paper, researcher ) { const sentences = getSentencesFromTree( paper ); const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ); - return { count: keyphraseFound.count, markings: flatten( keyphraseFound.markings ), From 3242b3b552bd827765b0d9b9e18e0bb768175645 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 24 May 2023 11:05:00 +0200 Subject: [PATCH 098/616] adapt keywordCountSpec.js to word order agnostic matching --- .../languageProcessing/researches/keywordCountSpec.js | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 5ba6d8aa1af..50999da06ab 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -143,15 +143,11 @@ describe( "Test for counting the keyword in a text", function() { ); } ); - it( "counts a string with a keyword with a '-' in it", function() { + it( "does count a string with a keyword with a '-' in it if the focus keyphrase does have a '-' in it", function() { const mockPaper = new Paper( "

A string with a key-word.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ - new Mark( { - marked: "A string with a key-word.", - original: "A string with a key-word.", - position: { endOffset: 28, startOffset: 20 } } ) ] + expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ ] ); } ); From 287bb7592f7b004e854c0c139dced17878a66940 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 30 May 2023 14:59:02 +0200 Subject: [PATCH 099/616] rename matchKeywordWithSentence to matchKeyphraseWithSentence in keyphraseCount.js --- .../src/languageProcessing/researches/keywordCount.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index bc33f23e130..21977f2bfbd 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -3,7 +3,8 @@ import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; import findKeyWordFormsInSentence from "../helpers/match/findKeyWordFormsInSentence"; -import matchKeywordWithSentence from "../helpers/match/matchKeywordWithSentence"; +// import matchKeywordWithSentence from "../helpers/match/matchKeywordWithSentence"; +import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; const countMatches = ( matches, keyphraseForms ) => { // the count is the number of complete matches. @@ -64,7 +65,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); - const matchesInSentence = matchKeywordWithSentence( topicForms.keyphraseForms, sentence ); + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence ); console.log( matchesInSentence); const matchesCount = countMatches( matchesInSentence, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); From a594542a64ddcc46ea3e691cce26e60124261e4b Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Tue, 30 May 2023 11:23:22 -0700 Subject: [PATCH 100/616] add sentence tokenizer for Japanese --- .../languages/ja/Researcher.js | 3 +++ .../languages/ja/SplitIntoTokensJapanese.js | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 4312b87d4bb..252b30dae0b 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,6 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; +import splitIntoTokensJapanese from "yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -29,6 +30,7 @@ import morphology from "./customResearches/getWordForms"; import getKeyphraseLength from "./customResearches/getKeyphraseLength"; import textLengthResearch from "./customResearches/textLength"; import findKeyphraseInSEOTitle from "./customResearches/findKeyphraseInSEOTitle"; +import SplitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; /** * The researches contains all the researches @@ -72,6 +74,7 @@ export default class Researcher extends AbstractResearcher { customCountLength, matchTransitionWordsHelper, memoizedTokenizer, + splitIntoTokensJapanese, } ); Object.assign( this.defaultResearches, { diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js new file mode 100644 index 00000000000..d5baaaa16ec --- /dev/null +++ b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js @@ -0,0 +1,22 @@ +import { map } from "lodash-es"; +import TinySegmenter from "tiny-segmenter"; + +/** + * Split sentence into tokens. + * + * @param {Sentence} sentence The sentence to split. + * + * @returns {Token[]} The tokens. + */ +function splitIntoTokensJapanese( sentence ) { + // Retrieve sentence from sentence class + const sentenceText = sentence.text; + // Return empty string if sentence is empty + if ( sentenceText === "" ) { + return []; + } + // Split sentences into words that are also tokens + const words = TinySegmenter.segment( sentenceText ); + return map( words ); +} +export default splitIntoTokensJapanese; From 6c75c4a05931b1881e77baa058347fbddd96eb58 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Tue, 30 May 2023 17:20:31 -0700 Subject: [PATCH 101/616] pass custom tokenization for Japanese in LanguageProcessor --- .../src/parse/language/LanguageProcessor.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index edd24304170..2a735e64dff 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,5 +1,6 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; +import countWords from "../../languageProcessing/helpers/word/countWords"; const whitespaceRegex = /^\s+$/; /** @@ -49,12 +50,22 @@ class LanguageProcessor { * Split sentence into tokens. * * @param {Sentence} sentence The sentence to split. + * @param {paper} paper The paper containing the keyword and text. + * @param {researcher} researcher The researcher. * * @returns {Token[]} The tokens. */ - splitIntoTokens( sentence ) { + splitIntoTokens( sentence, paper, researcher ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; + // If there is a custom getWords helper use its output for retrieving words/tokens. + const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); + let wordCount = countWords( paper.getText() ); + if ( getWordsCustomHelper ) { + wordCount = getWordsCustomHelper( paper.getText() ).length; + const tokenTextsCustom = wordCount; + return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); + } // Split the sentence string into tokens const tokenTexts = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); return tokenTexts.map( tokenText => new Token( tokenText ) ); From 982a59a3183abae7fe9d400f83060d0df5d36465 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Thu, 1 Jun 2023 14:22:32 -0700 Subject: [PATCH 102/616] add tests to check what errors show in github env --- .../yoastseo/spec/parse/build/buildSpec.js | 61 +++++++++++++++++++ .../parse/language/LanguageProcessorSpec.js | 13 ++++ 2 files changed, 74 insertions(+) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 949cc66a965..685ceef310a 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,6 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import splitIntoTokensJapanese from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { @@ -67,6 +68,66 @@ describe( "The parse function", () => { } ); } ); + it( "parses a basic Japanese HTML text", () => { + const html = "

こんにちは世界!

"; + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensJapanese: splitIntoTokensJapanese } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( html, languageProcessor ) ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ { + name: "div", + sourceCodeLocation: { + startOffset: 0, + endOffset: 45, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 39, + endOffset: 45, + }, + }, + attributes: {}, + childNodes: [ { + name: "p", + isImplicit: false, + attributes: { + "class": new Set( [ "yoast" ] ), + }, + sentences: [ { + text: "こんにちは世界!", + sourceCodeRange: { startOffset: 22, endOffset: 35 }, + tokens: [ + { text: "こんにちは", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, + { text: "世界", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, + { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + ], + } ], + childNodes: [ { + name: "#text", + value: "こんにちは世界!", + } ], + sourceCodeLocation: { + startOffset: 5, + endOffset: 39, + startTag: { + startOffset: 5, + endOffset: 22, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, + } ], + } ], + } ); + } ); + it( "adds implicit paragraphs around phrasing content outside of paragraphs and headings", () => { const html = "
Hello World!
"; diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index e555de25b2a..0134717945d 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -43,3 +43,16 @@ describe( "A test for the splitIntoTokens method", () => { ] ); } ); } ); + +describe( "A test for the splitIntoTokens method in Japanese", () => { + it( "should return an array of tokens", function() { + const languageProcessor = new LanguageProcessor( researcher ); + + const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); + expect( tokens ).toEqual( [ + { text: "こんにちは", sourceCodeRange: {} }, + { text: "世界", sourceCodeRange: {} }, + { text: "!", sourceCodeRange: {} }, + ] ); + } ); +} ); From 52edb3e8278d0f95a907297637db1de64222bf79 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Thu, 1 Jun 2023 16:17:51 +0300 Subject: [PATCH 103/616] Fix linting issues --- .../yoastseo/src/languageProcessing/languages/ja/Researcher.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 252b30dae0b..8ac2daef08d 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -30,7 +30,6 @@ import morphology from "./customResearches/getWordForms"; import getKeyphraseLength from "./customResearches/getKeyphraseLength"; import textLengthResearch from "./customResearches/textLength"; import findKeyphraseInSEOTitle from "./customResearches/findKeyphraseInSEOTitle"; -import SplitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; /** * The researches contains all the researches From 324896a527748ccd8e767ab611eb718eb430f300 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Thu, 1 Jun 2023 16:27:49 +0300 Subject: [PATCH 104/616] Check that researcher is present is added. --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 2a735e64dff..689d99b35c3 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -59,10 +59,9 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let wordCount = countWords( paper.getText() ); + const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - wordCount = getWordsCustomHelper( paper.getText() ).length; + const wordCount = getWordsCustomHelper( paper.getText() ).length; const tokenTextsCustom = wordCount; return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } From a0fe59af27e1d1569637e3df77470067b5946359 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 11:01:10 -0700 Subject: [PATCH 105/616] retrieve research helper in language processor --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 689d99b35c3..a9b4a51d3c2 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,6 +1,5 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; -import countWords from "../../languageProcessing/helpers/word/countWords"; const whitespaceRegex = /^\s+$/; /** @@ -61,8 +60,8 @@ class LanguageProcessor { // If there is a custom getWords helper use its output for retrieving words/tokens. const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - const wordCount = getWordsCustomHelper( paper.getText() ).length; - const tokenTextsCustom = wordCount; + const tokenTextsCustom = researcher.getHelper( "splitIntoTokensJapanese" ); + return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From 79711cea8bc5bd131626aa207a1091eee4d6b8a4 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 11:08:51 -0700 Subject: [PATCH 106/616] remove unnecessary function arguments --- .../yoastseo/src/parse/language/LanguageProcessor.js | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index a9b4a51d3c2..827d0c1b984 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -49,19 +49,16 @@ class LanguageProcessor { * Split sentence into tokens. * * @param {Sentence} sentence The sentence to split. - * @param {paper} paper The paper containing the keyword and text. - * @param {researcher} researcher The researcher. * * @returns {Token[]} The tokens. */ - splitIntoTokens( sentence, paper, researcher ) { + splitIntoTokens( sentence ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = researcher && researcher.getHelper( "getWordsCustomHelper" ); + const getWordsCustomHelper = this.researcher.getHelper( "getWordsCustomHelper" ); if ( getWordsCustomHelper ) { - const tokenTextsCustom = researcher.getHelper( "splitIntoTokensJapanese" ); - + const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensJapanese" ); return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From c985f3168489159b0e255c7cdc4e713f7e2c4a36 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 13:26:58 -0700 Subject: [PATCH 107/616] improve code --- .../languageProcessing/languages/ja/Researcher.js | 4 ++-- .../yoastseo/src/parse/language/LanguageProcessor.js | 12 ++++++------ .../languages/ja/SplitIntoTokensJapanese.js | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 8ac2daef08d..fa354b0e2a8 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -73,7 +73,7 @@ export default class Researcher extends AbstractResearcher { customCountLength, matchTransitionWordsHelper, memoizedTokenizer, - splitIntoTokensJapanese, + splitIntoTokensCustom, } ); Object.assign( this.defaultResearches, { diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 827d0c1b984..b15214b3d64 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -56,14 +56,14 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; // If there is a custom getWords helper use its output for retrieving words/tokens. - const getWordsCustomHelper = this.researcher.getHelper( "getWordsCustomHelper" ); - if ( getWordsCustomHelper ) { - const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensJapanese" ); - return tokenTextsCustom.map( tokenText => new Token( tokenText ) ); + const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensCustom" ); + if ( tokenTextsCustom ) { + const tokensCustom = tokenTextsCustom( sentenceText ); + return tokensCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens - const tokenTexts = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); - return tokenTexts.map( tokenText => new Token( tokenText ) ); + const tokens = sentenceText.split( /([\s,.!?;:([\]'"¡¿)/])/g ).filter( x => x !== "" ); + return tokens.map( tokenText => new Token( tokenText ) ); } } export default LanguageProcessor; diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js index d5baaaa16ec..c89e322e67d 100644 --- a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js +++ b/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js @@ -16,7 +16,7 @@ function splitIntoTokensJapanese( sentence ) { return []; } // Split sentences into words that are also tokens - const words = TinySegmenter.segment( sentenceText ); + const words = new TinySegmenter().segment( sentenceText ); return map( words ); } export default splitIntoTokensJapanese; From d3885adf5753df512840c14c1785083764f10983 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 2 Jun 2023 13:40:38 -0700 Subject: [PATCH 108/616] pass sentence instead of sentence text --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index b15214b3d64..975f18dfea6 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -58,7 +58,7 @@ class LanguageProcessor { // If there is a custom getWords helper use its output for retrieving words/tokens. const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensCustom" ); if ( tokenTextsCustom ) { - const tokensCustom = tokenTextsCustom( sentenceText ); + const tokensCustom = tokenTextsCustom( sentence ); return tokensCustom.map( tokenText => new Token( tokenText ) ); } // Split the sentence string into tokens From 8eaf4f0762d2f409b795a771a22364acd89d8a92 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Mon, 5 Jun 2023 15:46:41 -0700 Subject: [PATCH 109/616] relocate helper file and update specs --- packages/yoastseo/spec/parse/build/buildSpec.js | 4 ++-- .../spec/parse/language/LanguageProcessorSpec.js | 10 +++++++--- .../src/languageProcessing/languages/ja/Researcher.js | 2 +- .../languages/ja/helpers/splitIntoTokensJapanese.js} | 4 ++-- 4 files changed, 12 insertions(+), 8 deletions(-) rename packages/yoastseo/src/{parse/structure/languages/ja/SplitIntoTokensJapanese.js => languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js} (85%) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 685ceef310a..438505fd842 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,7 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; -import splitIntoTokensJapanese from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { @@ -72,7 +72,7 @@ describe( "The parse function", () => { const html = "

こんにちは世界!

"; const researcher = Factory.buildMockResearcher( {}, true, false, false, - { splitIntoTokensJapanese: splitIntoTokensJapanese } ); + { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( researcher ); expect( build( html, languageProcessor ) ).toEqual( { name: "#document-fragment", diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 0134717945d..4c033d86c3d 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,6 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; +import splitIntoTokensCustom from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -46,13 +47,16 @@ describe( "A test for the splitIntoTokens method", () => { describe( "A test for the splitIntoTokens method in Japanese", () => { it( "should return an array of tokens", function() { + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( researcher ); - const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); expect( tokens ).toEqual( [ - { text: "こんにちは", sourceCodeRange: {} }, + { text: "こん", sourceCodeRange: {} }, + { text: "にち", sourceCodeRange: {} }, + { text: "は", sourceCodeRange: {} }, { text: "世界", sourceCodeRange: {} }, - { text: "!", sourceCodeRange: {} }, + { text: "!", sourceCodeRange: {} }, ] ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index fa354b0e2a8..fbd0e0cf813 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "../../../parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "./helpers/splitIntoTokensJapanese"; // All config import firstWordExceptions from "./config/firstWordExceptions"; diff --git a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js similarity index 85% rename from packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js rename to packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js index c89e322e67d..959bd1fd99c 100644 --- a/packages/yoastseo/src/parse/structure/languages/ja/SplitIntoTokensJapanese.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js @@ -8,7 +8,7 @@ import TinySegmenter from "tiny-segmenter"; * * @returns {Token[]} The tokens. */ -function splitIntoTokensJapanese( sentence ) { +function splitIntoTokensCustom( sentence ) { // Retrieve sentence from sentence class const sentenceText = sentence.text; // Return empty string if sentence is empty @@ -19,4 +19,4 @@ function splitIntoTokensJapanese( sentence ) { const words = new TinySegmenter().segment( sentenceText ); return map( words ); } -export default splitIntoTokensJapanese; +export default splitIntoTokensCustom; From 3367b13e14a86f0591344977d740021da79fd35c Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Mon, 5 Jun 2023 15:54:03 -0700 Subject: [PATCH 110/616] correct new path --- packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 4c033d86c3d..2bf034058c9 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,7 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; -import splitIntoTokensCustom from "../../../src/parse/structure/languages/ja/SplitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); From 9e8a3e06224d992c59557905f1f9c5f7056aae2b Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 08:45:29 +0200 Subject: [PATCH 111/616] Adapt getMarkingsInSentenceSpec.js to new implementation of getMarkingsInSentence.js --- .../highlighting/getMarkingsInSentenceSpec.js | 30 +++++++++---------- .../highlighting/getMarkingsInSentence.js | 8 +++-- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index d041919fb33..a46b943f6de 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -7,7 +7,7 @@ const testCases = [ { testDescription: "No markings in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: { primaryMatches: [], secondaryMatches: [] }, + matchesInSentence: [], matchWordCustomHelper: false, locale: "en_US", expectedResult: [], @@ -15,7 +15,7 @@ const testCases = [ { testDescription: "One marking in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { @@ -27,7 +27,7 @@ const testCases = [ { testDescription: "One marking in sentence with two consecutive matches", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { @@ -39,7 +39,7 @@ const testCases = [ { testDescription: "Two markings that are not consecutive in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], [ { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ], matchWordCustomHelper: false, locale: "en_US", expectedResult: [ @@ -58,7 +58,7 @@ const testCases = [ { testDescription: "One marking in a sentence that has a non-zero startOffset", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { @@ -70,7 +70,7 @@ const testCases = [ { testDescription: "One marking in a sentence of a language that does not use spaces", sentence: { text: "これは文です.", sourceCodeRange: { startOffset: 0, endOffset: 7 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 3, endOffset: 4 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 3, endOffset: 4 } } ], matchWordCustomHelper: JapaneseCustomHelper, locale: "ja", expectedResult: [ new Mark( { @@ -82,7 +82,7 @@ const testCases = [ { testDescription: "Two markings that overlap", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: { primaryMatches: [ [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], [ { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ] ], secondaryMatches: [] }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } }, { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ], matchWordCustomHelper: false, locale: "en_US", expectedResult: [ @@ -93,14 +93,14 @@ const testCases = [ } ), ], }, - { - testDescription: "No secondary match if a multi word keyphrase is separated with an underscore in the sentence.", - sentence: { text: "A key sentence with a key_word.", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - matchesInSentence: { primaryMatches: [ ], secondaryMatches: [ [ { sourceCodeRange: { startOffset: 2, endOffset: 5 } } ] ] }, - matchWordCustomHelper: false, - locale: "en_US", - expectedResult: [], - }, + // { + // testDescription: "No secondary match if a multi word keyphrase is separated with an underscore in the sentence.", + // sentence: { text: "A key sentence with a key_word.", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + // matchesInSentence: [ { sourceCodeRange: { startOffset: 2, endOffset: 5 } } ] ] }, + // matchWordCustomHelper: false, + // locale: "en_US", + // expectedResult: [], + // }, ]; /* eslint-enable max-len */ diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 68440bcee9c..c7f1a7abe00 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -56,7 +56,6 @@ const createMarksForSentence = ( sentence, matches, locale ) => { // Merge consecutive and overlapping matches. // TODO adapt last argument once japanese tokenization is implemented. const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, getLanguage( locale ) !== "ja" ); - console.log( mergedMatches ); const sentenceStartOffset = sentence.sourceCodeRange.startOffset; // Loop through the matches backwards, so that the reference is not affected by the changes. @@ -127,9 +126,13 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { + if ( matchesInSentence.length === 0 ) { + return []; + } + // Create the marked sentence that is used for search based highlighting. const markedSentence = createMarksForSentence( sentence, matchesInSentence, locale ); - console.log( markedSentence ); + // Create the markings for the primary matches. // Note that there is a paradigm shift: // With search based highlighting there would be one marking for the entire sentence. @@ -147,7 +150,6 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp original: sentence.text, } ); } ); - console.log( markings ); return mergeConsecutiveAndOverlappingMarkings( markings, getLanguage( locale ) !== "ja" ); } From 9710a673dc8e085b505572e36bf3a0744071f62d Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 08:49:24 +0200 Subject: [PATCH 112/616] Adapt tokenizeSpec to new tokenization algorithm --- .../spec/parse/build/private/tokenizeSpec.js | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index ea4eda7081a..61a71cb7302 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -345,23 +345,9 @@ describe( "A test for the tokenize function", }, }, { - text: "9", + text: "9.3", sourceCodeRange: { startOffset: 35, - endOffset: 36, - }, - }, - { - text: ".", - sourceCodeRange: { - startOffset: 36, - endOffset: 37, - }, - }, - { - text: "3", - sourceCodeRange: { - startOffset: 37, endOffset: 38, }, }, From 21c35862ea81c1be27141fe6f3b6b8d8e8e1fe7e Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 14:57:45 +0200 Subject: [PATCH 113/616] use regular expression for matching keywordforms in matchKeyphraseWithSentence.js --- .../helpers/match/matchKeyphraseWithSentence.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 957c0128eb8..f95e6ee3e44 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -63,7 +63,8 @@ const matchKeyphraseWithSentence = ( keywordForms, sentence ) => { // Compare the matchtoken with the keyword forms. keywordForms.forEach( ( keywordForm ) => { keywordForm.forEach( ( keywordFormPart ) => { - if ( tokenForMatching.text.toLowerCase() === keywordFormPart.toLowerCase() ) { + const keywordFormPartRegex = new RegExp( `^${keywordFormPart}$`, "ig" ); + if ( tokenForMatching.text.match( keywordFormPartRegex ) ) { matches.push( ...tokensForMatching ); } } ); From ac59320c860f31b2b5f41f99d35a5fc8a6673d63 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:00:10 +0200 Subject: [PATCH 114/616] adapt keywordCountSpec.js to to new situation --- .../researches/keywordCountSpec.js | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 50999da06ab..5ac44df6435 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -147,14 +147,17 @@ describe( "Test for counting the keyword in a text", function() { const mockPaper = new Paper( "

A string with a key-word.

" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ ] + expect( keyphraseCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ new Mark( { + marked: "A string with a key-word.", + original: "A string with a key-word.", + position: { startOffset: 20, endOffset: 28 } } ) ] ); } ); - it( "counts 'key word' in 'key-word'.", function() { + it( "Does not count 'key word' in 'key-word'.", function() { const mockPaper = new Paper( "A string with a key-word." ); buildTree( mockPaper, mockResearcher ); - expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 0 ); } ); it( "counts a string with a keyword with a '_' in it", function() { @@ -220,18 +223,18 @@ describe( "Test for counting the keyword in a text", function() { ); } ); - it( "counts can count dollar sign as in '$keyword'.", function() { + it( "counts dollar sign as in '$keyword'.", function() { const mockPaper = new Paper( "A string with a $keyword." ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); // Markings do not currently work in this condition. } ); - it( "counts 'key word' also in 'key-word'.)", function() { + it( "does not count 'key word' in 'key-word' but counts key word.)", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); buildTree( mockPaper, mockResearcher ); - expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - // Note: this behavior might change in in the future. + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + // Note: this behavior might change in the future. } ); it( "doesn't count 'key-word' in 'key word'.", function() { @@ -241,6 +244,12 @@ describe( "Test for counting the keyword in a text", function() { // Note: this behavior might change in in the future. } ); + it( "Does not count a third consecutive keyword in a sentence.", function() { + const mockPaper = new Paper( "

A string with keyword keyword keyword.

" ); + buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 2 ); + } ); + it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { const mockPaper = new Paper( "

A string with three keys (key and another key) and one word.

" ); buildTree( mockPaper, mockResearcher ); @@ -281,8 +290,9 @@ describe( "Test for counting the keyword in a text", function() { // Note: this behavior might change in the future. } ); - it.skip( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { - const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", { locale: "id_ID" } ); + it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { + const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", + { locale: "id_ID" } ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); From a64acbbf6eb6d5cfeb6e78a6f66fdfbb6472f197 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:00:50 +0200 Subject: [PATCH 115/616] skip japanese specs in keywordCountSpec.js until japanese tokenizer is done. --- .../languageProcessing/researches/keywordCountSpec.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 5ac44df6435..f3eba938805 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -325,7 +325,7 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) // Decided not to remove test below as it tests the added logic of the Japanese helpers. describe( "Test for counting the keyword in a text for Japanese", () => { - it( "counts/marks a string of text with a keyword in it.", function() { + it.skip( "counts/marks a string of text with a keyword in it.", function() { const mockPaper = new Paper( "

私の猫はかわいいです。 { original: "私の猫はかわいいです。", position: { endOffset: 6, startOffset: 5 } } ) ] ); } ); - it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { + it.skip( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { const mockPaper = new Paper( "

私の猫はかわいい猫です。 { position: { endOffset: 12, startOffset: 11 } } ) ] ); } ); - it( "counts a string of text with no keyword in it.", function() { + it.skip( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -359,7 +359,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); - it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { + it.skip( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "

私の猫はかわいいですかわいい。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); From a2371f11c5434e4a2e97213bc21453d059f9d6b6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:13:04 +0200 Subject: [PATCH 116/616] add removeConsecutiveMatches to keywordCount.js --- .../researches/keywordCount.js | 59 +++++++++++++++---- 1 file changed, 46 insertions(+), 13 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 21977f2bfbd..a3bf915eee5 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,13 +2,16 @@ import { flatten } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import findKeyWordFormsInSentence from "../helpers/match/findKeyWordFormsInSentence"; -// import matchKeywordWithSentence from "../helpers/match/matchKeywordWithSentence"; import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; +/** + * Counts the number of matches for a keyphrase in a sentence. + * @param {Token[]} matches The matches to count. + * @param {(string[])[]} keyphraseForms Keyphraseforms that were used for matching. + * @returns {number} The number of matches. + */ const countMatches = ( matches, keyphraseForms ) => { // the count is the number of complete matches. - const matchesCopy = [ ...matches ]; let nrMatches = 0; @@ -25,7 +28,8 @@ const countMatches = ( matches, keyphraseForms ) => { // check if any of the keyphrase forms is in the matches. const foundMatch = matchesCopy.find( match =>{ return keyphraseForm.some( keyphraseFormWord => { - return match.text.toLowerCase() === keyphraseFormWord.toLowerCase(); + const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); + return match.text.match( theRegex ); } ); } ); // @@ -36,13 +40,6 @@ const countMatches = ( matches, keyphraseForms ) => { } if ( nrKeyphraseFormsWithMatch === keyphraseForms.length ) { nrMatches += 1; - - // const match = matches.find(match => match === keyphraseForm); - // if (match) { - // matches.splice(matches.indexOf(match), 1); - // } else { - // return matches.length; - // } } else { return nrMatches; } @@ -50,6 +47,41 @@ const countMatches = ( matches, keyphraseForms ) => { return nrMatches; }; +/** + * Creates a new array in which consecutive matches are removed. A consecutive match occures if the same keyphrase occurs more than twice in a row. + * The first and second match are kept, the rest is removed. + * @param {Token[]} matches An array of all matches. (Including consecutive matches). + * @returns {Token[]} An array of matches without consecutive matches. + */ +const removeConsecutiveMatches = ( matches ) => { + // If there are three or more matches in a row, remove all but the first and the second. + + const matchesCopy = [ ...matches ]; + // const matchesCopy = cloneDeep( matches ); + let nrConsecutiveMatches = 0; + let previousMatch = null; + const result = []; + + for ( let i = 0; i < matchesCopy.length; i++ ) { + const match = matchesCopy[ i ]; + + if ( previousMatch && match.sourceCodeRange.startOffset === previousMatch.sourceCodeRange.endOffset + 1 && + match.text.toLowerCase() === previousMatch.text.toLowerCase() ) { + nrConsecutiveMatches += 1; + } else { + nrConsecutiveMatches = 0; + } + + if ( nrConsecutiveMatches < 2 ) { + result.push( match ); + } + + previousMatch = match; + } + + return result; +}; + /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * @@ -66,9 +98,10 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence ); - console.log( matchesInSentence); - const matchesCount = countMatches( matchesInSentence, topicForms.keyphraseForms ); + const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); + const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); + // const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); result.count += matchesCount; From 25a05922f066d70022a37c017a78dfe86e37d776 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:13:41 +0200 Subject: [PATCH 117/616] fix eslint issues in getMarkingsInSentence.js --- .../helpers/highlighting/getMarkingsInSentence.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index c7f1a7abe00..98f80447499 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -1,4 +1,3 @@ -import { flatten } from "lodash-es"; import Mark from "../../../../src/values/Mark"; import { getLanguage } from "../../../../src/languageProcessing"; @@ -54,7 +53,6 @@ const createMarksForSentence = ( sentence, matches, locale ) => { // Create one array with both primary and secondary matches, sorted by start offset. // Merge consecutive and overlapping matches. - // TODO adapt last argument once japanese tokenization is implemented. const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, getLanguage( locale ) !== "ja" ); const sentenceStartOffset = sentence.sourceCodeRange.startOffset; From abd8faf8c52b53ca9a876c1f79c69b5bbeede777 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:14:04 +0200 Subject: [PATCH 118/616] adapt specs in getKeywordDensitySpec.js --- .../languageProcessing/researches/getKeywordDensitySpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 7d8e912a2d1..c1dbc1f51b3 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -67,13 +67,13 @@ describe( "Test for counting the keyword density in a text with an English resea expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); } ); - it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() { + it( "should not recognize a multiword keyphrase when it is occurs hyphenated", function() { const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); // This behavior might change in the future. - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); } ); it( "should recognize a keyphrase with an ampersand in it", function() { From 8323b7d2b88cf56b3aed4f68e6205b2ee0e1470c Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 6 Jun 2023 15:34:39 +0200 Subject: [PATCH 119/616] skip japanese specs --- .../languageProcessing/researches/getKeywordDensitySpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index c1dbc1f51b3..0a3085d7bf9 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -179,7 +179,7 @@ describe( "test for counting the keyword density in a text in a language that us expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); } );*/ - it( "returns the keyword density when the keyword is found once", function() { + it.skip( "returns the keyword density when the keyword is found once", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); @@ -187,7 +187,7 @@ describe( "test for counting the keyword density in a text in a language that us expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 ); } ); - it( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { + it.skip( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { // 小さく is the inflected form of 小さい. const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); From c67c6d422920d8a53921c8139a0ef1997b59a842 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 7 Jun 2023 13:05:10 +0200 Subject: [PATCH 120/616] add specs for double quoted keyphrases. --- .../match/matchKeyphraseWithSentenceSpec.js | 99 +++++++++++++++++++ .../researches/keywordCountSpec.js | 13 +++ 2 files changed, 112 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index b4f761033bb..0712dcdb8fa 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -597,6 +597,105 @@ const testCases = [ { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, ], }, + { + testDescription: "Matches a single word keyphrase if keyphrase is doubleQuoted.", + sentence: { + text: "A sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + keyphraseForms: [ [ "keyphrase" ] ], + expectedResult: [ + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + ], + }, + { + testDescription: "Matches a multi word keyphrase with a space as result of the keyphrase being double quoted.", + sentence: { + text: "A sentence with a key phrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key phrase" ] ], + expectedResult: [ + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + ], + }, + { + testDescription: "Does not match multiword keyphrase in opposite order if keyphrase is doubleQuoted.", + sentence: { + text: "A sentence with a phrase key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "phrase", sourceCodeRange: { startOffset: 18, endOffset: 24 } }, + { text: " ", sourceCodeRange: { startOffset: 24, endOffset: 25 } }, + { text: "key", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key phrase" ] ], + expectedResult: [], + }, + { + testDescription: "Does not match plural if singular keyphrase is doubleQuoted.", + sentence: { + text: "A sentence with keyphrases.", + }, + keyphraseForms: [ [ "keyphrase" ] ], + expectedResult: [], + }, + { + testDescription: "Does not match singular if plural keyphrase is doubleQuoted.", + sentence: { + text: "A sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + keyphraseForms: [ [ "keyphrases" ] ], + expectedResult: [], + }, + ]; // eslint-enable max-len diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index f3eba938805..3d2e8fd4ac3 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -32,6 +32,7 @@ const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ] const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] ); const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] ); const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] ); +const mockResearcherDubbelQuotedKeyphrase = buildMorphologyMockResearcher( [ [ "key word" ] ] ); // Escape, since the morphology researcher escapes regex as well. const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] ); @@ -230,6 +231,18 @@ describe( "Test for counting the keyword in a text", function() { // Markings do not currently work in this condition. } ); + it( "can recognize a focus keyphrase that is double quoted", function() { + const mockPaper = new Paper( "A string with a key word." ); + buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcherDubbelQuotedKeyphrase ).count ).toBe( 1 ); + } ); + + it( "can recognize multiple focus keyphrases that are double quoted", function() { + const mockPaper = new Paper( "A string with a key word and a key word." ); + buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcherDubbelQuotedKeyphrase ).count ).toBe( 2 ); + } ); + it( "does not count 'key word' in 'key-word' but counts key word.)", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); buildTree( mockPaper, mockResearcher ); From 841fa265b02355e5a90c9e708f3412d6dc36d581 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 7 Jun 2023 14:23:31 +0200 Subject: [PATCH 121/616] fix bug where splitIntoTokens could not access researcher --- packages/yoastseo/src/parse/build/private/tokenize.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/tokenize.js b/packages/yoastseo/src/parse/build/private/tokenize.js index 6f8933fd0ba..ede3adef6f8 100644 --- a/packages/yoastseo/src/parse/build/private/tokenize.js +++ b/packages/yoastseo/src/parse/build/private/tokenize.js @@ -6,12 +6,12 @@ import getTextElementPositions from "./getTextElementPositions"; * * @param {Paragraph|Heading} node The paragraph or heading node to split into sentences. * @param {Sentence} sentence The sentence. - * @param {function} splitIntoTokens The function to use to split the sentence into tokens. + * @param {function} LanguageProcessor The languageprocessor for the current language. * * @returns {Sentence} The sentence, with tokens. */ -function getTokens( node, sentence, splitIntoTokens ) { - sentence.tokens = splitIntoTokens( sentence ); +function getTokens( node, sentence, LanguageProcessor ) { + sentence.tokens = LanguageProcessor.splitIntoTokens( sentence ); sentence.tokens = getTextElementPositions( node, sentence.tokens, sentence.sourceCodeRange.startOffset ); return sentence; } @@ -31,7 +31,7 @@ function getSentences( node, languageProcessor ) { // Add position information to the sentences. sentences = getTextElementPositions( node, sentences ); // Tokenize sentences into tokens. - return sentences.map( sentence => getTokens( node, sentence, languageProcessor.splitIntoTokens ) ); + return sentences.map( sentence => getTokens( node, sentence, languageProcessor ) ); } /** From 6a6b30531ecb1f608831235cefd6e0a055e6da0f Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 8 Jun 2023 16:31:22 +0200 Subject: [PATCH 122/616] change japanese example in buildSpec.js --- .../yoastseo/spec/parse/build/buildSpec.js | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 811a347d2f9..82774586362 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -69,10 +69,10 @@ describe( "The parse function", () => { } ); it( "parses a basic Japanese HTML text", () => { - const html = "

こんにちは世界!

"; + const html = "

犬が大好き

"; const researcher = Factory.buildMockResearcher( {}, true, false, false, - { splitIntoTokensCustom: splitIntoTokensCustom } ); + { splitIntoTokensCustom: splitIntoTokensCustom, memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); expect( build( html, languageProcessor ) ).toEqual( { name: "#document-fragment", @@ -81,14 +81,14 @@ describe( "The parse function", () => { name: "div", sourceCodeLocation: { startOffset: 0, - endOffset: 45, + endOffset: 37, startTag: { startOffset: 0, endOffset: 5, }, endTag: { - startOffset: 39, - endOffset: 45, + startOffset: 31, + endOffset: 37, }, }, attributes: {}, @@ -99,28 +99,28 @@ describe( "The parse function", () => { "class": new Set( [ "yoast" ] ), }, sentences: [ { - text: "こんにちは世界!", - sourceCodeRange: { startOffset: 22, endOffset: 35 }, + text: "犬が大好き", + sourceCodeRange: { startOffset: 22, endOffset: 27 }, tokens: [ - { text: "こんにちは", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, - { text: "世界", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, - { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: "犬", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: "が", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "大好き", sourceCodeRange: { startOffset: 24, endOffset: 27 } }, ], } ], childNodes: [ { name: "#text", - value: "こんにちは世界!", + value: "犬が大好き", } ], sourceCodeLocation: { startOffset: 5, - endOffset: 39, + endOffset: 31, startTag: { startOffset: 5, endOffset: 22, }, endTag: { - startOffset: 35, - endOffset: 39, + startOffset: 27, + endOffset: 31, }, }, } ], From bc4c908b9dbdf601514fe1111cb8b64cdecf25d6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:04:14 +0200 Subject: [PATCH 123/616] add specs for splitIntoTokensJapanese --- .../ja/helpers/splitIntoTokensJapaneseSpec.js | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js new file mode 100644 index 00000000000..62720611b51 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js @@ -0,0 +1,57 @@ +import splitIntoTokensJapanese from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; + +const testcases = [ + { + description: "should return an empty result sentence is empty", + sentence: { text: "", sourceCodeRange: { startOffset: 0, endOffset: 0 } }, + expected: [], + }, + { + description: "should correctly tokenize a simple Japanese sentence wouthout punctuations", + sentence: { + text: "犬が大好き", + sourceCodeRange: { startOffset: 0, endOffset: 5 }, + }, + expected: [ "犬", "が", "大好き" ], + }, + { + description: "should correctly tokenize a Japanese sentence with japanese punctuations", + sentence: { + text: "犬が大好き\u3002", + sourceCodeRange: { startOffset: 0, endOffset: 6 }, + }, + expected: [ "犬", "が", "大好き", "。" ], + + }, + { + description: "should correctly tokenize a Japanese sentence with english punctuations", + sentence: { + text: "犬が大好き.", + sourceCodeRange: { startOffset: 0, endOffset: 6 }, + }, + expected: [ "犬", "が", "大好き", "." ], + }, + { + description: "should correctly tokenize a Japanese sentence with quotation marks inside", + sentence: { + text: "犬「が」大好き\u3002", + sourceCodeRange: { startOffset: 0, endOffset: 8 }, + }, + expected: [ "犬", "「", "が", "」", "大好き", "。" ], + }, + { + description: "should correctly tokenize a Japanese sentence with quotation marks around", + sentence: { + text: "『犬が大好き\u3002』", + sourceCodeRange: { startOffset: 0, endOffset: 8 }, + }, + expected: [ "『", "犬", "が", "大好き", "。", "』" ], + }, +]; + +describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { + it( description, () => { + const tokens = splitIntoTokensJapanese( sentence ); + expect( tokens ).toEqual( expected ); + } ); +} ); From d66192517edafd171a84e04122e7412af2603dc9 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:04:39 +0200 Subject: [PATCH 124/616] add spec for japanese in tokenizeSpec.js --- .../spec/parse/build/private/tokenizeSpec.js | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 12651e4c272..a08cab6160c 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -1,6 +1,7 @@ import tokenize from "../../../../src/parse/build/private/tokenize"; import Paper from "../../../../src/values/Paper"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; +import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import { buildTreeNoTokenize } from "../../../specHelpers/parse/buildTree"; import LanguageProcessor from "../../../../src/parse/language/LanguageProcessor"; @@ -192,3 +193,81 @@ describe( "A test for the tokenize function", function() { } ); } ); } ); + +describe( "A test for tokenizing a japanese sentence", function() { + it( "should correctly tokenize a simple Japanse sentence.", function() { + const mockPaper = new Paper( "

犬が大好き\u3002

", { locale: "ja_JP" } ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "犬が大好き。", + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 9, + }, + text: "犬が大好き。", + tokens: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 4, + }, + text: "犬", + }, + { + sourceCodeRange: { + startOffset: 4, + endOffset: 5, + }, + text: "が", + }, + { + sourceCodeRange: { + startOffset: 5, + endOffset: 8, + }, + text: "大好き", + }, + { + sourceCodeRange: { + startOffset: 8, + endOffset: 9, + }, + text: "。", + }, + ], + }, + ], + sourceCodeLocation: { + startOffset: 0, + endOffset: 13, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 9, + endOffset: 13, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); +} ); From 254d268abb4a5b2b266b464d366b3a60ddb0bc54 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 9 Jun 2023 10:10:39 +0200 Subject: [PATCH 125/616] fix double declaration of researcher --- .../yoastseo/spec/parse/language/LanguageProcessorSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 2bf034058c9..88e94c842b3 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -47,9 +47,9 @@ describe( "A test for the splitIntoTokens method", () => { describe( "A test for the splitIntoTokens method in Japanese", () => { it( "should return an array of tokens", function() { - const researcher = Factory.buildMockResearcher( {}, true, false, false, + const japaneseResearcher = Factory.buildMockResearcher( {}, true, false, false, { splitIntoTokensCustom: splitIntoTokensCustom } ); - const languageProcessor = new LanguageProcessor( researcher ); + const languageProcessor = new LanguageProcessor( japaneseResearcher ); const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); expect( tokens ).toEqual( [ { text: "こん", sourceCodeRange: {} }, From dbc907fb9d8aeea35203d4c7a2ff383771e53835 Mon Sep 17 00:00:00 2001 From: Marina Koleva Date: Mon, 12 Jun 2023 10:10:20 +0200 Subject: [PATCH 126/616] adapting one unit test in LanguageProcessorSpec.js --- .../spec/parse/language/LanguageProcessorSpec.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 98097aa375a..4f023b01c39 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -439,13 +439,17 @@ describe( "A test for the splitIntoTokens method in Japanese", () => { const japaneseResearcher = Factory.buildMockResearcher( {}, true, false, false, { splitIntoTokensCustom: splitIntoTokensCustom } ); const languageProcessor = new LanguageProcessor( japaneseResearcher ); - const tokens = languageProcessor.splitIntoTokens( new Sentence( "こんにちは世界!" ) ); + const tokens = languageProcessor.splitIntoTokens( new Sentence( "ウクライナは、東ヨーロッパに位置する国家。" ) ); expect( tokens ).toEqual( [ - { text: "こん", sourceCodeRange: {} }, - { text: "にち", sourceCodeRange: {} }, + { text: "ウクライナ", sourceCodeRange: {} }, { text: "は", sourceCodeRange: {} }, - { text: "世界", sourceCodeRange: {} }, - { text: "!", sourceCodeRange: {} }, + { text: "、", sourceCodeRange: {} }, + { text: "東ヨーロッパ", sourceCodeRange: {} }, + { text: "に", sourceCodeRange: {} }, + { text: "位置", sourceCodeRange: {} }, + { text: "する", sourceCodeRange: {} }, + { text: "国家", sourceCodeRange: {} }, + { text: "。", sourceCodeRange: {} }, ] ); } ); } ); From ee659c1707d314498afe952b9b97f97f5f8642d0 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 10:11:23 +0200 Subject: [PATCH 127/616] create helper for tokenizing words for html parser --- .../helpers/word/getWordsForHTMLParser.js | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js new file mode 100644 index 00000000000..f6721ac39eb --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -0,0 +1,58 @@ +import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePunctuation"; + +/* + * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a + * tab or a non-breaking space. + * The regex is used to split a text into tokens. + * Do not add punctuation marks to this regex, as they are handled separately inside splitIntoTokens(). + * The word separator explicitly only contains characters that split two words and not a word and a space. + * A space is a word separator because it separates two words if it occurs between two words. For example: "foo bar" + * A slash is a word separator because it separates two words if it directly borders those words. For example: "foo/bar" + * A backslash is a word separator because it separates two words if it occurs between two words. For example: "foo\bar" + * A tab is a word separator because it separates two words if it occurs between two words. For example: "foo bar" + * A non-breaking space is a word separator because it separates two words if it occurs between two words. For example: "foo\u00A0bar" + */ +const wordSeparatorsRegex = /([\s\t\u00A0])/; + +/** + * Returns an array with tokens used in the text. + * @param text + * @return {*[]} + */ +const getWordsForHTMLParser = ( text ) => { + // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. + const rawTokens = text.split( wordSeparatorsRegex ).filter( x => x !== "" ); + + const tokenTexts = []; + rawTokens.forEach( token => { + if ( token === "" ) { + return; + } + // Pretokens contains all that occurs before the first letter of the token. + const preTokens = []; + // Posttokens contains all that occurs after the last letter of the token. + const postTokens = []; + + // Add all punctuation marks that occur before the first letter of the token to the pretokens array. + while ( punctuationRegexStart.test( token ) ) { + preTokens.push( token[ 0 ] ); + token = token.slice( 1 ); + } + // Add all punctuation marks that occur after the last letter of the token to the posttokens array. + while ( punctuationRegexEnd.test( token ) ) { + // Using unshift here because we are iterating from the end of the string to the beginning, + // and we want to keep the order of the punctuation marks. + // Therefore, we add them to the start of the array. + postTokens.unshift( token[ token.length - 1 ] ); + token = token.slice( 0, -1 ); + } + + let currentTokens = [ ...preTokens, token, ...postTokens ]; + currentTokens = currentTokens.filter( x => x !== "" ); + tokenTexts.push( ...currentTokens ); + } ); + + return tokenTexts; +}; + +export default getWordsForHTMLParser; From c208870dd3296bf8ab0142714c502b4f5dd88981 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 10:11:50 +0200 Subject: [PATCH 128/616] call helper for tokenizing words for html parser --- .../src/parse/language/LanguageProcessor.js | 47 ++----------------- 1 file changed, 3 insertions(+), 44 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 68644efefd7..d9043e086b7 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,22 +1,11 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; import { punctuationRegexStart, punctuationRegexEnd } from "../../languageProcessing/helpers/sanitize/removePunctuation"; +import getWordsForHTMLParser from "../../languageProcessing/helpers/word/getWordsForHTMLParser"; const whitespaceRegex = /^\s+$/; -/* - * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a - * tab or a non-breaking space. - * The regex is used to split a text into tokens. - * Do not add punctuation marks to this regex, as they are handled separately inside splitIntoTokens(). - * The word separator explicitly only contains characters that split two words and not a word and a space. - * A space is a word separator because it separates two words if it occurs between two words. For example: "foo bar" - * A slash is a word separator because it separates two words if it directly borders those words. For example: "foo/bar" - * A backslash is a word separator because it separates two words if it occurs between two words. For example: "foo\bar" - * A tab is a word separator because it separates two words if it occurs between two words. For example: "foo bar" - * A non-breaking space is a word separator because it separates two words if it occurs between two words. For example: "foo\u00A0bar" - */ -const wordSeparatorsRegex = /([\s\t\u00A0])/; + /** * Contains language-specific logic for splitting a text into sentences and tokens. @@ -72,37 +61,7 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; - // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. - const rawTokens = sentenceText.split( wordSeparatorsRegex ).filter( x => x !== "" ); - - const tokenTexts = []; - rawTokens.forEach( token => { - if ( token === "" ) { - return; - } - // Pretokens contains all that occurs before the first letter of the token. - const preTokens = []; - // Posttokens contains all that occurs after the last letter of the token. - const postTokens = []; - - // Add all punctuation marks that occur before the first letter of the token to the pretokens array. - while ( punctuationRegexStart.test( token ) ) { - preTokens.push( token[ 0 ] ); - token = token.slice( 1 ); - } - // Add all punctuation marks that occur after the last letter of the token to the posttokens array. - while ( punctuationRegexEnd.test( token ) ) { - // Using unshift here because we are iterating from the end of the string to the beginning, - // and we want to keep the order of the punctuation marks. - // Therefore, we add them to the start of the array. - postTokens.unshift( token[ token.length - 1 ] ); - token = token.slice( 0, -1 ); - } - - let currentTokens = [ ...preTokens, token, ...postTokens ]; - currentTokens = currentTokens.filter( x => x !== "" ); - tokenTexts.push( ...currentTokens ); - } ); + const tokenTexts = getWordsForHTMLParser( sentenceText ); return tokenTexts.map( tokenText => new Token( tokenText ) ); } From 30c10972810135937999216c4d2824261714f157 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 10:12:01 +0200 Subject: [PATCH 129/616] call helper for tokenizing words for html parser --- .../match/matchKeyphraseWithSentenceSpec.js | 1144 ++++++++++------- 1 file changed, 677 insertions(+), 467 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index b4f761033bb..d81b0b76d1a 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -3,10 +3,604 @@ import matchKeyphraseWithSentence from "../../../../src/languageProcessing/helpe /* eslint-disable max-len */ const testCases = [ + // { + // testDescription: "No matches in sentence", + // sentence: { + // text: "A sentence with notthekeyphrase.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + // { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + // keyphraseForms: [ [ "keyword", "keywords" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "should return empty result if keyphraseForms is empty", + // sentence: { + // text: "A sentence with notthekeyphrase.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + // { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + // keyphraseForms: [ [] ], + // expectedResult: [], + // }, + // { + // testDescription: "One match in sentence of a single-word keyphrase", + // sentence: { + // text: "A sentence with the keyword.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + // { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, + // { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + // keyphraseForms: [ [ "keyword", "keywords" ] ], + // expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], + // }, + // { + // testDescription: "One match in sentence of a multi word keyphrase", + // sentence: { + // text: "A sentence with the key words.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + // { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + // { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + // { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 30 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], + // }, + // { + // testDescription: "Disregards word order of multi word keyphrases", + // sentence: { + // text: "A word that is key.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "word", sourceCodeRange: { startOffset: 2, endOffset: 6 } }, + // { text: " ", sourceCodeRange: { startOffset: 6, endOffset: 7 } }, + // { text: "that", sourceCodeRange: { startOffset: 7, endOffset: 11 } }, + // { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, + // { text: "is", sourceCodeRange: { startOffset: 12, endOffset: 14 } }, + // { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + // { text: "key", sourceCodeRange: { startOffset: 15, endOffset: 18 } }, + // { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 19 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], + // }, + // { + // testDescription: "Two matches of multi word keyphrase in sentence", + // sentence: { + // text: "A sentence with the key words and the key word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + // { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + // { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + // { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + // { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, + // { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, + // { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, + // { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, + // { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, + // { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + // { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, + // { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 47 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ + // { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, + // { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, + // { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, + // { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, + // ], + // }, + // { + // testDescription: "One primary and one secondary match of multi word keyphrase in sentence", + // sentence: { + // text: "A key sentence with a key word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, + // { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, + // { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + // { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, + // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + // { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, + // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, + // { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + // { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, + // { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ + // { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, + // { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, + // { sourceCodeRange: { endOffset: 30, startOffset: 26 }, text: "word" } ], + // }, + // { + // testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence.", + // sentence: { + // text: "A sentence with a key_word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "A match if the key phrase is separated by an underscore in the sentence and in the keyphrase.", + // sentence: { + // text: "A sentence with a key_word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key_word", "key_words" ] ], + // expectedResult: [ + // { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, + // { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, + // { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "word" }, + // ], + // }, + // { + // testDescription: "If there is only a partial match, the partial match is returned.", + // sentence: { + // text: "A sentence with a key.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], + // + // }, + // { + // testDescription: "It is also a match if the keyphrase is lowercase but the occurrence in the sentence is uppercase.", + // sentence: { + // text: "A sentence with a KEY WORD.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ + // { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, + // { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, + // ], + // }, + // { + // testDescription: "Correctly matches if the keyphrase is the last word in the sentence.", + // sentence: { + // text: "A sentence with a key", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 21 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], + // }, + // { + // testDescription: "Doesn't return a match if the keyphrase in the text ends with an underscore.", + // sentence: { + // text: "A sentence with a key_", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "Doesn't return a match if the keyphrase in the text ends with a dash.", + // sentence: { + // text: "A sentence with a key-", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "Correctly matches if the sentence ends with an exclamation mark.", + // sentence: { + // text: "A sentence with a key!", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + // expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], + // }, + // { + // testDescription: "A keyphrase with a dash matches when the keyphrase occurs with a dash in the text.", + // sentence: { + // text: "A sentence with a key-word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key-word", "key-words" ] ], + // expectedResult: [ + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // ], + // }, + // { + // testDescription: "A keyphrase with a dash does not match when the keyphrase occurs without a dash in the text.", + // sentence: { + // text: "A sentence with a key word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key-word", "key-words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "A keyphrase without a dash does not match when the keyphrase occurs with a dash in the text.", + // sentence: { + // text: "A sentence with a key-word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedResult: [ ], + // }, + // { + // testDescription: "A keyphrase without a dash matches only with the keyphrase without a dash in the text.", + // sentence: { + // text: "A sentence with a key-word and a key word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, + // { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + // { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + // { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 42 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedResult: [ + // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + // ], + // }, + // { + // testDescription: "A match with a dash and a match without a dash, but the keyphrase contains a dash.", + // sentence: { + // text: "A sentence with a key-word and a key word.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, + // { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + // { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + // { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 42 } }, + // keyphraseForms: [ [ "key-word", "key-words" ] ], + // expectedResult: [ + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + // ], + // }, + // { + // testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order, there should be no match.", + // sentence: { + // text: "A sentence with a word-key.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, + // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + // { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key-word", "key-words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order and the keyphrase does not contain a dash," + + // " there should not be a match.", + // sentence: { + // text: "A sentence with a word-key.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, + // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + // { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedResult: [ ], + // }, + // { + // testDescription: "When the sentence contains a different word with a dash that contains part of the keyphrase, there should be no match.", + // sentence: { + // text: "A sentence with a key-phrase.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + // { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedResult: [], + // }, + // { + // testDescription: "Returns a match when the words from the keyphrase are separated by a dash and spaces.", + // sentence: { + // text: "A sentence with a key - phrase.", + // tokens: [ + // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + // { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, + // { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + // keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], + // expectedResult: [ + // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + // { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, + // ], + // }, + // { + // testDescription: "Matches a keyphrase with an apostrophe.", + // sentence: { + // text: "All the keyphrase's forms should be matched.", + // tokens: [ + // { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, + // { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, + // { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, + // { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + // { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, + // { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + // { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, + // { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + // { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, + // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + // { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, + // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + // { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, + // { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, + // ], + // sourceCodeRange: { startOffset: 0, endOffset: 45 } }, + // keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], + // expectedResult: [ + // { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, + // { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + // { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + // ], + // }, { - testDescription: "No matches in sentence", + testDescription: "with exact matching, a single word keyphrase should match with a single word.", sentence: { - text: "A sentence with notthekeyphrase.", + text: "A sentence with a keyphrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -14,159 +608,22 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedResult: [], - }, - { - testDescription: "should return empty result if keyphraseForms is empty", - sentence: { - text: "A sentence with notthekeyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - keyphraseForms: [ [] ], - expectedResult: [], - }, - { - testDescription: "One match in sentence of a single-word keyphrase", - sentence: { - text: "A sentence with the keyword.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], - }, - { - testDescription: "One match in sentence of a multi word keyphrase", - sentence: { - text: "A sentence with the key words.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 30 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], - }, - { - testDescription: "Disregards word order of multi word keyphrases", - sentence: { - text: "A word that is key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "word", sourceCodeRange: { startOffset: 2, endOffset: 6 } }, - { text: " ", sourceCodeRange: { startOffset: 6, endOffset: 7 } }, - { text: "that", sourceCodeRange: { startOffset: 7, endOffset: 11 } }, - { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, - { text: "is", sourceCodeRange: { startOffset: 12, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "key", sourceCodeRange: { startOffset: 15, endOffset: 18 } }, - { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 19 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], - }, - { - testDescription: "Two matches of multi word keyphrase in sentence", - sentence: { - text: "A sentence with the key words and the key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, - { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, - { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, - { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, - { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, - { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, - { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 47 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + keyphraseForms: [ [ "keyphrase" ] ], + exactMatching: true, expectedResult: [ - { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, - { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, - { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, - { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, ], }, { - testDescription: "One primary and one secondary match of multi word keyphrase in sentence", - sentence: { - text: "A key sentence with a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, - { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, - { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, - { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ - { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, - { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, - { sourceCodeRange: { endOffset: 30, startOffset: 26 }, text: "word" } ], - }, - { - testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence.", + testDescription: "with exact matching, a singular single word keyphrase should not match with the plural form in the text.", sentence: { - text: "A sentence with a key_word.", + text: "A sentence with keyphrases.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -174,21 +631,18 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "keyphrases", sourceCodeRange: { startOffset: 16, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + keyphraseForms: [ [ "keyphrase" ] ], + exactMatching: true, expectedResult: [], }, { - testDescription: "A match if the key phrase is separated by an underscore in the sentence and in the keyphrase.", + testDescription: "with exact matching, a plural single word keyphrase should not match wih a singular form in the text.", sentence: { - text: "A sentence with a key_word.", + text: "A sentence with a keyphrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -198,23 +652,18 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key_word", "key_words" ] ], - expectedResult: [ - { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, - { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, - { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "word" }, - ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + keyphraseForms: [ [ "keyphrases" ] ], + exactMatching: true, + expectedResult: [], }, { - testDescription: "If there is only a partial match, the partial match is returned.", + testDescription: "with exact matching, a multi word keyphrase should match the same multi word phrase in the text.", sentence: { - text: "A sentence with a key.", + text: "A sentence with a key phrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -225,42 +674,23 @@ const testCases = [ { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - - }, - { - testDescription: "It is also a match if the keyphrase is lowercase but the occurrence in the sentence is uppercase.", - sentence: { - text: "A sentence with a KEY WORD.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key phrase" ] ], + exactMatching: true, expectedResult: [ - { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, - { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, ], }, { - testDescription: "Correctly matches if the keyphrase is the last word in the sentence.", + testDescription: "with exact matching, a multi word keyphrase should not match a multi word phrase if it is in a different order.", sentence: { - text: "A sentence with a key", + text: "A sentence with a phrase key.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -270,56 +700,20 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 21 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - }, - { - testDescription: "Doesn't return a match if the keyphrase in the text ends with an underscore.", - sentence: { - text: "A sentence with a key_", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [], - }, - { - testDescription: "Doesn't return a match if the keyphrase in the text ends with a dash.", - sentence: { - text: "A sentence with a key-", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 18, endOffset: 24 } }, + { text: " ", sourceCodeRange: { startOffset: 24, endOffset: 25 } }, + { text: "key", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key phrase" ] ], + exactMatching: true, expectedResult: [], }, { - testDescription: "Correctly matches if the sentence ends with an exclamation mark.", + testDescription: "with exact matching, a the matching should be insensitive to case.", sentence: { - text: "A sentence with a key!", + text: "A sentence with a KeYphRaSe.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -329,120 +723,45 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], - }, - { - testDescription: "A keyphrase with a dash matches when the keyphrase occurs with a dash in the text.", - sentence: { - text: "A sentence with a key-word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "keyphrase" ] ], + exactMatching: true, expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, ], }, { - testDescription: "A keyphrase with a dash does not match when the keyphrase occurs without a dash in the text.", - sentence: { - text: "A sentence with a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedResult: [], - }, - { - testDescription: "A keyphrase without a dash does not match when the keyphrase occurs with a dash in the text.", - sentence: { - text: "A sentence with a key-word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedResult: [ ], - }, - { - testDescription: "A keyphrase without a dash matches only with the keyphrase without a dash in the text.", + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", sentence: { - text: "A sentence with a key-word and a key word.", + text: "A .sentence with a keyphrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, + { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, + { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, + { text: "with", sourceCodeRange: { startOffset: 12, endOffset: 16 } }, + { text: " ", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: "a", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: " ", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 19, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ ".sentence" ] ], + exactMatching: true, expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, + { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, ], }, { - testDescription: "A match with a dash and a match without a dash, but the keyphrase contains a dash.", + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly follows the keyphrase.", sentence: { - text: "A sentence with a key-word and a key word.", + text: "A sentence with a keyphrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -452,76 +771,21 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, ], - sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + keyphraseForms: [ [ "keyphrase." ] ], + exactMatching: true, expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, ], }, { - testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order, there should be no match.", - sentence: { - text: "A sentence with a word-key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedResult: [], - }, - { - testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order and the keyphrase does not contain a dash," + - " there should not be a match.", - sentence: { - text: "A sentence with a word-key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedResult: [ ], - }, - { - testDescription: "When the sentence contains a different word with a dash that contains part of the keyphrase, there should be no match.", + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and is not surrounded by spaces.", sentence: { - text: "A sentence with a key-phrase.", + text: "A sentence with a key.phrase.", tokens: [ { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, @@ -531,72 +795,18 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedResult: [], - }, - { - testDescription: "Returns a match when the words from the keyphrase are separated by a dash and spaces.", - sentence: { - text: "A sentence with a key - phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], - expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - ], - }, - { - testDescription: "Matches a keyphrase with an apostrophe.", - sentence: { - text: "All the keyphrase's forms should be matched.", - tokens: [ - { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, - { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, - { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, - { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, - { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, - { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 45 } }, - keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], + keyphraseForms: [ [ "key.phrase" ] ], + exactMatching: true, expectedResult: [ - { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, ], }, + + ]; // eslint-enable max-len @@ -665,9 +875,9 @@ const japaneseTestCases = [ // eslint-disable-next-line max-len -describe.each( testCases )( "findKeyWordFormsInSentence", ( { testDescription, sentence, keyphraseForms, expectedResult } ) => { +describe.each( testCases )( "findKeyWordFormsInSentence", ( { testDescription, sentence, keyphraseForms, exactMatching, expectedResult } ) => { it( testDescription, () => { - expect( matchKeyphraseWithSentence( keyphraseForms, sentence ) ).toEqual( expectedResult ); + expect( matchKeyphraseWithSentence( keyphraseForms, sentence, exactMatching ) ).toEqual( expectedResult ); } ); } ); From 5e5bca30558e1c1baa0720952d00aaf03b5716b3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 10:12:27 +0200 Subject: [PATCH 130/616] implement exact matching --- .../match/matchKeyphraseWithSentence.js | 54 ++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 957c0128eb8..22e66571432 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -1,7 +1,53 @@ import { cloneDeep } from "lodash-es"; +import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; const wordCouplers = [ "_", "-", "'" ]; +const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { + // Tokenize keyword forms. + const keywordFormsText = keywordForms[ 0 ][ 0 ]; + console.log( keywordFormsText ); + return getWordsForHTMLParser( keywordFormsText ); +}; + +const exactMatch = ( keywordForms, sentence ) => { + // Tokenize keyword forms. + const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForms ); + + const sentenceTokens = sentence.tokens; + + // Check if tokenized keyword forms occur in the same order in the sentence tokens. + let keywordIndex = 0; + let sentenceIndex = 0; + const matches = []; + let currentMatch = []; + + while ( sentenceIndex < sentenceTokens.length ) { + // If the current sentence token matches the current keyword token, add it to the current match. + const sentenceTokenText = sentenceTokens[ sentenceIndex ].text; + const keywordTokenText = keywordTokens[ keywordIndex ]; + + if ( sentenceTokenText.toLowerCase() === keywordTokenText.toLowerCase() ) { + currentMatch.push( sentenceTokens[ sentenceIndex ] ); + keywordIndex++; + } else { + keywordIndex = 0; + currentMatch = []; + } + + // If the current match has the same length as the keyword tokens, the keyword forms have been matched. + // Add the current match to the matches array and reset the keyword index and the current match. + if ( currentMatch.length === keywordTokens.length ) { + matches.push( ...currentMatch ); + keywordIndex = 0; + currentMatch = []; + } + + sentenceIndex++; + } + return matches; +}; + /** * Matches a keyword with a sentence object from the html parser. * @@ -9,6 +55,8 @@ const wordCouplers = [ "_", "-", "'" ]; * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * @param {Sentence} sentence The sentence to match against the keywordForms. + * @param {boolean} exactMatching Whether to match the keyword forms exactly or not. + * Depends on whether the user has put the keyphrase in double quotes. * * @returns {Token[]} The tokens that match the keywordForms. * @@ -24,9 +72,13 @@ const wordCouplers = [ "_", "-", "'" ]; * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( keywordForms, sentence ) => { +const matchKeyphraseWithSentence = ( keywordForms, sentence, exactMatching = false ) => { const tokens = sentence.tokens.slice(); + if ( exactMatching ) { + return exactMatch( keywordForms, sentence ); + } + // Filter out all tokens that do not match the keyphrase forms. const matches = []; From d66285f857b6726d9fbe4bfbc742d1f64cebc179 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 11:05:01 +0200 Subject: [PATCH 131/616] reformatting of matchKeyphraseWithSentence.js --- .../match/matchKeyphraseWithSentence.js | 141 +++++++++++------- 1 file changed, 88 insertions(+), 53 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 22e66571432..b1aff35d367 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -3,14 +3,26 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; const wordCouplers = [ "_", "-", "'" ]; +/** + * Tokenize keyword forms for exact matching. This function gets the keyword form and tokenizes it. + * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. + * This is the result of how the focus keyword is processed in buildTopicStems.js in the buildStems function. + * @param {(string[])[]} keywordForms The keyword forms to tokenize. + * @returns {string[]} The tokenized keyword forms. + */ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { // Tokenize keyword forms. const keywordFormsText = keywordForms[ 0 ][ 0 ]; - console.log( keywordFormsText ); return getWordsForHTMLParser( keywordFormsText ); }; -const exactMatch = ( keywordForms, sentence ) => { +/** + * Exact matching of keyword forms in a sentence. Exact matching happens when the user puts the keyword in double quotes. + * @param {(string[])[]} keywordForms The keyword forms to match. + * @param {Sentence} sentence The sentence to match the keyword forms with. + * @returns {Token[]} The tokens that exactly match the keyword forms. + */ +const exactMatching = ( keywordForms, sentence ) => { // Tokenize keyword forms. const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForms ); @@ -48,6 +60,75 @@ const exactMatch = ( keywordForms, sentence ) => { return matches; }; +/** + * Prepares the tokens for matching by combining words separated by a wordcoupler (hyphen/underscore) into one token. + * @param {Token[]} tokens The tokens to prepare for matching. + * @param {number} i The index of the current token. + * @returns {{tokenForMatching: string, tokensForMatching: Token[]}} The token used for matching and + * the tokens that are combined into the token used for matching. + */ +const getTokensForMatching = ( tokens, i ) => { + const tokenForMatching = cloneDeep( tokens[ i ] ); + + // The token used for matching (tokenForMatching) may consist of multiple tokens combined, + // since we want to combine words separated by a hyphen/underscore into one token. + // This array keeps track of all tokens that are combined into the token used for matching + // and is later used to add all individual tokens to the array of matches. + const tokensForMatching = [ ]; + // Add the current token to the tokens for matching. + tokensForMatching.push( cloneDeep( tokens[ i ] ) ); + + // While the next token is a word coupler, add it to the current token. + while ( tokens[ i + 1 ] && wordCouplers.includes( tokens[ i + 1 ].text ) ) { + // Add the word coupler to the token for matching. + i++; + tokenForMatching.text += tokens[ i ].text; + tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; + tokensForMatching.push( tokens[ i ] ); + + // If there is a token after the word coupler, add it to the token for matching. as well. + i++; + if ( ! tokens[ i ] ) { + break; + } + tokenForMatching.text += tokens[ i ].text; + tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; + tokensForMatching.push( tokens[ i ] ); + } + + return { tokenForMatching, tokensForMatching }; +}; + + +/** + * Free matching of keyword forms in a sentence. Free matching happens when the user does not put the keyword in double quotes. + * @param {(string[])[]} keywordForms The keyword forms to match. + * @param {Sentence} sentence The sentence to match the keyword forms with. + * @returns {Token[]} The tokens that match the keyword forms. + */ +const freeMatching = ( keywordForms, sentence ) => { + const tokens = sentence.tokens.slice(); + + // Filter out all tokens that do not match the keyphrase forms. + const matches = []; + + // Iterate over all tokens in the sentence. + for ( let i = 0; i < tokens.length; i++ ) { + const { tokenForMatching, tokensForMatching } = getTokensForMatching( tokens, i ); + + // Compare the matchtoken with the keyword forms. + keywordForms.forEach( ( keywordForm ) => { + keywordForm.forEach( ( keywordFormPart ) => { + if ( tokenForMatching.text.toLowerCase() === keywordFormPart.toLowerCase() ) { + matches.push( ...tokensForMatching ); + } + } ); + } ); + } + + return matches; +}; + /** * Matches a keyword with a sentence object from the html parser. * @@ -55,7 +136,7 @@ const exactMatch = ( keywordForms, sentence ) => { * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * @param {Sentence} sentence The sentence to match against the keywordForms. - * @param {boolean} exactMatching Whether to match the keyword forms exactly or not. + * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * Depends on whether the user has put the keyphrase in double quotes. * * @returns {Token[]} The tokens that match the keywordForms. @@ -72,57 +153,11 @@ const exactMatch = ( keywordForms, sentence ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( keywordForms, sentence, exactMatching = false ) => { - const tokens = sentence.tokens.slice(); - - if ( exactMatching ) { - return exactMatch( keywordForms, sentence ); - } - - // Filter out all tokens that do not match the keyphrase forms. - const matches = []; - - // Iterate over all tokens in the sentence. - for ( let i = 0; i < tokens.length; i++ ) { - const tokenForMatching = cloneDeep( tokens[ i ] ); - - // The token used for matching (tokenForMatching) may consist of multiple tokens combined, - // since we want to combine words separated by a hyphen/underscore into one token. - // This array keeps track of all tokens that are combined into the token used for matching - // and is later used to add all individual tokens to the array of matches. - const tokensForMatching = [ ]; - // Add the current token to the tokens for matching. - tokensForMatching.push( cloneDeep( tokens[ i ] ) ); - - // While the next token is a word coupler, add it to the current token. - while ( tokens[ i + 1 ] && wordCouplers.includes( tokens[ i + 1 ].text ) ) { - // Add the word coupler to the token for matching. - i++; - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - - // If there is a token after the word coupler, add it to the token for matching. as well. - i++; - if ( ! tokens[ i ] ) { - break; - } - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - } - - // Compare the matchtoken with the keyword forms. - keywordForms.forEach( ( keywordForm ) => { - keywordForm.forEach( ( keywordFormPart ) => { - if ( tokenForMatching.text.toLowerCase() === keywordFormPart.toLowerCase() ) { - matches.push( ...tokensForMatching ); - } - } ); - } ); +const matchKeyphraseWithSentence = ( keywordForms, sentence, useExactMatching = false ) => { + if ( useExactMatching ) { + return exactMatching( keywordForms, sentence ); } - - return matches; + return freeMatching( keywordForms, sentence ); }; export default matchKeyphraseWithSentence; From 2cfea784b7a3b789d7a495db6f4195346508673c Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 11:35:19 +0200 Subject: [PATCH 132/616] remove unused import --- packages/yoastseo/src/parse/language/LanguageProcessor.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index d9043e086b7..733ad61fff1 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,12 +1,10 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; -import { punctuationRegexStart, punctuationRegexEnd } from "../../languageProcessing/helpers/sanitize/removePunctuation"; import getWordsForHTMLParser from "../../languageProcessing/helpers/word/getWordsForHTMLParser"; const whitespaceRegex = /^\s+$/; - /** * Contains language-specific logic for splitting a text into sentences and tokens. */ From c851eb7b1e8a1c4455fada6178b05cf88c11118a Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 13 Jun 2023 11:35:28 +0200 Subject: [PATCH 133/616] add jsdocs --- .../helpers/word/getWordsForHTMLParser.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index f6721ac39eb..d03c81c9bb1 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -15,9 +15,17 @@ import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePu const wordSeparatorsRegex = /([\s\t\u00A0])/; /** - * Returns an array with tokens used in the text. - * @param text - * @return {*[]} + * Tokenizes a text similar to getWords, but in a suitable way for the HTML parser. + * 1. It does not normalize whitespace. + * This operation is too risky for the HTML parser because it may throw away characters and as a result, the token positions are corrupted. + * 2. It does not remove punctuation marks but keeps them. + * + * This algorithm splits the text by word separators: tokens that are the border between two words. + * This algorithm separates punctuation marks from words and keeps them as separate tokens. + * It only splits them off if they appear at the start or end of a word. + * + * @param {string} text The text to tokenize. + * @returns {string[]} An array of tokens. */ const getWordsForHTMLParser = ( text ) => { // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. From 8a22e4c33552398b32484883b55f0419e69372ae Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 13 Jun 2023 15:06:15 +0300 Subject: [PATCH 134/616] Rename file to use unified name for function and file. --- ...IntoTokensJapaneseSpec.js => splitIntoTokensCustomSpec.js} | 4 ++-- packages/yoastseo/spec/parse/build/buildSpec.js | 2 +- .../yoastseo/spec/parse/language/LanguageProcessorSpec.js | 2 +- .../src/languageProcessing/languages/ja/Researcher.js | 2 +- .../{splitIntoTokensJapanese.js => splitIntoTokensCustom.js} | 0 5 files changed, 5 insertions(+), 5 deletions(-) rename packages/yoastseo/spec/languageProcessing/languages/ja/helpers/{splitIntoTokensJapaneseSpec.js => splitIntoTokensCustomSpec.js} (90%) rename packages/yoastseo/src/languageProcessing/languages/ja/helpers/{splitIntoTokensJapanese.js => splitIntoTokensCustom.js} (100%) diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js similarity index 90% rename from packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js rename to packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js index 62720611b51..376f6c89dc1 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensJapaneseSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js @@ -1,4 +1,4 @@ -import splitIntoTokensJapanese from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; const testcases = [ { @@ -51,7 +51,7 @@ const testcases = [ describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { it( description, () => { - const tokens = splitIntoTokensJapanese( sentence ); + const tokens = splitIntoTokensCustom( sentence ); expect( tokens ).toEqual( expected ); } ); } ); diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 82774586362..1c08d2a5dce 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -2,7 +2,7 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; describe( "The parse function", () => { it( "parses a basic HTML text", () => { diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 4f023b01c39..fadd25f9ddf 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,7 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; -import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index fbd0e0cf813..10fc57d734c 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,7 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; -import splitIntoTokensCustom from "./helpers/splitIntoTokensJapanese"; +import splitIntoTokensCustom from "./helpers/splitIntoTokensCustom"; // All config import firstWordExceptions from "./config/firstWordExceptions"; diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js similarity index 100% rename from packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensJapanese.js rename to packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js From 61c4e98f611f667b3182a035e8125704cf9b3919 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 13 Jun 2023 15:12:53 +0300 Subject: [PATCH 135/616] Fix test name. --- .../languages/ja/helpers/splitIntoTokensCustomSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js index 376f6c89dc1..dd6f0f7cef7 100644 --- a/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js +++ b/packages/yoastseo/spec/languageProcessing/languages/ja/helpers/splitIntoTokensCustomSpec.js @@ -49,7 +49,7 @@ const testcases = [ }, ]; -describe.each( testcases )( "splitIntoTokensJapanese: %p", ( { description, sentence, expected } ) => { +describe.each( testcases )( "splitIntoTokensCustom for Japanese: %p", ( { description, sentence, expected } ) => { it( description, () => { const tokens = splitIntoTokensCustom( sentence ); expect( tokens ).toEqual( expected ); From 948d9810a1b3cfa4d93fe17fae9b263fda945b87 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 11:45:45 +0200 Subject: [PATCH 136/616] refactor keywordCountSpec.js --- .../researches/keywordCountSpec.js | 327 ++++++++++-------- 1 file changed, 190 insertions(+), 137 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index ce5d6324f77..d08457dad85 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -33,155 +33,208 @@ const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] // Escape, since the morphology researcher escapes regex as well. const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] ); -describe( "Test for counting the keyword in a text", function() { - it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "a string of text with the keyword in it" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ +const testCases = [ + { + description: "counts/marks a string of text with a keyword in it.", + paper: new Paper( "a string of text with the keyword in it", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "a string of text with the keyword in it", - original: "a string of text with the keyword in it" } ) ] - ); - } ); - - it( "counts a string of text with no keyword in it.", function() { - const mockPaper = new Paper( "a string of text" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [] ); - } ); - - it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "a string of text with the key word in it, with more key words." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + original: "a string of text with the keyword in it" } ), + ], + }, + { + description: "counts a string of text with no keyword in it.", + paper: new Paper( "a string of text", { keyword: "" } ), + keyphraseForms: [ [ "" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + { + description: "counts multiple occurrences of a keyphrase consisting of multiple words.", + paper: new Paper( "a string of text with the key word in it, with more key words.", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { marked: "a string of text with the key word in it, " + - "with more key words.", - original: "a string of text with the key word in it, with more key words." } ) ] - ); - } ); + "with more key words.", + original: "a string of text with the key word in it, with more key words." } ) ], - it( "counts a string of text with German diacritics and eszett as the keyword", function() { - const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ + }, + { + description: "counts a string of text with German diacritics and eszett as the keyword", + paper: new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", { keyword: "äöüß" } ), + keyphraseForms: [ [ "äöüß" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ] - ); - } ); - - it( "counts a string with multiple keyword morphological forms", function() { - const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ - new Mark( { marked: "A string of text with a keyword " + - "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it." } ) ] - ); - } ); + original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ], + }, + { + description: "counts a string with multiple keyword morphological forms", + paper: new Paper( "A string of text with a keyword and multiple keywords in it.", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it." } ) ], - it( "counts a string with a keyword with a '-' in it", function() { - const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ - new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word." } ) ] - ); - } ); + }, + { + description: "counts a string with a keyword with a '-' in it", + paper: new Paper( "A string with a key-word.", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key-word.", + original: "A string with a key-word." } ) ], - it( "counts 'key word' in 'key-word'.", function() { - const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); + }, + { + description: "counts 'key word' in 'key-word'.", + paper: new Paper( "A string with a key-word.", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { + marked: "A string with a key-word.", + original: "A string with a key-word." } ) ], // Note: this behavior might change in the future. - } ); - - it( "counts a string with a keyword with a '_' in it", function() { - const mockPaper = new Paper( "A string with a key_word." ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ - new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word." } ) ] - ); - } ); - - it( "counts a string with with 'kapaklı' as a keyword in it", function() { - const mockPaper = new Paper( "A string with kapaklı." ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ - new Mark( { marked: "A string with kapaklı.", - original: "A string with kapaklı." } ) ] - ); - } ); - - it( "counts a string with with '&' in the string and the keyword", function() { - const mockPaper = new Paper( "A string with key&word." ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ - new Mark( { marked: "A string with key&word.", - original: "A string with key&word." } ) ] - ); - } ); - - it( "does not count images as keywords.", function() { - const mockPaper = new Paper( "" ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] ); - } ); - - it( "keyword counting is blind to CApiTal LeTteRs.", function() { - const mockPaper = new Paper( "A string with KeY worD." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + }, + { + description: "counts a string with a keyword with a '_' in it", + paper: new Paper( "A string with a key_word.", { keyword: "key_word" } ), + keyphraseForms: [ [ "key_word", "key_words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key_word.", + original: "A string with a key_word." } ) ], + }, + { + description: "counts a string with with a 'ı' in the keyphrase", + paper: new Paper( "A string with with 'kapaklı' as a keyword in it", { keyword: "kapaklı" } ), + keyphraseForms: [ [ "kapaklı" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with with 'kapaklı' as a keyword in it", + original: "A string with with 'kapaklı' as a keyword in it" } ) ], + }, + { + description: "counts a string with with '&' in the string and the keyword", + paper: new Paper( "A string with key&word in it", { keyword: "key&word" } ), + keyphraseForms: [ [ "key&word" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with key&word in it", + original: "A string with key&word in it" } ) ], + }, + { + description: "does not count images as keywords.", + paper: new Paper( "image", { keyword: "image" } ), + keyphraseForms: [ [ "image", "images" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + { + description: "keyword counting is blind to CApiTal LeTteRs.", + paper: new Paper( "A string with KeY worD.", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with KeY worD.", - original: "A string with KeY worD." } ) ] - ); - } ); - - it( "keyword counting is blind to types of apostrophe.", function() { - const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ - new Mark( { marked: "A string with quotes to match the key'word, " + - "even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ." } ) ] - ); - } ); - - it( "counts can count dollar sign as in '$keyword'.", function() { - const mockPaper = new Paper( "A string with a $keyword." ); - expect( keywordCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); - // Markings do not currently work in this condition. - } ); - - it( "counts 'key word' also in 'key-word'.)", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - // Note: this behavior might change in the future. - } ); - - it( "doesn't count 'key-word' in 'key word'.", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - // Note: this behavior might change in the future. - } ); - - it( "doesn't count keyphrase instances inside elements we want to exclude from the analysis", function() { - const mockPaper = new Paper( "There is no keyword in this sentence." ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 ); - } ); - - it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { - const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ + original: "A string with KeY worD." } ) ], + }, + { + description: "keyword counting is blind to types of apostrophe.", + paper: new Paper( "A string with quotes to match the key'word, even if the quotes differ.", { keyword: "key'word" } ), + keyphraseForms: [ [ "key'word", "key'words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { + marked: "A string with quotes to match the key'word, even if the quotes differ.", + original: "A string with quotes to match the key'word, even if the quotes differ." } ) ], + }, + { + description: "can match dollar sign as in '$keyword'.", + paper: new Paper( "A string with a $keyword." ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", + original: "A string with a $keyword." } ) ], + }, + { + description: "doesn't count 'key-word' in 'key word'.", + paper: new Paper( "A string with a key word.", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + { + description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", + paper: new Paper( "There is no keyword in this sentence." ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + { + description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", + paper: new Paper( "A string with three keys (key and another key) and one word." ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word." } ) ] - ); - } ); + original: "A string with three keys (key and another key) and one word." } ) ], + }, + { + description: "doesn't match singular forms in reduplicated plurals in Indonesian", + paper: new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + { locale: "id_ID", keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + // eslint-disable-next-line max-len + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit." } ) ], + }, + { + description: "counts a single word keyphrase with exact matching", + paper: new Paper( "A string with a keyword.", { keyword: "\"keyword\"" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a keyword.", + original: "A string with a keyword." } ) ], + }, + { + description: "with exact matching, a singular single word keyphrase should not be counted if the focus keyphrase is plural", + paper: new Paper( "A string with a keyword.", { keyword: "\"keywords\"" } ), + keyphraseForms: [ [ "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + { + description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", + paper: new Paper( "A string with a key phrase.", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", + original: "A string with a key phrase." } ) ], + }, + { + // eslint-disable-next-line max-len + description: "with exact matching, a multi word keyphrase should not me counted if the focus keyphrase has the same words in a different order", + paper: new Paper( "A string with a phrase key.", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 0, + expectedMarkings: [], + }, + +]; - it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); +// eslint-disable-next-line max-len +describe.each( testCases )( "Test for counting the keyword in a text in english", function( { description, paper, keyphraseForms, expectedCount, expectedMarkings } ) { + it( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + const keyWordCountResult = keywordCount( paper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( expectedCount ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); } ); } ); From 7a9f1c8bb012c7cf3e4e51d5c5c7c7c13258963a Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 13:32:40 +0200 Subject: [PATCH 137/616] add helper that checks if a (key)word is doublequoted --- .../helpers/match/isDoubleQuoted.js | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js new file mode 100644 index 00000000000..9ede8f2448d --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js @@ -0,0 +1,14 @@ + +import doubleQuotes from "../sanitize/doubleQuotes"; +import { includes } from "lodash-es"; + +/** + * Checks if the keyphrase is double quoted. + * @param {string} keyphrase The keyphrase to check. + * @returns {boolean} Whether the keyphrase is double quoted. + */ +const isDoubleQuoted = ( keyphrase ) => { + return ( includes( doubleQuotes, keyphrase[ 0 ] ) && includes( doubleQuotes, keyphrase[ keyphrase.length - 1 ] ) ); +}; + +export default isDoubleQuoted; From 8b7c2ae9f86819c821a3cbf269acaceee570deae Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 13:33:14 +0200 Subject: [PATCH 138/616] apply the isDoubleQuoted helper --- .../helpers/match/processExactMatchRequest.js | 4 ++-- .../helpers/morphology/buildTopicStems.js | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js b/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js index a0fc4b26104..6afa1c422db 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js @@ -1,4 +1,4 @@ -import doubleQuotes from "../sanitize/doubleQuotes"; +import isDoubleQuoted from "./isDoubleQuoted"; /** * Checks if exact match functionality is requested by enclosing the keyphrase in double quotation marks. @@ -11,7 +11,7 @@ export default function processExactMatchRequest( keyphrase ) { const exactMatchRequest = { exactMatchRequested: false, keyphrase: keyphrase }; // Check if only exact match processing is requested by the user. If so, strip the quotation marks from the keyphrase. - if ( doubleQuotes.includes( keyphrase[ 0 ] ) && doubleQuotes.includes( keyphrase[ keyphrase.length - 1 ] ) ) { + if ( isDoubleQuoted( keyphrase ) ) { exactMatchRequest.keyphrase = keyphrase.substring( 1, keyphrase.length - 1 ); exactMatchRequest.exactMatchRequested = true; } diff --git a/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js b/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js index 2ac596f4e77..b3657013a49 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js +++ b/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js @@ -1,7 +1,8 @@ import getWords from "../word/getWords.js"; import { normalizeSingle } from "../sanitize/quotes"; -import { includes, isUndefined, escapeRegExp, memoize } from "lodash-es"; +import { isUndefined, escapeRegExp, memoize } from "lodash-es"; +import isDoubleQuoted from "../match/isDoubleQuoted"; /** * A topic phrase (i.e., a keyphrase or synonym) with stem-original pairs for the words in the topic phrase. @@ -62,8 +63,7 @@ const buildStems = function( keyphrase, stemmer, functionWords ) { } // If the keyphrase is embedded in double quotation marks, return keyword itself, without outer-most quotation marks. - const doubleQuotes = [ "“", "”", "〝", "〞", "〟", "‟", "„", "\"" ]; - if ( includes( doubleQuotes, keyphrase[ 0 ] ) && includes( doubleQuotes, keyphrase[ keyphrase.length - 1 ] ) ) { + if ( isDoubleQuoted( keyphrase ) ) { keyphrase = keyphrase.substring( 1, keyphrase.length - 1 ); return new TopicPhrase( [ new StemOriginalPair( escapeRegExp( keyphrase ), keyphrase ) ], From b258a956f3f3799a3905151cd303ee60f85ee058 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 13:34:04 +0200 Subject: [PATCH 139/616] add skipped specs for exact matching. --- .../researches/keywordCountSpec.js | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index d08457dad85..1d9d32c8a8d 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -208,6 +208,7 @@ const testCases = [ keyphraseForms: [ [ "keywords" ] ], expectedCount: 0, expectedMarkings: [], + skip: true, }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", @@ -216,6 +217,7 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", original: "A string with a key phrase." } ) ], + skip: true, }, { // eslint-disable-next-line max-len @@ -224,13 +226,25 @@ const testCases = [ keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, expectedMarkings: [], + skip: true, + }, + { + description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", + paper: new Paper( "A .sentence with a keyphrase.", { keyword: "\".sentence\"" } ), + keyphraseForms: [ [ ".sentence" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", + original: "A .sentence with a keyphrase." } ) ], + skip: true, }, ]; // eslint-disable-next-line max-len -describe.each( testCases )( "Test for counting the keyword in a text in english", function( { description, paper, keyphraseForms, expectedCount, expectedMarkings } ) { - it( description, function() { +describe.each( testCases )( "Test for counting the keyword in a text in english", function( { description, paper, keyphraseForms, expectedCount, expectedMarkings, skip } ) { + const test = skip ? it.skip : it; + + test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); const keyWordCountResult = keywordCount( paper, mockResearcher ); expect( keyWordCountResult.count ).toBe( expectedCount ); From 81956cbdcba5a7c31f00dd10f2da3257a3a19dca Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 13:35:24 +0200 Subject: [PATCH 140/616] remove unused mockResearchers --- .../languageProcessing/researches/keywordCountSpec.js | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 1d9d32c8a8d..9cf437643bb 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -22,17 +22,6 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { }, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); }; -const mockResearcher = buildMorphologyMockResearcher( [ [ "keyword", "keywords" ] ] ); -const mockResearcherGermanDiacritics = buildMorphologyMockResearcher( [ [ "äöüß" ] ] ); -const mockResearcherMinus = buildMorphologyMockResearcher( [ [ "key-word", "key-words" ] ] ); -const mockResearcherUnderscore = buildMorphologyMockResearcher( [ [ "key_word", "key_words" ] ] ); -const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ] ] ); -const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] ); -const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] ); -const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] ); -// Escape, since the morphology researcher escapes regex as well. -const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] ); - const testCases = [ { description: "counts/marks a string of text with a keyword in it.", From c02891eb89b1be592e0f53d74a17c3b52ff9c858 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 14:41:03 +0200 Subject: [PATCH 141/616] add spec for isDoubleQuoted --- .../helpers/match/isDoubleQuotedSpec.js | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js new file mode 100644 index 00000000000..0ce43edde94 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js @@ -0,0 +1,48 @@ +import isDoubleQuoted from "../../../../src/languageProcessing/helpers/match/isDoubleQuoted"; +import doubleQuotes from "../../../../src/languageProcessing/helpers/sanitize/doubleQuotes"; +import sample from "lodash-es"; + +const testCases = [ + { + description: "returns true for double quoted keyphrase", + keyphrase: "\"keyphrase\"", + expectedResult: true, + }, + { + description: "returns false for non-double quoted keyphrase", + keyphrase: "keyphrase", + expectedResult: false, + }, + { + description: "returns true for empty double quoted keyphrase", + keyphrase: "\"\"", + expectedResult: true, + }, + { + description: "returns false for single quoted keyphrase", + keyphrase: "'keyphrase'", + expectedResult: false, + }, + { + description: "returns true for any combination of double quotes", + keyphrase: `${ sample( doubleQuotes ) }keyphrase${ sample( doubleQuotes ) }`, + expectedResult: true, + }, + { + description: "returns false if only starts with a double quote", + keyphrase: "\"keyphrase", + expectedResult: false, + }, + { + description: "returns false if only ends with a double quote", + keyphrase: "keyphrase\"", + expectedResult: false, + }, +]; + +describe.each( testCases )( "isDoubleQuoted( %s )", ( + { description, keyphrase, expectedResult } ) => { + it( description, () => { + expect( isDoubleQuoted( keyphrase ) ).toBe( expectedResult ); + } ); +} ); From f8c42c18c69242e67018648abeb59204cc62730d Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:43:58 +0200 Subject: [PATCH 142/616] rename spec in isDoubleQuotedSpec.js --- .../spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js index 0ce43edde94..9f953c99252 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/isDoubleQuotedSpec.js @@ -40,7 +40,7 @@ const testCases = [ }, ]; -describe.each( testCases )( "isDoubleQuoted( %s )", ( +describe.each( testCases )( "isDoubleQuoted", ( { description, keyphrase, expectedResult } ) => { it( description, () => { expect( isDoubleQuoted( keyphrase ) ).toBe( expectedResult ); From 8c82b2a2590e61c382671163e9009574f0848f5a Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:44:21 +0200 Subject: [PATCH 143/616] Add spec for getWordsForHTMLParser.js --- .../helpers/word/getWordsForHTMLParserSpec.js | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js new file mode 100644 index 00000000000..abbcbec9f27 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js @@ -0,0 +1,72 @@ +import getWordsForHTMLParser from "../../../../src/languageProcessing/helpers/word/getWordsForHTMLParser"; + + +const testCases = [ + { + description: "returns an empty array for an empty string", + text: "", + expectedResult: [], + }, + { + description: "correctly tokenizes a phrase with no punctuation", + text: "a simple phrase", + expectedResult: [ "a", " ", "simple", " ", "phrase" ], + }, + { + description: "correctly tokenizes a simple sentence", + text: "This is a simple sentence.", + expectedResult: [ "This", " ", "is", " ", "a", " ", "simple", " ", "sentence", "." ], + }, + { + description: "correctly tokenizes a sentence with a comma", + text: "This is a simple sentence, with a comma.", + expectedResult: [ "This", " ", "is", " ", "a", " ", "simple", " ", "sentence", ",", " ", "with", " ", "a", " ", "comma", "." ], + }, + { + description: "correctly tokenizes a phrase with a hyphen", + text: "a-hyphenated-phrase", + expectedResult: [ "a-hyphenated-phrase" ], + }, + { + description: "correctly tokenizes a phrase with an apostrophe", + text: "a phrase with an apostrophe's", + expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "apostrophe's" ], + }, + { + description: "correctly tokenizes a phrase between quotes", + text: "\"a phrase between quotes\"", + expectedResult: [ "\"", "a", " ", "phrase", " ", "between", " ", "quotes", "\"" ], + }, + { + description: "correctly tokenizes a phrase between parentheses", + text: "(a phrase between parentheses)", + expectedResult: [ "(", "a", " ", "phrase", " ", "between", " ", "parentheses", ")" ], + }, + { + description: "correctly tokenizes a phrase that starts with a space", + text: " a phrase that starts with a space", + expectedResult: [ " ", "a", " ", "phrase", " ", "that", " ", "starts", " ", "with", " ", "a", " ", "space" ], + }, + { + description: "correctly tokenizes a phrase that ends with a space", + text: "a phrase that ends with a space ", + expectedResult: [ "a", " ", "phrase", " ", "that", " ", "ends", " ", "with", " ", "a", " ", "space", " " ], + }, + { + description: "correctly tokenizes a phrase that is separated by non-breaking spaces", + text: "a\u00a0phrase\u00a0that\u00a0is\u00a0separated\u00a0by\u00a0non-breaking\u00a0spaces", + expectedResult: [ "a", "\u00a0", "phrase", "\u00a0", "that", "\u00a0", "is", "\u00a0", "separated", "\u00a0", "by", "\u00a0", + "non-breaking", "\u00a0", "spaces" ], + }, + { + description: "correctly tokenizes a phrase that is separated by tabs", + text: "a\tphrase\tthat\tis\tseparated\tby\ttabs", + expectedResult: [ "a", "\t", "phrase", "\t", "that", "\t", "is", "\t", "separated", "\t", "by", "\t", "tabs" ], + }, +]; + +describe.each( testCases )( "getWordsForHTMLParser", ( { description, text, expectedResult } ) => { + it( description, () => { + expect( getWordsForHTMLParser( text ) ).toEqual( expectedResult ); + } ); +} ); From 87171d5d31df2383756994b048057f59d7a296d3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:45:25 +0200 Subject: [PATCH 144/616] Add early exit for getWordsForHTMLParser.js --- .../languageProcessing/helpers/word/getWordsForHTMLParser.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index d03c81c9bb1..d735b5bf3c0 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -28,6 +28,10 @@ const wordSeparatorsRegex = /([\s\t\u00A0])/; * @returns {string[]} An array of tokens. */ const getWordsForHTMLParser = ( text ) => { + if ( ! text ) { + return []; + } + // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. const rawTokens = text.split( wordSeparatorsRegex ).filter( x => x !== "" ); From 3f9a24e5612d5afb9e96ce082e7ee832d2937600 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:45:53 +0200 Subject: [PATCH 145/616] Remove unused code in getWordsForHTMLParser.js --- .../languageProcessing/helpers/word/getWordsForHTMLParser.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index d735b5bf3c0..40367e48a6c 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -37,9 +37,7 @@ const getWordsForHTMLParser = ( text ) => { const tokenTexts = []; rawTokens.forEach( token => { - if ( token === "" ) { - return; - } + // Pretokens contains all that occurs before the first letter of the token. const preTokens = []; // Posttokens contains all that occurs after the last letter of the token. From 8ee58d5e005428da69fe973f119937c5eefc68b1 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:48:39 +0200 Subject: [PATCH 146/616] fix es-lint errors. --- .../src/languageProcessing/helpers/word/getWordsForHTMLParser.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index 40367e48a6c..fb8c2080501 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -37,7 +37,6 @@ const getWordsForHTMLParser = ( text ) => { const tokenTexts = []; rawTokens.forEach( token => { - // Pretokens contains all that occurs before the first letter of the token. const preTokens = []; // Posttokens contains all that occurs after the last letter of the token. From f9b34482b9b4c981cb8151f97aeb4bfcfc5f41a8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 15:59:18 +0200 Subject: [PATCH 147/616] uncomment temporarily commented out specs. --- .../match/matchKeyphraseWithSentenceSpec.js | 1188 ++++++++--------- 1 file changed, 594 insertions(+), 594 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index d81b0b76d1a..f6ef5dd6749 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -3,600 +3,600 @@ import matchKeyphraseWithSentence from "../../../../src/languageProcessing/helpe /* eslint-disable max-len */ const testCases = [ - // { - // testDescription: "No matches in sentence", - // sentence: { - // text: "A sentence with notthekeyphrase.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - // { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - // keyphraseForms: [ [ "keyword", "keywords" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "should return empty result if keyphraseForms is empty", - // sentence: { - // text: "A sentence with notthekeyphrase.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - // { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - // keyphraseForms: [ [] ], - // expectedResult: [], - // }, - // { - // testDescription: "One match in sentence of a single-word keyphrase", - // sentence: { - // text: "A sentence with the keyword.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - // { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, - // { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - // keyphraseForms: [ [ "keyword", "keywords" ] ], - // expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], - // }, - // { - // testDescription: "One match in sentence of a multi word keyphrase", - // sentence: { - // text: "A sentence with the key words.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - // { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - // { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - // { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 30 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], - // }, - // { - // testDescription: "Disregards word order of multi word keyphrases", - // sentence: { - // text: "A word that is key.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "word", sourceCodeRange: { startOffset: 2, endOffset: 6 } }, - // { text: " ", sourceCodeRange: { startOffset: 6, endOffset: 7 } }, - // { text: "that", sourceCodeRange: { startOffset: 7, endOffset: 11 } }, - // { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, - // { text: "is", sourceCodeRange: { startOffset: 12, endOffset: 14 } }, - // { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - // { text: "key", sourceCodeRange: { startOffset: 15, endOffset: 18 } }, - // { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 19 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], - // }, - // { - // testDescription: "Two matches of multi word keyphrase in sentence", - // sentence: { - // text: "A sentence with the key words and the key word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - // { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - // { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - // { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - // { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, - // { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, - // { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, - // { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, - // { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, - // { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - // { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, - // { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 47 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ - // { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, - // { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, - // { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, - // { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, - // ], - // }, - // { - // testDescription: "One primary and one secondary match of multi word keyphrase in sentence", - // sentence: { - // text: "A key sentence with a key word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, - // { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, - // { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - // { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, - // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - // { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, - // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, - // { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - // { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, - // { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ - // { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, - // { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, - // { sourceCodeRange: { endOffset: 30, startOffset: 26 }, text: "word" } ], - // }, - // { - // testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence.", - // sentence: { - // text: "A sentence with a key_word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "A match if the key phrase is separated by an underscore in the sentence and in the keyphrase.", - // sentence: { - // text: "A sentence with a key_word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key_word", "key_words" ] ], - // expectedResult: [ - // { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, - // { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, - // { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "word" }, - // ], - // }, - // { - // testDescription: "If there is only a partial match, the partial match is returned.", - // sentence: { - // text: "A sentence with a key.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - // - // }, - // { - // testDescription: "It is also a match if the keyphrase is lowercase but the occurrence in the sentence is uppercase.", - // sentence: { - // text: "A sentence with a KEY WORD.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ - // { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, - // { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, - // ], - // }, - // { - // testDescription: "Correctly matches if the keyphrase is the last word in the sentence.", - // sentence: { - // text: "A sentence with a key", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 21 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - // }, - // { - // testDescription: "Doesn't return a match if the keyphrase in the text ends with an underscore.", - // sentence: { - // text: "A sentence with a key_", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "Doesn't return a match if the keyphrase in the text ends with a dash.", - // sentence: { - // text: "A sentence with a key-", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "Correctly matches if the sentence ends with an exclamation mark.", - // sentence: { - // text: "A sentence with a key!", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - // keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - // expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], - // }, - // { - // testDescription: "A keyphrase with a dash matches when the keyphrase occurs with a dash in the text.", - // sentence: { - // text: "A sentence with a key-word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key-word", "key-words" ] ], - // expectedResult: [ - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // ], - // }, - // { - // testDescription: "A keyphrase with a dash does not match when the keyphrase occurs without a dash in the text.", - // sentence: { - // text: "A sentence with a key word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key-word", "key-words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "A keyphrase without a dash does not match when the keyphrase occurs with a dash in the text.", - // sentence: { - // text: "A sentence with a key-word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedResult: [ ], - // }, - // { - // testDescription: "A keyphrase without a dash matches only with the keyphrase without a dash in the text.", - // sentence: { - // text: "A sentence with a key-word and a key word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - // { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - // { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - // { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedResult: [ - // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - // ], - // }, - // { - // testDescription: "A match with a dash and a match without a dash, but the keyphrase contains a dash.", - // sentence: { - // text: "A sentence with a key-word and a key word.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - // { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - // { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - // { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - // { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - // { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - // keyphraseForms: [ [ "key-word", "key-words" ] ], - // expectedResult: [ - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - // ], - // }, - // { - // testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order, there should be no match.", - // sentence: { - // text: "A sentence with a word-key.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - // { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key-word", "key-words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order and the keyphrase does not contain a dash," + - // " there should not be a match.", - // sentence: { - // text: "A sentence with a word-key.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - // { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, - // { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedResult: [ ], - // }, - // { - // testDescription: "When the sentence contains a different word with a dash that contains part of the keyphrase, there should be no match.", - // sentence: { - // text: "A sentence with a key-phrase.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, - // { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedResult: [], - // }, - // { - // testDescription: "Returns a match when the words from the keyphrase are separated by a dash and spaces.", - // sentence: { - // text: "A sentence with a key - phrase.", - // tokens: [ - // { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - // { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - // { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - // { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - // { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - // { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - // { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - // { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - // { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - // { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - // { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - // { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - // keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], - // expectedResult: [ - // { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - // { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - // ], - // }, - // { - // testDescription: "Matches a keyphrase with an apostrophe.", - // sentence: { - // text: "All the keyphrase's forms should be matched.", - // tokens: [ - // { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, - // { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, - // { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, - // { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, - // { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - // { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - // { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - // { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, - // { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - // { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, - // { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - // { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, - // { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - // { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, - // { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, - // ], - // sourceCodeRange: { startOffset: 0, endOffset: 45 } }, - // keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], - // expectedResult: [ - // { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - // { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - // { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - // ], - // }, + { + testDescription: "No matches in sentence", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedResult: [], + }, + { + testDescription: "should return empty result if keyphraseForms is empty", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + keyphraseForms: [ [] ], + expectedResult: [], + }, + { + testDescription: "One match in sentence of a single-word keyphrase", + sentence: { + text: "A sentence with the keyword.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], + }, + { + testDescription: "One match in sentence of a multi word keyphrase", + sentence: { + text: "A sentence with the key words.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 30 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], + }, + { + testDescription: "Disregards word order of multi word keyphrases", + sentence: { + text: "A word that is key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "word", sourceCodeRange: { startOffset: 2, endOffset: 6 } }, + { text: " ", sourceCodeRange: { startOffset: 6, endOffset: 7 } }, + { text: "that", sourceCodeRange: { startOffset: 7, endOffset: 11 } }, + { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, + { text: "is", sourceCodeRange: { startOffset: 12, endOffset: 14 } }, + { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + { text: "key", sourceCodeRange: { startOffset: 15, endOffset: 18 } }, + { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 19 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], + }, + { + testDescription: "Two matches of multi word keyphrase in sentence", + sentence: { + text: "A sentence with the key words and the key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, + { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, + { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, + { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, + { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, + { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, + { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 47 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ + { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, + { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, + { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, + { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, + ], + }, + { + testDescription: "One primary and one secondary match of multi word keyphrase in sentence", + sentence: { + text: "A key sentence with a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, + { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, + { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, + { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, + { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, + { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ + { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, + { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, + { sourceCodeRange: { endOffset: 30, startOffset: 26 }, text: "word" } ], + }, + { + testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence.", + sentence: { + text: "A sentence with a key_word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedResult: [], + }, + { + testDescription: "A match if the key phrase is separated by an underscore in the sentence and in the keyphrase.", + sentence: { + text: "A sentence with a key_word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key_word", "key_words" ] ], + expectedResult: [ + { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, + { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, + { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "word" }, + ], + }, + { + testDescription: "If there is only a partial match, the partial match is returned.", + sentence: { + text: "A sentence with a key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], + + }, + { + testDescription: "It is also a match if the keyphrase is lowercase but the occurrence in the sentence is uppercase.", + sentence: { + text: "A sentence with a KEY WORD.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ + { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, + { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, + ], + }, + { + testDescription: "Correctly matches if the keyphrase is the last word in the sentence.", + sentence: { + text: "A sentence with a key", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 21 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], + }, + { + testDescription: "Doesn't return a match if the keyphrase in the text ends with an underscore.", + sentence: { + text: "A sentence with a key_", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [], + }, + { + testDescription: "Doesn't return a match if the keyphrase in the text ends with a dash.", + sentence: { + text: "A sentence with a key-", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [], + }, + { + testDescription: "Correctly matches if the sentence ends with an exclamation mark.", + sentence: { + text: "A sentence with a key!", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], + }, + { + testDescription: "A keyphrase with a dash matches when the keyphrase occurs with a dash in the text.", + sentence: { + text: "A sentence with a key-word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedResult: [ + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + ], + }, + { + testDescription: "A keyphrase with a dash does not match when the keyphrase occurs without a dash in the text.", + sentence: { + text: "A sentence with a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedResult: [], + }, + { + testDescription: "A keyphrase without a dash does not match when the keyphrase occurs with a dash in the text.", + sentence: { + text: "A sentence with a key-word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedResult: [ ], + }, + { + testDescription: "A keyphrase without a dash matches only with the keyphrase without a dash in the text.", + sentence: { + text: "A sentence with a key-word and a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, + { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 42 } }, + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedResult: [ + { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + ], + }, + { + testDescription: "A match with a dash and a match without a dash, but the keyphrase contains a dash.", + sentence: { + text: "A sentence with a key-word and a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, + { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, + { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, + { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 42 } }, + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedResult: [ + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + ], + }, + { + testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order, there should be no match.", + sentence: { + text: "A sentence with a word-key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, + { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedResult: [], + }, + { + testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order and the keyphrase does not contain a dash," + + " there should not be a match.", + sentence: { + text: "A sentence with a word-key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, + { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedResult: [ ], + }, + { + testDescription: "When the sentence contains a different word with a dash that contains part of the keyphrase, there should be no match.", + sentence: { + text: "A sentence with a key-phrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedResult: [], + }, + { + testDescription: "Returns a match when the words from the keyphrase are separated by a dash and spaces.", + sentence: { + text: "A sentence with a key - phrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, + { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], + expectedResult: [ + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, + ], + }, + { + testDescription: "Matches a keyphrase with an apostrophe.", + sentence: { + text: "All the keyphrase's forms should be matched.", + tokens: [ + { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, + { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, + { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, + { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, + { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, + { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, + { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 45 } }, + keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], + expectedResult: [ + { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, + { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ], + }, { testDescription: "with exact matching, a single word keyphrase should match with a single word.", sentence: { From d9265cef97198a3d621dfbbc91c0efab151ed063 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 14 Jun 2023 16:04:58 +0200 Subject: [PATCH 148/616] add exact matching parameters to matchKeyphraseWithSentenceSpec --- .../match/matchKeyphraseWithSentenceSpec.js | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index f6ef5dd6749..e6a93a43e4b 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -19,6 +19,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 32 } }, keyphraseForms: [ [ "keyword", "keywords" ] ], + exactMatching: false, expectedResult: [], }, { @@ -37,6 +38,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 32 } }, keyphraseForms: [ [] ], + exactMatching: false, expectedResult: [], }, { @@ -57,6 +59,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 28 } }, keyphraseForms: [ [ "keyword", "keywords" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], }, { @@ -79,6 +82,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 30 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], }, { @@ -99,6 +103,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 19 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], }, { @@ -129,6 +134,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 47 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, @@ -158,6 +164,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 31 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, @@ -183,6 +190,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -205,6 +213,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key_word", "key_words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, @@ -229,6 +238,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 22 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], }, @@ -252,6 +262,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, @@ -274,6 +285,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 21 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], }, { @@ -294,6 +306,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 22 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -314,6 +327,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 22 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -334,6 +348,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 22 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], }, { @@ -356,6 +371,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key-word", "key-words" ] ], + exactMatching: false, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, @@ -382,6 +398,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key-word", "key-words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -404,6 +421,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ ], }, { @@ -434,6 +452,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 42 } }, keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, @@ -467,6 +486,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 42 } }, keyphraseForms: [ [ "key-word", "key-words" ] ], + exactMatching: false, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, @@ -493,6 +513,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key-word", "key-words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -516,6 +537,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [ ], }, { @@ -538,6 +560,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + exactMatching: false, expectedResult: [], }, { @@ -562,6 +585,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], + exactMatching: false, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, @@ -591,6 +615,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 45 } }, keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], + exactMatching: false, expectedResult: [ { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, From 0ddf87adda9c1cee44a6ba518fa8ceb6cb1c52ad Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 15 Jun 2023 11:31:01 +0200 Subject: [PATCH 149/616] simplify matchKeyphraseWithSentence.js --- .../match/matchKeyphraseWithSentence.js | 63 ++----------------- 1 file changed, 6 insertions(+), 57 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index b1aff35d367..b95e603536a 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -1,8 +1,5 @@ -import { cloneDeep } from "lodash-es"; import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; -const wordCouplers = [ "_", "-", "'" ]; - /** * Tokenize keyword forms for exact matching. This function gets the keyword form and tokenizes it. * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. @@ -60,46 +57,6 @@ const exactMatching = ( keywordForms, sentence ) => { return matches; }; -/** - * Prepares the tokens for matching by combining words separated by a wordcoupler (hyphen/underscore) into one token. - * @param {Token[]} tokens The tokens to prepare for matching. - * @param {number} i The index of the current token. - * @returns {{tokenForMatching: string, tokensForMatching: Token[]}} The token used for matching and - * the tokens that are combined into the token used for matching. - */ -const getTokensForMatching = ( tokens, i ) => { - const tokenForMatching = cloneDeep( tokens[ i ] ); - - // The token used for matching (tokenForMatching) may consist of multiple tokens combined, - // since we want to combine words separated by a hyphen/underscore into one token. - // This array keeps track of all tokens that are combined into the token used for matching - // and is later used to add all individual tokens to the array of matches. - const tokensForMatching = [ ]; - // Add the current token to the tokens for matching. - tokensForMatching.push( cloneDeep( tokens[ i ] ) ); - - // While the next token is a word coupler, add it to the current token. - while ( tokens[ i + 1 ] && wordCouplers.includes( tokens[ i + 1 ].text ) ) { - // Add the word coupler to the token for matching. - i++; - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - - // If there is a token after the word coupler, add it to the token for matching. as well. - i++; - if ( ! tokens[ i ] ) { - break; - } - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - } - - return { tokenForMatching, tokensForMatching }; -}; - - /** * Free matching of keyword forms in a sentence. Free matching happens when the user does not put the keyword in double quotes. * @param {(string[])[]} keywordForms The keyword forms to match. @@ -110,23 +67,15 @@ const freeMatching = ( keywordForms, sentence ) => { const tokens = sentence.tokens.slice(); // Filter out all tokens that do not match the keyphrase forms. - const matches = []; + // const matches = []; - // Iterate over all tokens in the sentence. - for ( let i = 0; i < tokens.length; i++ ) { - const { tokenForMatching, tokensForMatching } = getTokensForMatching( tokens, i ); - - // Compare the matchtoken with the keyword forms. - keywordForms.forEach( ( keywordForm ) => { - keywordForm.forEach( ( keywordFormPart ) => { - if ( tokenForMatching.text.toLowerCase() === keywordFormPart.toLowerCase() ) { - matches.push( ...tokensForMatching ); - } + return tokens.filter( ( token ) => { + return keywordForms.some( ( keywordForm ) => { + return keywordForm.some( ( keywordFormPart ) => { + return token.text.toLowerCase() === keywordFormPart.toLowerCase(); } ); } ); - } - - return matches; + } ); }; /** From 2d061047c078ef4f50781fcb19c20fb579c74010 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 15 Jun 2023 11:31:26 +0200 Subject: [PATCH 150/616] adapt specs to new tokenizer. --- .../match/matchKeyphraseWithSentenceSpec.js | 68 ++++++------------- 1 file changed, 21 insertions(+), 47 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index e6a93a43e4b..d1fd379451d 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -183,9 +183,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key_word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, @@ -206,18 +204,14 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key_word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key_word", "key_words" ] ], exactMatching: false, expectedResult: [ - { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" }, - { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: "_" }, - { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "word" }, + { sourceCodeRange: { endOffset: 26, startOffset: 18 }, text: "key_word" }, ], }, { @@ -289,7 +283,7 @@ const testCases = [ expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], }, { - testDescription: "Doesn't return a match if the keyphrase in the text ends with an underscore.", + testDescription: "Does return a match if the keyphrase in the text ends with an underscore.", sentence: { text: "A sentence with a key_", tokens: [ @@ -305,12 +299,12 @@ const testCases = [ { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], exactMatching: false, - expectedResult: [], + expectedResult: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ], }, { - testDescription: "Doesn't return a match if the keyphrase in the text ends with a dash.", + testDescription: "Does return a match if the keyphrase in the text ends with a dash.", sentence: { text: "A sentence with a key-", tokens: [ @@ -328,7 +322,7 @@ const testCases = [ sourceCodeRange: { startOffset: 0, endOffset: 22 } }, keyphraseForms: [ [ "key" ], [ "word", "words" ] ], exactMatching: false, - expectedResult: [], + expectedResult: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ], }, { testDescription: "Correctly matches if the sentence ends with an exclamation mark.", @@ -364,18 +358,14 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, keyphraseForms: [ [ "key-word", "key-words" ] ], exactMatching: false, expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, ], }, { @@ -414,9 +404,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, @@ -437,9 +425,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, @@ -471,9 +457,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, @@ -488,9 +472,7 @@ const testCases = [ keyphraseForms: [ [ "key-word", "key-words" ] ], exactMatching: false, expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, ], }, { @@ -506,9 +488,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + { text: "word-key", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, @@ -530,9 +510,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word", sourceCodeRange: { startOffset: 18, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: "key", sourceCodeRange: { startOffset: 23, endOffset: 26 } }, + { text: "word-key", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 27 } }, @@ -553,9 +531,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: "key-phrase", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, @@ -600,9 +576,7 @@ const testCases = [ { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, @@ -617,9 +591,7 @@ const testCases = [ keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], exactMatching: false, expectedResult: [ - { text: "keyphrase", sourceCodeRange: { startOffset: 8, endOffset: 17 } }, - { text: "'", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "s", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, ], }, { @@ -912,3 +884,5 @@ describe.each( japaneseTestCases )( "findKeyWordFormsInSentence for japanese", ( expect( matchKeyphraseWithSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) ).toEqual( expectedResult ); } ); } ); + + From 6f59e5c2814034fec202a3d743117d0a6bbd49ce Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 15 Jun 2023 15:32:53 +0200 Subject: [PATCH 151/616] delete unused files --- .../match/convertMatchToPositionResultSpec.js | 282 -------------- .../match/findKeyWordFormsInSentenceSpec.js | 361 ------------------ .../match/convertMatchToPositionResult.js | 102 ----- .../match/findKeyWordFormsInSentence.js | 160 -------- 4 files changed, 905 deletions(-) delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js deleted file mode 100644 index 7d200a83e7c..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/convertMatchToPositionResultSpec.js +++ /dev/null @@ -1,282 +0,0 @@ -import convertMatchToPositionResult from "../../../../src/languageProcessing/helpers/match/convertMatchToPositionResult"; - -/* eslint-disable max-len */ -const testCases = [ - { - testDescription: "No matches", - matches: [], - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { - text: "私の猫はかわいいです。", - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [], - secondaryMatches: [], - position: 0, - }, - }, - { - testDescription: "Only a primary match consisting of a single token", - matches: - [ - { matches: [ "猫" ] }, - ], - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { - text: "私の猫はかわいいです。", - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - keyphraseForms: [ [ "猫" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 2, endOffset: 3 }, - }, - ] ], - secondaryMatches: [], - position: 0, - }, - }, - { - testDescription: "A primary match and a single secondary match", - matches: - [ - { matches: [ "猫" ] }, - { matches: [ "かわいい" ] }, - ], - sentence: { - text: "私の猫はかわいいですかわいい。", - tokens: [ - { - text: "私の猫はかわいいですかわいい。", - sourceCodeRange: { startOffset: 0, endOffset: 15 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 2, endOffset: 3 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 4, endOffset: 8 }, - }, - ] ], - secondaryMatches: [ [ - { - text: "かわいい", - sourceCodeRange: { - startOffset: 10, endOffset: 14 }, - }, - ] ], - position: 0, - }, - }, - { - testDescription: "Only a primary match consisting of multiple tokens", - matches: - [ - { matches: [ "猫" ] }, - { matches: [ "かわいい" ] }, - ], - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { - text: "私の猫はかわいいです。", - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 2, endOffset: 3 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 4, endOffset: 8 }, - }, - ] ], - secondaryMatches: [], - position: 0, - }, - }, - { - testDescription: "A primary match and and a secondary match that is in front of the primary match", - matches: - [ - { - matches: [ - "猫", - ], - }, - { - matches: [ - "かわいい", - ], - }, - ], - sentence: { - text: "私猫の猫はかわいいです。", - tokens: [ - { - text: "私猫の猫はかわいいです。", - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 3, endOffset: 4 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 5, endOffset: 9 }, - - }, - ] ], - secondaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 1, endOffset: 2 }, - }, - ] ], - position: 0, - }, - }, - { - testDescription: "Multiple primary matches", - matches: - [ - { - matches: [ - "猫", - ], - }, - { - matches: [ - "かわいい", - ], - }, - ], - sentence: { - text: "私猫のはかわいいで猫かわいいす。", - tokens: [ - { - text: "私猫のはかわいいで猫かわいいす。", - sourceCodeRange: { startOffset: 0, endOffset: 16 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 16 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 1, endOffset: 2 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 4, endOffset: 8 }, - - }, - ], [ - { - text: "猫", - sourceCodeRange: { startOffset: 9, endOffset: 10 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 10, endOffset: 14 }, - }, - ] ], - secondaryMatches: [ ], - position: 0, - }, - }, - { - testDescription: "A single primary match and a seconary match that is in front of the primary match and a secondary match that is after the primary match", - matches: - [ - { matches: [ "猫" ] }, - { matches: [ "かわいい" ] }, - ], - sentence: { - text: "私猫の猫はかわいいでかわいいす。", - tokens: [ - { - text: "私猫の猫はかわいいでかわいいす。", - sourceCodeRange: { startOffset: 0, endOffset: 16 }, - }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 16 }, - }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - expected: { - primaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 3, endOffset: 4 }, - }, - { - text: "かわいい", - sourceCodeRange: { startOffset: 5, endOffset: 9 }, - - }, - ] ], - secondaryMatches: [ [ - { - text: "猫", - sourceCodeRange: { startOffset: 1, endOffset: 2 }, - } ], - [ { - text: "かわいい", - sourceCodeRange: { startOffset: 10, endOffset: 14 }, - - }, - ] ], - position: 0, - }, - }, -]; -/* eslint-enable max-len */ - -describe.each( testCases )( "convertMatchToPositionResult", ( - { testDescription, matches, sentence, keyphraseForms, locale, expected } ) => { - it( testDescription, () => { - expect( convertMatchToPositionResult( matches, sentence, keyphraseForms, locale ) ).toEqual( expected ); - } ); -} ); diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js deleted file mode 100644 index 9b3562b05de..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/findKeyWordFormsInSentenceSpec.js +++ /dev/null @@ -1,361 +0,0 @@ -import findKeyWordFormsInSentence from "../../../../src/languageProcessing/helpers/match/findKeyWordFormsInSentence"; -import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; - -/* eslint-disable max-len */ -const testCases = [ - { - testDescription: "No matches in sentence", - sentence: { - text: "A sentence with notthekeyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { primaryMatches: [], secondaryMatches: [] }, - }, - { - testDescription: "should return empty result if KeyphraseForms is empty", - sentence: { - text: "A sentence with notthekeyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - keyphraseForms: [ [] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { primaryMatches: [], secondaryMatches: [] }, - - }, - { - testDescription: "One match in sentence of a single-word keyphrase", - sentence: { - text: "A sentence with the keyword.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [ [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ] ], - secondaryMatches: [] }, - }, - { - testDescription: "One match in sentence of a multi word keyphrase", - sentence: { - text: "A sentence with the key words.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 30 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [ - [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], - ], - secondaryMatches: [] }, - }, - { - testDescription: "Two matches of multi word keyphrase in sentence", - sentence: { - text: "A sentence with the key words and the key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, - { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, - { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, - { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, - { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, - { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, - { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 47 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [ - [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], - [ { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" } ], - ], - secondaryMatches: [], - }, - }, - { - testDescription: "One primary and one secondary match of multi word keyphrase in sentence", - sentence: { - text: "A key sentence with a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, - { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, - { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, - { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [ - [ { sourceCodeRange: { startOffset: 22, endOffset: 25 }, text: "key" }, { sourceCodeRange: { startOffset: 26, endOffset: 30 }, text: "word" } ], - ], - secondaryMatches: - [ { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" } ], - }, - }, - { - testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence and a secondary match.", - sentence: { - text: "A sentence with a key_word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [], - secondaryMatches: [], - }, - }, - { - testDescription: "A secondary match if a multi word keyphrase is separated with an underscore in the sentence.", - sentence: { - text: "A key sentence with a key_word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, - { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, - { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, - { text: "_", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [], - secondaryMatches: [ { sourceCodeRange: { endOffset: 5, startOffset: 2 }, text: "key" } ], - }, - }, - { - testDescription: "If there is a secondary match, an empty result is returned.", - sentence: { - text: "A sentence with a key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - locale: "en_US", - matchWordCustomHelper: false, - expectedResult: { - primaryMatches: [], - secondaryMatches: [], - }, - }, - { - testDescription: "Only a primary match in a language that uses a custom helper to match words.", - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { text: "私の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: 0, - primaryMatches: [ [ - { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, - { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" }, - ] ], - secondaryMatches: [], - }, - }, - { - testDescription: "A primary and secondary match in a language that uses a custom helper to match words.", - sentence: { - text: "私の猫はかわいいですかわいい。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: 0, - primaryMatches: [ [ - { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, - { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" } ] ], - secondaryMatches: [ [ - { sourceCodeRange: { startOffset: 10, endOffset: 14 }, text: "かわいい" } ] ], - }, - }, - { - testDescription: "Only secondary matches in a language that uses a custom helper to match words.", - sentence: { - text: "私のはかわいいですかわいい。", - tokens: [ - { text: "私のはかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 14 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 14 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: -1, - primaryMatches: [], - secondaryMatches: [], - }, - }, - { - testDescription: "A primary match and a secondary match that is in front of the primary match in a language that uses a custom helper to match words.", - sentence: { - text: "私猫の猫はかわいいです。iiii", - tokens: [ - { text: "私猫の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: 0, - primaryMatches: [ [ - { sourceCodeRange: { startOffset: 3, endOffset: 4 }, text: "猫" }, - { sourceCodeRange: { startOffset: 5, endOffset: 9 }, text: "かわいい" } ] ], - secondaryMatches: [ [ - { sourceCodeRange: { startOffset: 1, endOffset: 2 }, text: "猫" } ] ], - }, - }, - - - // { - // testDescription: "One marking in sentence with two consecutive matches", - // }, - // { - // testDescription: "Two markings that are not consecutive in sentence", - // }, - // { - // testDescription: "One marking in a sentence that has a non-zero startOffset", - // }, - // { - // testDescription: "One primary and one secondary marking in a sentence", - // }, - // { - // testDescription: "One primary and two secondary markings in a sentence", - // }, - // { - // testDescription: "One marking in a sentence in a language that does not have spaces between words", - // }, - // { - // testDescription: "One marking in a sentence in a language that does not have spaces between words with two consecutive matches", - // }, - // { - // testDescription: "Two markings that are not consecutive in a sentence in a language that does not have spaces between words", - // }, -]; -// eslint-enable max-len - -// eslint-disable-next-line max-len -describe.each( testCases )( "findKeyWordFormsInSentence", ( { testDescription, sentence, keyphraseForms, locale, matchWordCustomHelper, expectedResult } ) => { - it( testDescription, () => { - expect( findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) ).toEqual( expectedResult ); - } ); -} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js b/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js deleted file mode 100644 index da2e0767b3b..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/match/convertMatchToPositionResult.js +++ /dev/null @@ -1,102 +0,0 @@ -import { uniq } from "lodash-es"; -import Token from "../../../parse/structure/Token"; -import matchTokenWithWordForms from "../keywordCount/matchTokenWithWordForms"; - -/** - * Gets all indices of a word in a sentence. Is only used for languages with a matchWordCustomHelper. - * Will probably be obsolete once the tokenizer for Japanese is implemented. - * @param {string} word The word. - * @param {Sentence} sentence The sentence. - * @returns {array} The indices. - */ -const getAllIndicesOfWord = ( word, sentence ) => { - const text = sentence.text; - - // get all the indices of the word in the sentence. - const indices = []; - let index = text.indexOf( word ); - while ( index > -1 ) { - indices.push( index ); - index = text.indexOf( word, index + 1 ); - } - - return indices; -}; - -/** - * Converts the matches to the format that is used in the assessment. - * This function is for compatibility. If matches were found with the old method with a custom match helper, - * this function converts them to the new format that is used for the position based highlighting. - * @param {array} matches The matches. - * @param {Sentence} sentence The sentence. - * @param {string[][]} keyPhraseForms The keyphrase forms. - * @param {string} locale The locale. - * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The matches in the format that is used in the assessment. - */ -const convertToPositionResult = ( matches, sentence, keyPhraseForms, locale ) => { - const matchTokens = []; - matches.forEach( matchObject => { - const matchWords = matchObject.matches; - - uniq( matchWords ).forEach( matchWord => { - const indices = getAllIndicesOfWord( matchWord, sentence ); - indices.forEach( index => { - const startOffset = sentence.sourceCodeRange.startOffset + index; - const endOffset = sentence.sourceCodeRange.startOffset + index + matchWord.length; - - const matchToken = new Token( matchWord, { startOffset: startOffset, endOffset: endOffset } ); - - matchTokens.push( matchToken ); - } ); - } ); - } ); - - // Sort tokens on startOffset. - matchTokens.sort( ( a, b ) => a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset ); - - const primaryMatches = []; - const secondaryMatches = []; - - let currentMatch = []; - let keyPhraseFormsIndex = 0; - - // A primary match is a match that contains all the keyphrase forms in the same order as they occur in the keyphrase. - // A secondary match is any other match. - matchTokens.forEach( ( token ) => { - const head = keyPhraseForms[ keyPhraseFormsIndex ]; - if ( head && matchTokenWithWordForms( head, token, locale ) ) { - // If the token matches the head of the keyphrase form. - currentMatch.push( token ); - keyPhraseFormsIndex += 1; - if ( currentMatch.length === keyPhraseForms.length ) { - primaryMatches.push( currentMatch ); - currentMatch = []; - keyPhraseFormsIndex = 0; - } - } else { - // If the token does not match the head of the keyphrase form. - if ( currentMatch.length > 0 ) { - // If there is a current match, add the current token to the secondary matches. - secondaryMatches.push( currentMatch ); - currentMatch = [ token ]; - keyPhraseFormsIndex = 1; - // keyPhraseFormsIndex = 0; - } else { - secondaryMatches.push( [ token ] ); - currentMatch = [ ]; - keyPhraseFormsIndex = 0; - } - // currentMatch = []; - // keyPhraseFormsIndex = 0; - } - } ); - - - return { - primaryMatches: primaryMatches, - secondaryMatches: secondaryMatches, - position: 0, - }; -}; - -export default convertToPositionResult; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js deleted file mode 100644 index 755a97c7dfd..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/match/findKeyWordFormsInSentence.js +++ /dev/null @@ -1,160 +0,0 @@ -import matchTextWithArray from "./matchTextWithArray"; -import { uniq } from "lodash-es"; -import matchTokenWithWordForms from "../keywordCount/matchTokenWithWordForms"; -import { tokenizerSplitter } from "../../../parse/language/LanguageProcessor"; -import removeConsecutiveKeyphraseFromResult from "../keywordCount/removeConsecutiveKeyphraseMatches"; -import convertToPositionResult from "./convertMatchToPositionResult"; - -/** - * (re-)Tokenizes the keyphrase forms in the same way that the tokens in the text are splitted. - * This solves the problem that "key-word" would not match with "key-word" in the text, - * because it would occur like ["key-word"] in the keyphraseForms and in ["key", "-", "word"] in the tokenized text. - * @param {string[][]} keyphraseForms The keyphrase forms to tokenize. - * @returns {array[]} The tokenized keyphrase forms. - */ -const tokenizeKeyphraseForms = ( keyphraseForms ) => { - const newKeyphraseForms = []; - keyphraseForms.forEach( word => { - const tokenizedWord = word.map( form => { - return form.split( tokenizerSplitter ); - } ); - - const tokenizedWordTransposed = tokenizedWord[ 0 ].map( ( _, index ) => uniq( tokenizedWord.map( row => row[ index ] ) ) ); - - tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); - } ); - - // if a token ends with a single \ then prepend the \ to the next token and remove the \ it from the current token. - // This is a fix because the tokenization does not work well for a situation where a fullstop is escaped. - // For example: yoastseo 9.3 would be tokenized as ["yoastseo", "9\", ".", "3"]. This fix corrects this to ["yoastseo", "9", "\.", "3"]. - for ( let i = 0; i < newKeyphraseForms.length; i++ ) { - if ( newKeyphraseForms[ i ][ 0 ].endsWith( "\\" ) && ! newKeyphraseForms[ i ][ 0 ].endsWith( "\\\\" ) ) { - // Remove the \ from the current token. - newKeyphraseForms[ i ][ 0 ] = newKeyphraseForms[ i ][ 0 ].slice( 0, -1 ); - // Prepend the \ to the next token. - newKeyphraseForms[ i + 1 ][ 0 ] = "\\" + newKeyphraseForms[ i + 1 ][ 0 ]; - } - } - - - return newKeyphraseForms; -}; - - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {array[]} keyphraseForms The keyphrase forms to match. - * @param {Token[]} tokens The tokens to match the keyphrase forms in. - * @param {string} locale The locale used in the analysis. - * @returns {{primaryMatches: *[], secondaryMatches: *[]}} The matches. - */ -const getMatchesInTokens = ( keyphraseForms, tokens, locale ) => { - // TODO: write documentation for primary and secondary matches. - const result = { - primaryMatches: [], - secondaryMatches: [], - }; - - // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. - let keyPhraseFormsIndex = 0; - // positionInSentence keeps track of what word in the sentence we are currently at. - let positionInSentence = 0; - - let foundWords = []; - // eslint-disable-next-line no-constant-condition - while ( true ) { - // The head of the keyphrase form we are currently analyzing. - // [["key"], ["word", "words"]] - const head = keyphraseForms[ keyPhraseFormsIndex ]; - - const foundPosition = tokens.slice( positionInSentence ).findIndex( token => matchTokenWithWordForms( head, token, locale ) ); - // If an occurence of a word is found, see if the subsequent word also is find. - if ( foundPosition >= 0 ) { - if ( keyPhraseFormsIndex > 0 ) { - // if there are non keywords between two found keywords reset. - - const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. - if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( - t => { - return matchTokenWithWordForms( previousHead, t, locale ); - } ) ) { - result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); - keyPhraseFormsIndex = 0; - foundWords = []; - continue; - } - - const previousToken = tokens[ Math.max( positionInSentence + foundPosition - 1, 0 ) ]; // TODO: better way to extract previous token. - // if the previous token is an underscore and the previous head is not an underscore, reset. - if ( previousToken.text === "_" && ! previousHead.includes( "_" ) ) { - // result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); - keyPhraseFormsIndex = 0; - foundWords = []; - continue; - } - } - - keyPhraseFormsIndex += 1; - positionInSentence += foundPosition; - foundWords.push( tokens[ positionInSentence ] ); - positionInSentence += 1; - } else { - if ( keyPhraseFormsIndex === 0 ) { - break; - } - keyPhraseFormsIndex = 0; - } - - if ( foundWords.length >= keyphraseForms.length ) { - result.primaryMatches.push( foundWords ); - keyPhraseFormsIndex = 0; - foundWords = []; - } - } - - return removeConsecutiveKeyphraseFromResult( result ); -}; - - -/** - * Gets the matched keyphrase form(s). - * - * @param {Sentence} sentence The sentence to check. - * @param {string[][]} keyphraseForms The keyphrase forms. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * - * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The array of matched keyphrase form(s). - */ -function findKeyWordFormsInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { - let matches, result; - - if ( keyphraseForms[ 0 ].length === 0 ) { - return { primaryMatches: [], secondaryMatches: [] }; - } - - if ( matchWordCustomHelper ) { - // In practice this branch of the code is only touched for japanese (the only language with a matchWordCustomHelper). - // Since tokenization for Japanese will be implemented in the future, this branch will get obsolete. - - matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); - // If one of the forms is not found, return an empty result. - if ( matches.some( match => match.position === -1 ) ) { - return { primaryMatches: [], secondaryMatches: [], position: -1 }; - } - - result = convertToPositionResult( matches, sentence, keyphraseForms, locale ); - - return result; - } - - const tokens = sentence.tokens; - - const newKeyphraseForms = tokenizeKeyphraseForms( keyphraseForms ); - - matches = getMatchesInTokens( newKeyphraseForms, tokens, locale ); - - return matches; -} - -export default findKeyWordFormsInSentence; From abe99a9a1fbe0b9efd6b1d99d6c699f9eae88c40 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 15 Jun 2023 15:33:09 +0200 Subject: [PATCH 152/616] unskip specs --- .../languageProcessing/researches/keywordCountSpec.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 40080063baa..65af23fa49f 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -198,7 +198,7 @@ const testCases = [ keyphraseForms: [ [ "keywords" ] ], expectedCount: 0, expectedMarkings: [], - skip: true, + skip: false, }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", @@ -207,7 +207,7 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", original: "A string with a key phrase." } ) ], - skip: true, + skip: false, }, { // eslint-disable-next-line max-len @@ -216,7 +216,7 @@ const testCases = [ keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, expectedMarkings: [], - skip: true, + skip: false, }, { description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", @@ -225,7 +225,7 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", original: "A .sentence with a keyphrase." } ) ], - skip: true, + skip: false, }, ]; From b7448f7323b65f5de212b9b781e456442a301d33 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 15 Jun 2023 15:33:23 +0200 Subject: [PATCH 153/616] Add todos --- .../src/languageProcessing/researches/keywordCount.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index a3bf915eee5..3dfd2607328 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -96,7 +96,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - // const matchesInSentence = findKeyWordFormsInSentence( sentence, topicForms.keyphraseForms, locale, matchWordCustomHelper ); + // TODO call matchKeyphraseWithSentence with additional parameter: useExactMatching. const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence ); const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); @@ -121,6 +121,9 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); + + // TODO: Use isDoubleQuoted helper to check if you need to use exact matching and pass it to countKeyphraseInText. + const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); const locale = paper.getLocale(); const sentences = getSentencesFromTree( paper ); From f8d496c0ec92d7533cbcda48e898ad52c805a94b Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 12:34:55 +0200 Subject: [PATCH 154/616] Adapt unit tests --- .../researches/keywordCountSpec.js | 180 ++++++++++++------ 1 file changed, 120 insertions(+), 60 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 65af23fa49f..cd3c4135888 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -26,193 +26,251 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { const testCases = [ { description: "counts/marks a string of text with a keyword in it.", - paper: new Paper( "a string of text with the keyword in it", { keyword: "keyword" } ), + paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "a string of text with the keyword in it", - original: "a string of text with the keyword in it" } ), - ], + original: "a string of text with the keyword in it", + position: { endOffset: 36, startOffset: 29 } } ) ], + skip: false, }, { description: "counts a string of text with no keyword in it.", - paper: new Paper( "a string of text", { keyword: "" } ), + paper: new Paper( "

a string of text

", { keyword: "" } ), keyphraseForms: [ [ "" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { description: "counts multiple occurrences of a keyphrase consisting of multiple words.", - paper: new Paper( "a string of text with the key word in it, with more key words.", { keyword: "key word" } ), + paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 2, expectedMarkings: [ new Mark( { marked: "a string of text with the key word in it, " + "with more key words.", - original: "a string of text with the key word in it, with more key words." } ) ], - + original: "a string of text with the key word in it, with more key words.", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + position: { endOffset: 64, startOffset: 55 } } ) ], + skip: false, }, { description: "counts a string of text with German diacritics and eszett as the keyword", - paper: new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", { keyword: "äöüß" } ), + paper: new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß" } ), keyphraseForms: [ [ "äöüß" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ], + original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + position: { endOffset: 40, startOffset: 36 } } ) ], + skip: false, }, { description: "counts a string with multiple keyword morphological forms", - paper: new Paper( "A string of text with a keyword and multiple keywords in it.", { keyword: "keyword" } ), + paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 2, - expectedMarkings: [ new Mark( { marked: "A string of text with a keyword " + + expectedMarkings: [ + new Mark( { marked: "A string of text with a keyword " + "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it." } ) ], - + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 34, startOffset: 27 } } ), + new Mark( { marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 56, startOffset: 48 } } ) ], + skip: false, }, { description: "counts a string with a keyword with a '-' in it", - paper: new Paper( "A string with a key-word.", { keyword: "key-word" } ), + paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word." } ) ], - + original: "A string with a key-word.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, }, { description: "counts 'key word' in 'key-word'.", - paper: new Paper( "A string with a key-word.", { keyword: "key word" } ), + paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with a key-word.", - original: "A string with a key-word." } ) ], + expectedCount: 0, + expectedMarkings: [], + skip: false, // Note: this behavior might change in the future. }, { description: "counts a string with a keyword with a '_' in it", - paper: new Paper( "A string with a key_word.", { keyword: "key_word" } ), + paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), keyphraseForms: [ [ "key_word", "key_words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word." } ) ], + original: "A string with a key_word.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, }, { description: "counts a string with with a 'ı' in the keyphrase", - paper: new Paper( "A string with with 'kapaklı' as a keyword in it", { keyword: "kapaklı" } ), + paper: new Paper( "

A string with 'kapaklı' as a keyword in it

", { keyword: "kapaklı" } ), keyphraseForms: [ [ "kapaklı" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with with 'kapaklı' as a keyword in it", - original: "A string with with 'kapaklı' as a keyword in it" } ) ], + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, }, { description: "counts a string with with '&' in the string and the keyword", - paper: new Paper( "A string with key&word in it", { keyword: "key&word" } ), + paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with key&word in it", - original: "A string with key&word in it" } ) ], + original: "A string with key&word in it", + position: { endOffset: 25, startOffset: 17 } } ) ], + skip: false, }, { description: "does not count images as keywords.", - paper: new Paper( "image", { keyword: "image" } ), + paper: new Paper( "

A text with image

", { keyword: "image" } ), keyphraseForms: [ [ "image", "images" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { - description: "keyword counting is blind to CApiTal LeTteRs.", - paper: new Paper( "A string with KeY worD.", { keyword: "key word" } ), + description: "also matches same phrase with different capitalization.", + paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with KeY worD.", - original: "A string with KeY worD." } ) ], + original: "A string with KeY worD.", + position: { endOffset: 25, startOffset: 17 } } ) ], + skip: false, }, { - description: "keyword counting is blind to types of apostrophe.", - paper: new Paper( "A string with quotes to match the key'word, even if the quotes differ.", { keyword: "key'word" } ), + description: "should still match keyphrase occurrence with different types of apostrophe.", + paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key'word" } ), keyphraseForms: [ [ "key'word", "key'words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with quotes to match the key'word, even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ." } ) ], + original: "A string with quotes to match the key'word, even if the quotes differ.", + position: { endOffset: 45, startOffset: 37 } } ) ], + skip: false, }, { description: "can match dollar sign as in '$keyword'.", - paper: new Paper( "A string with a $keyword." ), + paper: new Paper( "

A string with a $keyword.

" ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", - original: "A string with a $keyword." } ) ], + original: "A string with a $keyword.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, }, { description: "doesn't count 'key-word' in 'key word'.", - paper: new Paper( "A string with a key word.", { keyword: "key-word" } ), + paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", - paper: new Paper( "There is no keyword in this sentence." ), + paper: new Paper( "

There is no keyword in this sentence.

" ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", - paper: new Paper( "A string with three keys (key and another key) and one word." ), + paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word." } ) ], + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 27, startOffset: 23 } } ), + new Mark( { marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 32, startOffset: 29 } } ), + new Mark( { marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 48, startOffset: 45 } } ), + new Mark( { marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { endOffset: 62, startOffset: 58 } } ) ], + skip: false, }, { - description: "doesn't match singular forms in reduplicated plurals in Indonesian", - paper: new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + description: "matches both singular and reduplicated plural form of the keyword in Indonesian", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", { locale: "id_ID", keyword: "keyword" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 1, + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, expectedMarkings: [ new Mark( { // eslint-disable-next-line max-len - marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", - original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit." } ) ], + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + position: { endOffset: 58, startOffset: 43 } } ), + new Mark( { + // eslint-disable-next-line max-len + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + position: { endOffset: 67, startOffset: 60 } } ) ], + skip: false, }, { description: "counts a single word keyphrase with exact matching", - paper: new Paper( "A string with a keyword.", { keyword: "\"keyword\"" } ), + paper: new Paper( "

A string with a keyword.

", { keyword: "\"keyword\"" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a keyword.", - original: "A string with a keyword." } ) ], + original: "A string with a keyword.", + position: { endOffset: 26, startOffset: 19 } } ) ], + skip: false, }, { description: "with exact matching, a singular single word keyphrase should not be counted if the focus keyphrase is plural", - paper: new Paper( "A string with a keyword.", { keyword: "\"keywords\"" } ), + paper: new Paper( "

A string with a keyword.

", { keyword: "\"keywords\"" } ), keyphraseForms: [ [ "keywords" ] ], expectedCount: 0, expectedMarkings: [], - skip: false, + skip: true, }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", - paper: new Paper( "A string with a key phrase.", { keyword: "\"key phrase\"" } ), + paper: new Paper( "

A string with a key phrase.

", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", - original: "A string with a key phrase." } ) ], - skip: false, + original: "A string with a key phrase.", + position: { endOffset: 36, startOffset: 29 } } ) ], + // Skipped for now, coz the PR for exact matching is not yet merged. + skip: true, }, { // eslint-disable-next-line max-len - description: "with exact matching, a multi word keyphrase should not me counted if the focus keyphrase has the same words in a different order", - paper: new Paper( "A string with a phrase key.", { keyword: "\"key phrase\"" } ), + description: "with exact matching, a multi word keyphrase should not be counted if the focus keyphrase has the same words in a different order", + paper: new Paper( "

A string with a phrase key.

", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, expectedMarkings: [], @@ -220,14 +278,15 @@ const testCases = [ }, { description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", - paper: new Paper( "A .sentence with a keyphrase.", { keyword: "\".sentence\"" } ), + paper: new Paper( "

A .sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", - original: "A .sentence with a keyphrase." } ) ], - skip: false, + original: "A .sentence with a keyphrase.", + position: { endOffset: 36, startOffset: 29 } } ) ], + // Skipped for now, coz the PR for exact matching is not yet merged. + skip: true, }, - ]; // eslint-disable-next-line max-len @@ -236,6 +295,7 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); const keyWordCountResult = keyphraseCount( paper, mockResearcher ); expect( keyWordCountResult.count ).toBe( expectedCount ); expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); From 1018d5c6f9dd13b5e663d4476b3672e9fcef5476 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 12:36:40 +0200 Subject: [PATCH 155/616] Adapt JSDoc --- .../src/languageProcessing/helpers/match/isDoubleQuoted.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js index 9ede8f2448d..ea1008addd2 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js @@ -1,11 +1,10 @@ - import doubleQuotes from "../sanitize/doubleQuotes"; import { includes } from "lodash-es"; /** - * Checks if the keyphrase is double quoted. + * Checks if the keyphrase is double-quoted. * @param {string} keyphrase The keyphrase to check. - * @returns {boolean} Whether the keyphrase is double quoted. + * @returns {boolean} Whether the keyphrase is double-quoted. */ const isDoubleQuoted = ( keyphrase ) => { return ( includes( doubleQuotes, keyphrase[ 0 ] ) && includes( doubleQuotes, keyphrase[ keyphrase.length - 1 ] ) ); From 2dfd2f76a35e098a703722a1dfb3740622d5f2dc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 12:36:59 +0200 Subject: [PATCH 156/616] Pass the argument for exact matching --- .../researches/keywordCount.js | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 3dfd2607328..d2236046438 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -3,6 +3,7 @@ import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; +import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; /** * Counts the number of matches for a keyphrase in a sentence. @@ -15,7 +16,7 @@ const countMatches = ( matches, keyphraseForms ) => { const matchesCopy = [ ...matches ]; let nrMatches = 0; - // While the number of matches is longer than the keyphrase forms. + // While the number of matches is longer than or the same as the keyphrase forms. while ( matchesCopy.length >= keyphraseForms.length ) { let nrKeyphraseFormsWithMatch = 0; @@ -89,15 +90,15 @@ const removeConsecutiveMatches = ( matches ) => { * @param {Array} topicForms The keyphrase forms. * @param {string} locale The locale used in the analysis. * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {boolean} isExactMatchRequested Whether the exact matching is requested. * * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ -export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ) { +export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper, isExactMatchRequested ) { const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - // TODO call matchKeyphraseWithSentence with additional parameter: useExactMatching. - const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence ); + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, isExactMatchRequested ); const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); @@ -122,13 +123,14 @@ export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); - // TODO: Use isDoubleQuoted helper to check if you need to use exact matching and pass it to countKeyphraseInText. - const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); const locale = paper.getLocale(); const sentences = getSentencesFromTree( paper ); + // Exact matching is requested when the keyphrase is enclosed in double quotes. + const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() ); + + const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper, isExactMatchRequested ); - const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper ); return { count: keyphraseFound.count, markings: flatten( keyphraseFound.markings ), From a4edc60980a2a60d2337d2a7ba5594c36f4c7cb8 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 12:38:22 +0200 Subject: [PATCH 157/616] Escape the tokens in case they contain special characters --- .../helpers/match/matchKeyphraseWithSentence.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index b95e603536a..cdb355d0f2a 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -1,4 +1,5 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; +import { escapeRegExp } from "lodash-es"; /** * Tokenize keyword forms for exact matching. This function gets the keyword form and tokenizes it. @@ -70,9 +71,10 @@ const freeMatching = ( keywordForms, sentence ) => { // const matches = []; return tokens.filter( ( token ) => { + const tokenText = escapeRegExp( token.text ); return keywordForms.some( ( keywordForm ) => { return keywordForm.some( ( keywordFormPart ) => { - return token.text.toLowerCase() === keywordFormPart.toLowerCase(); + return tokenText.toLowerCase() === keywordFormPart.toLowerCase(); } ); } ); } ); From 1991f47d5b5422fa4e67d20529a44d1ae36622bc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 14:54:58 +0200 Subject: [PATCH 158/616] Create a helper to get all words from tree --- .../helpers/word/getAllWordsFromTreeSpec.js | 37 +++++++++++++++++++ .../helpers/word/getAllWordsFromTree.js | 22 +++++++++++ 2 files changed, 59 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js create mode 100644 packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js new file mode 100644 index 00000000000..41be7b9366a --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js @@ -0,0 +1,37 @@ +import getAllWordsFromTree from "../../../../src/languageProcessing/helpers/word/getAllWordsFromTree"; +import buildTree from "../../../specHelpers/parse/buildTree"; +import Paper from "../../../../src/values/Paper"; +import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; + +describe( "a test for getting words from the tree", () => { + let researcher; + beforeEach( () => { + researcher = new EnglishResearcher(); + } ); + it( "should retrieve the words from the paper, excluding the html tags and punctuations", () => { + const paper = new Paper( "

A very intelligent cat loves their human. A dog is very cute.

A subheading 3" + + "

text text text

A subheading 4

more text." ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 23 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [ "A", "very", "intelligent", "cat", "loves", "their", "human", + "A", "dog", "is", "very", "cute", "A", "subheading", "3", "text", "text", "text", "A", "subheading", "4", "more", "text" ] ); + } ); + it( "should get the correct words from text containing   and word enclosed in double quotes", () => { + const paper = new Paper( "

What's black, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!

" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 15 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [ "What's", "black", "orange", "sassy", "all", "over", "and", "a", "crowd", + "favorite", "Yep", "you", "guessed", "it", "Torties" ] ); + } ); + it( "should not return words from the excluded elements from the tree", () => { + const paper = new Paper( "
" + + "

From their cute little paws to their mysterious ways of sneaking up on us, cats are just the best!

" + + "

The sentence above is a very compelling quote!

" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 8 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [ "The", "sentence", "above", "is", "a", "very", "compelling", "quote" ] ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js new file mode 100644 index 00000000000..a651b7a83b8 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -0,0 +1,22 @@ +import getSentencesFromTree from "../sentence/getSentencesFromTree"; +import { flatMap } from "lodash-es"; +import removePunctuation from "../sanitize/removePunctuation"; + +/** + * Gets the words from the tree. + * + * @param {Paper} paper The paper to get the tree and words from. + * + * @returns {String[]} Array of words retrieved from the tree. + */ +export default function( paper ) { + const sentences = getSentencesFromTree( paper ); + // Get all the tokens from each sentence. + const tokens = sentences.map( sentence => sentence.tokens ); + let words = flatMap( tokens ).map( token => token.text ); + // Remove punctuation and spaces. + words = words.map( token => removePunctuation( token ) ); + + // Filter out empty tokens + return words.filter( word => word.trim() !== "" ); +} From 4c691c4064991188723bfd7ab3c946ae3b15dba1 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 14:56:05 +0200 Subject: [PATCH 159/616] Adjust getKeywordDensity.js --- .../researches/getKeywordDensitySpec.js | 27 ++++--------------- .../researches/getKeywordDensity.js | 11 +++----- 2 files changed, 9 insertions(+), 29 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 0a3085d7bf9..2f3cbd20603 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -83,14 +83,6 @@ describe( "Test for counting the keyword density in a text with an English resea expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); - it( "should i dont know why this spec was created", function() { - const mockPaper = new Paper( "", { keyword: "key&word" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); - } ); - it( "should skip a keyphrase in case of three consecutive keyphrases", function() { // Consecutive keywords are skipped, so this will match 2 times. const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword.

", { keyword: "keyword" } ); @@ -145,20 +137,11 @@ describe( "Test for counting the keyword density in a text with an English resea buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); - - // it( "should ", function() { - // const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } ); - // const mockResearcher = new EnglishResearcher( mockPaper ); - // mockResearcher.addResearchData( "morphology", morphologyDataEN ); - // buildTree( mockPaper, mockResearcher ); - // } ); - - // eslint-disable-next-line max-statements - - - it( "returns keyword density", function() { - - + it( "should recognize keyphrase separated by  ", function() { + const mockPaper = new Paper( "a string with quotes to match the key word, even if the quotes differ", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 5e5f146b148..5c5df0e8e42 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -1,19 +1,16 @@ -import countWords from "../helpers/word/countWords.js"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; +import getAllWordsFromTree from "../helpers/word/getAllWordsFromTree"; /** * Calculates the keyphrase density. * - * @param {Object} paper The paper containing keyphrase and text. - * @param {Object} researcher The researcher. + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. * * @returns {Object} The keyphrase density. */ export default function getKeyphraseDensity( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - let wordCount = countWords( text ); + let wordCount = getAllWordsFromTree( paper ).length; // If there is a custom getWords helper use its output for countWords. if ( getWordsCustomHelper ) { From 6740993dfae15b6d31db59aa1b49a882b62ecbf9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 16:52:56 +0200 Subject: [PATCH 160/616] Adapt/add unit tests --- .../match/matchKeyphraseWithSentenceSpec.js | 52 ++- .../helpers/word/getAllWordsFromTreeSpec.js | 7 + .../helpers/word/getWordsSpec.js | 8 + .../researches/keywordCountSpec.js | 23 ++ .../seo/KeywordDensityAssessmentSpec.js | 338 ++++++++++-------- 5 files changed, 253 insertions(+), 175 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js index 2e0f8a2a9b2..ff426463701 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js @@ -594,6 +594,9 @@ const testCases = [ { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, ], }, +]; + +const exactMatchingTestCases = [ { testDescription: "with exact matching, a single word keyphrase should match with a single word.", sentence: { @@ -802,8 +805,6 @@ const testCases = [ { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, ], }, - - { testDescription: "Matches a single word keyphrase if keyphrase is doubleQuoted.", sentence: { @@ -822,7 +823,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 28 } }, keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: false, + exactMatching: true, expectedResult: [ { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, ], @@ -847,7 +848,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, keyphraseForms: [ [ "key phrase" ] ], - exactMatching: false, + exactMatching: true, expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, @@ -874,7 +875,7 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 29 } }, keyphraseForms: [ [ "key phrase" ] ], - exactMatching: false, + exactMatching: true, expectedResult: [], }, { @@ -883,7 +884,7 @@ const testCases = [ text: "A sentence with keyphrases.", }, keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: false, + exactMatching: true, expectedResult: [], }, { @@ -904,12 +905,10 @@ const testCases = [ ], sourceCodeRange: { startOffset: 0, endOffset: 28 } }, keyphraseForms: [ [ "keyphrases" ] ], - exactMatching: false, + exactMatching: true, expectedResult: [], }, - ]; -// eslint-enable max-len // The japanese test cases need to be adapted once japanese tokenization is implemented. // eslint-disable-next-line max-len @@ -972,20 +971,41 @@ const japaneseTestCases = [ }, }, ]; -// eslint-enable max-len - -// eslint-disable-next-line max-len -describe.each( testCases )( "findKeyWordFormsInSentence", ( { testDescription, sentence, keyphraseForms, exactMatching, expectedResult } ) => { +describe.each( testCases )( "find keyphrase forms in sentence", ( { + testDescription, + sentence, + keyphraseForms, + exactMatching, + expectedResult, +} ) => { it( testDescription, () => { expect( matchKeyphraseWithSentence( keyphraseForms, sentence, exactMatching ) ).toEqual( expectedResult ); } ); } ); -// eslint-disable-next-line max-len -describe.each( japaneseTestCases )( "findKeyWordFormsInSentence for japanese", ( { testDescription, sentence, keyphraseForms, locale, matchWordCustomHelper, expectedResult } ) => { +// The test below is skipped for now because the PR for exact matching is not yet merged. +describe.each( exactMatchingTestCases )( "find keyphrase forms in sentence when exact matching is requested", ( { + testDescription, + sentence, + keyphraseForms, + exactMatching, + expectedResult, +} ) => { + it.skip( testDescription, () => { + expect( matchKeyphraseWithSentence( keyphraseForms, sentence, exactMatching ) ).toEqual( expectedResult ); + } ); +} ); + +describe.each( japaneseTestCases )( "findKeyWordFormsInSentence for japanese", ( { + testDescription, + sentence, + keyphraseForms, + locale, + expectedResult, +} ) => { it.skip( testDescription, () => { - expect( matchKeyphraseWithSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) ).toEqual( expectedResult ); + expect( matchKeyphraseWithSentence( sentence, keyphraseForms, locale ) ).toEqual( expectedResult ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js index 41be7b9366a..faa58cdf7e0 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js @@ -34,4 +34,11 @@ describe( "a test for getting words from the tree", () => { expect( getAllWordsFromTree( paper ).length ).toEqual( 8 ); expect( getAllWordsFromTree( paper ) ).toEqual( [ "The", "sentence", "above", "is", "a", "very", "compelling", "quote" ] ); } ); + it( "should return empty array if text is empty", () => { + const paper = new Paper( "" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 0 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [] ); + } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsSpec.js index 8c96732f312..99e8d913704 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsSpec.js @@ -72,6 +72,14 @@ describe( "a test for getting words from a sentence", function() { expect( getWords( text ) ).toEqual( [ "Sri", "Tandjung", "noted", "that", "Javanese", "had", "been", "eating", "cooked", "native", "black", "soybeans", "since", "the", "12th", "century" ] ); } ); + + it( "gets words from text containing html tags", function() { + const text = "

A very intelligent cat loves their human. A dog is very cute.

A subheading 3" + + "

text text text

A subheading 4

more text."; + expect( getWords( text ).length ).toBe( 23 ); + expect( getWords( text ) ).toEqual( [ "A", "very", "intelligent", "cat", "loves", "their", "human", "A", "dog", + "is", "very", "cute", "A", "subheading", "3", "text", "text", "text", "A", "subheading", "4", "more", "text" ] ); + } ); } ); describe( "language-specific tests for getting words", function() { diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index cd3c4135888..c6be1c8c9f5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -191,6 +191,18 @@ const testCases = [ expectedMarkings: [], skip: false, }, + { + description: "counts the keyphrase occurrence in the text with  ", + paper: new Paper( "

a string with nbsp to match the key word.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key" ], [ "word" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string with nbsp to match the key word.", + original: "a string with nbsp to match the key word.", + position: { endOffset: 43, startOffset: 35 } } ), + ], + skip: false, + }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), @@ -287,6 +299,17 @@ const testCases = [ // Skipped for now, coz the PR for exact matching is not yet merged. skip: true, }, + { + description: "can match dollar sign as in '$keyword' with exact matching.", + paper: new Paper( "

A string with a $keyword.

", { keyword: "\"\\$keyword\"" } ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { endOffset: 27, startOffset: 19 } } ) ], + // Skipped for now, coz the PR for exact matching is not yet merged. + skip: true, + }, ]; // eslint-disable-next-line max-len diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index a111dc849e7..86c98b6664c 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -20,169 +20,189 @@ const longTextJapanese = "熱".repeat( 200 ); // const japaneseSentenceWithKeyphrase = "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) // const japaneseSentenceWithKeyphraseExactMatch = "一日一冊の本を読むのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) -describe( "Tests for the keywordDensity assessment for languages without morphology", function() { - it( "runs the keywordDensity on the paper without keyword in the text", function() { - const paper = new Paper( nonkeyword.repeat( 1000 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 0 times. That's less than the recommended minimum of 5 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", function() { - const paper = new Paper( nonkeyword.repeat( 999 ) + keyword, { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 1 time. That's less than the recommended minimum of 5 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", function() { - const paper = new Paper( nonkeyword.repeat( 995 ) + keyword.repeat( 5 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 5 times. This is great!" ); - } ); - - it( "runs the keywordDensity on the paper with a good keyphrase density (2%)", function() { - const paper = new Paper( nonkeyword.repeat( 980 ) + keyword.repeat( 20 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. This is great!" ); - } ); - - it( "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", function() { - const paper = new Paper( nonkeyword.repeat( 965 ) + keyword.repeat( 35 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -10 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 35 times. That's more than the recommended maximum of 29 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - it( "runs the keywordDensity on the paper with a very high keyphrase density (10%)", function() { - const paper = new Paper( nonkeyword.repeat( 900 ) + keyword.repeat( 100 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 100 times. That's way more than the recommended maximum of 29 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - - it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", function() { - const paper = new Paper( nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ), { keyword: "b c" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. This is great!" ); - } ); - - it( "does not count text inside elements we want to exclude from the analysis when calculating the recommended" + - "number of keyphrase usages", function() { - const paper = new Paper( nonkeyword.repeat( 101 ) + "
" + nonkeyword.repeat( 859 ) + - "
" + "b c, ".repeat( 20 ), { keyword: "b c" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + - " Don't overoptimize!" ); - } ); - - it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() { - const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - it( "returns a bad result if the keyword is only used once, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + keyword, { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - +// Test data for language without morphology. +const testDataWithDefaultResearcher = [ + { + description: "runs the keywordDensity on the paper without keyword in the text", + paper: new Paper( "

" + nonkeyword.repeat( 1000 ) + "

", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", + paper: new Paper( "

" + nonkeyword.repeat( 999 ) + keyword + "

", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", + paper: new Paper( "

" + nonkeyword.repeat( 995 ) + keyword.repeat( 5 ) + "

", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 5 times. This is great!", + }, + }, + { + description: "runs the keywordDensity on the paper with a good keyphrase density (2%)", + paper: new Paper( "

" + nonkeyword.repeat( 980 ) + keyword.repeat( 20 ) + "

", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", + paper: new Paper( "

" + nonkeyword.repeat( 965 ) + keyword.repeat( 35 ) + "

", { keyword: "keyword" } ), + expectedResult: { + score: -10, + text: "Keyphrase density: " + + "The keyphrase was found 35 times. That's more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "runs the keywordDensity on the paper with a very high keyphrase density (10%)", + paper: new Paper( "

" + nonkeyword.repeat( 900 ) + keyword.repeat( 100 ) + "

", { keyword: "keyword" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 100 times. That's way more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", + paper: new Paper( "

" + nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ) + "

", { keyword: "b c" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "does not count text inside elements we want to exclude from the analysis when calculating the recommended" + + "number of keyphrase usages", + paper: new Paper( "

" + nonkeyword.repeat( 101 ) + "

" + nonkeyword.repeat( 859 ) + + "
" + "b c, ".repeat( 20 ) + "

", { keyword: "b c" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", + paper: new Paper( "

" + nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ) + "

", { keyword: "b c d e f" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "returns a bad result if the keyword is only used once, regardless of the density", + paper: new Paper( "

" + nonkeyword.repeat( 100 ) + keyword + "

", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + + "for a text of this length. Focus on your keyphrase!", + }, + }, + { + description: "returns a good result if the keyword is used twice and " + + "the recommended count is smaller than or equal to 2, regardless of the density", + paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "a b c, a b c

", { keyword: "a b c", locale: "xx_XX" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 2 times. This is great!", + }, + }, +]; + +describe.each( testDataWithDefaultResearcher )( "a kephrase density test for languages without morphology", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + it( description, () => { const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + - "for a text of this length. Focus on your keyphrase!" ); - } ); - - it( "returns a good result if the keyword is used twice and " + - "the recommended count is smaller than or equal to 2, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + "a b c, a b c", { keyword: "a b c", locale: "xx_XX" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 2 times. This is great!" ); - } ); - it( "applies to a paper with a keyword and a text of at least 100 words", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( true ); - } ); - - it( "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + - "we want to exclude from the analysis", function() { - const paper = new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with text of 100 words but without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and with a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "applies to a Japanese paper with a keyword and a text of at least 200 characters", function() { - const paper = new Paper( longTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( true ); + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); } ); +} ); - it( "does not apply to a Japanese paper with text of less than 200 characters", function() { - const paper = new Paper( shortTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( false ); +const testDataForApplicability = [ + { + description: "applies to a paper with a keyword and a text of at least 100 words", + paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "

", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + + "we want to exclude from the analysis", + paper: new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with text of 100 words but without a keyword", + paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "

", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and with a keyword", + paper: new Paper( "

" + nonkeyword.repeat( 99 ) + "

", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and without a keyword", + paper: new Paper( "

" + nonkeyword.repeat( 99 ) + "

", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "applies to a Japanese paper with a keyword and a text of at least 200 characters", + paper: new Paper( "

" + longTextJapanese + "

", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a Japanese paper with text of less than 200 characters", + paper: new Paper( "

" + shortTextJapanese + "

", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: false, + }, +]; +describe.each( testDataForApplicability )( "assessment applicability test", ( { + description, + paper, + researcher, + expectedResult, +} ) => { + researcher.setPaper( paper ); + buildTree( paper, researcher ); + it( description, () => { + expect( new KeywordDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); } ); } ); From f593f52420e39ec556a2f1d355ead936da797b64 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 16:59:20 +0200 Subject: [PATCH 161/616] Adjust JSDoc --- .../match/matchKeyphraseWithSentence.js | 23 +++++++++++-------- .../researches/keywordCount.js | 3 +-- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index cdb355d0f2a..c7f14bda715 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -2,10 +2,12 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; import { escapeRegExp } from "lodash-es"; /** - * Tokenize keyword forms for exact matching. This function gets the keyword form and tokenizes it. + * Tokenizes keyword forms for exact matching. This function gets the keyword form and tokenizes it. * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. * This is the result of how the focus keyword is processed in buildTopicStems.js in the buildStems function. + * * @param {(string[])[]} keywordForms The keyword forms to tokenize. + * * @returns {string[]} The tokenized keyword forms. */ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { @@ -15,12 +17,15 @@ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { }; /** - * Exact matching of keyword forms in a sentence. Exact matching happens when the user puts the keyword in double quotes. + * Gets the exact matches of the keyphrase. + * Exact matching happens when the user puts the keyword in double quotes. + * * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. + * * @returns {Token[]} The tokens that exactly match the keyword forms. */ -const exactMatching = ( keywordForms, sentence ) => { +const getExactMatches = ( keywordForms, sentence ) => { // Tokenize keyword forms. const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForms ); @@ -34,7 +39,7 @@ const exactMatching = ( keywordForms, sentence ) => { while ( sentenceIndex < sentenceTokens.length ) { // If the current sentence token matches the current keyword token, add it to the current match. - const sentenceTokenText = sentenceTokens[ sentenceIndex ].text; + const sentenceTokenText = escapeRegExp( sentenceTokens[ sentenceIndex ].text ); const keywordTokenText = keywordTokens[ keywordIndex ]; if ( sentenceTokenText.toLowerCase() === keywordTokenText.toLowerCase() ) { @@ -59,17 +64,15 @@ const exactMatching = ( keywordForms, sentence ) => { }; /** - * Free matching of keyword forms in a sentence. Free matching happens when the user does not put the keyword in double quotes. + * Free matching of keyword forms in a sentence. Free matching happens when the keyphrase is enclosed in double quotes. * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. * @returns {Token[]} The tokens that match the keyword forms. */ -const freeMatching = ( keywordForms, sentence ) => { +const getNonExactMatches = ( keywordForms, sentence ) => { const tokens = sentence.tokens.slice(); // Filter out all tokens that do not match the keyphrase forms. - // const matches = []; - return tokens.filter( ( token ) => { const tokenText = escapeRegExp( token.text ); return keywordForms.some( ( keywordForm ) => { @@ -106,9 +109,9 @@ const freeMatching = ( keywordForms, sentence ) => { */ const matchKeyphraseWithSentence = ( keywordForms, sentence, useExactMatching = false ) => { if ( useExactMatching ) { - return exactMatching( keywordForms, sentence ); + return getExactMatches( keywordForms, sentence ); } - return freeMatching( keywordForms, sentence ); + return getNonExactMatches( keywordForms, sentence ); }; export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index d2236046438..acac65dc2d2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -86,7 +86,7 @@ const removeConsecutiveMatches = ( matches ) => { /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * - * @param {Array} sentences The sentences to check. + * @param {Sentence[]} sentences The sentences to check. * @param {Array} topicForms The keyphrase forms. * @param {string} locale The locale used in the analysis. * @param {function} matchWordCustomHelper A custom helper to match words with a text. @@ -102,7 +102,6 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); - // const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); result.count += matchesCount; From bbc5e3adaca306883bac04e5bfac5a7dbd9d734f Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 17:16:33 +0200 Subject: [PATCH 162/616] Adjust test for Japanese --- .../seo/KeywordDensityAssessmentSpec.js | 341 +++++++++--------- 1 file changed, 180 insertions(+), 161 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 86c98b6664c..9b6b329dd98 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -11,14 +11,11 @@ import buildTree from "../../../specHelpers/parse/buildTree"; const morphologyData = getMorphologyData( "en" ); const morphologyDataDe = getMorphologyData( "de" ); -// const morphologyDataJA = getMorphologyData( "ja" ); // Variable is used in language specific test (and thus removed) +const morphologyDataJA = getMorphologyData( "ja" ); const nonkeyword = "nonkeyword, "; const keyword = "keyword, "; const shortTextJapanese = "熱".repeat( 199 ); const longTextJapanese = "熱".repeat( 200 ); -// const japaneseSentence = "私の猫はかわいいです。小さくて可愛い花の刺繍に関する一般一般の記事です。".repeat( 20 ); // Variable is used in language specific test (and thus removed) -// const japaneseSentenceWithKeyphrase = "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) -// const japaneseSentenceWithKeyphraseExactMatch = "一日一冊の本を読むのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) // Test data for language without morphology. const testDataWithDefaultResearcher = [ @@ -342,160 +339,182 @@ describe( "A test for marking the keyword", function() { // expect( assessment.getMarks() ).toEqual( marks ); // } ); } ); -// -// describe( "A test for keyword density in Japanese", function() { -// it( "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains only function words " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "shouldn't return NaN/infinity times of synonym occurrence when the synonym contains only function words " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "", -// synonyms: "ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains spaces " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "かしら かい を ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "gives a very BAD result when keyword density is above 4% when the text contains way too many instances" + -// " of the keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( -50 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + -// "this length. Don't overoptimize!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 3% and 4% when the text contains too many instances" + -// " of the keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 16 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( -10 ); -// expect( result.getText() ).toBe( "Keyphrase density:" + -// " The keyphrase was found 16 times. That's more than the recommended maximum of 15 times for a text of this length." + -// " Don't overoptimize!" ); -// } ); -// -// it( "gives a BAD result when keyword density is 0", function() { -// const paper = new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 0 and 0.5%", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 1 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density:" + -// " The keyphrase was found 1 time. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// -// it( "gives a GOOD result when keyword density is between 0.5% and 3.5% when the text contains keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "should still gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text " + -// "and the keyphrase is enclosed in double quotes", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { -// keyword: "「一冊の本を読む」", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 0.5% and 3.5%, if morphology is added, but there is no morphology data", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// } ); -// + +const japaneseSentence = "私の猫はかわいいです。小さくて可愛い花の刺繍に関する一般一般の記事です。".repeat( 20 ); +const japaneseSentenceWithKeyphrase = "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。"; +const japaneseSentenceWithKeyphraseExactMatch = "一日一冊の本を読むのはできるかどうかやってみます。"; + +const testDataJapanese = [ + { + description: "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains only function words " + + "and there is no match in the text", + paper: new Paper( "

" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

", { + keyword: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of synonym occurrence when the synonym contains only function words " + + "and there is no match in the text", + paper: new Paper( "

" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

", { + keyword: "", + synonyms: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains spaces " + + "and there is no match in the text", + paper: new Paper( "

" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

", { + keyword: "かしら かい を ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, +]; + +describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + + it( description, () => { + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); + } ); +} ); + +// eslint-disable-next-line no-warning-comments +// TODO: ADAPT the tests below. +xdescribe( "A test for keyword density in Japanese", function() { + it( "gives a very BAD result when keyword density is above 4% when the text contains way too many instances" + + " of the keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( -50 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + + "this length. Don't overoptimize!" ); + } ); + + it( "gives a BAD result when keyword density is between 3% and 4% when the text contains too many instances" + + " of the keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 16 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( -10 ); + expect( result.getText() ).toBe( "Keyphrase density:" + + " The keyphrase was found 16 times. That's more than the recommended maximum of 15 times for a text of this length." + + " Don't overoptimize!" ); + } ); + + it( "gives a BAD result when keyword density is 0", function() { + const paper = new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); + + it( "gives a BAD result when keyword density is between 0 and 0.5%", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 1 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density:" + + " The keyphrase was found 1 time. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); + + it( "gives a GOOD result when keyword density is between 0.5% and 3.5% when the text contains keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "should still gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text " + + "and the keyphrase is enclosed in double quotes", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "「一冊の本を読む」", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "gives a BAD result when keyword density is between 0.5% and 3.5%, if morphology is added, but there is no morphology data", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); +} ); + From 483b65b47e08b0faae19040631613069870aeaa2 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 16 Jun 2023 17:23:05 +0200 Subject: [PATCH 163/616] Add TODOs --- .../assessments/seo/KeywordDensityAssessment.js | 10 ++++++---- .../helpers/assessments/recommendedKeywordCount.js | 2 ++ 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 5f052f08dcb..58b83e2c8b2 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -111,8 +111,9 @@ class KeyphraseDensityAssessment extends Assessment { this._hasMorphologicalForms = researcher.getData( "morphology" ) !== false; - let text = paper.getText(); - text = removeHtmlBlocks( text ); + const text = paper.getText(); + // eslint-disable-next-line no-warning-comments + // TODO: do we need to use the paper text, or can we adjust this to use the html parser? this.setBoundaries( text, keyphraseLength, customGetWords ); this._keyphraseDensity = this._keyphraseDensity * keyphraseLengthFactor( keyphraseLength ); @@ -331,8 +332,9 @@ class KeyphraseDensityAssessment extends Assessment { if ( customApplicabilityConfig ) { this._config.applicableIfTextLongerThan = customApplicabilityConfig; } - let text = paper.getText(); - text = removeHtmlBlocks( text ); + const text = paper.getText(); + // eslint-disable-next-line no-warning-comments + // TODO: Adapt this to use HTML Parser. const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length; return paper.hasText() && paper.hasKeyword() && textLength >= this._config.applicableIfTextLongerThan; diff --git a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js index 100ffdd3cbd..ac577dde9a0 100644 --- a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js +++ b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js @@ -14,6 +14,8 @@ import keyphraseLengthFactor from "./keyphraseLengthFactor.js"; * @returns {number} The recommended keyword count. */ export default function( text, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) { + // eslint-disable-next-line no-warning-comments + // TODO: adapt to use the HTML parser. Also for Japanese. const wordCount = customGetWords ? customGetWords( text ).length : countWords( text ); if ( wordCount === 0 ) { From 3aa96c5f405a65f01fd6a4ef2512e2c3054d2419 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Mon, 19 Jun 2023 04:40:29 +0300 Subject: [PATCH 164/616] HTML Parser - Implement position based highlighting for block editor#138 Implement block offset calculation Tests for block added. --- .../researches/keywordCountSpec.js | 621 ++++++++++++++++-- .../highlighting/getMarkingsInSentence.js | 5 +- .../helpers/sentence/getSentencesFromTree.js | 11 +- 3 files changed, 570 insertions(+), 67 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index c6be1c8c9f5..e8613991c9d 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -32,7 +32,14 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "a string of text with the keyword in it", original: "a string of text with the keyword in it", - position: { endOffset: 36, startOffset: 29 } } ) ], + position: { + startOffset: 29, + endOffset: 36, + startOffsetBlock: 26, + endOffsetBlock: 33, + }, + } ), + ], skip: false, }, { @@ -49,14 +56,29 @@ const testCases = [ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 2, expectedMarkings: [ - new Mark( { marked: "a string of text with the key word in it, " + + new Mark( { + marked: "a string of text with the key word in it, " + "with more key words.", - original: "a string of text with the key word in it, with more key words.", - position: { endOffset: 37, startOffset: 29 } } ), - new Mark( { marked: "a string of text with the key word in it, " + + original: "a string of text with the key word in it, with more key words.", + position: { + startOffset: 29, + endOffset: 37, + startOffsetBlock: 26, + endOffsetBlock: 34, + }, + } ), + new Mark( { + marked: "a string of text with the key word in it, " + "with more key words.", - original: "a string of text with the key word in it, with more key words.", - position: { endOffset: 64, startOffset: 55 } } ) ], + original: "a string of text with the key word in it, with more key words.", + position: { + startOffset: 55, + endOffset: 64, + startOffsetBlock: 52, + endOffsetBlock: 61, + }, + } ), + ], skip: false, }, { @@ -65,9 +87,17 @@ const testCases = [ keyphraseForms: [ [ "äöüß" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + new Mark( { + marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - position: { endOffset: 40, startOffset: 36 } } ) ], + position: { + startOffset: 36, + endOffset: 40, + startOffsetBlock: 33, + endOffsetBlock: 37, + }, + } ), + ], skip: false, }, { @@ -76,14 +106,29 @@ const testCases = [ keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 2, expectedMarkings: [ - new Mark( { marked: "A string of text with a keyword " + + new Mark( { + marked: "A string of text with a keyword " + "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it.", - position: { endOffset: 34, startOffset: 27 } } ), - new Mark( { marked: "A string of text with a keyword " + + original: "A string of text with a keyword and multiple keywords in it.", + position: { + startOffset: 27, + endOffset: 34, + startOffsetBlock: 24, + endOffsetBlock: 31, + }, + } ), + new Mark( { + marked: "A string of text with a keyword " + "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it.", - position: { endOffset: 56, startOffset: 48 } } ) ], + original: "A string of text with a keyword and multiple keywords in it.", + position: { + startOffset: 48, + endOffset: 56, + startOffsetBlock: 45, + endOffsetBlock: 53, + }, + } ), + ], skip: false, }, { @@ -91,9 +136,18 @@ const testCases = [ paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word.", - position: { endOffset: 27, startOffset: 19 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with a key-word.", + original: "A string with a key-word.", + position: { + startOffset: 19, + endOffset: 27, + startOffsetBlock: 16, + endOffsetBlock: 24, + }, + } ), + ], skip: false, }, { @@ -110,9 +164,18 @@ const testCases = [ paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), keyphraseForms: [ [ "key_word", "key_words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word.", - position: { endOffset: 27, startOffset: 19 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with a key_word.", + original: "A string with a key_word.", + position: { + startOffset: 19, + endOffset: 27, + startOffsetBlock: 16, + endOffsetBlock: 24, + }, + } ), + ], skip: false, }, { @@ -120,9 +183,18 @@ const testCases = [ paper: new Paper( "

A string with 'kapaklı' as a keyword in it

", { keyword: "kapaklı" } ), keyphraseForms: [ [ "kapaklı" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { + startOffset: 18, + endOffset: 25, + startOffsetBlock: 15, + endOffsetBlock: 22, + }, + } ), + ], skip: false, }, { @@ -130,9 +202,18 @@ const testCases = [ paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with key&word in it", - original: "A string with key&word in it", - position: { endOffset: 25, startOffset: 17 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with key&word in it", + original: "A string with key&word in it", + position: { + startOffset: 17, + endOffset: 25, + startOffsetBlock: 14, + endOffsetBlock: 22, + }, + } ), + ], skip: false, }, { @@ -149,9 +230,17 @@ const testCases = [ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "A string with KeY worD.", + new Mark( { + marked: "A string with KeY worD.", original: "A string with KeY worD.", - position: { endOffset: 25, startOffset: 17 } } ) ], + position: { + startOffset: 17, + endOffset: 25, + startOffsetBlock: 14, + endOffsetBlock: 22, + }, + } ), + ], skip: false, }, { @@ -159,10 +248,18 @@ const testCases = [ paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key'word" } ), keyphraseForms: [ [ "key'word", "key'words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with quotes to match the key'word, even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ.", - position: { endOffset: 45, startOffset: 37 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with quotes to match the key'word, even if the quotes differ.", + original: "A string with quotes to match the key'word, even if the quotes differ.", + position: { + startOffset: 37, + endOffset: 45, + startOffsetBlock: 34, + endOffsetBlock: 42, + }, + } ), + ], skip: false, }, { @@ -170,9 +267,18 @@ const testCases = [ paper: new Paper( "

A string with a $keyword.

" ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", - original: "A string with a $keyword.", - position: { endOffset: 27, startOffset: 19 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + }, + } ), + ], skip: false, }, { @@ -197,9 +303,16 @@ const testCases = [ keyphraseForms: [ [ "key" ], [ "word" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "a string with nbsp to match the key word.", + new Mark( { + marked: "a string with nbsp to match the key word.", original: "a string with nbsp to match the key word.", - position: { endOffset: 43, startOffset: 35 } } ), + position: { + startOffset: 35, + endOffset: 43, + startOffsetBlock: 32, + endOffsetBlock: 40, + }, + } ), ], skip: false, }, @@ -209,26 +322,55 @@ const testCases = [ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "A string with three keys (" + + new Mark( { + marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 27, startOffset: 23 } } ), - new Mark( { marked: "A string with three keys (" + + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 23, + endOffset: 27, + startOffsetBlock: 20, + endOffsetBlock: 24, + }, + } ), + new Mark( { + marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 32, startOffset: 29 } } ), - new Mark( { marked: "A string with three keys (" + + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 29, + endOffset: 32, + startOffsetBlock: 26, + endOffsetBlock: 29, + }, + } ), + new Mark( { + marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 48, startOffset: 45 } } ), - new Mark( { marked: "A string with three keys (" + + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 45, + endOffset: 48, + startOffsetBlock: 42, + endOffsetBlock: 45, + }, + } ), + new Mark( { + marked: "A string with three keys (" + "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word.", - position: { endOffset: 62, startOffset: 58 } } ) ], + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 58, + endOffset: 62, + startOffsetBlock: 55, + endOffsetBlock: 59, + }, + } ), + ], skip: false, }, { @@ -242,12 +384,25 @@ const testCases = [ // eslint-disable-next-line max-len marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", - position: { endOffset: 58, startOffset: 43 } } ), + position: { + endOffset: 58, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 55, + }, + } ), new Mark( { // eslint-disable-next-line max-len marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", - position: { endOffset: 67, startOffset: 60 } } ) ], + position: { + startOffset: 60, + endOffset: 67, + startOffsetBlock: 57, + endOffsetBlock: 64, + }, + } ), + ], skip: false, }, { @@ -255,9 +410,17 @@ const testCases = [ paper: new Paper( "

A string with a keyword.

", { keyword: "\"keyword\"" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a keyword.", - original: "A string with a keyword.", - position: { endOffset: 26, startOffset: 19 } } ) ], + expectedMarkings: [ + new Mark( { marked: "A string with a keyword.", + original: "A string with a keyword.", + position: { + startOffset: 19, + endOffset: 26, + startOffsetBlock: 16, + endOffsetBlock: 23, + }, + } ), + ], skip: false, }, { @@ -273,9 +436,17 @@ const testCases = [ paper: new Paper( "

A string with a key phrase.

", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", - original: "A string with a key phrase.", - position: { endOffset: 36, startOffset: 29 } } ) ], + expectedMarkings: [ + new Mark( { marked: "A string with a key phrase.", + original: "A string with a key phrase.", + position: { + startOffset: 29, + endOffset: 36, + startOffsetBlock: 26, + endOffsetBlock: 33, + }, + } ), + ], // Skipped for now, coz the PR for exact matching is not yet merged. skip: true, }, @@ -293,9 +464,18 @@ const testCases = [ paper: new Paper( "

A .sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", - original: "A .sentence with a keyphrase.", - position: { endOffset: 36, startOffset: 29 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A .sentence with a keyphrase.", + original: "A .sentence with a keyphrase.", + position: { + startOffset: 29, + endOffset: 36, + startOffsetBlock: 26, + endOffsetBlock: 33, + }, + } ), + ], // Skipped for now, coz the PR for exact matching is not yet merged. skip: true, }, @@ -304,12 +484,271 @@ const testCases = [ paper: new Paper( "

A string with a $keyword.

", { keyword: "\"\\$keyword\"" } ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", - original: "A string with a $keyword.", - position: { endOffset: 27, startOffset: 19 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { + startOffset: 19, + endOffset: 27, + startOffsetBlock: 16, + endOffsetBlock: 24, + }, + } ), + ], // Skipped for now, coz the PR for exact matching is not yet merged. skip: true, }, + { + description: "can match paragraph gutenberg block", + paper: new Paper( + ` +

a string of text with the keyword in it

+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + content: "a string of text with the keyword in it", + }, + name: "core/paragraph", + clientId: "5615ca1f-4052-41a9-97be-be19cfd2085b", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "a string of text with the keyword in it", + original: "a string of text with the keyword in it", + position: { + startOffset: 55, + endOffset: 62, + startOffsetBlock: 26, + endOffsetBlock: 33, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +

Over the years, we’ve written quite a few articles about  + branding. + Branding is about getting people to relate to your company and + products. It’s also about trying to make your brand synonymous with a certain product or service. + This can be a lengthy and hard project. It can potentially cost you all of your revenue. + It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. + However, for a lot of small business owners, the investment in branding will have + to be made with a relatively small budget. 

+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ", + }, + name: "core/paragraph", + clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "synonymous" ] ], + expectedCount: 1, + expectedMarkings: [ + + new Mark( { + // eslint-disable-next-line max-len + marked: " It’s also about trying to make your brand synonymous with a certain product or service.", + original: " It’s also about trying to make your brand synonymous with a certain product or service.", + position: { + startOffset: 295, + endOffset: 305, + startOffsetBlock: 266, + endOffsetBlock: 276, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +

You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. + These all can be qualified as + ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There + are multiple ways to do this, without a huge budget. + In this post, I’ll share my thoughts on how to go about your own low-budget branding.

+ `, + { + keyword: "expertise", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. These all can be qualified as ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There are multiple ways to do this, without a huge budget. In this post, I’ll share my thoughts on how to go about your own low-budget branding.", + }, + name: "core/paragraph", + clientId: "65be5146-3395-4845-8c7c-4a79fd6e3611", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "expertise" ] ], + expectedCount: 1, + expectedMarkings: [ + + new Mark( { + // eslint-disable-next-line max-len + marked: " All have the same main goal when they start: the need to establish a name in their field of expertise.", + original: " All have the same main goal when they start: the need to establish a name in their field of expertise.", + position: { + startOffset: 282, + endOffset: 291, + startOffsetBlock: 253, + endOffsetBlock: 262, + }, + } ), + ], + skip: false, + }, + { + description: "can match heading gutenberg block", + paper: new Paper( + ` +

Define and communicate brand values

+ `, + { + keyword: "communicate", + wpBlocks: [ + { + attributes: { + level: 2, + content: "Define and communicate brand values", + }, + name: "core/heading", + clientId: "b8e62d35-b3af-45ec-9889-139ef0a9baaa", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "communicate" ] ], + expectedCount: 1, + expectedMarkings: [ + // eslint-disable-next-line max-len + new Mark( { + marked: "Define and communicate brand values", + original: "Define and communicate brand values", + position: { + startOffset: 107, + endOffset: 118, + startOffsetBlock: 11, + endOffsetBlock: 22, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +
+
+

Lorem Ipsum is simply dummy text of the printing and typesetting industry.

+
+ + + +
+

There are many variations of passages of Lorem Ipsum available

+
+
+ `, + { + keyword: "Ipsum", + wpBlocks: [ + { + attributes: {}, + name: "core/columns", + clientId: "1b9f1d49-813e-4578-a19a-bf236447cc41", + innerBlocks: [ + { + attributes: {}, + name: "core/column", + clientId: "09b93261-25ef-4391-98cc-630e8fa1eac1", + innerBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + }, + name: "core/paragraph", + clientId: "3f0e68c1-287e-40ef-90c8-54b3ab61702a", + innerBlocks: [], + }, + ], + }, + { + attributes: {}, + name: "core/column", + clientId: "0f247feb-5ada-433d-bc97-1faa0265f7c4", + innerBlocks: [ + { + attributes: { + content: "There are many variations of passages of Lorem Ipsum available", + }, + name: "core/paragraph", + clientId: "5093342c-ec93-48a2-b2d5-883ae514b12d", + innerBlocks: [], + }, + ], + }, + ], + }, + ], + } + ), + keyphraseForms: [ [ "Ipsum" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + original: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + position: { + startOffset: 149, + endOffset: 154, + startOffsetBlock: 14, + endOffsetBlock: 19, + }, + } ), + new Mark( { + marked: "There are many variations of passages of Lorem Ipsum available", + original: "There are many variations of passages of Lorem Ipsum available", + position: { + startOffset: 426, + endOffset: 431, + startOffsetBlock: 47, + endOffsetBlock: 52, + }, + } ), + ], + // Skipped for now, coz the PR for exact matching is not yet merged. + skip: false, + }, ]; // eslint-disable-next-line max-len @@ -325,6 +764,60 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" } ); } ); +// eslint-disable-next-line max-len +describe( "Test position based highlighting", () => { + it( "test", function() { + const expectedMarkings = [ + // eslint-disable-next-line max-len + new Mark( { marked: " It’s also about trying to make your brand synonymous with a certain product or service.", + original: " It’s also about trying to make your brand synonymous with a certain product or service.", + position: { + startOffset: 295, + endOffset: 305, + startOffsetBlock: 266, + endOffsetBlock: 276, + }, + } ), + ]; + + const keyphraseForms = [ [ "synonymous" ] ]; + + const mockPaper = new Paper( + ` +

Over the years, we’ve written quite a few articles about  + branding. + Branding is about getting people to relate to your company and + products. It’s also about trying to make your brand synonymous with a certain product or service. + This can be a lengthy and hard project. It can potentially cost you all of your revenue. + It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. + However, for a lot of small business owners, the investment in branding will have + to be made with a relatively small budget. 

+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ", + }, + name: "core/paragraph", + clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c", + innerBlocks: [], + }, + ], + } + ); + + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + + buildTree( mockPaper, mockResearcher ); + const keyWordCountResult = keyphraseCount( mockPaper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( 1 ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + } ); +} ); + /** * Mocks Japanese Researcher. * @param {Array} keyphraseForms The morphological forms to be added to the researcher. diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 98f80447499..a772265b9d7 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -9,7 +9,7 @@ const markEnd = "
"; * This is a helper for position based highlighting. * @param {Object[]} matches An array of markings to merge. * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. - * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. + * @returns {Token[]} An array of markings where consecutive and overlapping markings are merged. */ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { const newMatches = []; @@ -143,6 +143,9 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp position: { startOffset: token.sourceCodeRange.startOffset, endOffset: token.sourceCodeRange.endOffset, + // relative to start of block positions. + startOffsetBlock: token.sourceCodeRange.startOffset - sentence.parentCodeRange.startOffset, + endOffsetBlock: token.sourceCodeRange.endOffset - sentence.parentCodeRange.startOffset, }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 23a09d4acbe..88f17982368 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -4,10 +4,17 @@ * * @param {Paper} paper The paper to get the sentences from. * - * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes. + * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node. */ export default function( paper ) { const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences ); - return tree.flatMap( node => node.sentences ); + return tree.flatMap( node => node.sentences.map( s => ( { + ...s, + parentCodeRange: { + startOffset: node.sourceCodeLocation.startTag.endOffset, + endOffset: node.sourceCodeLocation.endTag.startOffset, + }, + } ) ) + ); } From a337871655c7e31fb1db3797474be6b78242c474 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Mon, 19 Jun 2023 05:39:51 +0300 Subject: [PATCH 165/616] HTML Parser - Implement position based highlighting for block editor#138 Fix tests. --- .../highlighting/getMarkingsInSentenceSpec.js | 79 ++++++++++++++----- .../sentence/getSentencesFromTreeSpec.js | 6 ++ .../seo/KeywordDensityAssessmentSpec.js | 43 ++++++++-- .../highlighting/getMarkingsInSentence.js | 4 +- .../helpers/sentence/getSentencesFromTree.js | 5 +- 5 files changed, 106 insertions(+), 31 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index a46b943f6de..bb0c16a0bd6 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -21,7 +21,12 @@ const testCases = [ expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", - position: { endOffset: 4, startOffset: 0 }, + position: { + startOffset: 0, + endOffset: 4, + startOffsetBlock: 0, + endOffsetBlock: 4, + }, } ) ], }, { @@ -30,11 +35,18 @@ const testCases = [ matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], matchWordCustomHelper: false, locale: "en_US", - expectedResult: [ new Mark( { - marked: "This is a sentence.", - original: "This is a sentence.", - position: { endOffset: 7, startOffset: 0 }, - } ) ], + expectedResult: [ + new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { + startOffset: 0, + endOffset: 7, + startOffsetBlock: 0, + endOffsetBlock: 7, + }, + } ), + ], }, { testDescription: "Two markings that are not consecutive in sentence", @@ -46,12 +58,22 @@ const testCases = [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", - position: { endOffset: 4, startOffset: 0 }, + position: { + startOffset: 0, + endOffset: 4, + startOffsetBlock: 0, + endOffsetBlock: 4, + }, } ), new Mark( { marked: "This is a sentence.", original: "This is a sentence.", - position: { endOffset: 18, startOffset: 10 }, + position: { + startOffset: 10, + endOffset: 18, + startOffsetBlock: 10, + endOffsetBlock: 18, + }, } ), ], }, @@ -61,11 +83,18 @@ const testCases = [ matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], matchWordCustomHelper: false, locale: "en_US", - expectedResult: [ new Mark( { - marked: "This is a sentence.", - original: "This is a sentence.", - position: { endOffset: 14, startOffset: 10 }, - } ) ], + expectedResult: [ + new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { + startOffset: 10, + endOffset: 14, + startOffsetBlock: 10, + endOffsetBlock: 14, + }, + } ), + ], }, { testDescription: "One marking in a sentence of a language that does not use spaces", @@ -73,11 +102,18 @@ const testCases = [ matchesInSentence: [ { sourceCodeRange: { startOffset: 3, endOffset: 4 } } ], matchWordCustomHelper: JapaneseCustomHelper, locale: "ja", - expectedResult: [ new Mark( { - marked: "これはです.", - original: "これは文です.", - position: { endOffset: 4, startOffset: 3 }, - } ) ], + expectedResult: [ + new Mark( { + marked: "これはです.", + original: "これは文です.", + position: { + startOffset: 3, + endOffset: 4, + startOffsetBlock: 3, + endOffsetBlock: 4, + }, + } ), + ], }, { testDescription: "Two markings that overlap", @@ -89,7 +125,12 @@ const testCases = [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", - position: { endOffset: 9, startOffset: 0 }, + position: { + startOffset: 0, + endOffset: 9, + startOffsetBlock: 0, + endOffsetBlock: 9, + }, } ), ], }, diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index d9339833844..d4ba2c37aa5 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -15,6 +15,7 @@ describe( "test", () => { expect( getSentencesFromTree( paper ) ).toEqual( [ { sourceCodeRange: { endOffset: 44, startOffset: 3 }, + parentStartOffset: 3, text: "A very intelligent cat loves their human.", tokens: [ { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, @@ -35,6 +36,7 @@ describe( "test", () => { }, { sourceCodeRange: { endOffset: 64, startOffset: 44 }, + parentStartOffset: 3, text: " A dog is very cute.", tokens: [ { sourceCodeRange: { endOffset: 45, startOffset: 44 }, text: " " }, @@ -51,6 +53,7 @@ describe( "test", () => { ] }, { sourceCodeRange: { endOffset: 86, startOffset: 72 }, + parentStartOffset: 72, text: "A subheading 3", tokens: [ { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: "A" }, @@ -61,6 +64,7 @@ describe( "test", () => { ] }, { sourceCodeRange: {}, + parentStartOffset: 0, text: "text text text", tokens: [ { sourceCodeRange: {}, text: "text" }, @@ -71,6 +75,7 @@ describe( "test", () => { ] }, { sourceCodeRange: { endOffset: 123, startOffset: 109 }, + parentStartOffset: 109, text: "A subheading 4", tokens: [ { sourceCodeRange: { endOffset: 110, startOffset: 109 }, text: "A" }, @@ -81,6 +86,7 @@ describe( "test", () => { ] }, { sourceCodeRange: { endOffset: 138, startOffset: 128 }, + parentStartOffset: 0, text: "more text.", tokens: [ { sourceCodeRange: { endOffset: 132, startOffset: 128 }, text: "more" }, diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 9b6b329dd98..9c60179673a 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -254,15 +254,26 @@ describe( "A test for marking the keyword", function() { "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", - position: { endOffset: 50, startOffset: 43 }, + position: { + startOffset: 43, + endOffset: 50, + startOffsetBlock: 40, + endOffsetBlock: 47, + }, } ), new Mark( { marked: "This is a very interesting paper with a " + "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", - position: { endOffset: 70, startOffset: 63 }, - } ) ]; + position: { + startOffset: 63, + endOffset: 70, + startOffsetBlock: 60, + endOffsetBlock: 67, + }, + } ), + ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); @@ -277,7 +288,14 @@ describe( "A test for marking the keyword", function() { const expected = [ new Mark( { marked: "This is the release of YoastSEO 9.3.", original: "This is the release of YoastSEO 9.3.", - position: { startOffset: 26, endOffset: 38 } } ) ]; + position: { + startOffset: 26, + endOffset: 38, + startOffsetBlock: 23, + endOffsetBlock: 35, + }, + } ), + ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); @@ -297,11 +315,24 @@ describe( "A test for marking the keyword", function() { marked: " A flamboyant cat with a " + "toy\n", original: " A flamboyant cat with a toy\n", - position: { endOffset: 204, startOffset: 201 } } ), + position: { + startOffset: 201, + endOffset: 204, + startOffsetBlock: 198, + endOffsetBlock: 201, + }, + } ), new Mark( { marked: " A flamboyant cat with a " + "toy\n", - original: " A flamboyant cat with a toy\n", position: { endOffset: 215, startOffset: 212 } } ), + original: " A flamboyant cat with a toy\n", + position: { + startOffset: 212, + endOffset: 215, + startOffsetBlock: 209, + endOffsetBlock: 212, + }, + } ), ]; expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index a772265b9d7..d6b7f94d41d 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -144,8 +144,8 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp startOffset: token.sourceCodeRange.startOffset, endOffset: token.sourceCodeRange.endOffset, // relative to start of block positions. - startOffsetBlock: token.sourceCodeRange.startOffset - sentence.parentCodeRange.startOffset, - endOffsetBlock: token.sourceCodeRange.endOffset - sentence.parentCodeRange.startOffset, + startOffsetBlock: token.sourceCodeRange.startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: token.sourceCodeRange.endOffset - ( sentence.parentStartOffset || 0 ), }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 88f17982368..3fb82de546f 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -11,10 +11,7 @@ export default function( paper ) { return tree.flatMap( node => node.sentences.map( s => ( { ...s, - parentCodeRange: { - startOffset: node.sourceCodeLocation.startTag.endOffset, - endOffset: node.sourceCodeLocation.endTag.startOffset, - }, + parentStartOffset: ( node.sourceCodeLocation && node.sourceCodeLocation.startTag ) ? node.sourceCodeLocation.startTag.endOffset : 0, } ) ) ); } From 691b587c5dc4a1f378ae2c54d6c9946423909376 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 15:36:16 +0200 Subject: [PATCH 166/616] Adjust JSDoc --- .../languageProcessing/helpers/word/getAllWordsFromTree.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js index a651b7a83b8..d353db510be 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -3,7 +3,8 @@ import { flatMap } from "lodash-es"; import removePunctuation from "../sanitize/removePunctuation"; /** - * Gets the words from the tree. + * Gets the words from the tree, i.e. the paragraph and heading nodes. + * These two node types are the nodes that should contain words for the analysis. * * @param {Paper} paper The paper to get the tree and words from. * @@ -17,6 +18,6 @@ export default function( paper ) { // Remove punctuation and spaces. words = words.map( token => removePunctuation( token ) ); - // Filter out empty tokens + // Filter out empty tokens. return words.filter( word => word.trim() !== "" ); } From 68ad3ac4de682a536ca7659409a26ba961196184 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 15:38:45 +0200 Subject: [PATCH 167/616] Use helper for getting words from tree --- .../match/matchKeyphraseWithSentence.js | 12 ++++++----- .../seo/KeywordDensityAssessment.js | 20 ++++++++----------- .../assessments/recommendedKeywordCount.js | 12 +++++------ 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index c7f14bda715..7170e82e9c7 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -1,5 +1,6 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; import { escapeRegExp } from "lodash-es"; +import matchTextWithTransliteration from "./matchTextWithTransliteration"; /** * Tokenizes keyword forms for exact matching. This function gets the keyword form and tokenizes it. @@ -67,9 +68,10 @@ const getExactMatches = ( keywordForms, sentence ) => { * Free matching of keyword forms in a sentence. Free matching happens when the keyphrase is enclosed in double quotes. * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. + * @param {string} locale The locale used for transliteration. * @returns {Token[]} The tokens that match the keyword forms. */ -const getNonExactMatches = ( keywordForms, sentence ) => { +const getNonExactMatches = ( keywordForms, sentence, locale ) => { const tokens = sentence.tokens.slice(); // Filter out all tokens that do not match the keyphrase forms. @@ -77,7 +79,7 @@ const getNonExactMatches = ( keywordForms, sentence ) => { const tokenText = escapeRegExp( token.text ); return keywordForms.some( ( keywordForm ) => { return keywordForm.some( ( keywordFormPart ) => { - return tokenText.toLowerCase() === keywordFormPart.toLowerCase(); + return matchTextWithTransliteration( tokenText, keywordFormPart, locale ).length > 0; } ); } ); } ); @@ -92,7 +94,7 @@ const getNonExactMatches = ( keywordForms, sentence ) => { * @param {Sentence} sentence The sentence to match against the keywordForms. * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * Depends on whether the user has put the keyphrase in double quotes. - * + * @param {string} locale The locale used for transliteration. * @returns {Token[]} The tokens that match the keywordForms. * * The algorithm is as follows: @@ -107,11 +109,11 @@ const getNonExactMatches = ( keywordForms, sentence ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( keywordForms, sentence, useExactMatching = false ) => { +const matchKeyphraseWithSentence = ( keywordForms, sentence, useExactMatching, locale = false ) => { if ( useExactMatching ) { return getExactMatches( keywordForms, sentence ); } - return getNonExactMatches( keywordForms, sentence ); + return getNonExactMatches( keywordForms, sentence, locale ); }; export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 58b83e2c8b2..25d132ff2b9 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -9,6 +9,7 @@ import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; import getWords from "../../../languageProcessing/helpers/word/getWords"; +import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAllWordsFromTree"; /** * Represents the assessment that will look if the keyphrase density is within the recommended range. @@ -75,20 +76,20 @@ class KeyphraseDensityAssessment extends Assessment { /** * Determines correct boundaries depending on the availability of morphological forms. * - * @param {string} text The paper text. + * @param {Paper} paper The paper to analyze. * @param {number} keyphraseLength The length of the keyphrase in words. * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. * * @returns {void} */ - setBoundaries( text, keyphraseLength, customGetWords ) { + setBoundaries( paper, keyphraseLength, customGetWords ) { if ( this._hasMorphologicalForms ) { this._boundaries = this._config.parameters.multipleWordForms; } else { this._boundaries = this._config.parameters.noWordForms; } - this._minRecommendedKeyphraseCount = recommendedKeyphraseCount( text, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); - this._maxRecommendedKeyphraseCount = recommendedKeyphraseCount( text, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); + this._minRecommendedKeyphraseCount = recommendedKeyphraseCount( paper, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); + this._maxRecommendedKeyphraseCount = recommendedKeyphraseCount( paper, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); } /** @@ -111,10 +112,7 @@ class KeyphraseDensityAssessment extends Assessment { this._hasMorphologicalForms = researcher.getData( "morphology" ) !== false; - const text = paper.getText(); - // eslint-disable-next-line no-warning-comments - // TODO: do we need to use the paper text, or can we adjust this to use the html parser? - this.setBoundaries( text, keyphraseLength, customGetWords ); + this.setBoundaries( paper, keyphraseLength, customGetWords ); this._keyphraseDensity = this._keyphraseDensity * keyphraseLengthFactor( keyphraseLength ); const calculatedScore = this.calculateResult(); @@ -332,10 +330,8 @@ class KeyphraseDensityAssessment extends Assessment { if ( customApplicabilityConfig ) { this._config.applicableIfTextLongerThan = customApplicabilityConfig; } - const text = paper.getText(); - // eslint-disable-next-line no-warning-comments - // TODO: Adapt this to use HTML Parser. - const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length; + + const textLength = customCountLength ? customCountLength( paper.getText() ) : getAllWordsFromTree( paper ).length; return paper.hasText() && paper.hasKeyword() && textLength >= this._config.applicableIfTextLongerThan; } diff --git a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js index ac577dde9a0..d668353cfa2 100644 --- a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js +++ b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js @@ -1,11 +1,11 @@ -import countWords from "../../../languageProcessing/helpers/word/countWords"; +import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAllWordsFromTree"; import keyphraseLengthFactor from "./keyphraseLengthFactor.js"; /** - * Calculates a recommended keyword count for a text. The formula to calculate this number is based on the + * Calculates a recommended keyword count for a paper's text. The formula to calculate this number is based on the * keyword density formula. * - * @param {string} text The paper text. + * @param {Paper} paper The paper to analyze. * @param {number} keyphraseLength The length of the focus keyphrase in words. * @param {number} recommendedKeywordDensity The recommended keyword density (either maximum or minimum). * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyword density. @@ -13,10 +13,8 @@ import keyphraseLengthFactor from "./keyphraseLengthFactor.js"; * * @returns {number} The recommended keyword count. */ -export default function( text, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) { - // eslint-disable-next-line no-warning-comments - // TODO: adapt to use the HTML parser. Also for Japanese. - const wordCount = customGetWords ? customGetWords( text ).length : countWords( text ); +export default function( paper, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) { + const wordCount = customGetWords ? customGetWords( paper.getText() ).length : getAllWordsFromTree( paper ).length; if ( wordCount === 0 ) { return 0; From 9e16080459567a99f12b77d89d2c6b6e3fed8951 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 15:39:06 +0200 Subject: [PATCH 168/616] Remove unused file --- .../helpers/match/matchWordFormsWithTokens.js | 25 ------------------- 1 file changed, 25 deletions(-) delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js deleted file mode 100644 index 280716fcfac..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithTokens.js +++ /dev/null @@ -1,25 +0,0 @@ -import { uniq as unique } from "lodash-es"; - -/** - * Matches an array of words with an array of tokens. - * - * @param {array} wordForms The array of words to check. - * @param {array} tokens The array of tokens to check. - * @returns {{count: number, matches: *[]}} The matched words. - */ -export default function matchWordFormsWithTokens( wordForms, tokens ) { - const result = { - count: 0, - matches: [], - }; - - unique( wordForms ).forEach( function( form ) { - const foundWords = tokens.filter( token => token.text === form ); - if ( foundWords.length > 0 ) { - result.matches = result.matches.concat( foundWords ); - result.count += foundWords.length; - } - } ); - - return result; -} From 1797f4c77b6d3ff38dd42d8a771d85e4ebebd014 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 15:40:02 +0200 Subject: [PATCH 169/616] split unit tests --- .../researches/keywordCountSpec.js | 396 +++++++++++++----- 1 file changed, 293 insertions(+), 103 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index c6be1c8c9f5..62b3b6bd6a6 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -38,7 +38,7 @@ const testCases = [ { description: "counts a string of text with no keyword in it.", paper: new Paper( "

a string of text

", { keyword: "" } ), - keyphraseForms: [ [ "" ] ], + keyphraseForms: [], expectedCount: 0, expectedMarkings: [], skip: false, @@ -59,17 +59,6 @@ const testCases = [ position: { endOffset: 64, startOffset: 55 } } ) ], skip: false, }, - { - description: "counts a string of text with German diacritics and eszett as the keyword", - paper: new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß" } ), - keyphraseForms: [ [ "äöüß" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - position: { endOffset: 40, startOffset: 36 } } ) ], - skip: false, - }, { description: "counts a string with multiple keyword morphological forms", paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), @@ -86,55 +75,6 @@ const testCases = [ position: { endOffset: 56, startOffset: 48 } } ) ], skip: false, }, - { - description: "counts a string with a keyword with a '-' in it", - paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word.", - position: { endOffset: 27, startOffset: 19 } } ) ], - skip: false, - }, - { - description: "counts 'key word' in 'key-word'.", - paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - // Note: this behavior might change in the future. - }, - { - description: "counts a string with a keyword with a '_' in it", - paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), - keyphraseForms: [ [ "key_word", "key_words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word.", - position: { endOffset: 27, startOffset: 19 } } ) ], - skip: false, - }, - { - description: "counts a string with with a 'ı' in the keyphrase", - paper: new Paper( "

A string with 'kapaklı' as a keyword in it

", { keyword: "kapaklı" } ), - keyphraseForms: [ [ "kapaklı" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], - skip: false, - }, - { - description: "counts a string with with '&' in the string and the keyword", - paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), - keyphraseForms: [ [ "key&word" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with key&word in it", - original: "A string with key&word in it", - position: { endOffset: 25, startOffset: 17 } } ) ], - skip: false, - }, { description: "does not count images as keywords.", paper: new Paper( "

A text with image

", { keyword: "image" } ), @@ -154,35 +94,6 @@ const testCases = [ position: { endOffset: 25, startOffset: 17 } } ) ], skip: false, }, - { - description: "should still match keyphrase occurrence with different types of apostrophe.", - paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key'word" } ), - keyphraseForms: [ [ "key'word", "key'words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with quotes to match the key'word, even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ.", - position: { endOffset: 45, startOffset: 37 } } ) ], - skip: false, - }, - { - description: "can match dollar sign as in '$keyword'.", - paper: new Paper( "

A string with a $keyword.

" ), - keyphraseForms: [ [ "\\$keyword" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", - original: "A string with a $keyword.", - position: { endOffset: 27, startOffset: 19 } } ) ], - skip: false, - }, - { - description: "doesn't count 'key-word' in 'key word'.", - paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - }, { description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", paper: new Paper( "

There is no keyword in this sentence.

" ), @@ -191,18 +102,6 @@ const testCases = [ expectedMarkings: [], skip: false, }, - { - description: "counts the keyphrase occurrence in the text with  ", - paper: new Paper( "

a string with nbsp to match the key word.

", { keyword: "key word" } ), - keyphraseForms: [ [ "key" ], [ "word" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "a string with nbsp to match the key word.", - original: "a string with nbsp to match the key word.", - position: { endOffset: 43, startOffset: 35 } } ), - ], - skip: false, - }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), @@ -325,6 +224,297 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" } ); } ); +const testCasesWithSpecialCharacters = [ + { + description: "counts the keyphrase occurrence in the text with  ", + paper: new Paper( "

a string with nbsp to match the key word.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key" ], [ "word" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string with nbsp to match the key word.", + original: "a string with nbsp to match the key word.", + position: { endOffset: 43, startOffset: 35 } } ), + ], + skip: false, + }, + { + description: "counts a string with a keyword with a '-' in it", + paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key-word.", + original: "A string with a key-word.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, + }, + { + description: "counts 'key word' in 'key-word'.", + paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + // Note: this behavior might change in the future. + }, + { + description: "counts a string with a keyword with a '_' in it", + paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), + keyphraseForms: [ [ "key_word", "key_words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key_word.", + original: "A string with a key_word.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, + }, + { + description: "counts a string with with '&' in the string and the keyword", + paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), + keyphraseForms: [ [ "key&word" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with key&word in it", + original: "A string with key&word in it", + position: { endOffset: 25, startOffset: 17 } } ) ], + skip: false, + }, + { + description: "should still match keyphrase occurrence with different types of apostrophe.", + paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key'word" } ), + keyphraseForms: [ [ "key'word", "key'words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { + marked: "A string with quotes to match the key'word, even if the quotes differ.", + original: "A string with quotes to match the key'word, even if the quotes differ.", + position: { endOffset: 45, startOffset: 37 } } ) ], + skip: false, + }, + { + description: "can match dollar sign as in '$keyword'.", + paper: new Paper( "

A string with a $keyword.

" ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, + }, + { + description: "doesn't count 'key-word' in 'key word'.", + paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "can match keyphrase enclosed in «» '«keyword»' in the text", + paper: new Paper( "

A string with a «keyword».

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a «keyword».", + marked: "A string with a «keyword».", + position: { endOffset: 27, startOffset: 20 } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase enclosed in «» '«keyword»'", + paper: new Paper( "

A string with a keyword.

", { keyword: "«keyword»" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with a keyword.", + position: { endOffset: 26, startOffset: 19 } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase enclosed in ‹› '‹keyword›' in the text", + paper: new Paper( "

A string with a ‹keyword›.

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a ‹keyword›.", + marked: "A string with a ‹keyword›.", + position: { endOffset: 27, startOffset: 20 } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase enclosed in ‹› '‹keyword›'", + paper: new Paper( "

A string with a keyword.

", { keyword: "‹keyword›" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with a keyword.", + position: { endOffset: 26, startOffset: 19 } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase preceded by ¿", + paper: new Paper( "

¿Keyword is it

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "¿Keyword is it", + marked: "¿Keyword is it", + position: { endOffset: 11, startOffset: 4 } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase followed by RTL-specific punctuation marks", + paper: new Paper( "

الجيدة؟

", { keyword: "الجيدة" } ), + keyphraseForms: [ [ "الجيدة" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "الجيدة؟", + marked: "الجيدة؟", + position: { endOffset: 9, startOffset: 3 } } + ), + ], + skip: false, + }, +]; + +describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyword in a text containing special characters", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; + + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( expectedCount ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + } ); + } ); + +const testCasesWithLocaleMapping = [ + { + description: "counts a string of text with German diacritics and eszett as the keyword", + paper: new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß", locale: "de_DE" } ), + keyphraseForms: [ [ "äöüß" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + position: { endOffset: 40, startOffset: 36 } } ) ], + skip: true, + }, + { + description: "counts a string of text with Spanish accented vowel", + paper: new Paper( "

Accion Española fue una revista.

", { keyword: "acción", locale: "es_ES" } ), + keyphraseForms: [ [ "acción" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "Acción Española fue una revista.", + original: "Acción Española fue una revista.", + position: { endOffset: 40, startOffset: 36 } } ) ], + skip: false, + }, + { + description: "counts a string of text with Swedish diacritics", + paper: new Paper( "

oeverlaatelsebesiktning and overlatelsebesiktning

", { keyword: "överlåtelsebesiktning", locale: "sv_SV" } ), + keyphraseForms: [ [ "överlåtelsebesiktning" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "Acción Española fue una revista.", + original: "Acción Española fue una revista.", + position: { endOffset: 40, startOffset: 36 } } ) ], + skip: false, + }, + { + description: "counts a string of text with Norwegian diacritics", + paper: new Paper( "

København and Koebenhavn and Kobenhavn

", { keyword: "København", locale: "nb_NO" } ), + keyphraseForms: [ [ "København" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "Acción Española fue una revista.", + original: "Acción Española fue una revista.", + position: { endOffset: 40, startOffset: 36 } } ) ], + skip: false, + }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "İstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, + }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: Istanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "Istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "Istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, + }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: istanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, + }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: ıstanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "ıstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "ıstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, + }, +]; +describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a text with different locale mapping, e.g. Turkish", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; + + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( expectedCount ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + } ); + } ); + /** * Mocks Japanese Researcher. * @param {Array} keyphraseForms The morphological forms to be added to the researcher. @@ -378,7 +568,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { position: { endOffset: 12, startOffset: 11 } } ) ] ); } ); - it.skip( "counts a string of text with no keyword in it.", function() { + it.skip( "counts a string if text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); From 31c47ff5e3d7a5909745c3b512b6cca1a04edd01 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 17:07:30 +0200 Subject: [PATCH 170/616] pass the locale argument --- .../src/languageProcessing/researches/keywordCount.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index acac65dc2d2..be2e4076db8 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -98,7 +98,10 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, isExactMatchRequested ); + // eslint-disable-next-line no-warning-comments + // TODO: test in Japanese to see if we use this helper as well + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, isExactMatchRequested, locale ); + console.log( matchesInSentence, "matches" ); const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); From ea870d25ca21d9cfb45caec7e09fb8a46748f4df Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 19 Jun 2023 17:14:31 +0200 Subject: [PATCH 171/616] adjust unit tests --- .../sentence/getSentencesFromTreeSpec.js | 3 ++- .../researches/keywordCountSpec.js | 23 +++++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index d9339833844..16ded227abb 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -3,7 +3,7 @@ import getSentencesFromTree from "../../../../src/languageProcessing/helpers/sen import buildTree from "../../../specHelpers/parse/buildTree"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; -describe( "test", () => { +describe( "test to get sentences from the tree", () => { let researcher; beforeEach( () => { researcher = new EnglishResearcher(); @@ -11,6 +11,7 @@ describe( "test", () => { it( "test", () => { const paper = new Paper( "

A very intelligent cat loves their human. A dog is very cute.

A subheading 3" + "

text text text

A subheading 4

more text." ); + researcher.setPaper( paper ); buildTree( paper, researcher ); expect( getSentencesFromTree( paper ) ).toEqual( [ { diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 62b3b6bd6a6..f0386fa10ee 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -165,7 +165,7 @@ const testCases = [ keyphraseForms: [ [ "keywords" ] ], expectedCount: 0, expectedMarkings: [], - skip: true, + skip: false, }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", @@ -176,7 +176,7 @@ const testCases = [ original: "A string with a key phrase.", position: { endOffset: 36, startOffset: 29 } } ) ], // Skipped for now, coz the PR for exact matching is not yet merged. - skip: true, + skip: false, }, { // eslint-disable-next-line max-len @@ -195,8 +195,7 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", original: "A .sentence with a keyphrase.", position: { endOffset: 36, startOffset: 29 } } ) ], - // Skipped for now, coz the PR for exact matching is not yet merged. - skip: true, + skip: false, }, { description: "can match dollar sign as in '$keyword' with exact matching.", @@ -206,8 +205,18 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", position: { endOffset: 27, startOffset: 19 } } ) ], - // Skipped for now, coz the PR for exact matching is not yet merged. - skip: true, + skip: false, + }, + { + description: "can match multiple occurrences of keyphrase in the text with exact matching.", + paper: new Paper( "

A string with cats and dogs, and other cats and dogs.

", { keyword: "\"cats and dogs\"" } ), + keyphraseForms: [ [ "cats and dogs" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a cats and dogs, " + + "and other cats and dogs.", + original: "A string with cats and dogs, and other cats and dogs.", + position: { endOffset: 27, startOffset: 19 } } ) ], + skip: false, }, ]; @@ -420,7 +429,7 @@ const testCasesWithLocaleMapping = [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", position: { endOffset: 40, startOffset: 36 } } ) ], - skip: true, + skip: false, }, { description: "counts a string of text with Spanish accented vowel", From d9a9aaf01e0031325394a9cb9c0b1e1a4caa9aa8 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 11:56:30 +0200 Subject: [PATCH 172/616] Adapt code --- .../match/matchKeyphraseWithSentence.js | 4 +-- .../researches/keywordCount.js | 26 ++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 7170e82e9c7..8ce852866df 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -92,9 +92,9 @@ const getNonExactMatches = ( keywordForms, sentence, locale ) => { * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * @param {Sentence} sentence The sentence to match against the keywordForms. + * @param {string} locale The locale used for transliteration. * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * Depends on whether the user has put the keyphrase in double quotes. - * @param {string} locale The locale used for transliteration. * @returns {Token[]} The tokens that match the keywordForms. * * The algorithm is as follows: @@ -109,7 +109,7 @@ const getNonExactMatches = ( keywordForms, sentence, locale ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( keywordForms, sentence, useExactMatching, locale = false ) => { +const matchKeyphraseWithSentence = ( keywordForms, sentence, locale, useExactMatching = false ) => { if ( useExactMatching ) { return getExactMatches( keywordForms, sentence ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index be2e4076db8..87ca7bafb74 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -4,14 +4,16 @@ import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; +import matchTextWithTransliteration from "../helpers/match/matchTextWithTransliteration"; /** * Counts the number of matches for a keyphrase in a sentence. * @param {Token[]} matches The matches to count. - * @param {(string[])[]} keyphraseForms Keyphraseforms that were used for matching. + * @param {(string[])[]} keyphraseForms The keyphrase forms that were used for matching. + * @param {string} locale The locale used for transliteration. * @returns {number} The number of matches. */ -const countMatches = ( matches, keyphraseForms ) => { +const countMatches = ( matches, keyphraseForms, locale ) => { // the count is the number of complete matches. const matchesCopy = [ ...matches ]; @@ -29,8 +31,9 @@ const countMatches = ( matches, keyphraseForms ) => { // check if any of the keyphrase forms is in the matches. const foundMatch = matchesCopy.find( match =>{ return keyphraseForm.some( keyphraseFormWord => { - const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); - return match.text.match( theRegex ); + // const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); + // return match.text.match( theRegex ); + return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ); } ); } ); // @@ -100,10 +103,9 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // eslint-disable-next-line no-warning-comments // TODO: test in Japanese to see if we use this helper as well - const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, isExactMatchRequested, locale ); - console.log( matchesInSentence, "matches" ); + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, locale, isExactMatchRequested ); const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); - const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms ); + const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale ); const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); @@ -124,6 +126,16 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); + // [ [tortie], [cat, cats] ] + // KW: tortie cat + // A pretty tortie cat, among many cats. Cats are beautiful. A pretty tortie cat, among many cats. Cats are beautiful. + if ( topicForms.keyphraseForms.length === 0 ) { + return { + count: 0, + markings: [], + length: 0, + }; + } const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); const locale = paper.getLocale(); From 053be78812026ab199b857deb104f179425fb5c8 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 11:57:57 +0200 Subject: [PATCH 173/616] Adapt unit tests --- .../researches/keywordCountSpec.js | 62 ++++++++++++++++++- 1 file changed, 59 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index f0386fa10ee..50fd575aae4 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -431,15 +431,71 @@ const testCasesWithLocaleMapping = [ position: { endOffset: 40, startOffset: 36 } } ) ], skip: false, }, + // Test cases for consecutive matching. + { + description: "consecutive 1", + paper: new Paper( "

this is a keywords keywords keywords.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, + { + description: "consecutive 2", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, + { + description: "consecutive 3", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keywords", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, + { + description: "consecutive 4", + paper: new Paper( "

this is a key key key word word word.

", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, + { + description: "counts a string of text with Spanish accented vowel", + paper: new Paper( "

acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), + keyphraseForms: [ [ "accion" ] ], + expectedCount: 0, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, { description: "counts a string of text with Spanish accented vowel", paper: new Paper( "

Accion Española fue una revista.

", { keyword: "acción", locale: "es_ES" } ), keyphraseForms: [ [ "acción" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "Acción Española fue una revista.", - original: "Acción Española fue una revista.", - position: { endOffset: 40, startOffset: 36 } } ) ], + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], skip: false, }, { From 07b2c9a60510819515c968a754d8b02d96dffaf0 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 12:12:18 +0200 Subject: [PATCH 174/616] adjust the check --- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 87ca7bafb74..f1b3ee32ec6 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -33,7 +33,7 @@ const countMatches = ( matches, keyphraseForms, locale ) => { return keyphraseForm.some( keyphraseFormWord => { // const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); // return match.text.match( theRegex ); - return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ); + return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ).length > 0; } ); } ); // From 4b2ed65abf368968485db6157361b13250aeedae Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 12:47:38 +0200 Subject: [PATCH 175/616] adjust the check --- .../match/matchKeyphraseWithSentence.js | 33 +++++++++++++++---- .../researches/keywordCount.js | 19 +++++++---- 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 8ce852866df..ead3f40fd56 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -24,9 +24,9 @@ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. * - * @returns {Token[]} The tokens that exactly match the keyword forms. + * @returns {Object} The tokens that exactly match the keyword forms. */ -const getExactMatches = ( keywordForms, sentence ) => { +export const getExactMatches = ( keywordForms, sentence ) => { // Tokenize keyword forms. const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForms ); @@ -36,6 +36,7 @@ const getExactMatches = ( keywordForms, sentence ) => { let keywordIndex = 0; let sentenceIndex = 0; const matches = []; + let count = 0; let currentMatch = []; while ( sentenceIndex < sentenceTokens.length ) { @@ -55,13 +56,14 @@ const getExactMatches = ( keywordForms, sentence ) => { // Add the current match to the matches array and reset the keyword index and the current match. if ( currentMatch.length === keywordTokens.length ) { matches.push( ...currentMatch ); + count++; keywordIndex = 0; currentMatch = []; } sentenceIndex++; } - return matches; + return { count: count, matches: matches }; }; /** @@ -71,7 +73,7 @@ const getExactMatches = ( keywordForms, sentence ) => { * @param {string} locale The locale used for transliteration. * @returns {Token[]} The tokens that match the keyword forms. */ -const getNonExactMatches = ( keywordForms, sentence, locale ) => { +export const getNonExactMatches = ( keywordForms, sentence, locale ) => { const tokens = sentence.tokens.slice(); // Filter out all tokens that do not match the keyphrase forms. @@ -83,6 +85,25 @@ const getNonExactMatches = ( keywordForms, sentence, locale ) => { } ); } ); } ); + + // + // return keywordForms.map( formArray => formArray.map( form => { + // let count = 0; + // const matches = []; + // tokens.forEach( token => { + // const tokenText = escapeRegExp( token.text ); + // const match = matchTextWithTransliteration( tokenText, form, locale ); + // if ( match.length > 0 ) { + // count++; + // matches.push( match ); + // } + // } ); + // return { + // count: count, + // matches: matches, + // }; + // } ) + // ); }; /** @@ -109,11 +130,9 @@ const getNonExactMatches = ( keywordForms, sentence, locale ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( keywordForms, sentence, locale, useExactMatching = false ) => { +export const matchKeyphraseWithSentence = ( keywordForms, sentence, locale, useExactMatching = false ) => { if ( useExactMatching ) { return getExactMatches( keywordForms, sentence ); } return getNonExactMatches( keywordForms, sentence, locale ); }; - -export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index f1b3ee32ec6..7da0bc232ae 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,7 +2,7 @@ import { flatten } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; +import { getExactMatches, getNonExactMatches } from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import matchTextWithTransliteration from "../helpers/match/matchTextWithTransliteration"; @@ -36,7 +36,6 @@ const countMatches = ( matches, keyphraseForms, locale ) => { return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ).length > 0; } ); } ); - // if ( foundMatch ) { matchesCopy.splice( matchesCopy.indexOf( foundMatch ), 1 ); nrKeyphraseFormsWithMatch += 1; @@ -103,10 +102,18 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // eslint-disable-next-line no-warning-comments // TODO: test in Japanese to see if we use this helper as well - const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, locale, isExactMatchRequested ); - const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); - const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale ); - const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); + let matchesCount = 0; + let markings = []; + if ( isExactMatchRequested ) { + const matchesInSentence = getExactMatches( topicForms.keyphraseForms, sentence ); + matchesCount = matchesInSentence.count > 2 ? 2 : matchesInSentence.count; + markings = getMarkingsInSentence( sentence, matchesInSentence.matches, matchWordCustomHelper, locale ); + } else { + const matchesInSentence = getNonExactMatches( topicForms.keyphraseForms, sentence, locale ); + const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); + matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale ); + markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); + } result.markings.push( markings ); result.count += matchesCount; From fb32e685db77a7803ebe30d4015f224d1ddb64ca Mon Sep 17 00:00:00 2001 From: Agnieszka Szuba Date: Tue, 20 Jun 2023 12:48:53 +0200 Subject: [PATCH 176/616] Fix documentation --- .../helpers/match/matchKeyphraseWithSentence.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 8ce852866df..46bd7d888dd 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -65,7 +65,8 @@ const getExactMatches = ( keywordForms, sentence ) => { }; /** - * Free matching of keyword forms in a sentence. Free matching happens when the keyphrase is enclosed in double quotes. + * Free matching of keyword forms in a sentence. + * * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. * @param {string} locale The locale used for transliteration. From 280d17ae453db29ce894cbb60a758bdf81e1b605 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 12:57:35 +0200 Subject: [PATCH 177/616] Adapt unit tests --- .../researches/keywordCountSpec.js | 18 +++++++++++------- .../match/matchKeyphraseWithSentence.js | 2 +- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 50fd575aae4..c54300e6f63 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -174,8 +174,7 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", original: "A string with a key phrase.", - position: { endOffset: 36, startOffset: 29 } } ) ], - // Skipped for now, coz the PR for exact matching is not yet merged. + position: { endOffset: 29, startOffset: 19 } } ) ], skip: false, }, { @@ -194,7 +193,7 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", original: "A .sentence with a keyphrase.", - position: { endOffset: 36, startOffset: 29 } } ) ], + position: { endOffset: 14, startOffset: 5 } } ) ], skip: false, }, { @@ -211,11 +210,16 @@ const testCases = [ description: "can match multiple occurrences of keyphrase in the text with exact matching.", paper: new Paper( "

A string with cats and dogs, and other cats and dogs.

", { keyword: "\"cats and dogs\"" } ), keyphraseForms: [ [ "cats and dogs" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a cats and dogs, " + + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "A string with cats and dogs, " + "and other cats and dogs.", - original: "A string with cats and dogs, and other cats and dogs.", - position: { endOffset: 27, startOffset: 19 } } ) ], + original: "A string with cats and dogs, and other cats and dogs.", + position: { endOffset: 30, startOffset: 17 } } ), + new Mark( { marked: "A string with cats and dogs, " + + "and other cats and dogs.", + original: "A string with cats and dogs, and other cats and dogs.", + position: { endOffset: 55, startOffset: 42 } } ) ], skip: false, }, ]; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index ead3f40fd56..6a93386f5b3 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -42,7 +42,7 @@ export const getExactMatches = ( keywordForms, sentence ) => { while ( sentenceIndex < sentenceTokens.length ) { // If the current sentence token matches the current keyword token, add it to the current match. const sentenceTokenText = escapeRegExp( sentenceTokens[ sentenceIndex ].text ); - const keywordTokenText = keywordTokens[ keywordIndex ]; + const keywordTokenText = escapeRegExp( keywordTokens[ keywordIndex ] ); if ( sentenceTokenText.toLowerCase() === keywordTokenText.toLowerCase() ) { currentMatch.push( sentenceTokens[ sentenceIndex ] ); From 86ccfe672ed41e582c3d601b7f19174efd2efad6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 20 Jun 2023 15:26:40 +0200 Subject: [PATCH 178/616] Adapt code --- .../researches/keywordCountSpec.js | 31 +++++++++++++--- .../match/matchKeyphraseWithSentence.js | 8 ++--- .../researches/keywordCount.js | 36 ++++++++++--------- 3 files changed, 48 insertions(+), 27 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index c54300e6f63..af27a1dbf95 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -198,8 +198,8 @@ const testCases = [ }, { description: "can match dollar sign as in '$keyword' with exact matching.", - paper: new Paper( "

A string with a $keyword.

", { keyword: "\"\\$keyword\"" } ), - keyphraseForms: [ [ "\\$keyword" ] ], + paper: new Paper( "

A string with a $keyword.

", { keyword: "\"$keyword\"" } ), + keyphraseForms: [ [ "$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", @@ -302,8 +302,8 @@ const testCasesWithSpecialCharacters = [ }, { description: "can match dollar sign as in '$keyword'.", - paper: new Paper( "

A string with a $keyword.

" ), - keyphraseForms: [ [ "\\$keyword" ] ], + paper: new Paper( "

A string with a $keyword.

", { keyword: "$keyword" } ), + keyphraseForms: [ [ "$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", @@ -480,9 +480,20 @@ const testCasesWithLocaleMapping = [ position: { endOffset: 9, startOffset: 3 } } ) ], skip: false, }, + { + description: "consecutive 5", + paper: new Paper( "

this is a key word key word key word.

", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3 } } ) ], + skip: false, + }, { description: "counts a string of text with Spanish accented vowel", - paper: new Paper( "

acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), + paper: new Paper( "

Acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, expectedMarkings: [ @@ -524,6 +535,16 @@ const testCasesWithLocaleMapping = [ position: { endOffset: 40, startOffset: 36 } } ) ], skip: false, }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", + paper: new Paper( "

Türkçe and Turkce

", { keyword: "Türkçe", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", + original: "A string with 'kapaklı' as a keyword in it", + position: { endOffset: 25, startOffset: 18 } } ) ], + skip: false, + }, { description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 6a93386f5b3..40ae8854e74 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -11,7 +11,7 @@ import matchTextWithTransliteration from "./matchTextWithTransliteration"; * * @returns {string[]} The tokenized keyword forms. */ -const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { +export const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { // Tokenize keyword forms. const keywordFormsText = keywordForms[ 0 ][ 0 ]; return getWordsForHTMLParser( keywordFormsText ); @@ -24,7 +24,7 @@ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. * - * @returns {Object} The tokens that exactly match the keyword forms. + * @returns {Token[]} The tokens that exactly match the keyword forms. */ export const getExactMatches = ( keywordForms, sentence ) => { // Tokenize keyword forms. @@ -36,7 +36,6 @@ export const getExactMatches = ( keywordForms, sentence ) => { let keywordIndex = 0; let sentenceIndex = 0; const matches = []; - let count = 0; let currentMatch = []; while ( sentenceIndex < sentenceTokens.length ) { @@ -56,14 +55,13 @@ export const getExactMatches = ( keywordForms, sentence ) => { // Add the current match to the matches array and reset the keyword index and the current match. if ( currentMatch.length === keywordTokens.length ) { matches.push( ...currentMatch ); - count++; keywordIndex = 0; currentMatch = []; } sentenceIndex++; } - return { count: count, matches: matches }; + return matches; }; /** diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 7da0bc232ae..4d04b03df89 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,7 +2,10 @@ import { flatten } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import { getExactMatches, getNonExactMatches } from "../helpers/match/matchKeyphraseWithSentence"; +import { + matchKeyphraseWithSentence, + tokenizeKeywordFormsForExactMatching, +} from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import matchTextWithTransliteration from "../helpers/match/matchTextWithTransliteration"; @@ -13,7 +16,7 @@ import matchTextWithTransliteration from "../helpers/match/matchTextWithTranslit * @param {string} locale The locale used for transliteration. * @returns {number} The number of matches. */ -const countMatches = ( matches, keyphraseForms, locale ) => { +const countMatches = ( matches, keyphraseForms, locale, isExactMatchRequested ) => { // the count is the number of complete matches. const matchesCopy = [ ...matches ]; @@ -31,8 +34,15 @@ const countMatches = ( matches, keyphraseForms, locale ) => { // check if any of the keyphrase forms is in the matches. const foundMatch = matchesCopy.find( match =>{ return keyphraseForm.some( keyphraseFormWord => { - // const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); - // return match.text.match( theRegex ); + if ( isExactMatchRequested ) { + // keyphraseForm = [ "key phrase" ] + // keyphraseWord = "key phrase" + // match = { text: "", sourceCodeInfo: {} } + keyphraseFormWord = tokenizeKeywordFormsForExactMatching( keyphraseFormWord ); + // "key phrase" => [ "key", " ", "phrase" ] + return keyphraseFormWord.every( word => matchTextWithTransliteration( match.text, word, locale ).length > 0 ); + + } return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ).length > 0; } ); } ); @@ -102,18 +112,10 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // eslint-disable-next-line no-warning-comments // TODO: test in Japanese to see if we use this helper as well - let matchesCount = 0; - let markings = []; - if ( isExactMatchRequested ) { - const matchesInSentence = getExactMatches( topicForms.keyphraseForms, sentence ); - matchesCount = matchesInSentence.count > 2 ? 2 : matchesInSentence.count; - markings = getMarkingsInSentence( sentence, matchesInSentence.matches, matchWordCustomHelper, locale ); - } else { - const matchesInSentence = getNonExactMatches( topicForms.keyphraseForms, sentence, locale ); - const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); - matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale ); - markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); - } + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, locale, isExactMatchRequested ); + const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); + const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale, isExactMatchRequested ); + const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); result.count += matchesCount; @@ -154,7 +156,7 @@ export default function keyphraseCount( paper, researcher ) { return { count: keyphraseFound.count, - markings: flatten( keyphraseFound.markings ), + markings: keyphraseFound.count === 0 ? [] : flatten( keyphraseFound.markings ), length: topicForms.keyphraseForms.length, }; } From 629f47dc0791994c4097999c0fb9e735080e78af Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 21 Jun 2023 16:06:25 +0200 Subject: [PATCH 179/616] Adapt unit tests --- .../researches/keywordCountSpec.js | 321 +++++++++++++----- 1 file changed, 235 insertions(+), 86 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index af27a1dbf95..0616ac9b657 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -167,6 +167,16 @@ const testCases = [ expectedMarkings: [], skip: false, }, + { + description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", + paper: new Paper( "

A string with a key phrase key phrase.

", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { marked: "A string with a key phrase key phrase.", + original: "A string with a key phrase key phrase.", + position: { endOffset: 40, startOffset: 19 } } ) ], + skip: false, + }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", paper: new Paper( "

A string with a key phrase.

", { keyword: "\"key phrase\"" } ), @@ -264,10 +274,15 @@ const testCasesWithSpecialCharacters = [ description: "counts 'key word' in 'key-word'.", paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 0, - expectedMarkings: [], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a key-word.", + marked: "A string with a key-word.", + position: { endOffset: 27, startOffset: 19 }, + } ), + ], skip: false, - // Note: this behavior might change in the future. }, { description: "counts a string with a keyword with a '_' in it", @@ -435,71 +450,12 @@ const testCasesWithLocaleMapping = [ position: { endOffset: 40, startOffset: 36 } } ) ], skip: false, }, - // Test cases for consecutive matching. - { - description: "consecutive 1", - paper: new Paper( "

this is a keywords keywords keywords.

", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], - skip: false, - }, - { - description: "consecutive 2", - paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], - skip: false, - }, - { - description: "consecutive 3", - paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keywords", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], - skip: false, - }, - { - description: "consecutive 4", - paper: new Paper( "

this is a key key key word word word.

", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], - skip: false, - }, - { - description: "consecutive 5", - paper: new Paper( "

this is a key word key word key word.

", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], - skip: false, - }, { description: "counts a string of text with Spanish accented vowel", paper: new Paper( "

Acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, - expectedMarkings: [ - new Mark( { marked: "Accion Española fue una revista.", - original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], + expectedMarkings: [], skip: false, }, { @@ -519,9 +475,16 @@ const testCasesWithLocaleMapping = [ keyphraseForms: [ [ "överlåtelsebesiktning" ] ], expectedCount: 2, expectedMarkings: [ - new Mark( { marked: "Acción Española fue una revista.", - original: "Acción Española fue una revista.", - position: { endOffset: 40, startOffset: 36 } } ) ], + new Mark( { + marked: "oeverlaatelsebesiktning " + + "and overlatelsebesiktning", + original: "oeverlaatelsebesiktning and overlatelsebesiktning", + position: { endOffset: 26, startOffset: 3 } } ), + new Mark( { + marked: "oeverlaatelsebesiktning " + + "and overlatelsebesiktning", + original: "oeverlaatelsebesiktning and overlatelsebesiktning", + position: { endOffset: 52, startOffset: 31 } } ) ], skip: false, }, { @@ -530,19 +493,38 @@ const testCasesWithLocaleMapping = [ keyphraseForms: [ [ "København" ] ], expectedCount: 3, expectedMarkings: [ - new Mark( { marked: "Acción Española fue una revista.", - original: "Acción Española fue una revista.", - position: { endOffset: 40, startOffset: 36 } } ) ], + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 12, startOffset: 3 } } ), + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 27, startOffset: 17 } } ), + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 41, startOffset: 32 } } ) ], skip: false, }, { - description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", + description: "counts a string with a Turkish diacritics", paper: new Paper( "

Türkçe and Turkce

", { keyword: "Türkçe", locale: "tr_TR" } ), keyphraseForms: [ [ "Türkçe" ] ], - expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 9, startOffset: 3 } } ), + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 20, startOffset: 14 } } ), + ], skip: false, }, { @@ -550,9 +532,27 @@ const testCasesWithLocaleMapping = [ paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "İstanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42 } } ) ], skip: false, }, { @@ -560,9 +560,27 @@ const testCasesWithLocaleMapping = [ paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "Istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "Istanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42 } } ) ], skip: false, }, { @@ -570,9 +588,27 @@ const testCasesWithLocaleMapping = [ paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "istanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42 } } ) ], skip: false, }, { @@ -580,9 +616,27 @@ const testCasesWithLocaleMapping = [ paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "ıstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "ıstanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'kapaklı' as a keyword in it", - original: "A string with 'kapaklı' as a keyword in it", - position: { endOffset: 25, startOffset: 18 } } ) ], + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42 } } ) ], skip: false, }, ]; @@ -605,6 +659,101 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a } ); } ); +const testDataForConsecutiveMatching = [ + { + description: "consecutive 1", + paper: new Paper( "

this is a keywords keywords keywords.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a keywords keywords keywords.", + original: "this is a keywords keywords keywords.", + position: { endOffset: 30, startOffset: 13 } } ) ], + skip: false, + }, + { + description: "consecutive 2", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.", + original: "this is a keyword keyword keyword.", + position: { endOffset: 28, startOffset: 13 } } ) ], + skip: false, + }, + { + description: "consecutive 3", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keywords", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.", + original: "this is a keyword keyword keyword.", + position: { endOffset: 28, startOffset: 13 } } ) ], + skip: false, + }, + { + description: "consecutive 4", + paper: new Paper( "

this is a key key key word word word.

", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "this is a key key key " + + "word word word.", + original: "this is a key key key word word word.", + position: { endOffset: 20, startOffset: 13 } } ), + new Mark( { + marked: "this is a key key key " + + "word word word.", + original: "this is a key key key word word word.", + position: { endOffset: 34, startOffset: 25 } } ) ], + skip: false, + }, + { + description: "doesn't recognize consecutive occurrences when the keyphrase contains multiple words, with non-exact matching", + paper: new Paper( "

this is a key word key word key word.

", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "this is a key word key word key word.", + original: "this is a key word key word key word.", + position: { endOffset: 39, startOffset: 13 } } ) ], + skip: false, + }, + { + description: "doesn't recognize consecutive occurrences when the keyphrase contains multiple words: with exact matching", + paper: new Paper( "

A sentence about a red panda red panda red panda.

", { keyword: "\"red panda\"", locale: "en_US" } ), + keyphraseForms: [ [ "red panda" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "A sentence about a red panda red panda red panda.", + original: "A sentence about a red panda red panda red panda.", + position: { endOffset: 51, startOffset: 22 } } ) ], + skip: false, + }, +]; + +describe.each( testDataForConsecutiveMatching )( "Test for counting the consecutive occurrences of keyword in the text", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; + + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( expectedCount ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + } ); + } ); + /** * Mocks Japanese Researcher. * @param {Array} keyphraseForms The morphological forms to be added to the researcher. From ce87af2d30dde0475694090f0d85d1f0f31c4241 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 21 Jun 2023 16:35:51 +0200 Subject: [PATCH 180/616] Simplify code --- .../highlighting/getMarkingsInSentence.js | 2 +- .../match/matchKeyphraseWithSentence.js | 104 +++++++++--------- .../researches/keywordCount.js | 99 +++++------------ 3 files changed, 83 insertions(+), 122 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 98f80447499..56014947073 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -117,7 +117,7 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => * Gets the Mark objects of all keyphrase matches. * * @param {Sentence} sentence The sentence to check. - * @param {Object} matchesInSentence An object containing the matches in the sentence. + * @param {Array} matchesInSentence An object containing the matches in the sentence. * @param {function} matchWordCustomHelper A custom helper to match words with a text. * @param {string} locale The locale used in the analysis. * diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 0af4cef8a1e..a5bdeb35146 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -1,41 +1,43 @@ -import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; import { escapeRegExp } from "lodash-es"; import matchTextWithTransliteration from "./matchTextWithTransliteration"; +import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; /** * Tokenizes keyword forms for exact matching. This function gets the keyword form and tokenizes it. * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. * This is the result of how the focus keyword is processed in buildTopicStems.js in the buildStems function. * - * @param {(string[])[]} keywordForms The keyword forms to tokenize. + * @param {(string[])} keywordForm The keyword forms to tokenize. * * @returns {string[]} The tokenized keyword forms. */ -export const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { +export const tokenizeKeywordFormsForExactMatching = ( keywordForm ) => { // Tokenize keyword forms. - const keywordFormsText = keywordForms[ 0 ][ 0 ]; + const keywordFormsText = keywordForm[ 0 ]; return getWordsForHTMLParser( keywordFormsText ); }; /** * Gets the exact matches of the keyphrase. * Exact matching happens when the user puts the keyword in double quotes. - * - * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. + * @param {string[]} keywordForm The keyword forms to match. * - * @returns {Token[]} The tokens that exactly match the keyword forms. + * @returns {Object} The tokens that exactly match the keyword forms. */ -export const getExactMatches = ( keywordForms, sentence ) => { - // Tokenize keyword forms. - const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForms ); +const exactMatchKeyphraseWithSentence = ( sentence, keywordForm ) => { + const result = { + count: 0, + matches: [], + }; + // Tokenize keyword form. + const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForm ); const sentenceTokens = sentence.tokens; // Check if tokenized keyword forms occur in the same order in the sentence tokens. let keywordIndex = 0; let sentenceIndex = 0; - const matches = []; let currentMatch = []; while ( sentenceIndex < sentenceTokens.length ) { @@ -54,68 +56,68 @@ export const getExactMatches = ( keywordForms, sentence ) => { // If the current match has the same length as the keyword tokens, the keyword forms have been matched. // Add the current match to the matches array and reset the keyword index and the current match. if ( currentMatch.length === keywordTokens.length ) { - matches.push( ...currentMatch ); + result.matches.push( ...currentMatch ); + result.count++; keywordIndex = 0; currentMatch = []; } sentenceIndex++; } + return result; +}; + + +const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { + let matches = []; + + tokens.forEach( token => { + // Escaping for the regex is only necessary when we want to create a regex out of a string. + // In this case, we create a regex out of the keywordForm but not out of the token text. + const occurrence = matchTextWithTransliteration( token.text, escapeRegExp( wordForm ), locale ); + if ( occurrence.length > 0 ) { + matches = matches.concat( token ); + } + } ); return matches; }; /** - * Free matching of keyword forms in a sentence. + * Finds keyphrase forms in a sentence. * - * @param {(string[])[]} keywordForms The keyword forms to match. - * @param {Sentence} sentence The sentence to match the keyword forms with. - * @param {string} locale The locale used for transliteration. - * @returns {Token[]} The tokens that match the keyword forms. + * @param {Sentence} sentence The sentence to check. + * @param {string[]} wordForms The word forms of the keyphrase to check. + * @param {string} locale The locale used in the analysis. + * @returns {{count: number, matches: *[]}} Object containing the number of the matches and the matched tokens. */ -export const getNonExactMatches = ( keywordForms, sentence, locale ) => { +const matchSentenceWithKeywordForms = ( sentence, wordForms, locale ) => { const tokens = sentence.tokens.slice(); - // Filter out all tokens that do not match the keyphrase forms. - return tokens.filter( ( token ) => { - const tokenText = escapeRegExp( token.text ); - return keywordForms.some( ( keywordForm ) => { - return keywordForm.some( ( keywordFormPart ) => { - return matchTextWithTransliteration( tokenText, keywordFormPart, locale ).length > 0; - } ); - } ); - } ); + let count = 0; + let matches = []; - // - // return keywordForms.map( formArray => formArray.map( form => { - // let count = 0; - // const matches = []; - // tokens.forEach( token => { - // const tokenText = escapeRegExp( token.text ); - // const match = matchTextWithTransliteration( tokenText, form, locale ); - // if ( match.length > 0 ) { - // count++; - // matches.push( match ); - // } - // } ); - // return { - // count: count, - // matches: matches, - // }; - // } ) - // ); + wordForms.forEach( wordToMatch => { + const occurrence = matchTokensWithKeywordForm( tokens, wordToMatch, locale ); + count += occurrence.length; + matches = matches.concat( occurrence ); + } ); + return { + count: count, + matches: matches, + }; }; /** * Matches a keyword with a sentence object from the html parser. * - * @param {(string[])[]} keywordForms The keyword forms. + * @param {string[]} wordForms The keyword forms. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * @param {Sentence} sentence The sentence to match against the keywordForms. * @param {string} locale The locale used for transliteration. * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * Depends on whether the user has put the keyphrase in double quotes. - * @returns {Token[]} The tokens that match the keywordForms. + * @returns {Object} Object containing the number of the matches and the matched tokens. * * The algorithm is as follows: * @@ -129,9 +131,11 @@ export const getNonExactMatches = ( keywordForms, sentence, locale ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -export const matchKeyphraseWithSentence = ( keywordForms, sentence, locale, useExactMatching = false ) => { +const matchKeyphraseWithSentence = ( wordForms, sentence, locale, useExactMatching = false ) => { if ( useExactMatching ) { - return getExactMatches( keywordForms, sentence ); + return exactMatchKeyphraseWithSentence( sentence, wordForms ); } - return getNonExactMatches( keywordForms, sentence, locale ); + return matchSentenceWithKeywordForms( sentence, wordForms, locale ); }; + +export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 4d04b03df89..985f4d01b3b 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,76 +1,22 @@ -import { flatten } from "lodash-es"; +import { flatten, flattenDeep } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import { - matchKeyphraseWithSentence, - tokenizeKeywordFormsForExactMatching, -} from "../helpers/match/matchKeyphraseWithSentence"; +import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; -import matchTextWithTransliteration from "../helpers/match/matchTextWithTransliteration"; /** - * Counts the number of matches for a keyphrase in a sentence. - * @param {Token[]} matches The matches to count. - * @param {(string[])[]} keyphraseForms The keyphrase forms that were used for matching. - * @param {string} locale The locale used for transliteration. - * @returns {number} The number of matches. - */ -const countMatches = ( matches, keyphraseForms, locale, isExactMatchRequested ) => { - // the count is the number of complete matches. - const matchesCopy = [ ...matches ]; - - let nrMatches = 0; - // While the number of matches is longer than or the same as the keyphrase forms. - while ( matchesCopy.length >= keyphraseForms.length ) { - let nrKeyphraseFormsWithMatch = 0; - - // for each keyphrase form, if there is a match that is equal to the keyphrase form, remove it from the matches. - // If there is no match, return the current count. - // If all keyphrase forms have a match, increase the count by 1. - for ( let i = 0; i < keyphraseForms.length; i++ ) { - const keyphraseForm = keyphraseForms[ i ]; - - // check if any of the keyphrase forms is in the matches. - const foundMatch = matchesCopy.find( match =>{ - return keyphraseForm.some( keyphraseFormWord => { - if ( isExactMatchRequested ) { - // keyphraseForm = [ "key phrase" ] - // keyphraseWord = "key phrase" - // match = { text: "", sourceCodeInfo: {} } - keyphraseFormWord = tokenizeKeywordFormsForExactMatching( keyphraseFormWord ); - // "key phrase" => [ "key", " ", "phrase" ] - return keyphraseFormWord.every( word => matchTextWithTransliteration( match.text, word, locale ).length > 0 ); - - } - return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ).length > 0; - } ); - } ); - if ( foundMatch ) { - matchesCopy.splice( matchesCopy.indexOf( foundMatch ), 1 ); - nrKeyphraseFormsWithMatch += 1; - } - } - if ( nrKeyphraseFormsWithMatch === keyphraseForms.length ) { - nrMatches += 1; - } else { - return nrMatches; - } - } - return nrMatches; -}; - -/** - * Creates a new array in which consecutive matches are removed. A consecutive match occures if the same keyphrase occurs more than twice in a row. + * Creates a new array in which consecutive matches are removed. A consecutive match occurs if the same keyphrase occurs more than twice in a row. * The first and second match are kept, the rest is removed. + * * @param {Token[]} matches An array of all matches. (Including consecutive matches). + * * @returns {Token[]} An array of matches without consecutive matches. */ const removeConsecutiveMatches = ( matches ) => { // If there are three or more matches in a row, remove all but the first and the second. - const matchesCopy = [ ...matches ]; - // const matchesCopy = cloneDeep( matches ); + let nrConsecutiveMatches = 0; let previousMatch = null; const result = []; @@ -112,13 +58,26 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // eslint-disable-next-line no-warning-comments // TODO: test in Japanese to see if we use this helper as well - const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, locale, isExactMatchRequested ); - const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); - const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale, isExactMatchRequested ); - const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); - - result.markings.push( markings ); - result.count += matchesCount; + const matchesInSentence = topicForms.keyphraseForms.map( wordForms => matchKeyphraseWithSentence( sentence, + wordForms, locale, isExactMatchRequested ) ); + const hasAllKeywords = matchesInSentence.every( wordForms => wordForms.count > 0 ); + if ( hasAllKeywords ) { + const counts = matchesInSentence.map( match => match.count ); + const totalMatchCount = Math.min( ...counts ); + let foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + const nonConsecutiveMatches = removeConsecutiveMatches( foundWords ); + + if ( totalMatchCount > 2 && nonConsecutiveMatches.length < foundWords.length ) { + // Only count 2 occurrences if a keyphrase is found more than 2 times consecutively. + // Q: Do we want to highlight the removed occurrences as well?. + foundWords = nonConsecutiveMatches; + result.count += 2; + } else { + result.count += totalMatchCount; + } + const markings = getMarkingsInSentence( sentence, foundWords, matchWordCustomHelper, locale ); + result.markings.push( markings ); + } } ); return result; @@ -135,9 +94,7 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu export default function keyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); - // [ [tortie], [cat, cats] ] - // KW: tortie cat - // A pretty tortie cat, among many cats. Cats are beautiful. A pretty tortie cat, among many cats. Cats are beautiful. + if ( topicForms.keyphraseForms.length === 0 ) { return { count: 0, @@ -156,7 +113,7 @@ export default function keyphraseCount( paper, researcher ) { return { count: keyphraseFound.count, - markings: keyphraseFound.count === 0 ? [] : flatten( keyphraseFound.markings ), + markings: flatten( keyphraseFound.markings ), length: topicForms.keyphraseForms.length, }; } From 01a3d12406d88ee040d4a13d4e2a5bc2ddaf876d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 21 Jun 2023 16:38:55 +0200 Subject: [PATCH 181/616] remove unused helpers --- .../matchTokenWithWordFormsSpec.js | 20 ----- .../removeConsecutiveKeyphraseMatchesSpec.js | 88 ------------------- .../keywordCount/matchTokenWithWordForms.js | 55 ------------ .../removeConsecutiveKeyphraseMatches.js | 59 ------------- 4 files changed, 222 deletions(-) delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js delete mode 100644 packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js deleted file mode 100644 index b21c9f16ac7..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js +++ /dev/null @@ -1,20 +0,0 @@ -import matchTokenWithWordForms from "../../../../src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms"; -import Token from "../../../../src/parse/structure/Token"; - -/* eslint-disable max-len */ -const testCases = [ [ "The wordforms contain the token", [ "keyword" ] , new Token( "keyword" ), "en_US", true ], - [ "The wordforms do not contain the token", [ "keyword" ], new Token( "notkeyword" ), "en_US", false ], - [ "The wordforms contain the token in a different form: singular matches plural", [ "keyword", "keywords" ], new Token( "keyword" ), "en_US", true ], - [ "The wordforms contain the token in a different form: plural matches singular", [ "keyword", "keywords" ], new Token( "keywords" ), "en_US", true ], - [ "The wordforms contain the token but with a capital letter", [ "Keyword" ], new Token( "keyword" ), "en_US", true ], - [ "The wordforms contain the token in a different form: singular matches plural (different locale)", [ "anahtar", "anahtarlar" ], new Token( "Anahtar" ), "tr_TR", true ], - [ "The wordforms contain a token that starts with a dollar sign.", [ "$keyword" ], new Token( "$keyword" ), "en_US", true ], -]; -/* eslint-enable max-len */ - -describe.each( testCases )( "matchTokenWithWordForms", ( testDescription, wordForms, token, locale, expectedResult ) => { - it( testDescription, () => { - expect( matchTokenWithWordForms( wordForms, token, locale ) ).toBe( expectedResult ); - } ); -} ); - diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js deleted file mode 100644 index 833ef6a6cd8..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatchesSpec.js +++ /dev/null @@ -1,88 +0,0 @@ -import removeConsecutiveKeyphraseMatches from "../../../../src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches"; - -/* -The following cases are tested: - -This case is consecutive. -keyword -This is a nice string with a keyword keyword keyword. - -This case is not consecutive. -key word -This is a nice string with a key word key word key word. - -This case is not consecutive. -keyword -This is a nice string with a keyword keywords keyword. - -This case is not consecutive. -keyword -This is a nice string with a keyword keyword nonkeyword keyword. - */ - -/* eslint-disable max-len */ -const testCases = [ - [ - "keyphrases are consecutive when they have length one and occur after each other 3 or more times", - "keyword", - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 16, endOffset: 23 } } ], - ] }, - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], - ] }, - ], - [ - "keyphrase matches are not consecutive when they consist of multiple tokens", - "key word", - { primaryMatches: [ - [ { text: "key", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, { text: "word", sourceCodeRange: { startOffset: 3, endOffset: 7 } } ], - [ { text: "key", sourceCodeRange: { startOffset: 8, endOffset: 11 } }, { text: "word", sourceCodeRange: { startOffset: 11, endOffset: 15 } } ], - [ { text: "key", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, { text: "word", sourceCodeRange: { startOffset: 19, endOffset: 23 } } ], - ] }, - { primaryMatches: [ - [ { text: "key", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, { text: "word", sourceCodeRange: { startOffset: 3, endOffset: 7 } } ], - [ { text: "key", sourceCodeRange: { startOffset: 8, endOffset: 11 } }, { text: "word", sourceCodeRange: { startOffset: 11, endOffset: 15 } } ], - [ { text: "key", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, { text: "word", sourceCodeRange: { startOffset: 19, endOffset: 23 } } ], - ] }, - ], - [ - "keyphrase matches are not consecutive when they are the same word but in a different form (morphology doesn't count)", - "keyword", - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keywords", sourceCodeRange: { startOffset: 8, endOffset: 16 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 17, endOffset: 24 } } ], - ] }, - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keywords", sourceCodeRange: { startOffset: 8, endOffset: 16 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 17, endOffset: 24 } } ], - ] }, - ], - [ - "keyphrase matches are not consecutive when they do not occur right after each other", - "keyword", - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 27, endOffset: 34 } } ], - ] }, - { primaryMatches: [ - [ { text: "keyword", sourceCodeRange: { startOffset: 0, endOffset: 7 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 8, endOffset: 15 } } ], - [ { text: "keyword", sourceCodeRange: { startOffset: 27, endOffset: 34 } } ], - ] }, - ], -]; -/* eslint-enable max-len */ - -describe.each( testCases )( "removeConsecutiveKeyPhraseMatches", - ( descriptor, keyphrase, result, expected ) => { - it( descriptor, () => { - expect( removeConsecutiveKeyphraseMatches( result ) ).toStrictEqual( expected ); - } ); - } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js deleted file mode 100644 index 2c656f06ac0..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/matchTokenWithWordForms.js +++ /dev/null @@ -1,55 +0,0 @@ -import addWordBoundary from "../word/addWordboundary"; -import { replaceTurkishIsMemoized } from "../transliterate/specialCharacterMappings"; -// import matchTextWithTransliteration from "../match/matchTextWithTransliteration"; - -/** - * Creates a regex from the keyword with included wordboundaries. - * - * @param {string} keyword The keyword to create a regex from. - * @param {string} locale The locale. - * - * @returns {RegExp} Regular expression of the keyword with word boundaries. - */ -const toRegex = function( keyword, locale ) { - keyword = addWordBoundary( keyword, false, "", locale ); - return new RegExp( keyword, "ig" ); -}; - -/** - * Matches a string with and without transliteration. - * Adapted from yoastseo/src/languageProcessing/helpers/match/matchTextWithTransliteration.js - * @param {string} text The text to match. - * @param {string} keyword The keyword to match in the text. - * @param {string} locale The locale used for transliteration. - * @returns {Boolean} All matches from the original as the transliterated text and keyword. - */ -const matchTextWithTransliteration = function( text, keyword, locale ) { - let keywordRegex = toRegex( keyword, locale ); - - if ( locale === "tr_TR" ) { - const turkishMappings = replaceTurkishIsMemoized( keyword ); - keywordRegex = new RegExp( turkishMappings.map( x => addWordBoundary( x ) ).join( "|" ), "ig" ); - } - const matches = text.match( keywordRegex ); - - return !! matches; -}; - -/** - * Checks if a token matches with different forms of a word. - * @param {string[]} wordForms Different forms of a word to match the token against. - * @param {Token} token The token to match. - * @param {string} locale The locale used for transliteration. - * @returns {boolean|*} True if the token matches with one of the word forms. - */ -const matchTokenWithWordForms = ( wordForms, token, locale ) => { - // First try a literal match to avoid unnecessary regexes. - if ( wordForms.includes( token.text ) ) { - return true; - } - return wordForms.some( keyword => { - return !! matchTextWithTransliteration( token.text, keyword, locale ); - } ); -}; - -export default matchTokenWithWordForms; diff --git a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js b/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js deleted file mode 100644 index 762a40d439b..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/keywordCount/removeConsecutiveKeyphraseMatches.js +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Checks if the first token is directly followed by the second token. - * @param {Token} firstToken The first token. - * @param {Token} secondToken The second token. - * @returns {boolean} Whether the tokens are consecutive. - */ -const tokensAreConsecutive = ( firstToken, secondToken ) => { - return firstToken.sourceCodeRange.endOffset + 1 === secondToken.sourceCodeRange.startOffset; -}; - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {Token[]} currentMatch The current match. - * @param {Token[]} firstPreviousMatch The first previous match. - * @param {Token[]} secondPreviousMatch The second previous match. - * @returns {boolean} Whether the keyphrase forms are consecutive. - */ -const isConsecutiveKeyphrase = ( currentMatch, firstPreviousMatch, secondPreviousMatch ) => { - return currentMatch[ 0 ].text === firstPreviousMatch[ 0 ].text && - tokensAreConsecutive( firstPreviousMatch[ 0 ], currentMatch[ 0 ] ) && - firstPreviousMatch[ 0 ].text === secondPreviousMatch[ 0 ].text && - tokensAreConsecutive( secondPreviousMatch[ 0 ], firstPreviousMatch[ 0 ] ); -}; - -/** - * Matches the keyphrase forms in an array of tokens. - * @param {Object} result The result object. - * @returns {{primaryMatches: *[], secondaryMatches: *[], position: number}} The result object with the consecutive keyphrase removed. - */ -const removeConsecutiveKeyphraseFromResult = ( result ) => { - const newResult = { ...result, primaryMatches: [] }; - // If three or more matches are consecutive, only keep the first two. - // Iterate backwards over the primary matches - for ( let i = result.primaryMatches.length - 1; i >= 0; i-- ) { - const currentMatch = result.primaryMatches[ i ]; - // If the current match consists of two or more tokens, add it to the front of the new primary matches. - if ( currentMatch.length > 1 ) { - newResult.primaryMatches.unshift( currentMatch ); - continue; - } - - // If the current match is the same as the previous two matches, and it is consecutive, do not add it to the primary matches. - const firstPreviousMatch = result.primaryMatches[ i - 1 ]; - const secondPreviousMatch = result.primaryMatches[ i - 2 ]; - - if ( firstPreviousMatch && secondPreviousMatch ) { - if ( isConsecutiveKeyphrase( currentMatch, firstPreviousMatch, secondPreviousMatch ) ) { - continue; - } else { - newResult.primaryMatches.unshift( currentMatch ); - } - } else { - newResult.primaryMatches.unshift( currentMatch ); - } - } - return newResult; -}; - -export default removeConsecutiveKeyphraseFromResult; From 2541c0b0f5ce2edeb1717883495ff1e72c0f812e Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 22 Jun 2023 16:55:01 +0200 Subject: [PATCH 182/616] Remove check for consecutive matching --- .../researches/keywordCountSpec.js | 135 ++++++------------ .../researches/keywordCount.js | 55 ++----- 2 files changed, 48 insertions(+), 142 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 0616ac9b657..de8b1719a00 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -232,6 +232,28 @@ const testCases = [ position: { endOffset: 55, startOffset: 42 } } ) ], skip: false, }, + { + description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.", + original: "this is a keyword keyword keyword.", + position: { endOffset: 36, startOffset: 13 } } ) ], + skip: false, + }, + { + description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively: with exact matching", + paper: new Paper( "

A sentence about a red panda red panda red panda.

", { keyword: "\"red panda\"", locale: "en_US" } ), + keyphraseForms: [ [ "red panda" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "A sentence about a red panda red panda red panda.", + original: "A sentence about a red panda red panda red panda.", + position: { endOffset: 51, startOffset: 22 } } ) ], + skip: false, + }, ]; // eslint-disable-next-line max-len @@ -451,7 +473,7 @@ const testCasesWithLocaleMapping = [ skip: false, }, { - description: "counts a string of text with Spanish accented vowel", + description: "doesn't count a match if the keyphrase is with diacritic while the occurrence in the text is with diacritic", paper: new Paper( "

Acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, @@ -527,6 +549,23 @@ const testCasesWithLocaleMapping = [ ], skip: false, }, + { + description: "still counts a string with a Turkish diacritics when exact matching is used", + paper: new Paper( "

Türkçe and Turkce

", { keyword: "\"Türkçe\"", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 9, startOffset: 3 } } ), + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 20, startOffset: 14 } } ), + ], + skip: false, + }, { description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), @@ -659,100 +698,6 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a } ); } ); -const testDataForConsecutiveMatching = [ - { - description: "consecutive 1", - paper: new Paper( "

this is a keywords keywords keywords.

", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a keywords keywords keywords.", - original: "this is a keywords keywords keywords.", - position: { endOffset: 30, startOffset: 13 } } ) ], - skip: false, - }, - { - description: "consecutive 2", - paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a keyword keyword keyword.", - original: "this is a keyword keyword keyword.", - position: { endOffset: 28, startOffset: 13 } } ) ], - skip: false, - }, - { - description: "consecutive 3", - paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keywords", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a keyword keyword keyword.", - original: "this is a keyword keyword keyword.", - position: { endOffset: 28, startOffset: 13 } } ) ], - skip: false, - }, - { - description: "consecutive 4", - paper: new Paper( "

this is a key key key word word word.

", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { - marked: "this is a key key key " + - "word word word.", - original: "this is a key key key word word word.", - position: { endOffset: 20, startOffset: 13 } } ), - new Mark( { - marked: "this is a key key key " + - "word word word.", - original: "this is a key key key word word word.", - position: { endOffset: 34, startOffset: 25 } } ) ], - skip: false, - }, - { - description: "doesn't recognize consecutive occurrences when the keyphrase contains multiple words, with non-exact matching", - paper: new Paper( "

this is a key word key word key word.

", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "this is a key word key word key word.", - original: "this is a key word key word key word.", - position: { endOffset: 39, startOffset: 13 } } ) ], - skip: false, - }, - { - description: "doesn't recognize consecutive occurrences when the keyphrase contains multiple words: with exact matching", - paper: new Paper( "

A sentence about a red panda red panda red panda.

", { keyword: "\"red panda\"", locale: "en_US" } ), - keyphraseForms: [ [ "red panda" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "A sentence about a red panda red panda red panda.", - original: "A sentence about a red panda red panda red panda.", - position: { endOffset: 51, startOffset: 22 } } ) ], - skip: false, - }, -]; - -describe.each( testDataForConsecutiveMatching )( "Test for counting the consecutive occurrences of keyword in the text", - function( { - description, - paper, - keyphraseForms, - expectedCount, - expectedMarkings, - skip } ) { - const test = skip ? it.skip : it; - - test( description, function() { - const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); - buildTree( paper, mockResearcher ); - const keyWordCountResult = keyphraseCount( paper, mockResearcher ); - expect( keyWordCountResult.count ).toBe( expectedCount ); - expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); - } ); - } ); /** * Mocks Japanese Researcher. diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 985f4d01b3b..0e98ab7ebc3 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -5,42 +5,6 @@ import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; -/** - * Creates a new array in which consecutive matches are removed. A consecutive match occurs if the same keyphrase occurs more than twice in a row. - * The first and second match are kept, the rest is removed. - * - * @param {Token[]} matches An array of all matches. (Including consecutive matches). - * - * @returns {Token[]} An array of matches without consecutive matches. - */ -const removeConsecutiveMatches = ( matches ) => { - // If there are three or more matches in a row, remove all but the first and the second. - const matchesCopy = [ ...matches ]; - - let nrConsecutiveMatches = 0; - let previousMatch = null; - const result = []; - - for ( let i = 0; i < matchesCopy.length; i++ ) { - const match = matchesCopy[ i ]; - - if ( previousMatch && match.sourceCodeRange.startOffset === previousMatch.sourceCodeRange.endOffset + 1 && - match.text.toLowerCase() === previousMatch.text.toLowerCase() ) { - nrConsecutiveMatches += 1; - } else { - nrConsecutiveMatches = 0; - } - - if ( nrConsecutiveMatches < 2 ) { - result.push( match ); - } - - previousMatch = match; - } - - return result; -}; - /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * @@ -64,18 +28,10 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu if ( hasAllKeywords ) { const counts = matchesInSentence.map( match => match.count ); const totalMatchCount = Math.min( ...counts ); - let foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - const nonConsecutiveMatches = removeConsecutiveMatches( foundWords ); - - if ( totalMatchCount > 2 && nonConsecutiveMatches.length < foundWords.length ) { - // Only count 2 occurrences if a keyphrase is found more than 2 times consecutively. - // Q: Do we want to highlight the removed occurrences as well?. - foundWords = nonConsecutiveMatches; - result.count += 2; - } else { - result.count += totalMatchCount; - } + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); const markings = getMarkingsInSentence( sentence, foundWords, matchWordCustomHelper, locale ); + + result.count += totalMatchCount; result.markings.push( markings ); } } ); @@ -109,6 +65,11 @@ export default function keyphraseCount( paper, researcher ) { // Exact matching is requested when the keyphrase is enclosed in double quotes. const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() ); + /* + * Count the amount of keyphrase occurrences in the sentences. + * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + * */ const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper, isExactMatchRequested ); return { From 289fc7a8b0f7736eacd47dc407584d0be59c9875 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 22 Jun 2023 16:57:23 +0200 Subject: [PATCH 183/616] Adjust check for exact matching --- .../match/matchKeyphraseWithSentence.js | 55 ++++++++++++------- .../seo/KeywordDensityAssessment.js | 2 - 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index a5bdeb35146..bb51efe0e42 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -7,31 +7,33 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. * This is the result of how the focus keyword is processed in buildTopicStems.js in the buildStems function. * - * @param {(string[])} keywordForm The keyword forms to tokenize. + * @param {(string[])} wordForms The keyword forms to tokenize. * * @returns {string[]} The tokenized keyword forms. */ -export const tokenizeKeywordFormsForExactMatching = ( keywordForm ) => { +export const tokenizeKeywordFormsForExactMatching = ( wordForms ) => { // Tokenize keyword forms. - const keywordFormsText = keywordForm[ 0 ]; + const keywordFormsText = wordForms[ 0 ]; return getWordsForHTMLParser( keywordFormsText ); }; /** * Gets the exact matches of the keyphrase. * Exact matching happens when the user puts the keyword in double quotes. + * * @param {Sentence} sentence The sentence to match the keyword forms with. - * @param {string[]} keywordForm The keyword forms to match. + * @param {string[]} wordForms The keyword forms to match. + * @param {string} locale The locale used in the analysis. * - * @returns {Object} The tokens that exactly match the keyword forms. + * @returns {{count: number, matches: Token[]}} Object containing the number of the exact matches and the matched tokens. */ -const exactMatchKeyphraseWithSentence = ( sentence, keywordForm ) => { +const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { const result = { count: 0, matches: [], }; // Tokenize keyword form. - const keywordTokens = tokenizeKeywordFormsForExactMatching( keywordForm ); + const keywordTokens = tokenizeKeywordFormsForExactMatching( wordForms ); const sentenceTokens = sentence.tokens; @@ -42,10 +44,12 @@ const exactMatchKeyphraseWithSentence = ( sentence, keywordForm ) => { while ( sentenceIndex < sentenceTokens.length ) { // If the current sentence token matches the current keyword token, add it to the current match. - const sentenceTokenText = escapeRegExp( sentenceTokens[ sentenceIndex ].text ); + const sentenceTokenText = sentenceTokens[ sentenceIndex ].text; const keywordTokenText = escapeRegExp( keywordTokens[ keywordIndex ] ); - if ( sentenceTokenText.toLowerCase() === keywordTokenText.toLowerCase() ) { + const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keywordTokenText.toLowerCase(), locale ); + + if ( foundMatches.length > 0 ) { currentMatch.push( sentenceTokens[ sentenceIndex ] ); keywordIndex++; } else { @@ -67,7 +71,15 @@ const exactMatchKeyphraseWithSentence = ( sentence, keywordForm ) => { return result; }; - +/** + * Matches a word form of the keyphrase with the tokens from the sentence. + * + * @param {Token[]} tokens The array of tokens to check. + * @param {string} wordForm The word form of the keyphrase. + * @param {string} locale The locale used in the analysis. + * + * @returns {Token[]} The array of the matched tokens. + */ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { let matches = []; @@ -88,16 +100,17 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { * @param {Sentence} sentence The sentence to check. * @param {string[]} wordForms The word forms of the keyphrase to check. * @param {string} locale The locale used in the analysis. - * @returns {{count: number, matches: *[]}} Object containing the number of the matches and the matched tokens. + * + * @returns {{count: number, matches: Token[]}} Object containing the number of the matches and the matched tokens. */ -const matchSentenceWithKeywordForms = ( sentence, wordForms, locale ) => { +const matchWordFormsInSentence = ( sentence, wordForms, locale ) => { const tokens = sentence.tokens.slice(); let count = 0; let matches = []; - wordForms.forEach( wordToMatch => { - const occurrence = matchTokensWithKeywordForm( tokens, wordToMatch, locale ); + wordForms.forEach( wordForm => { + const occurrence = matchTokensWithKeywordForm( tokens, wordForm, locale ); count += occurrence.length; matches = matches.concat( occurrence ); } ); @@ -110,14 +123,14 @@ const matchSentenceWithKeywordForms = ( sentence, wordForms, locale ) => { /** * Matches a keyword with a sentence object from the html parser. * + * @param {Sentence} sentence The sentence to match against the keywordForms. * @param {string[]} wordForms The keyword forms. - * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] + * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). - * @param {Sentence} sentence The sentence to match against the keywordForms. * @param {string} locale The locale used for transliteration. * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. - * Depends on whether the user has put the keyphrase in double quotes. - * @returns {Object} Object containing the number of the matches and the matched tokens. + * Depends on whether the keyphrase enclosed in double quotes. + * @returns {{count: number, matches: Token[]}} Object containing the number of the matches and the matched tokens. * * The algorithm is as follows: * @@ -131,11 +144,11 @@ const matchSentenceWithKeywordForms = ( sentence, wordForms, locale ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( wordForms, sentence, locale, useExactMatching = false ) => { +const matchKeyphraseWithSentence = ( sentence, wordForms, locale, useExactMatching = false ) => { if ( useExactMatching ) { - return exactMatchKeyphraseWithSentence( sentence, wordForms ); + return findExactMatchKeyphraseInSentence( sentence, wordForms, locale ); } - return matchSentenceWithKeywordForms( sentence, wordForms, locale ); + return matchWordFormsInSentence( sentence, wordForms, locale ); }; export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 25d132ff2b9..a1157e098f7 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -7,8 +7,6 @@ import AssessmentResult from "../../../values/AssessmentResult"; import { inRangeEndInclusive, inRangeStartEndInclusive, inRangeStartInclusive } from "../../helpers/assessments/inRange"; import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; -import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; -import getWords from "../../../languageProcessing/helpers/word/getWords"; import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAllWordsFromTree"; /** From e259bed58e5fc7bd4e11e86b7dac2fa9023d04aa Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 22 Jun 2023 17:02:56 +0200 Subject: [PATCH 184/616] Add a unit test --- .../researches/keywordCountSpec.js | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index de8b1719a00..41fc7e8b536 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -131,7 +131,8 @@ const testCases = [ skip: false, }, { - description: "matches both singular and reduplicated plural form of the keyword in Indonesian", + description: "matches both singular and reduplicated plural form of the keyword in Indonesian, " + + "the plural should be counted as one occurrence", paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", { locale: "id_ID", keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], @@ -149,6 +150,21 @@ const testCases = [ position: { endOffset: 67, startOffset: 60 } } ) ], skip: false, }, + { + description: "matches both reduplicated keyphrase separated by '-' as two occurrence in English", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.

", + { locale: "en_US", keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + // eslint-disable-next-line max-len + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", + position: { endOffset: 58, startOffset: 43 } } ), + ], + skip: false, + }, { description: "counts a single word keyphrase with exact matching", paper: new Paper( "

A string with a keyword.

", { keyword: "\"keyword\"" } ), From adfc93eb181e0ea095378052193076aeb40352c3 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 23 Jun 2023 11:55:40 +0200 Subject: [PATCH 185/616] Adjust the code for Japanese --- .../researches/keywordCountSpec.js | 54 ++++++++++--------- .../highlighting/getMarkingsInSentence.js | 3 +- ...tence.js => matchWordFormsWithSentence.js} | 33 ++++++++---- .../researches/keywordCount.js | 29 +++++----- 4 files changed, 70 insertions(+), 49 deletions(-) rename packages/yoastseo/src/languageProcessing/helpers/match/{matchKeyphraseWithSentence.js => matchWordFormsWithSentence.js} (81%) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 41fc7e8b536..df9aad42b42 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -322,6 +322,29 @@ const testCasesWithSpecialCharacters = [ ], skip: false, }, + { + description: "counts 'key' in 'key-word'.", + paper: new Paper( "

A string with a word of key-phrase.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a word of key-phrase.", + marked: "A string with a word of" + + " key-phrase.", + position: { endOffset: 23, startOffset: 19 }, + } ), + new Mark( { + original: "A string with a word of key-phrase.", + marked: "A string with a word of" + + " key-phrase.", + position: { endOffset: 30, startOffset: 27 }, + } ), + ], + // eslint-disable-next-line no-warning-comments + // TODO: fix the highlighting. Should we only highlight "key", and not "key-phrase" in this case? + skip: false, + }, { description: "counts a string with a keyword with a '_' in it", paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), @@ -739,10 +762,9 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) } ); }; - -// Decided not to remove test below as it tests the added logic of the Japanese helpers. describe( "Test for counting the keyword in a text for Japanese", () => { - it.skip( "counts/marks a string of text with a keyword in it.", function() { + // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information. + it( "counts/marks a string of text with a keyword in it.", function() { const mockPaper = new Paper( "

私の猫はかわいいです。 { expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", - original: "私の猫はかわいいです。", position: { endOffset: 6, startOffset: 5 } } ) ] ); + original: "私の猫はかわいいです。" } ) ] ); } ); - it.skip( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { + it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { const mockPaper = new Paper( "

私の猫はかわいい猫です。 { expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいい猫です。", - position: { endOffset: 6, startOffset: 5 } } ), new Mark( { marked: "私のはかわいい" + - "です。", - original: "私の猫はかわいい猫です。", - position: { endOffset: 12, startOffset: 11 } } ) ] ); + } ) ] ); } ); - it.skip( "counts a string if text with no keyword in it.", function() { + it( "counts a string if text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -776,7 +795,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); - it.skip( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { + it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "

私の猫はかわいいですかわいい。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -786,19 +805,6 @@ describe( "Test for counting the keyword in a text for Japanese", () => { marked: "私のかわいい" + "ですかわいい。", original: "私の猫はかわいいですかわいい。", - position: { endOffset: 6, startOffset: 5 }, - } ), - new Mark( { - marked: "私のかわいい" + - "ですかわいい。", - original: "私の猫はかわいいですかわいい。", - position: { endOffset: 11, startOffset: 7 }, - } ), - new Mark( { - marked: "私のかわいい" + - "ですかわいい。", - original: "私の猫はかわいいですかわいい。", - position: { endOffset: 17, startOffset: 13 }, } ), ] ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 56014947073..bbb6e65c3b6 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -118,12 +118,11 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => * * @param {Sentence} sentence The sentence to check. * @param {Array} matchesInSentence An object containing the matches in the sentence. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. * @param {string} locale The locale used in the analysis. * * @returns {Mark[]} The array of Mark objects of the keyphrase matches. */ -function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) { +function getMarkingsInSentence( sentence, matchesInSentence, locale ) { if ( matchesInSentence.length === 0 ) { return []; } diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js similarity index 81% rename from packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js rename to packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index bb51efe0e42..e57aa7fa55e 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -91,6 +91,7 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { matches = matches.concat( token ); } } ); + return matches; }; @@ -100,20 +101,23 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { * @param {Sentence} sentence The sentence to check. * @param {string[]} wordForms The word forms of the keyphrase to check. * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. * - * @returns {{count: number, matches: Token[]}} Object containing the number of the matches and the matched tokens. + * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ -const matchWordFormsInSentence = ( sentence, wordForms, locale ) => { +const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => { const tokens = sentence.tokens.slice(); - let count = 0; let matches = []; wordForms.forEach( wordForm => { - const occurrence = matchTokensWithKeywordForm( tokens, wordForm, locale ); + const occurrence = matchWordCustomHelper + ? matchWordCustomHelper( sentence.text, wordForm ) + : matchTokensWithKeywordForm( tokens, wordForm, locale ); count += occurrence.length; matches = matches.concat( occurrence ); } ); + return { count: count, matches: matches, @@ -124,13 +128,16 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale ) => { * Matches a keyword with a sentence object from the html parser. * * @param {Sentence} sentence The sentence to match against the keywordForms. - * @param {string[]} wordForms The keyword forms. + * @param {string[]} wordForms The array of keyword forms. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). + * * @param {string} locale The locale used for transliteration. - * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * Depends on whether the keyphrase enclosed in double quotes. - * @returns {{count: number, matches: Token[]}} Object containing the number of the matches and the matched tokens. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. + * + * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. * * The algorithm is as follows: * @@ -144,11 +151,15 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale ) => { * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. * This matchToken is then compared with the keyword forms. */ -const matchKeyphraseWithSentence = ( sentence, wordForms, locale, useExactMatching = false ) => { - if ( useExactMatching ) { +const matchWordFormsWithSentence = ( sentence, wordForms, locale, matchWordCustomHelper, useExactMatching = false ) => { + /* + * Only use `findExactMatchKeyphraseInSentence` when the custom helper is not available. + * When the custom helper is available, we don't differenciate the approach for exact matching or non-exact matching. + */ + if ( useExactMatching && ! matchWordCustomHelper ) { return findExactMatchKeyphraseInSentence( sentence, wordForms, locale ); } - return matchWordFormsInSentence( sentence, wordForms, locale ); + return matchWordFormsInSentence( sentence, wordForms, locale, matchWordCustomHelper ); }; -export default matchKeyphraseWithSentence; +export default matchWordFormsWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 0e98ab7ebc3..c1d0cd01bd5 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,8 +2,9 @@ import { flatten, flattenDeep } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import matchKeyphraseWithSentence from "../helpers/match/matchKeyphraseWithSentence"; +import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; +import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. @@ -20,16 +21,24 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - // eslint-disable-next-line no-warning-comments - // TODO: test in Japanese to see if we use this helper as well - const matchesInSentence = topicForms.keyphraseForms.map( wordForms => matchKeyphraseWithSentence( sentence, - wordForms, locale, isExactMatchRequested ) ); + const matchesInSentence = topicForms.keyphraseForms.map( wordForms => matchWordFormsWithSentence( sentence, + wordForms, locale, matchWordCustomHelper, isExactMatchRequested ) ); + const hasAllKeywords = matchesInSentence.every( wordForms => wordForms.count > 0 ); + if ( hasAllKeywords ) { const counts = matchesInSentence.map( match => match.count ); const totalMatchCount = Math.min( ...counts ); const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - const markings = getMarkingsInSentence( sentence, foundWords, matchWordCustomHelper, locale ); + + let markings = []; + + if ( matchWordCustomHelper ) { + // Currently, this check is only applicable for Japanese. + markings = markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); + } else { + markings = getMarkingsInSentence( sentence, foundWords, locale ); + } result.count += totalMatchCount; result.markings.push( markings ); @@ -52,11 +61,7 @@ export default function keyphraseCount( paper, researcher ) { topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); if ( topicForms.keyphraseForms.length === 0 ) { - return { - count: 0, - markings: [], - length: 0, - }; + return { count: 0, markings: [], length: 0 }; } const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); @@ -82,7 +87,7 @@ export default function keyphraseCount( paper, researcher ) { /** * Calculates the keyphrase count, takes morphology into account. * - * @deprecated Since version 20.8. Use keywordCountInSlug instead. + * @deprecated Since version 20.12. Use keyphraseCount instead. * * @param {Paper} paper The paper containing keyphrase and text. * @param {Researcher} researcher The researcher. From ec45a7f7015fad0d2a0210416b73cb0792ccc34f Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 23 Jun 2023 15:51:40 +0200 Subject: [PATCH 186/616] Add unit tests --- .../match/matchKeyphraseWithSentenceSpec.js | 1012 ----------------- .../match/matchWordFormsWithSentenceSpec.js | 628 ++++++++++ .../sentence/getSentencesFromTreeSpec.js | 22 +- .../researches/keywordCountSpec.js | 78 +- .../match/matchWordFormsWithSentence.js | 2 +- 5 files changed, 715 insertions(+), 1027 deletions(-) delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js deleted file mode 100644 index ff426463701..00000000000 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchKeyphraseWithSentenceSpec.js +++ /dev/null @@ -1,1012 +0,0 @@ -import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; -import matchKeyphraseWithSentence from "../../../../src/languageProcessing/helpers/match/matchKeyphraseWithSentence"; - -/* eslint-disable max-len */ -const testCases = [ - { - testDescription: "No matches in sentence", - sentence: { - text: "A sentence with notthekeyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "should return empty result if keyphraseForms is empty", - sentence: { - text: "A sentence with notthekeyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, - { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 32 } }, - keyphraseForms: [ [] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "One match in sentence of a single-word keyphrase", - sentence: { - text: "A sentence with the keyword.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyword", "keywords" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ], - }, - { - testDescription: "One match in sentence of a multi word keyphrase", - sentence: { - text: "A sentence with the key words.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: ".", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 30 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" } ], - }, - { - testDescription: "Disregards word order of multi word keyphrases", - sentence: { - text: "A word that is key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "word", sourceCodeRange: { startOffset: 2, endOffset: 6 } }, - { text: " ", sourceCodeRange: { startOffset: 6, endOffset: 7 } }, - { text: "that", sourceCodeRange: { startOffset: 7, endOffset: 11 } }, - { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, - { text: "is", sourceCodeRange: { startOffset: 12, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "key", sourceCodeRange: { startOffset: 15, endOffset: 18 } }, - { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 19 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { endOffset: 6, startOffset: 2 }, text: "word" }, { sourceCodeRange: { endOffset: 18, startOffset: 15 }, text: "key" } ], - }, - { - testDescription: "Two matches of multi word keyphrase in sentence", - sentence: { - text: "A sentence with the key words and the key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, - { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, - { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, - { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, - { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, - { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, - { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, - { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, - { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 47 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ - { sourceCodeRange: { endOffset: 23, startOffset: 20 }, text: "key" }, - { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, - { sourceCodeRange: { endOffset: 41, startOffset: 38 }, text: "key" }, - { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, - ], - }, - { - testDescription: "One primary and one secondary match of multi word keyphrase in sentence", - sentence: { - text: "A key sentence with a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "key", sourceCodeRange: { startOffset: 2, endOffset: 5 } }, - { text: " ", sourceCodeRange: { startOffset: 5, endOffset: 6 } }, - { text: "sentence", sourceCodeRange: { startOffset: 6, endOffset: 14 } }, - { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, - { text: "with", sourceCodeRange: { startOffset: 15, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "a", sourceCodeRange: { startOffset: 20, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "key", sourceCodeRange: { startOffset: 22, endOffset: 25 } }, - { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "word", sourceCodeRange: { startOffset: 26, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ - { sourceCodeRange: { startOffset: 2, endOffset: 5 }, text: "key" }, - { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "key" }, - { sourceCodeRange: { endOffset: 30, startOffset: 26 }, text: "word" } ], - }, - { - testDescription: "No match if a multi word keyphrase is separated with an underscore in the sentence.", - sentence: { - text: "A sentence with a key_word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key_word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "A match if the key phrase is separated by an underscore in the sentence and in the keyphrase.", - sentence: { - text: "A sentence with a key_word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key_word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key_word", "key_words" ] ], - exactMatching: false, - expectedResult: [ - { sourceCodeRange: { endOffset: 26, startOffset: 18 }, text: "key_word" }, - ], - }, - { - testDescription: "If there is only a partial match, the partial match is returned.", - sentence: { - text: "A sentence with a key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - - }, - { - testDescription: "It is also a match if the keyphrase is lowercase but the occurrence in the sentence is uppercase.", - sentence: { - text: "A sentence with a KEY WORD.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ - { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "KEY" }, - { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, - ], - }, - { - testDescription: "Correctly matches if the keyphrase is the last word in the sentence.", - sentence: { - text: "A sentence with a key", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 21 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ], - }, - { - testDescription: "Does return a match if the keyphrase in the text ends with an underscore.", - sentence: { - text: "A sentence with a key_", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ], - }, - { - testDescription: "Does return a match if the keyphrase in the text ends with a dash.", - sentence: { - text: "A sentence with a key-", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ], - }, - { - testDescription: "Correctly matches if the sentence ends with an exclamation mark.", - sentence: { - text: "A sentence with a key!", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 22 } }, - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } } ], - }, - { - testDescription: "A keyphrase with a dash matches when the keyphrase occurs with a dash in the text.", - sentence: { - text: "A sentence with a key-word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - exactMatching: false, - expectedResult: [ - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - ], - }, - { - testDescription: "A keyphrase with a dash does not match when the keyphrase occurs without a dash in the text.", - sentence: { - text: "A sentence with a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "A keyphrase without a dash does not match when the keyphrase occurs with a dash in the text.", - sentence: { - text: "A sentence with a key-word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ ], - }, - { - testDescription: "A keyphrase without a dash matches only with the keyphrase without a dash in the text.", - sentence: { - text: "A sentence with a key-word and a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - ], - }, - { - testDescription: "A match with a dash and a match without a dash, but the keyphrase contains a dash.", - sentence: { - text: "A sentence with a key-word and a key word.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - { text: "and", sourceCodeRange: { startOffset: 27, endOffset: 30 } }, - { text: " ", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - { text: "a", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "key", sourceCodeRange: { startOffset: 33, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "word", sourceCodeRange: { startOffset: 37, endOffset: 41 } }, - { text: ".", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 42 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - exactMatching: false, - expectedResult: [ - { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - ], - }, - { - testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order, there should be no match.", - sentence: { - text: "A sentence with a word-key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word-key", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key-word", "key-words" ] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "When the sentence contains the keyphrase with a dash but in the wrong order and the keyphrase does not contain a dash," + - " there should not be a match.", - sentence: { - text: "A sentence with a word-key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "word-key", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [ ], - }, - { - testDescription: "When the sentence contains a different word with a dash that contains part of the keyphrase, there should be no match.", - sentence: { - text: "A sentence with a key-phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key-phrase", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - exactMatching: false, - expectedResult: [], - }, - { - testDescription: "Returns a match when the words from the keyphrase are separated by a dash and spaces.", - sentence: { - text: "A sentence with a key - phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "-", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, - { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, - { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - { text: ".", sourceCodeRange: { startOffset: 30, endOffset: 31 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key", "keys" ], [ "phrase", "phrases" ] ], - exactMatching: false, - expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: "phrase", sourceCodeRange: { startOffset: 24, endOffset: 30 } }, - ], - }, - { - testDescription: "Matches a keyphrase with an apostrophe.", - sentence: { - text: "All the keyphrase's forms should be matched.", - tokens: [ - { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, - { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, - { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, - { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, - { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, - { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, - { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, - { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, - { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, - { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, - { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, - { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, - { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 45 } }, - keyphraseForms: [ [ "keyphrase", "keyphrases", "keyphrase's" ] ], - exactMatching: false, - expectedResult: [ - { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, - ], - }, -]; - -const exactMatchingTestCases = [ - { - testDescription: "with exact matching, a single word keyphrase should match with a single word.", - sentence: { - text: "A sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - ], - }, - { - testDescription: "with exact matching, a singular single word keyphrase should not match with the plural form in the text.", - sentence: { - text: "A sentence with keyphrases.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "keyphrases", sourceCodeRange: { startOffset: 16, endOffset: 26 } }, - { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 27 } }, - keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: true, - expectedResult: [], - }, - { - testDescription: "with exact matching, a plural single word keyphrase should not match wih a singular form in the text.", - sentence: { - text: "A sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyphrases" ] ], - exactMatching: true, - expectedResult: [], - }, - { - testDescription: "with exact matching, a multi word keyphrase should match the same multi word phrase in the text.", - sentence: { - text: "A sentence with a key phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key phrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, - ], - }, - { - testDescription: "with exact matching, a multi word keyphrase should not match a multi word phrase if it is in a different order.", - sentence: { - text: "A sentence with a phrase key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "phrase", sourceCodeRange: { startOffset: 18, endOffset: 24 } }, - { text: " ", sourceCodeRange: { startOffset: 24, endOffset: 25 } }, - { text: "key", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key phrase" ] ], - exactMatching: true, - expectedResult: [], - }, - { - testDescription: "with exact matching, a the matching should be insensitive to case.", - sentence: { - text: "A sentence with a KeYphRaSe.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, - ], - }, - { - testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", - sentence: { - text: "A .sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, - { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, - { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, - { text: "with", sourceCodeRange: { startOffset: 12, endOffset: 16 } }, - { text: " ", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: "a", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: " ", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 19, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ ".sentence" ] ], - exactMatching: true, - expectedResult: [ - { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, - { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, - ], - }, - { - testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly follows the keyphrase.", - sentence: { - text: "A sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyphrase." ] ], - exactMatching: true, - expectedResult: [ - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - }, - { - testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and is not surrounded by spaces.", - sentence: { - text: "A sentence with a key.phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key.phrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, - ], - }, - { - testDescription: "Matches a single word keyphrase if keyphrase is doubleQuoted.", - sentence: { - text: "A sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - ], - }, - { - testDescription: "Matches a multi word keyphrase with a space as result of the keyphrase being double quoted.", - sentence: { - text: "A sentence with a key phrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key phrase" ] ], - exactMatching: true, - expectedResult: [ - { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, - { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, - { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, - ], - }, - { - testDescription: "Does not match multiword keyphrase in opposite order if keyphrase is doubleQuoted.", - sentence: { - text: "A sentence with a phrase key.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "phrase", sourceCodeRange: { startOffset: 18, endOffset: 24 } }, - { text: " ", sourceCodeRange: { startOffset: 24, endOffset: 25 } }, - { text: "key", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, - { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 29 } }, - keyphraseForms: [ [ "key phrase" ] ], - exactMatching: true, - expectedResult: [], - }, - { - testDescription: "Does not match plural if singular keyphrase is doubleQuoted.", - sentence: { - text: "A sentence with keyphrases.", - }, - keyphraseForms: [ [ "keyphrase" ] ], - exactMatching: true, - expectedResult: [], - }, - { - testDescription: "Does not match singular if plural keyphrase is doubleQuoted.", - sentence: { - text: "A sentence with a keyphrase.", - tokens: [ - { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, - { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, - { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, - { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, - { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, - { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, - { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, - { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, - { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, - { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 28 } }, - keyphraseForms: [ [ "keyphrases" ] ], - exactMatching: true, - expectedResult: [], - }, -]; - -// The japanese test cases need to be adapted once japanese tokenization is implemented. -// eslint-disable-next-line max-len -const japaneseTestCases = [ - { - testDescription: "Only a primary match in a language that uses a custom helper to match words.", - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { text: "私の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: 0, - primaryMatches: [ [ - { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, - { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" }, - ] ], - secondaryMatches: [], - }, - }, - { - testDescription: "A primary and secondary match in a language that uses a custom helper to match words.", - sentence: { - text: "私の猫はかわいいですかわいい。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: 0, - primaryMatches: [ [ - { sourceCodeRange: { startOffset: 2, endOffset: 3 }, text: "猫" }, - { sourceCodeRange: { startOffset: 4, endOffset: 8 }, text: "かわいい" } ] ], - secondaryMatches: [ [ - { sourceCodeRange: { startOffset: 10, endOffset: 14 }, text: "かわいい" } ] ], - }, - }, - { - testDescription: "Only secondary matches in a language that uses a custom helper to match words.", - sentence: { - text: "私のはかわいいですかわいい。", - tokens: [ - { text: "私のはかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 14 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 14 } }, - keyphraseForms: [ [ "猫" ], [ "かわいい" ] ], - locale: "ja", - matchWordCustomHelper: JapaneseCustomHelper, - expectedResult: { - position: -1, - primaryMatches: [], - secondaryMatches: [], - }, - }, -]; - -describe.each( testCases )( "find keyphrase forms in sentence", ( { - testDescription, - sentence, - keyphraseForms, - exactMatching, - expectedResult, -} ) => { - it( testDescription, () => { - expect( matchKeyphraseWithSentence( keyphraseForms, sentence, exactMatching ) ).toEqual( expectedResult ); - } ); -} ); - -// The test below is skipped for now because the PR for exact matching is not yet merged. -describe.each( exactMatchingTestCases )( "find keyphrase forms in sentence when exact matching is requested", ( { - testDescription, - sentence, - keyphraseForms, - exactMatching, - expectedResult, -} ) => { - it.skip( testDescription, () => { - expect( matchKeyphraseWithSentence( keyphraseForms, sentence, exactMatching ) ).toEqual( expectedResult ); - } ); -} ); - -describe.each( japaneseTestCases )( "findKeyWordFormsInSentence for japanese", ( { - testDescription, - sentence, - keyphraseForms, - locale, - expectedResult, -} ) => { - it.skip( testDescription, () => { - expect( matchKeyphraseWithSentence( sentence, keyphraseForms, locale ) ).toEqual( expectedResult ); - } ); -} ); - - diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js new file mode 100644 index 00000000000..05f53cbf572 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js @@ -0,0 +1,628 @@ +import customHelperJapanese from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; +import matchWordFormsWithSentence from "../../../../src/languageProcessing/helpers/match/matchWordFormsWithSentence"; + +// eslint-disable-next-line no-warning-comments +// TODO: Adapt all the unit tests for the expected results. Now the function returns an object. +const testCases = [ + { + testDescription: "returns no match when no word form is found the sentence", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + wordForms: [ "keyword", "keywords" ], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "returns no match when no word forms is available for matching", + sentence: { + text: "A sentence with notthekeyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "notthekeyphrase", sourceCodeRange: { startOffset: 16, endOffset: 31 } }, + { text: ".", sourceCodeRange: { startOffset: 31, endOffset: 32 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 32 } }, + wordForms: [], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "returns one match whe the same word form is found in the sentence", + sentence: { + text: "A sentence with the keyword.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "keyword", sourceCodeRange: { startOffset: 20, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + wordForms: [ "keyword", "keywords" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { startOffset: 20, endOffset: 27 }, text: "keyword" } ] }, + }, + { + testDescription: "returns matches for all word forms if all forms are found in the sentence", + sentence: { + text: "A sentence with the key words and the key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "the", sourceCodeRange: { startOffset: 16, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "key", sourceCodeRange: { startOffset: 20, endOffset: 23 } }, + { text: " ", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "words", sourceCodeRange: { startOffset: 24, endOffset: 29 } }, + { text: " ", sourceCodeRange: { startOffset: 29, endOffset: 30 } }, + { text: "and", sourceCodeRange: { startOffset: 30, endOffset: 33 } }, + { text: " ", sourceCodeRange: { startOffset: 33, endOffset: 34 } }, + { text: "the", sourceCodeRange: { startOffset: 34, endOffset: 37 } }, + { text: " ", sourceCodeRange: { startOffset: 37, endOffset: 38 } }, + { text: "key", sourceCodeRange: { startOffset: 38, endOffset: 41 } }, + { text: " ", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, + { text: "word", sourceCodeRange: { startOffset: 42, endOffset: 46 } }, + { text: ".", sourceCodeRange: { startOffset: 46, endOffset: 47 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 47 } }, + wordForms: [ "word", "words" ], + expectedResult: { + count: 2, + matches: [ + { sourceCodeRange: { endOffset: 46, startOffset: 42 }, text: "word" }, + { sourceCodeRange: { endOffset: 29, startOffset: 24 }, text: "words" }, + ], + }, + }, + { + testDescription: "returns a match of 'key' in 'key-word'", + sentence: { + text: "A sentence with a key-word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "key", "keys" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { endOffset: 26, startOffset: 18 }, text: "key-word" } ] }, + }, + { + testDescription: "returns a match for word form separated by an underscore", + sentence: { + text: "A sentence with a key_word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key_word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "key_word", "key_words" ], + expectedResult: { count: 1, + matches: [ + { sourceCodeRange: { endOffset: 26, startOffset: 18 }, text: "key_word" }, + ] }, + }, + { + testDescription: "returns a match for word form occurrence regardless of capitalization", + sentence: { + text: "A sentence with a KEY WORD.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "KEY", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "WORD", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "word", "words" ], + expectedResult: { count: 1, matches: [ + { sourceCodeRange: { endOffset: 26, startOffset: 22 }, text: "WORD" }, + ] }, + }, + { + testDescription: "correctly matches if the form is the last word in the sentence.", + sentence: { + text: "A sentence with a key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: ".", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + wordForms: [ "key" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { endOffset: 21, startOffset: 18 }, text: "key" } ] }, + }, + { + testDescription: "returns a match if the keyphrase in the text ends with an underscore.", + sentence: { + text: "A sentence with a key_", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "_", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + wordForms: [ "key", "keys" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ] }, + }, + { + testDescription: "returns a match if the keyphrase in the text ends with a dash.", + sentence: { + text: "A sentence with a key-", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "-", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + wordForms: [ "key" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ] }, + }, + { + testDescription: "matches the occurrence in the sentence which is immediately followed by an exclamation mark", + sentence: { + text: "A sentence with a key!", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: "!", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 22 } }, + wordForms: [ "key" ], + expectedResult: { count: 1, matches: [ { sourceCodeRange: { startOffset: 18, endOffset: 21 }, text: "key" } ] }, + }, + { + testDescription: "matches a keyphrase with a dash when the keyphrase occurs with a dash in the text", + sentence: { + text: "A sentence with a key-word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "key-word", "key-words" ], + expectedResult: { count: 1, matches: [ + { text: "key-word", sourceCodeRange: { startOffset: 18, endOffset: 26 } }, + ] }, + }, + { + testDescription: "does not match keyphrase with a dash when the keyphrase occurs without a dash in the text", + sentence: { + text: "A sentence with a key word.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "word", sourceCodeRange: { startOffset: 22, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "key-word", "key-words" ], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "Matches a keyphrase with an apostrophe.", + sentence: { + text: "All the keyphrase's forms should be matched.", + tokens: [ + { text: "All", sourceCodeRange: { startOffset: 0, endOffset: 3 } }, + { text: " ", sourceCodeRange: { startOffset: 3, endOffset: 4 } }, + { text: "the", sourceCodeRange: { startOffset: 4, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "forms", sourceCodeRange: { startOffset: 20, endOffset: 25 } }, + { text: " ", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, + { text: "should", sourceCodeRange: { startOffset: 26, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "be", sourceCodeRange: { startOffset: 34, endOffset: 36 } }, + { text: " ", sourceCodeRange: { startOffset: 36, endOffset: 37 } }, + { text: "matched", sourceCodeRange: { startOffset: 37, endOffset: 44 } }, + { text: ".", sourceCodeRange: { startOffset: 44, endOffset: 45 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 45 } }, + wordForms: [ "keyphrase", "keyphrases", "keyphrase's" ], + expectedResult: { count: 1, matches: [ + { text: "keyphrase's", sourceCodeRange: { startOffset: 8, endOffset: 19 } }, + ] }, + }, +]; + +describe.each( testCases )( "find word forms in sentence in English: non-exact matching", ( { + testDescription, + sentence, + wordForms, + expectedResult, +} ) => { + const locale = "en_US"; + it( testDescription, () => { + expect( matchWordFormsWithSentence( sentence, wordForms, locale, false, false ) ).toEqual( expectedResult ); + } ); +} ); + +const exactMatchingTestCases = [ + { + testDescription: "with exact matching, a single word keyphrase should match with a single word.", + sentence: { + text: "A sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + wordForms: [ "keyphrase" ], + expectedResult: { count: 1, matches: [ + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + ] }, + }, + { + testDescription: "with exact matching, a singular single word keyphrase should not match with the plural form in the text.", + sentence: { + text: "A sentence with keyphrases.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "keyphrases", sourceCodeRange: { startOffset: 16, endOffset: 26 } }, + { text: ".", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 27 } }, + wordForms: [ "keyphrase" ], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "with exact matching, a plural word keyphrase should not match wih a singular form in the text.", + sentence: { + text: "A sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + wordForms: [ "keyphrases" ], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "with exact matching, a multi word keyphrase should match the same multi word phrase in the text.", + sentence: { + text: "A sentence with a key phrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + wordForms: [ "key phrase" ], + expectedResult: { count: 1, matches: [ + { text: "key", sourceCodeRange: { startOffset: 18, endOffset: 21 } }, + { text: " ", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { text: "phrase", sourceCodeRange: { startOffset: 22, endOffset: 28 } }, + ] }, + }, + { + testDescription: "with exact matching, a multi word keyphrase should not match a multi word phrase if it is in a different order.", + sentence: { + text: "A sentence with a phrase key.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "phrase", sourceCodeRange: { startOffset: 18, endOffset: 24 } }, + { text: " ", sourceCodeRange: { startOffset: 24, endOffset: 25 } }, + { text: "key", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + wordForms: [ "key phrase" ], + expectedResult: { count: 0, matches: [] }, + }, + { + testDescription: "with exact matching, a the matching should be insensitive to case.", + sentence: { + text: "A sentence with a KeYphRaSe.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + wordForms: [ "keyphrase" ], + expectedResult: { count: 1, matches: [ + { text: "KeYphRaSe", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, + ] }, + }, + { + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", + sentence: { + text: "A .sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, + { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, + { text: " ", sourceCodeRange: { startOffset: 11, endOffset: 12 } }, + { text: "with", sourceCodeRange: { startOffset: 12, endOffset: 16 } }, + { text: " ", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: "a", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: " ", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 19, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + wordForms: [ ".sentence" ], + expectedResult: { count: 1, matches: [ + { text: ".", sourceCodeRange: { startOffset: 2, endOffset: 3 } }, + { text: "sentence", sourceCodeRange: { startOffset: 3, endOffset: 11 } }, + ] }, + }, + { + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and directly follows the keyphrase.", + sentence: { + text: "A sentence with a keyphrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 28 } }, + wordForms: [ "keyphrase." ], + expectedResult: { count: 1, matches: [ + { text: "keyphrase", sourceCodeRange: { startOffset: 18, endOffset: 27 } }, + { text: ".", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, + ] }, + }, + { + testDescription: "with exact matching, it should match a full stop if it is part of the keyphrase and is not surrounded by spaces.", + sentence: { + text: "A sentence with a key.phrase.", + tokens: [ + { text: "A", sourceCodeRange: { startOffset: 0, endOffset: 1 } }, + { text: " ", sourceCodeRange: { startOffset: 1, endOffset: 2 } }, + { text: "sentence", sourceCodeRange: { startOffset: 2, endOffset: 10 } }, + { text: " ", sourceCodeRange: { startOffset: 10, endOffset: 11 } }, + { text: "with", sourceCodeRange: { startOffset: 11, endOffset: 15 } }, + { text: " ", sourceCodeRange: { startOffset: 15, endOffset: 16 } }, + { text: "a", sourceCodeRange: { startOffset: 16, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 29 } }, + wordForms: [ "key.phrase" ], + expectedResult: { count: 1, matches: [ + { text: "key.phrase", sourceCodeRange: { startOffset: 18, endOffset: 28 } }, + ] }, + }, +]; + +describe.each( exactMatchingTestCases )( "find keyphrase forms in sentence when exact matching is requested", ( { + testDescription, + sentence, + wordForms, + expectedResult, +} ) => { + const locale = "en_US"; + it( testDescription, () => { + expect( matchWordFormsWithSentence( sentence, wordForms, locale, false, true ) ).toEqual( expectedResult ); + } ); +} ); + + +const japaneseTestCases = [ + { + testDescription: "matches one occurrence of word form in the sentence", + sentence: { + text: "私の猫はかわいいです。", + tokens: [ + { text: "私の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + wordForms: [ "猫" ], + expectedResult: { count: 1, matches: [ "猫" ] }, + }, + { + testDescription: "matches all occurrences of two word forms in the sentence", + sentence: { + text: "会う私の猫はかわいいですかわいい会い。", + tokens: [ + { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + wordForms: [ "会う", "会い" ], + expectedResult: { count: 2, matches: [ "会う", "会い" ] }, + }, + { + testDescription: "matches all occurrences of one word form in the sentence", + sentence: { + text: "これによって少しでも夏休み明けの感染者数を抑えたいという事だけど、どうなるかな者数。", + tokens: [ + { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + wordForms: [ "者数" ], + expectedResult: { count: 2, matches: [ "者数", "者数" ] }, + }, +]; +describe.each( japaneseTestCases )( "test for matching word forms for Japanese: non-exact matching", ( { + testDescription, + sentence, + wordForms, + expectedResult, +} ) => { + it( testDescription, () => { + const locale = "ja"; + expect( matchWordFormsWithSentence( sentence, wordForms, locale, customHelperJapanese, false ) ).toEqual( expectedResult ); + } ); +} ); + +const exactMatchJapaneseData = [ + { + testDescription: "matches only the exact form of the word forms in the sentence with the same word order", + sentence: { + text: "一日一冊の本を読むのはできるかどうかやってみます。", + tokens: [ + { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + wordForms: [ "『一冊の本を読む』" ], + expectedResult: { count: 1, matches: [ "一冊の本を読む" ] }, + }, + { + testDescription: "doesn't match the occurrence of the word forms in the sentence with different word order", + sentence: { + text: "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", + tokens: [ + { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + ], + sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + wordForms: [ "『一冊の本を読む』" ], + expectedResult: { count: 0, matches: [] }, + }, +]; +describe.each( exactMatchJapaneseData )( "test for matching word forms for Japanese: exact matching", ( { + testDescription, + sentence, + wordForms, + expectedResult, +} ) => { + it( testDescription, () => { + const locale = "ja"; + expect( matchWordFormsWithSentence( sentence, wordForms, locale, customHelperJapanese, true ) ).toEqual( expectedResult ); + } ); +} ); + + diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index 16ded227abb..25cbaa6037b 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -8,7 +8,7 @@ describe( "test to get sentences from the tree", () => { beforeEach( () => { researcher = new EnglishResearcher(); } ); - it( "test", () => { + it( "returns the sentences from paragraph and heading nodes", () => { const paper = new Paper( "

A very intelligent cat loves their human. A dog is very cute.

A subheading 3" + "

text text text

A subheading 4

more text." ); researcher.setPaper( paper ); @@ -92,4 +92,24 @@ describe( "test to get sentences from the tree", () => { }, ] ); } ); + it( "should not include sentences from non-paragraph and non-heading nodes", () => { + const paper = new Paper( "

A cute red panda

The red panda (Ailurus fulgens), also known as the lesser panda," + + " is a small mammal native to the eastern Himalayas and southwestern China
" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getSentencesFromTree( paper ) ).toEqual( [ + { sourceCodeRange: { endOffset: 19, startOffset: 3 }, + text: "A cute red panda", + tokens: [ + { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, + { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: " " }, + { sourceCodeRange: { endOffset: 9, startOffset: 5 }, text: "cute" }, + { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, + { sourceCodeRange: { endOffset: 13, startOffset: 10 }, text: "red" }, + { sourceCodeRange: { endOffset: 14, startOffset: 13 }, text: " " }, + { sourceCodeRange: { endOffset: 19, startOffset: 14 }, text: "panda" }, + ], + }, + ] ); + } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index df9aad42b42..01388e22248 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -139,13 +139,13 @@ const testCases = [ expectedCount: 2, expectedMarkings: [ new Mark( { - // eslint-disable-next-line max-len - marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword," + + " keyword adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", position: { endOffset: 58, startOffset: 43 } } ), new Mark( { - // eslint-disable-next-line max-len - marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword," + + " keyword adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", position: { endOffset: 67, startOffset: 60 } } ) ], skip: false, @@ -158,13 +158,27 @@ const testCases = [ expectedCount: 2, expectedMarkings: [ new Mark( { - // eslint-disable-next-line max-len marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", position: { endOffset: 58, startOffset: 43 } } ), ], skip: false, }, + { + description: "counts one occurrence of reduplicated keyphrase separated by '-' as two occurrence in English " + + "when the keyphrase is enclosed in double quotes", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.

", + { locale: "en_US", keyword: "\"kupu-kupu\"" } ), + keyphraseForms: [ [ "kupu-kupu" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", + position: { endOffset: 52, startOffset: 43 } } ), + ], + skip: false, + }, { description: "counts a single word keyphrase with exact matching", paper: new Paper( "

A string with a keyword.

", { keyword: "\"keyword\"" } ), @@ -204,8 +218,8 @@ const testCases = [ skip: false, }, { - // eslint-disable-next-line max-len - description: "with exact matching, a multi word keyphrase should not be counted if the focus keyphrase has the same words in a different order", + description: "with exact matching, a multi word keyphrase should not be counted if " + + "the focus keyphrase has the same words in a different order", paper: new Paper( "

A string with a phrase key.

", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, @@ -272,8 +286,13 @@ const testCases = [ }, ]; -// eslint-disable-next-line max-len -describe.each( testCases )( "Test for counting the keyword in a text in english", function( { description, paper, keyphraseForms, expectedCount, expectedMarkings, skip } ) { +describe.each( testCases )( "Test for counting the keyword in a text in english", function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { const test = skip ? it.skip : it; test( description, function() { @@ -308,6 +327,39 @@ const testCasesWithSpecialCharacters = [ position: { endOffset: 27, startOffset: 19 } } ) ], skip: false, }, + { + description: "counts occurrence with dash only when the keyphrase contains dash", + paper: new Paper( "

A string with a key-phrase and key phrase.

", { keyword: "key-phrase" } ), + keyphraseForms: [ [ "key-phrase", "key-phrases" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key-phrase and key phrase.", + original: "A string with a key-phrase and key phrase.", + position: { endOffset: 29, startOffset: 19 } } ) ], + skip: false, + }, + { + description: "should be no match when the sentence contains the keyphrase with a dash but in the wrong order", + paper: new Paper( "

A string with a panda-red.

", { keyword: "red-panda" } ), + keyphraseForms: [ [ "red-panda" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " + + "and the keyphrase doesn't contain dash", + paper: new Paper( "

A string with a panda-red.

", { keyword: "red panda" } ), + keyphraseForms: [ [ "red-panda" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a panda-red.", + marked: "A string with a panda-red.", + position: { startOffset: 16, endOffset: 25 }, + } ), + ], + skip: false, + }, { description: "counts 'key word' in 'key-word'.", paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), @@ -323,7 +375,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts 'key' in 'key-word'.", + description: "counts 'key' in 'key-phrase'", paper: new Paper( "

A string with a word of key-phrase.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, @@ -338,11 +390,11 @@ const testCasesWithSpecialCharacters = [ original: "A string with a word of key-phrase.", marked: "A string with a word of" + " key-phrase.", - position: { endOffset: 30, startOffset: 27 }, + position: { endOffset: 37, startOffset: 27 }, } ), + // Note that in this case, we'll highlight 'key-phrase' even though technically we only match "key" in "key-phrase". + // This is the consequence of tokenizing "key-phrase" into one token instead of three. ], - // eslint-disable-next-line no-warning-comments - // TODO: fix the highlighting. Should we only highlight "key", and not "key-phrase" in this case? skip: false, }, { diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index e57aa7fa55e..b3a75c4c2b9 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -154,7 +154,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH const matchWordFormsWithSentence = ( sentence, wordForms, locale, matchWordCustomHelper, useExactMatching = false ) => { /* * Only use `findExactMatchKeyphraseInSentence` when the custom helper is not available. - * When the custom helper is available, we don't differenciate the approach for exact matching or non-exact matching. + * When the custom helper is available, the step for the exact matching happens in the helper. */ if ( useExactMatching && ! matchWordCustomHelper ) { return findExactMatchKeyphraseInSentence( sentence, wordForms, locale ); From a3af81ce58f79cbba2f87e1f9104db24d8046939 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 23 Jun 2023 15:52:13 +0200 Subject: [PATCH 187/616] remove TODO --- .../helpers/match/matchWordFormsWithSentenceSpec.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js index 05f53cbf572..bb8f39da562 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js @@ -1,8 +1,6 @@ import customHelperJapanese from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; import matchWordFormsWithSentence from "../../../../src/languageProcessing/helpers/match/matchWordFormsWithSentence"; -// eslint-disable-next-line no-warning-comments -// TODO: Adapt all the unit tests for the expected results. Now the function returns an object. const testCases = [ { testDescription: "returns no match when no word form is found the sentence", From 8fc51a646e92d4f6af0f5dd893847b2290269e21 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 10:13:35 +0200 Subject: [PATCH 188/616] Adjust unit test --- .../researches/getKeywordDensitySpec.js | 118 ++++++------------ .../researches/keywordCountSpec.js | 4 +- .../researches/getKeywordDensity.js | 2 +- .../seo/KeywordDensityAssessment.js | 2 +- 4 files changed, 44 insertions(+), 82 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 2f3cbd20603..4a088b02a2a 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -3,6 +3,7 @@ import getKeywordDensity from "../../../src/languageProcessing/researches/getKey import Paper from "../../../src/values/Paper.js"; import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher"; import JapaneseResearcher from "../../../src/languageProcessing/languages/ja/Researcher"; +import TurkishResearcher from "../../../src/languageProcessing/languages/tr/Researcher"; import DefaultResearcher from "../../../src/languageProcessing/languages/_default/Researcher"; import getMorphologyData from "../../specHelpers/getMorphologyData"; import buildTree from "../../specHelpers/parse/buildTree"; @@ -12,7 +13,7 @@ const morphologyDataEN = getMorphologyData( "en" ); describe( "Test for counting the keyword density in a text with an English researcher", function() { it( "should return a correct non-zero value if the text contains the keyword", function() { - const mockPaper = new Paper( "a string of text with the keyword in it, density should be 7.7%", { keyword: "keyword" } ); + const mockPaper = new Paper( "

a string of text with the keyword in it, density should be 7.7%

", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); @@ -20,7 +21,7 @@ describe( "Test for counting the keyword density in a text with an English resea } ); it( "should return zero if the keyphrase does not occur in the text.", function() { - const mockPaper = new Paper( "a string of text without the keyword in it, density should be 0%", { keyword: "empty" } ); + const mockPaper = new Paper( "

a string of text without the keyword in it, density should be 0%

", { keyword: "empty" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); @@ -29,7 +30,7 @@ describe( "Test for counting the keyword density in a text with an English resea it( "should recognize a keyphrase when it consists umlauted characters", function() { - const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } ); + const mockPaper = new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); @@ -38,174 +39,135 @@ describe( "Test for counting the keyword density in a text with an English resea } ); it( "should recognize a hyphenated keyphrase.", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } ); + const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit

", { keyword: "key-word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); } ); - it( "should correctly recognize a keyphrase with a non-latin 'ı' in it", function() { - const mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - } ); - it( "should recognize a keyphrase with an underscore in it", function() { - const mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } ); + const mockPaper = new Paper( "

a string of text with the key_word in it, density should be 7.7%

", { keyword: "key_word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should return zero if a multiword keyphrase is not present but the multiword keyphrase occurs with an underscore", function() { - const mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } ); + const mockPaper = new Paper( "

a string of text with the key_word in it, density should be 0.0%

", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); } ); - it( "should not recognize a multiword keyphrase when it is occurs hyphenated", function() { - const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); + it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() { + const mockPaper = new Paper( "

a string of text with the key-word in it, density should be 7.7%

", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - // This behavior might change in the future. - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a keyphrase with an ampersand in it", function() { - const mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } ); + const mockPaper = new Paper( "

a string of text with the key&word in it, density should be 7.7%

", { keyword: "key&word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); - it( "should skip a keyphrase in case of three consecutive keyphrases", function() { - // Consecutive keywords are skipped, so this will match 2 times. - const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword.

", { keyword: "keyword" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); - } ); - - it( "should only keep the first two keyphrases if there more than three consecutive", function() { - // Consecutive keywords are skipped, so this will match 2 times. + it( "should include all keyphrase occurrence if there are more than two consecutive", function() { const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.

", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 12.5 ); - } ); - - it( "should not skip a keyphrase in case of consecutive keyphrases with morphology sss", function() { - // Consecutive keywords are skipped, so this will match 2 times. - const mockPaper = new Paper( "

This is a nice string with a keyword keyword keywords.

", { keyword: "keyword" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 30 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 56.25 ); } ); - it( "should recognize a keyphrase with a '$' in it", function() { - const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); + const mockPaper = new Paper( "

a string of text with the $keyword in it, density should be 7.7%

", { keyword: "$keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a keyphrase regardless of capitalization", function() { - const mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } ); + const mockPaper = new Paper( "

a string of text with the Keyword in it, density should be 7.7%

", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a multiword keyphrase regardless of capitalization", function() { - const mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } ); + const mockPaper = new Paper( "

a string of text with the Key word in it, density should be 14.29%

", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); } ); it( "should recognize apostrophes regardless of the type of quote that was used", function() { - const mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } ); + const mockPaper = new Paper( "

a string with quotes to match the key'word, even if the quotes differ

", { keyword: "key’word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize keyphrase separated by  ", function() { - const mockPaper = new Paper( "a string with quotes to match the key word, even if the quotes differ", { keyword: "key word" } ); + const mockPaper = new Paper( "

a string with non-breaking space to match the key word

", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 10 ); } ); } ); -describe( "test for counting the keyword density in a text in a language that we haven't supported yet", function() { - it( "returns keyword density", function() { - const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng" } ); +describe( "test for counting the keyword density in a non-English and non-Japanese language", function() { + it( "returns keyword density for a language that we haven't had the support yet", function() { + const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng", locale: "jv_ID" } ); const mockResearcher = new DefaultResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 25 ); } ); + it( "should correctly recognize a Turkish transliterated keyphrase with a 'ı' in it", function() { + const mockPaper = new Paper( "a string of text with the kapakli in it, density should be 7.7%

", { keyword: "kapaklı", locale: "tr_TR" } ); + const mockResearcher = new TurkishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); + } ); } ); describe( "test for counting the keyword density in a text in a language that uses a custom getWords helper (Japanese)", function() { - /*it( "returns keyword density when the keyword is not found in the sentence", function() { - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "猫" } ); + it( "returns keyword density when the keyword is not found in the sentence", function() { + const mockPaper = new Paper( "

小さくて可愛い花の刺繍に関する一般一般の記事です。

", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ + } ); - it.skip( "returns the keyword density when the keyword is found once", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } ); + it( "returns the keyword density when the keyword is found once", function() { + const mockPaper = new Paper( "

私の猫はかわいいです。

", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 ); } ); - it.skip( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { + it( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); + const mockPaper = new Paper( "

小さくて可愛い花の刺繍に関する一般一般の記事です。

", { keyword: "小さい花の刺繍" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - /*it( "returns the keyword density when the morphologyData is not available to detect inflected forms", function() { - // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } ); - - it( "returns the keyword density when the keyword contains multiple words with an exact match separated in the sentence", function() { - const mockPaper = new Paper( "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 5 ); - } ); - it( "returns the keyword density when the keyword contains multiple words with an exact match in the sentence", function() { - const mockPaper = new Paper( "一日一冊の本を読むのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); + const mockPaper = new Paper( "

一日一冊の本を読むのはできるかどうかやってみます。

", { keyword: "『一冊の本を読む』" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - - it( "returns 0 when the text is empty", function() { - const mockPaper = new Paper( "", { keyword: "猫" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 01388e22248..f4dd51bc672 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -239,7 +239,7 @@ const testCases = [ { description: "can match dollar sign as in '$keyword' with exact matching.", paper: new Paper( "

A string with a $keyword.

", { keyword: "\"$keyword\"" } ), - keyphraseForms: [ [ "$keyword" ] ], + keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", @@ -431,7 +431,7 @@ const testCasesWithSpecialCharacters = [ { description: "can match dollar sign as in '$keyword'.", paper: new Paper( "

A string with a $keyword.

", { keyword: "$keyword" } ), - keyphraseForms: [ [ "$keyword" ] ], + keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 5c5df0e8e42..2d497a2e954 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -29,7 +29,7 @@ export default function getKeyphraseDensity( paper, researcher ) { /** * Calculates the keyphrase density. * - * @deprecated Since version 20.8. Use getKeyphraseDensity instead. + * @deprecated Since version 20.12. Use getKeyphraseDensity instead. * * @param {Object} paper The paper containing keyphrase and text. * @param {Object} researcher The researcher. diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index a1157e098f7..900f0bb5bf3 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -340,7 +340,7 @@ class KeyphraseDensityAssessment extends Assessment { * KeywordDensityAssessment was the previous name for KeyphraseDensityAssessment (hence the name of this file). * We keep (and expose) this assessment for backwards compatibility. * - * @deprecated Since version 18.8 Use KeyphraseDensityAssessment instead. + * @deprecated Since version 20.12 Use KeyphraseDensityAssessment instead. */ class KeywordDensityAssessment extends KeyphraseDensityAssessment { /** From 8de3b1743b4e7f8bdcf04abf8f2807673b1dec3c Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 10:14:36 +0200 Subject: [PATCH 189/616] remove regex escaping step --- .../match/matchWordFormsWithSentence.js | 53 +++++++------------ 1 file changed, 19 insertions(+), 34 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index b3a75c4c2b9..1f1b5a38246 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -1,28 +1,27 @@ -import { escapeRegExp } from "lodash-es"; import matchTextWithTransliteration from "./matchTextWithTransliteration"; import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; /** - * Tokenizes keyword forms for exact matching. This function gets the keyword form and tokenizes it. - * This function assumes that if a keyphrase needs to be matched exactly, there will be only one keyword form. - * This is the result of how the focus keyword is processed in buildTopicStems.js in the buildStems function. + * Tokenizes the word form of the keyphrase for exact matching. This function gets the word form and tokenizes it. + * This function assumes that if a keyphrase needs to be matched exactly, there will be only one word form. + * This is the result of how the focus keyphrase is processed in buildTopicStems.js in the buildStems function. * - * @param {(string[])} wordForms The keyword forms to tokenize. + * @param {(string[])} wordForms The word forms to tokenize. * - * @returns {string[]} The tokenized keyword forms. + * @returns {string[]} The tokenized word forms. */ -export const tokenizeKeywordFormsForExactMatching = ( wordForms ) => { +export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => { // Tokenize keyword forms. - const keywordFormsText = wordForms[ 0 ]; - return getWordsForHTMLParser( keywordFormsText ); + const wordFormText = wordForms[ 0 ]; + return getWordsForHTMLParser( wordFormText ); }; /** * Gets the exact matches of the keyphrase. - * Exact matching happens when the user puts the keyword in double quotes. + * Exact matching happens when the user puts the keyphrase in double quotes. * - * @param {Sentence} sentence The sentence to match the keyword forms with. - * @param {string[]} wordForms The keyword forms to match. + * @param {Sentence} sentence The sentence to match the word forms with. + * @param {string[]} wordForms The word forms to match. * @param {string} locale The locale used in the analysis. * * @returns {{count: number, matches: Token[]}} Object containing the number of the exact matches and the matched tokens. @@ -32,20 +31,20 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { count: 0, matches: [], }; - // Tokenize keyword form. - const keywordTokens = tokenizeKeywordFormsForExactMatching( wordForms ); + // Tokenize word forms of the keyphrase. + const keywordTokens = tokenizeKeyphraseFormsForExactMatching( wordForms ); const sentenceTokens = sentence.tokens; - // Check if tokenized keyword forms occur in the same order in the sentence tokens. + // Check if tokenized word forms occur in the same order in the sentence tokens. let keywordIndex = 0; let sentenceIndex = 0; let currentMatch = []; while ( sentenceIndex < sentenceTokens.length ) { - // If the current sentence token matches the current keyword token, add it to the current match. + // If the current sentence token matches the current word token, add it to the current match. const sentenceTokenText = sentenceTokens[ sentenceIndex ].text; - const keywordTokenText = escapeRegExp( keywordTokens[ keywordIndex ] ); + const keywordTokenText = keywordTokens[ keywordIndex ]; const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keywordTokenText.toLowerCase(), locale ); @@ -84,9 +83,7 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { let matches = []; tokens.forEach( token => { - // Escaping for the regex is only necessary when we want to create a regex out of a string. - // In this case, we create a regex out of the keywordForm but not out of the token text. - const occurrence = matchTextWithTransliteration( token.text, escapeRegExp( wordForm ), locale ); + const occurrence = matchTextWithTransliteration( token.text, wordForm, locale ); if ( occurrence.length > 0 ) { matches = matches.concat( token ); } @@ -125,10 +122,10 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH }; /** - * Matches a keyword with a sentence object from the html parser. + * Matches the word forms of a keyphrase with a sentence object from the html parser. * * @param {Sentence} sentence The sentence to match against the keywordForms. - * @param {string[]} wordForms The array of keyword forms. + * @param {string[]} wordForms The array of word forms of the keyphrase. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * @@ -138,18 +135,6 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. - * - * The algorithm is as follows: - * - * It iterates over all tokens in the sentence. It compares the current token with the keyword forms. - * If it matches, it adds the token to the matches array. - * - * The keyword forms are tokenized differently than the sentence. - * The keyword forms are tokenized with researcher.getResearch( "morphology" ) and the sentence is tokenized with the html parser. - * This leads to differences in tokenization. For example, the html parser tokenizes "key-word" as [ "key", "-", "word" ]. The morphology - * tokenizes it as [ "key-word" ]. - * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. - * This matchToken is then compared with the keyword forms. */ const matchWordFormsWithSentence = ( sentence, wordForms, locale, matchWordCustomHelper, useExactMatching = false ) => { /* From 86430b541eb789937158dddbaadbe0d323850025 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 10:29:00 +0200 Subject: [PATCH 190/616] Adjust unit tests --- .../seo/KeywordDensityAssessmentSpec.js | 268 ++++++------------ 1 file changed, 86 insertions(+), 182 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 9b6b329dd98..62829430dbc 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -1,5 +1,4 @@ -/* eslint-disable capitalized-comments, spaced-comment */ -import KeywordDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; import GermanResearcher from "../../../../src/languageProcessing/languages/de/Researcher"; import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; @@ -20,7 +19,7 @@ const longTextJapanese = "熱".repeat( 200 ); // Test data for language without morphology. const testDataWithDefaultResearcher = [ { - description: "runs the keywordDensity on the paper without keyword in the text", + description: "runs the keyphrase density on the paper without keyphrase occurrence found in the text", paper: new Paper( "

" + nonkeyword.repeat( 1000 ) + "

", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -30,7 +29,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", + description: "runs the keyphrase density on the paper with a low keyphrase density (0.1%)", paper: new Paper( "

" + nonkeyword.repeat( 999 ) + keyword + "

", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -40,7 +39,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", + description: "runs the keyphrase density on the paper with a good keyphrase density (0.5%)", paper: new Paper( "

" + nonkeyword.repeat( 995 ) + keyword.repeat( 5 ) + "

", { keyword: "keyword" } ), expectedResult: { score: 9, @@ -49,7 +48,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a good keyphrase density (2%)", + description: "runs the keyphrase density on the paper with a good keyphrase density (2%)", paper: new Paper( "

" + nonkeyword.repeat( 980 ) + keyword.repeat( 20 ) + "

", { keyword: "keyword" } ), expectedResult: { score: 9, @@ -58,7 +57,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", + description: "runs the keyphrase density on the paper with a slightly too high keyphrase density (3.5%)", paper: new Paper( "

" + nonkeyword.repeat( 965 ) + keyword.repeat( 35 ) + "

", { keyword: "keyword" } ), expectedResult: { score: -10, @@ -68,7 +67,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a very high keyphrase density (10%)", + description: "runs the keyphrase density on the paper with a very high keyphrase density (10%)", paper: new Paper( "

" + nonkeyword.repeat( 900 ) + keyword.repeat( 100 ) + "

", { keyword: "keyword" } ), expectedResult: { score: -50, @@ -78,7 +77,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - short keyphrase", paper: new Paper( "

" + nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ) + "

", { keyword: "b c" } ), expectedResult: { score: 9, @@ -99,7 +98,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - long keyphrase", paper: new Paper( "

" + nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ) + "

", { keyword: "b c d e f" } ), expectedResult: { score: -50, @@ -109,7 +108,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "returns a bad result if the keyword is only used once, regardless of the density", + description: "returns a bad result if the keyphrase is only used once, regardless of the density", paper: new Paper( "

" + nonkeyword.repeat( 100 ) + keyword + "

", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -119,7 +118,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "returns a good result if the keyword is used twice and " + + description: "returns a good result if the keyphrase is used twice and " + "the recommended count is smaller than or equal to 2, regardless of the density", paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "a b c, a b c

", { keyword: "a b c", locale: "xx_XX" } ), expectedResult: { @@ -138,7 +137,7 @@ describe.each( testDataWithDefaultResearcher )( "a kephrase density test for lan const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); it( description, () => { - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( expectedResult.score ); expect( result.getText() ).toBe( expectedResult.text ); @@ -147,38 +146,38 @@ describe.each( testDataWithDefaultResearcher )( "a kephrase density test for lan const testDataForApplicability = [ { - description: "applies to a paper with a keyword and a text of at least 100 words", + description: "applies to a paper with a keyphrase and a text of at least 100 words", paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "

", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: true, }, { - description: "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + + description: "does not apply to a paper with a keyphrase and a text of at least 100 words when the text is inside an element" + "we want to exclude from the analysis", paper: new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with text of 100 words but without a keyword", + description: "does not apply to a paper with text of 100 words but without a keyphrase", paper: new Paper( "

" + nonkeyword.repeat( 100 ) + "

", { keyword: "" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with a text containing less than 100 words and with a keyword", + description: "does not apply to a paper with a text containing less than 100 words and with a keyphrase", paper: new Paper( "

" + nonkeyword.repeat( 99 ) + "

", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with a text containing less than 100 words and without a keyword", + description: "does not apply to a paper with a text containing less than 100 words and without a keyphrase", paper: new Paper( "

" + nonkeyword.repeat( 99 ) + "

", { keyword: "" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "applies to a Japanese paper with a keyword and a text of at least 200 characters", + description: "applies to a Japanese paper with a keyphrase and a text of at least 200 characters", paper: new Paper( "

" + longTextJapanese + "

", { keyword: "keyword" } ), researcher: new JapaneseResearcher(), expectedResult: true, @@ -199,40 +198,41 @@ describe.each( testDataForApplicability )( "assessment applicability test", ( { researcher.setPaper( paper ); buildTree( paper, researcher ); it( description, () => { - expect( new KeywordDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); + expect( new KeyphraseDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); } ); } ); -describe( "Tests for the keywordDensity assessment for languages with morphology", function() { - it( "gives a GOOD result when keyword density is between 3 and 3.5%", function() { +describe( "Tests for the keyphrase density assessment for languages with morphology", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "en_EN" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a GOOD result when keyword density is between 3 and 3.5%, also for other languages with morphology support", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%, also for other languages with morphology support", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); buildTree( paper, researcher ); researcher.addResearchData( "morphology", morphologyDataDe ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a BAD result when keyword density is between 3 and 3.5%, if morphology support is added, but there is no morphology data", function() { + it( "gives a BAD result when keyphrase density is between 3 and 3.5%, if morphology support is added," + + " but there is no morphology data", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -10 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. That's more than the recommended maximum of 29 times for a text of this length. " + @@ -240,14 +240,14 @@ describe( "Tests for the keywordDensity assessment for languages with morphology } ); } ); -describe( "A test for marking the keyword", function() { +describe( "A test for marking the keyphrase", function() { it( "returns markers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

This is a very interesting paper with a keyword and another keyword.

", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); - keywordDensityAssessment.getResult( paper, researcher ); + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is a very interesting paper with a " + @@ -263,26 +263,26 @@ describe( "A test for marking the keyword", function() { original: "This is a very interesting paper with a keyword and another keyword.", position: { endOffset: 70, startOffset: 63 }, } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); it( "returns markers for a keyphrase containing numbers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

This is the release of YoastSEO 9.3.

", { keyword: "YoastSEO 9.3" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); - keywordDensityAssessment.getResult( paper, researcher ); + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release of YoastSEO 9.3.", original: "This is the release of YoastSEO 9.3.", position: { startOffset: 26, endOffset: 38 } } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); it( "returns markers for a keyphrase found in image caption", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

a different cat with toy " + "A flamboyant cat with a toy

\n" + @@ -291,7 +291,7 @@ describe( "A test for marking the keyword", function() { const researcher = new EnglishResearcher( paper ); buildTree( paper, researcher ); - keywordDensityAssessment.getResult( paper, researcher ); + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: " A flamboyant cat with a " + @@ -303,41 +303,8 @@ describe( "A test for marking the keyword", function() { "toy\n", original: " A flamboyant cat with a toy\n", position: { endOffset: 215, startOffset: 212 } } ), ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); - - // it( "returns markers for a Japanese keyphrase enclosed in double quotes", function() { - // const paper = new Paper( japaneseSentenceWithKeyphraseExactMatch.repeat( 3 ), { - // keyword: "『一冊の本を読む』", - // locale: "ja", - // } ); - // const researcher = new JapaneseResearcher( paper ); - // const assessment = new KeywordDensityAssessment(); - // researcher.addResearchData( "morphology", morphologyDataJA ); - // - // const result = assessment.getResult( paper, researcher ); - // const marks = [ - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // ]; - // - // expect( result.getScore() ).toBe( -50 ); - // expect( result.getText() ).toBe( "Keyphrase density: " + - // "The keyphrase was found 3 times." + - // " That's way more than the recommended maximum of 2 times for a text of this length. Don't" + - // " overoptimize!" ); - // expect( assessment.getMarks() ).toEqual( marks ); - // } ); } ); const japaneseSentence = "私の猫はかわいいです。小さくて可愛い花の刺繍に関する一般一般の記事です。".repeat( 20 ); @@ -388,6 +355,54 @@ const testDataJapanese = [ "Focus on your keyphrase!", }, }, + { + description: "gives a very BAD result when keyphrase density is above 4% when the text contains way too many instances" + + " of the keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + + "this length. Don't overoptimize!", + }, + }, + { + description: "gives a BAD result when keyphrase density is 0", + paper: new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "gives a GOOD result when keyphrase density is between 0.5% and 3.5% when the text contains keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!", + }, + }, + { + description: "gives a GOOD result when keyphrase density is between 0.5% and 3.5% when the text contains the exact match of keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "『一冊の本を読む』", + locale: "ja", + } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!", + }, + }, ]; describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { @@ -398,7 +413,7 @@ describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { const researcher = new JapaneseResearcher( paper ); researcher.addResearchData( "morphology", morphologyDataJA ); buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); it( description, () => { expect( result.getScore() ).toBe( expectedResult.score ); @@ -406,115 +421,4 @@ describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { } ); } ); -// eslint-disable-next-line no-warning-comments -// TODO: ADAPT the tests below. -xdescribe( "A test for keyword density in Japanese", function() { - it( "gives a very BAD result when keyword density is above 4% when the text contains way too many instances" + - " of the keyphrase forms", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + - "this length. Don't overoptimize!" ); - } ); - - it( "gives a BAD result when keyword density is between 3% and 4% when the text contains too many instances" + - " of the keyphrase forms", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 16 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -10 ); - expect( result.getText() ).toBe( "Keyphrase density:" + - " The keyphrase was found 16 times. That's more than the recommended maximum of 15 times for a text of this length." + - " Don't overoptimize!" ); - } ); - - it( "gives a BAD result when keyword density is 0", function() { - const paper = new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "gives a BAD result when keyword density is between 0 and 0.5%", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 1 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density:" + - " The keyphrase was found 1 time. That's less than the recommended minimum of 2 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "gives a GOOD result when keyword density is between 0.5% and 3.5% when the text contains keyphrase forms", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 8 times. This is great!" ); - } ); - - it( "gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 8 times. This is great!" ); - } ); - - it( "should still gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text " + - "and the keyphrase is enclosed in double quotes", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { - keyword: "「一冊の本を読む」", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - researcher.addResearchData( "morphology", morphologyDataJA ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 8 times. This is great!" ); - } ); - - it( "gives a BAD result when keyword density is between 0.5% and 3.5%, if morphology is added, but there is no morphology data", function() { - const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { - keyword: "一冊の本を読む", - locale: "ja", - } ); - const researcher = new JapaneseResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); -} ); From 3ff4c2d8f7b2703e9c4f4a4ff4d827cf77f51acc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 11:05:07 +0200 Subject: [PATCH 191/616] Adapt unit tests --- .../highlighting/getMarkingsInSentenceSpec.js | 40 +++---------- .../recommendedKeywordCountSpec.js | 57 +++++++++++++++++-- 2 files changed, 58 insertions(+), 39 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index a46b943f6de..fecd6d5ac66 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -1,14 +1,11 @@ import getMarkingsInSentence from "../../../../src/languageProcessing/helpers/highlighting/getMarkingsInSentence"; import Mark from "../../../../src/values/Mark"; -import JapaneseCustomHelper from "../../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; -/* eslint-disable max-len */ const testCases = [ { testDescription: "No markings in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [], }, @@ -16,7 +13,6 @@ const testCases = [ testDescription: "One marking in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -28,7 +24,6 @@ const testCases = [ testDescription: "One marking in sentence with two consecutive matches", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -40,7 +35,6 @@ const testCases = [ testDescription: "Two markings that are not consecutive in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { @@ -59,7 +53,6 @@ const testCases = [ testDescription: "One marking in a sentence that has a non-zero startOffset", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -67,23 +60,10 @@ const testCases = [ position: { endOffset: 14, startOffset: 10 }, } ) ], }, - { - testDescription: "One marking in a sentence of a language that does not use spaces", - sentence: { text: "これは文です.", sourceCodeRange: { startOffset: 0, endOffset: 7 } }, - matchesInSentence: [ { sourceCodeRange: { startOffset: 3, endOffset: 4 } } ], - matchWordCustomHelper: JapaneseCustomHelper, - locale: "ja", - expectedResult: [ new Mark( { - marked: "これはです.", - original: "これは文です.", - position: { endOffset: 4, startOffset: 3 }, - } ) ], - }, { testDescription: "Two markings that overlap", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } }, { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ], - matchWordCustomHelper: false, locale: "en_US", expectedResult: [ new Mark( { @@ -93,21 +73,15 @@ const testCases = [ } ), ], }, - // { - // testDescription: "No secondary match if a multi word keyphrase is separated with an underscore in the sentence.", - // sentence: { text: "A key sentence with a key_word.", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - // matchesInSentence: [ { sourceCodeRange: { startOffset: 2, endOffset: 5 } } ] ] }, - // matchWordCustomHelper: false, - // locale: "en_US", - // expectedResult: [], - // }, - ]; -/* eslint-enable max-len */ -// eslint-disable-next-line max-len -describe.each( testCases )( "getMarkingsInSentence", ( { testDescription, sentence, matchesInSentence, matchWordCustomHelper, locale, expectedResult } ) => { +describe.each( testCases )( "a test for getting the marks from a sentence", ( { + testDescription, + sentence, + matchesInSentence, + locale, + expectedResult } ) => { it( testDescription, () => { - expect( getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ) ).toEqual( expectedResult ); + expect( getMarkingsInSentence( sentence, matchesInSentence, locale ) ).toEqual( expectedResult ); } ); } ); diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js index 8ca7d4dfd55..9c23557c25b 100644 --- a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js +++ b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js @@ -1,37 +1,82 @@ import recommendedKeywordCount from "../../../../src/scoring/helpers/assessments/recommendedKeywordCount.js"; +import Paper from "../../../../src/values/Paper"; +import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; +import japaneseWordHelper from "../../../../src/languageProcessing/languages/ja/helpers/getWords"; +import buildTree from "../../../specHelpers/parse/buildTree"; const maxRecommendedDensity = 3; const minRecommendedDensity = 0.5; const a = "a "; +const defaultResearcher = new DefaultResearcher(); describe( "Test for getting the recommended keyword count for a text", function() { it( "returns the maximum recommended keyword count for a text with 300 words and a 1-word keyphrase", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); + } ); + + it( "returns the maximum recommended keyword count for a text with 300 words and a 1-word keyphrase, " + + "using the custom helper for retrieving the words", function() { + // The sentence contains 10 words. + const text = "計画段階では「東海道新線」と呼ばれ開業て.".repeat( 30 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max", japaneseWordHelper ) ).toBe( 8 ); } ); it( "returns the maximum recommended keyword count for a text with 300 words and a multi-word keyphrase", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); } ); it( "returns the minimum recommended keyword count for a text with 1200 words and a 1-word keyphrase", function() { const text = a.repeat( 1200 ); - expect( recommendedKeywordCount( text, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); } ); it( "returns 2 as the default recommended minimum keyword count if the text is very short", function() { const text = a.repeat( 3 ); - expect( recommendedKeywordCount( text, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); } ); it( "returns 0 when the word count of the text is 0", function() { const text = ""; - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); } ); it( "returns the maximum recommended keyword count when the maxOrMin variable is not explicitly defined", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity ) ).toBe( 8 ); + const paper = new Paper( "

" + text + "

" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 8 ); + } ); + + it( "returns 0 when the paper is empty", function() { + const paper = new Paper( "" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); + } ); + + it( "returns 0 when the paper only contains excluded element", function() { + const paper = new Paper( "
In ancient Egyptian mythology, cats were highly revered and considered sacred animals.
" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); } ); } ); From bf76515cd7010a1a0fadcd6cf46725c7cdd7e008 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 12:16:26 +0200 Subject: [PATCH 192/616] Add a new method to retrieve the position info --- packages/yoastseo/src/values/Mark.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index 1e36be49cbe..8b56e788fed 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -50,6 +50,15 @@ Mark.prototype.getFieldsToMark = function() { return this._properties.fieldsToMark; }; +/** + * Returns the position information. + * + * @returns {number} The position information. + */ +Mark.prototype.getPosition = function() { + return this._properties.position; +}; + /** * Returns the start position. * From 933344eb8212019571a92f4051b53391eaad42b3 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 12:17:22 +0200 Subject: [PATCH 193/616] remove duplicates with the same position info --- packages/yoastseo/src/markers/removeDuplicateMarks.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index 41e4c2d84b0..5b7755e97f3 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -8,7 +8,7 @@ import { uniqBy } from "lodash-es"; */ function removeDuplicateMarks( marks ) { return uniqBy( marks, function( mark ) { - return mark.getOriginal(); + return mark.hasPosition() ? mark.getPosition() : mark.getOriginal(); } ); } From fc99491be941cbeeff37193a3e46e4e81353e7c9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 26 Jun 2023 12:17:48 +0200 Subject: [PATCH 194/616] Add unit tests --- .../researches/keywordCountSpec.js | 162 +++++++++--------- 1 file changed, 81 insertions(+), 81 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index f4dd51bc672..6e9c029869e 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -24,84 +24,84 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { }; const testCases = [ - { - description: "counts/marks a string of text with a keyword in it.", - paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "a string of text with the keyword in it", - original: "a string of text with the keyword in it", - position: { endOffset: 36, startOffset: 29 } } ) ], - skip: false, - }, - { - description: "counts a string of text with no keyword in it.", - paper: new Paper( "

a string of text

", { keyword: "" } ), - keyphraseForms: [], - expectedCount: 0, - expectedMarkings: [], - skip: false, - }, - { - description: "counts multiple occurrences of a keyphrase consisting of multiple words.", - paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "a string of text with the key word in it, " + - "with more key words.", - original: "a string of text with the key word in it, with more key words.", - position: { endOffset: 37, startOffset: 29 } } ), - new Mark( { marked: "a string of text with the key word in it, " + - "with more key words.", - original: "a string of text with the key word in it, with more key words.", - position: { endOffset: 64, startOffset: 55 } } ) ], - skip: false, - }, - { - description: "counts a string with multiple keyword morphological forms", - paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "A string of text with a keyword " + - "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it.", - position: { endOffset: 34, startOffset: 27 } } ), - new Mark( { marked: "A string of text with a keyword " + - "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it.", - position: { endOffset: 56, startOffset: 48 } } ) ], - skip: false, - }, - { - description: "does not count images as keywords.", - paper: new Paper( "

A text with image

", { keyword: "image" } ), - keyphraseForms: [ [ "image", "images" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - }, - { - description: "also matches same phrase with different capitalization.", - paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "A string with KeY worD.", - original: "A string with KeY worD.", - position: { endOffset: 25, startOffset: 17 } } ) ], - skip: false, - }, - { - description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", - paper: new Paper( "

There is no keyword in this sentence.

" ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - }, + // { + // description: "counts/marks a string of text with a keyword in it.", + // paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), + // keyphraseForms: [ [ "keyword", "keywords" ] ], + // expectedCount: 1, + // expectedMarkings: [ + // new Mark( { marked: "a string of text with the keyword in it", + // original: "a string of text with the keyword in it", + // position: { endOffset: 36, startOffset: 29 } } ) ], + // skip: false, + // }, + // { + // description: "counts a string of text with no keyword in it.", + // paper: new Paper( "

a string of text

", { keyword: "" } ), + // keyphraseForms: [], + // expectedCount: 0, + // expectedMarkings: [], + // skip: false, + // }, + // { + // description: "counts multiple occurrences of a keyphrase consisting of multiple words.", + // paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedCount: 2, + // expectedMarkings: [ + // new Mark( { marked: "a string of text with the key word in it, " + + // "with more key words.", + // original: "a string of text with the key word in it, with more key words.", + // position: { endOffset: 37, startOffset: 29 } } ), + // new Mark( { marked: "a string of text with the key word in it, " + + // "with more key words.", + // original: "a string of text with the key word in it, with more key words.", + // position: { endOffset: 64, startOffset: 55 } } ) ], + // skip: false, + // }, + // { + // description: "counts a string with multiple keyword morphological forms", + // paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), + // keyphraseForms: [ [ "keyword", "keywords" ] ], + // expectedCount: 2, + // expectedMarkings: [ + // new Mark( { marked: "A string of text with a keyword " + + // "and multiple keywords in it.", + // original: "A string of text with a keyword and multiple keywords in it.", + // position: { endOffset: 34, startOffset: 27 } } ), + // new Mark( { marked: "A string of text with a keyword " + + // "and multiple keywords in it.", + // original: "A string of text with a keyword and multiple keywords in it.", + // position: { endOffset: 56, startOffset: 48 } } ) ], + // skip: false, + // }, + // { + // description: "does not count images as keywords.", + // paper: new Paper( "

A text with image

", { keyword: "image" } ), + // keyphraseForms: [ [ "image", "images" ] ], + // expectedCount: 0, + // expectedMarkings: [], + // skip: false, + // }, + // { + // description: "also matches same phrase with different capitalization.", + // paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), + // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + // expectedCount: 1, + // expectedMarkings: [ + // new Mark( { marked: "A string with KeY worD.", + // original: "A string with KeY worD.", + // position: { endOffset: 25, startOffset: 17 } } ) ], + // skip: false, + // }, + // { + // description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", + // paper: new Paper( "

There is no keyword in this sentence.

" ), + // keyphraseForms: [ [ "keyword", "keywords" ] ], + // expectedCount: 0, + // expectedMarkings: [], + // skip: false, + // }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), @@ -349,13 +349,13 @@ const testCasesWithSpecialCharacters = [ description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " + "and the keyphrase doesn't contain dash", paper: new Paper( "

A string with a panda-red.

", { keyword: "red panda" } ), - keyphraseForms: [ [ "red-panda" ] ], + keyphraseForms: [ [ "red" ], [ "panda" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { original: "A string with a panda-red.", marked: "A string with a panda-red.", - position: { startOffset: 16, endOffset: 25 }, + position: { startOffset: 19, endOffset: 28 }, } ), ], skip: false, @@ -827,7 +827,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { original: "私の猫はかわいいです。" } ) ] ); } ); - it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { + it( "counts/marks a string of text with multiple occurrences of the same keyword in it.", function() { const mockPaper = new Paper( "

私の猫はかわいい猫です。 Date: Mon, 26 Jun 2023 12:18:26 +0200 Subject: [PATCH 195/616] uncomment tests --- .../researches/keywordCountSpec.js | 156 +++++++++--------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 6e9c029869e..e14b37b5de3 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -24,84 +24,84 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { }; const testCases = [ - // { - // description: "counts/marks a string of text with a keyword in it.", - // paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), - // keyphraseForms: [ [ "keyword", "keywords" ] ], - // expectedCount: 1, - // expectedMarkings: [ - // new Mark( { marked: "a string of text with the keyword in it", - // original: "a string of text with the keyword in it", - // position: { endOffset: 36, startOffset: 29 } } ) ], - // skip: false, - // }, - // { - // description: "counts a string of text with no keyword in it.", - // paper: new Paper( "

a string of text

", { keyword: "" } ), - // keyphraseForms: [], - // expectedCount: 0, - // expectedMarkings: [], - // skip: false, - // }, - // { - // description: "counts multiple occurrences of a keyphrase consisting of multiple words.", - // paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedCount: 2, - // expectedMarkings: [ - // new Mark( { marked: "a string of text with the key word in it, " + - // "with more key words.", - // original: "a string of text with the key word in it, with more key words.", - // position: { endOffset: 37, startOffset: 29 } } ), - // new Mark( { marked: "a string of text with the key word in it, " + - // "with more key words.", - // original: "a string of text with the key word in it, with more key words.", - // position: { endOffset: 64, startOffset: 55 } } ) ], - // skip: false, - // }, - // { - // description: "counts a string with multiple keyword morphological forms", - // paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), - // keyphraseForms: [ [ "keyword", "keywords" ] ], - // expectedCount: 2, - // expectedMarkings: [ - // new Mark( { marked: "A string of text with a keyword " + - // "and multiple keywords in it.", - // original: "A string of text with a keyword and multiple keywords in it.", - // position: { endOffset: 34, startOffset: 27 } } ), - // new Mark( { marked: "A string of text with a keyword " + - // "and multiple keywords in it.", - // original: "A string of text with a keyword and multiple keywords in it.", - // position: { endOffset: 56, startOffset: 48 } } ) ], - // skip: false, - // }, - // { - // description: "does not count images as keywords.", - // paper: new Paper( "

A text with image

", { keyword: "image" } ), - // keyphraseForms: [ [ "image", "images" ] ], - // expectedCount: 0, - // expectedMarkings: [], - // skip: false, - // }, - // { - // description: "also matches same phrase with different capitalization.", - // paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), - // keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - // expectedCount: 1, - // expectedMarkings: [ - // new Mark( { marked: "A string with KeY worD.", - // original: "A string with KeY worD.", - // position: { endOffset: 25, startOffset: 17 } } ) ], - // skip: false, - // }, - // { - // description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", - // paper: new Paper( "

There is no keyword in this sentence.

" ), - // keyphraseForms: [ [ "keyword", "keywords" ] ], - // expectedCount: 0, - // expectedMarkings: [], - // skip: false, - // }, + { + description: "counts/marks a string of text with a keyword in it.", + paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string of text with the keyword in it", + original: "a string of text with the keyword in it", + position: { endOffset: 36, startOffset: 29 } } ) ], + skip: false, + }, + { + description: "counts a string of text with no keyword in it.", + paper: new Paper( "

a string of text

", { keyword: "" } ), + keyphraseForms: [], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "counts multiple occurrences of a keyphrase consisting of multiple words.", + paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + position: { endOffset: 37, startOffset: 29 } } ), + new Mark( { marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + position: { endOffset: 64, startOffset: 55 } } ) ], + skip: false, + }, + { + description: "counts a string with multiple keyword morphological forms", + paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 34, startOffset: 27 } } ), + new Mark( { marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it.", + position: { endOffset: 56, startOffset: 48 } } ) ], + skip: false, + }, + { + description: "does not count images as keywords.", + paper: new Paper( "

A text with image

", { keyword: "image" } ), + keyphraseForms: [ [ "image", "images" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "also matches same phrase with different capitalization.", + paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "A string with KeY worD.", + original: "A string with KeY worD.", + position: { endOffset: 25, startOffset: 17 } } ) ], + skip: false, + }, + { + description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", + paper: new Paper( "

There is no keyword in this sentence.

" ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), From 1237e61a417d45fb8dfb39b0e04e5bba01e2314b Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 27 Jun 2023 09:44:13 +0200 Subject: [PATCH 196/616] uncomment tests --- .../yoastseo/spec/fullTextTests/testTexts/cs/czechPaper.js | 2 +- .../yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js | 2 +- .../testTexts/en/englishPaperForPerformanceTest.js | 5 ++--- .../testTexts/es/spanishPaperForPerformanceTest.js | 5 ++--- .../spec/fullTextTests/testTexts/hu/hungarianPaper.js | 2 +- .../spec/fullTextTests/testTexts/ja/japanesePaper.js | 2 +- .../testTexts/pl/polishPaperForPerformanceTest.js | 4 ++-- .../fullTextTests/testTexts/en/englishPaper2.js | 6 ++++-- .../fullTextTests/testTexts/en/englishPaper3.js | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/cs/czechPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/cs/czechPaper.js index 8b45f2183b2..5b9c76c8d90 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/cs/czechPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/cs/czechPaper.js @@ -33,7 +33,7 @@ const expectedResults = { isApplicable: true, score: 4, resultText: "Keyphrase density: The keyphrase was found 9 times. " + - "That's less than the recommended minimum of 13 times for a text of this length. Focus on your keyphrase!", }, metaDescriptionKeyword: { diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js index 486cf4eb010..d90902ad6e0 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/el/greekPaper.js @@ -32,7 +32,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 9, - resultText: "Keyphrase density: The keyphrase was found 38 times. This is great!", + resultText: "Keyphrase density: The keyphrase was found 37 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js index 71aa576e166..41c102db199 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/en/englishPaperForPerformanceTest.js @@ -35,10 +35,9 @@ const expectedResults = { }, keywordDensity: { isApplicable: true, - score: 4, + score: 9, resultText: "Keyphrase density: The keyphrase was found 5 times. " + - "That's less than the recommended minimum of 6 times for a text of this length. Focus on your keyphrase!", + "This is great!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js index 13ce057f1dd..ebb05f4cd5b 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/es/spanishPaperForPerformanceTest.js @@ -35,10 +35,9 @@ const expectedResults = { }, keywordDensity: { isApplicable: true, - score: 4, + score: 9, resultText: "Keyphrase density: The keyphrase was found 5 times. " + - "That's less than the recommended minimum of 6 times for a text of this length. " + - "Focus on your keyphrase!", + "This is great!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/hu/hungarianPaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/hu/hungarianPaper.js index ca875f73711..b14e8ddee49 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/hu/hungarianPaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/hu/hungarianPaper.js @@ -31,7 +31,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 9, - resultText: "Keyphrase density: The keyphrase was found 12 times. This is great!", + resultText: "Keyphrase density: The keyphrase was found 11 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js index 956e0d39262..6248ea48508 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js @@ -30,7 +30,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 4, - resultText: "Keyphrase density: The keyphrase was found 3 times. " + + resultText: "Keyphrase density: The keyphrase was found 2 times. " + "That's less than the recommended minimum of 8 times for a text of this length. " + "Focus on your keyphrase!", }, diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js index 13fbd3d30b0..f14d5cdd32d 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/pl/polishPaperForPerformanceTest.js @@ -34,8 +34,8 @@ const expectedResults = { }, keywordDensity: { isApplicable: true, - score: 4, - resultText: "Keyphrase density: The keyphrase was found 5 times. That's less than the recommended minimum of 6 times for a text of this length. Focus on your keyphrase!", + score: 9, + resultText: "Keyphrase density: The keyphrase was found 5 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js index 12bc5a676ef..291e40b1df2 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper2.js @@ -29,8 +29,10 @@ const expectedResults = { }, keywordDensity: { isApplicable: true, - score: 9, - resultText: "Keyphrase density: The keyphrase was found 4 times. This is great!", + score: -50, + resultText: "Keyphrase density: The keyphrase was found 5 times. " + + "That's way more than the recommended maximum of 4 times for a text of this length." + + " Don't overoptimize!", }, metaDescriptionKeyword: { isApplicable: true, diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js index 954f949524b..6ea14ea2155 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/testTexts/en/englishPaper3.js @@ -29,7 +29,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 9, - resultText: "Keyphrase density: The keyphrase was found 8 times. This is great!", + resultText: "Keyphrase density: The keyphrase was found 9 times. This is great!", }, metaDescriptionKeyword: { isApplicable: true, From f81f928a5f8100a59f01613827cc1905397de4c1 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 27 Jun 2023 09:51:28 +0200 Subject: [PATCH 197/616] Remove unused import --- .../src/scoring/assessments/seo/KeywordDensityAssessment.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 58b83e2c8b2..54b1259cf6e 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -5,9 +5,8 @@ import recommendedKeyphraseCount from "../../helpers/assessments/recommendedKeyw import Assessment from "../assessment"; import AssessmentResult from "../../../values/AssessmentResult"; import { inRangeEndInclusive, inRangeStartEndInclusive, inRangeStartInclusive } from "../../helpers/assessments/inRange"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; -import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; import getWords from "../../../languageProcessing/helpers/word/getWords"; /** From 4f794bf64dc1b3a113092dd4e9abcfae4dcfb6f2 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 27 Jun 2023 09:51:58 +0200 Subject: [PATCH 198/616] fix eslint error --- .../helpers/keywordCount/matchTokenWithWordFormsSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js index b21c9f16ac7..6a8686db6d3 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js @@ -2,7 +2,7 @@ import matchTokenWithWordForms from "../../../../src/languageProcessing/helpers/ import Token from "../../../../src/parse/structure/Token"; /* eslint-disable max-len */ -const testCases = [ [ "The wordforms contain the token", [ "keyword" ] , new Token( "keyword" ), "en_US", true ], +const testCases = [ [ "The wordforms contain the token", [ "keyword" ], new Token( "keyword" ), "en_US", true ], [ "The wordforms do not contain the token", [ "keyword" ], new Token( "notkeyword" ), "en_US", false ], [ "The wordforms contain the token in a different form: singular matches plural", [ "keyword", "keywords" ], new Token( "keyword" ), "en_US", true ], [ "The wordforms contain the token in a different form: plural matches singular", [ "keyword", "keywords" ], new Token( "keywords" ), "en_US", true ], From 1092db0e0f52fd9de047a3ee3e9d1267fa74187d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 27 Jun 2023 10:23:36 +0200 Subject: [PATCH 199/616] add unit tests --- .../helpers/match/matchWordFormsWithSentenceSpec.js | 2 +- .../researches/getKeywordDensitySpec.js | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js index bb8f39da562..6872a25d09c 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js @@ -321,7 +321,7 @@ describe.each( testCases )( "find word forms in sentence in English: non-exact m } ) => { const locale = "en_US"; it( testDescription, () => { - expect( matchWordFormsWithSentence( sentence, wordForms, locale, false, false ) ).toEqual( expectedResult ); + expect( matchWordFormsWithSentence( sentence, wordForms, locale, false ) ).toEqual( expectedResult ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js index 4a088b02a2a..ed4bafd89e9 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getKeywordDensitySpec.js @@ -116,6 +116,19 @@ describe( "Test for counting the keyword density in a text with an English resea buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 10 ); } ); + it( "should return 0 when the paper contains only excluded element", function() { + const mockPaper = new Paper( "
In the United States and parts of Europe, " + + "tortoiseshell cats are often considered lucky charms.
" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); + it( "should return 0 when the paper is empty", function() { + const mockPaper = new Paper( "" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); } ); describe( "test for counting the keyword density in a non-English and non-Japanese language", function() { From 7766e2b203b6bae9e609be5ef74641d4df2493f1 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 27 Jun 2023 16:01:39 +0200 Subject: [PATCH 200/616] Fix typo --- packages/js/src/decorator/gutenberg.js | 4 ++-- packages/js/tests/decorator/gutenberg.test.js | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index b4593f7c6e8..9410748b9c7 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -318,7 +318,7 @@ export function calculateAnnotationsForTextFormat( text, mark ) { * * @returns {Array} The annotations to apply. */ -export function crateAnnotationsFromPositionBasedMarks( mark ) { +export function createAnnotationsFromPositionBasedMarks( mark ) { return [ { startOffset: mark.getBlockPositionStart(), @@ -368,7 +368,7 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) return flatMap( marks, ( ( mark ) => { let annotations; if ( marks[ 0 ].hasBlockPosition && marks[ 0 ].hasBlockPosition() ) { - annotations = crateAnnotationsFromPositionBasedMarks( mark ); + annotations = createAnnotationsFromPositionBasedMarks( mark ); } else { annotations = calculateAnnotationsForTextFormat( text, diff --git a/packages/js/tests/decorator/gutenberg.test.js b/packages/js/tests/decorator/gutenberg.test.js index 48637cc1c72..844d7650680 100644 --- a/packages/js/tests/decorator/gutenberg.test.js +++ b/packages/js/tests/decorator/gutenberg.test.js @@ -7,7 +7,7 @@ import { getAnnotationsFromBlock, hasInnerBlocks, getAnnotationsForYoastBlocks, - getAnnotationsForBlockAttribute, crateAnnotationsFromPositionBasedMarks, + getAnnotationsForBlockAttribute, createAnnotationsFromPositionBasedMarks, } from "../../src/decorator/gutenberg"; jest.mock( "@wordpress/rich-text", () => ( { @@ -186,7 +186,7 @@ describe( "calculateAnnotationsForTextFormat", () => { } ); } ); -describe( "crateAnnotationsFromPositionBasedMarks", () => { +describe( "createAnnotationsFromPositionBasedMarks", () => { it( "create annotation from block position based mark", () => { const mark = new Mark( { position: { @@ -206,7 +206,7 @@ describe( "crateAnnotationsFromPositionBasedMarks", () => { }, ]; - const actual = crateAnnotationsFromPositionBasedMarks( mark ); + const actual = createAnnotationsFromPositionBasedMarks( mark ); expect( actual ).toEqual( expected ); } ); From 035d72aa51524c44959333d42dfa8e2d967f18e6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 27 Jun 2023 16:02:18 +0200 Subject: [PATCH 201/616] Clarify JS-doc --- packages/yoastseo/src/values/Mark.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index 30db397c6b2..613259d9066 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -89,14 +89,14 @@ Mark.prototype.setPositionEnd = function( positionEnd ) { /** * Returns the start position inside block. * - * @returns {number} The start position inside block. + * @returns {number} The start position inside block if the mark position information, undefined otherwise. */ Mark.prototype.getBlockPositionStart = function() { return this._properties.position && this._properties.position.startOffsetBlock; }; /** - * Returns the end position inside block. + * Returns the end position inside block if the mark has position information, undefined otherwise. * * @returns {number} The end position inside block. */ From 5290d0bec0e82fd8dd98b3e3fb02a1aa4c2965b6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 27 Jun 2023 17:08:01 +0200 Subject: [PATCH 202/616] add a unit test --- .../languageProcessing/researches/keywordCountSpec.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index e14b37b5de3..1f1df8bbf3f 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -438,6 +438,16 @@ const testCasesWithSpecialCharacters = [ position: { endOffset: 27, startOffset: 19 } } ) ], skip: false, }, + { + description: "can match multiple occurrences of keyphrase ending in & as in 'keyphrase$', and output correct Marks objects", + paper: new Paper( "

A string with a keyphrase&.

", { keyword: "keyphrase&" } ), + keyphraseForms: [ [ "keyphrase" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a keyphrase&.", + original: "A string with a keyphrase&.", + position: { endOffset: 28, startOffset: 19 } } ) ], + skip: false, + }, { description: "doesn't count 'key-word' in 'key word'.", paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), From 931d7052fa5ea755c4b317fe652c6040ac5d4d39 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 28 Jun 2023 09:56:36 +0200 Subject: [PATCH 203/616] Adapt the helper for removing duplicates of Mark objects --- ...ateMarks.js => removeDuplicateMarksSpec.js} | 18 ++++++++++++++---- .../src/markers/removeDuplicateMarks.js | 9 ++++++--- 2 files changed, 20 insertions(+), 7 deletions(-) rename packages/yoastseo/spec/markers/{removeDuplicateMarks.js => removeDuplicateMarksSpec.js} (64%) diff --git a/packages/yoastseo/spec/markers/removeDuplicateMarks.js b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js similarity index 64% rename from packages/yoastseo/spec/markers/removeDuplicateMarks.js rename to packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js index 73ae017032f..335a3fe1fd5 100644 --- a/packages/yoastseo/spec/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js @@ -7,11 +7,11 @@ describe( "removeDuplicateMarks", function() { } ); it( "should remove duplicated marks from the array", function() { - var marks = [ + const marks = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original", marked: "marked" } ), ]; - var expected = [ + const expected = [ new Mark( { original: "original", marked: "marked" } ), ]; @@ -19,16 +19,26 @@ describe( "removeDuplicateMarks", function() { } ); it( "should remove duplicated marks from the array", function() { - var marks = [ + const marks = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original2", marked: "marked" } ), new Mark( { original: "original", marked: "marked" } ), ]; - var expected = [ + const expected = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original2", marked: "marked" } ), ]; expect( removeDuplicateMarks( marks ) ).toEqual( expected ); } ); + + it( "should not remove duplicated marks if the mark objects have position information", function() { + const marks = [ + new Mark( { original: "original", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ), + new Mark( { original: "original2", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ), + new Mark( { original: "original1", marked: "marked", position: { startOffset: 15, endOffset: 20 } } ), + ]; + + expect( removeDuplicateMarks( marks ) ).toEqual( marks ); + } ); } ); diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index 5b7755e97f3..a30a51f4c61 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -1,14 +1,17 @@ import { uniqBy } from "lodash-es"; /** - * Removes duplicate marks from an array + * Removes duplicate marks from an array. + * If the marks object have position information, however, + * we don't want to remove the duplicated objects with the same original strings. + * + * @param {Array} marks The marks to remove duplications from. * - * @param {Array} marks The marks to remove duplications from * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { return uniqBy( marks, function( mark ) { - return mark.hasPosition() ? mark.getPosition() : mark.getOriginal(); + return ! mark.hasPosition() && mark.getOriginal(); } ); } From 867500697e21899f1cec30cb85890bdb28efe277 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 28 Jun 2023 10:01:46 +0200 Subject: [PATCH 204/616] check mark instead of marks[ 0 ] --- packages/js/src/decorator/gutenberg.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index 9410748b9c7..8331fa767aa 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -367,7 +367,7 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) return flatMap( marks, ( ( mark ) => { let annotations; - if ( marks[ 0 ].hasBlockPosition && marks[ 0 ].hasBlockPosition() ) { + if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { annotations = createAnnotationsFromPositionBasedMarks( mark ); } else { annotations = calculateAnnotationsForTextFormat( From a2a2c55e9892ca916faeff3a3db1001ad36e9c16 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 28 Jun 2023 17:07:16 +0200 Subject: [PATCH 205/616] Adjust the check for removing dupplicates --- packages/yoastseo/src/markers/removeDuplicateMarks.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index a30a51f4c61..b387af032f6 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -1,4 +1,4 @@ -import { uniqBy } from "lodash-es"; +import { uniqBy, isUndefined } from "lodash-es"; /** * Removes duplicate marks from an array. @@ -10,8 +10,12 @@ import { uniqBy } from "lodash-es"; * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { + if ( marks.length === 0 || ! isUndefined( marks[ 0 ].hasPosition() ) ) { + return marks; + } + return uniqBy( marks, function( mark ) { - return ! mark.hasPosition() && mark.getOriginal(); + return mark.getOriginal(); } ); } From 69ed7dcb71b746cd361dfcc211654a633b504ddc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 29 Jun 2023 11:22:13 +0200 Subject: [PATCH 206/616] Adjust the check for removing duplicates --- packages/yoastseo/src/markers/removeDuplicateMarks.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index b387af032f6..a659566285b 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -10,7 +10,7 @@ import { uniqBy, isUndefined } from "lodash-es"; * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { - if ( marks.length === 0 || ! isUndefined( marks[ 0 ].hasPosition() ) ) { + if ( !! marks && ( marks.length === 0 || ! isUndefined( marks[ 0 ].hasPosition() ) ) ) { return marks; } From 45f534757549762136a3c09afdfef3aaf5033359 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 29 Jun 2023 11:22:37 +0200 Subject: [PATCH 207/616] update unit tests --- .../yoastseo/spec/scoring/assessorSpec.js | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessorSpec.js b/packages/yoastseo/spec/scoring/assessorSpec.js index 07e778191b3..3df1f4539c2 100644 --- a/packages/yoastseo/spec/scoring/assessorSpec.js +++ b/packages/yoastseo/spec/scoring/assessorSpec.js @@ -26,21 +26,21 @@ describe( "an assessor object", function() { } ); } ); - var result5 = new AssessmentResult(); + const result5 = new AssessmentResult(); result5.setScore( 5 ); - var result4 = new AssessmentResult(); + const result4 = new AssessmentResult(); result4.setScore( 4 ); - var result8 = new AssessmentResult(); + const result8 = new AssessmentResult(); result8.setScore( 8 ); - var validResult = new AssessmentResult(); + const validResult = new AssessmentResult(); validResult.setScore( 9 ); validResult.setText( "all good" ); validResult.setHasMarks( true ); describe( "returning the overall score", function() { it( "returns the overall score", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.getValidResults = function() { return [ result5, result4, result8 ]; }; @@ -48,7 +48,7 @@ describe( "an assessor object", function() { } ); } ); - var mockAssessment = { + const mockAssessment = { /** * A mock assessment which always returns true. * @@ -61,15 +61,15 @@ describe( "an assessor object", function() { describe( "adding an assessment", function() { it( "adds an assessment", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.addAssessment( "testname", mockAssessment ); - var result = assessor.getAssessment( "testname" ); + const result = assessor.getAssessment( "testname" ); expect( result ).toEqual( mockAssessment ); } ); it( "overides the existing assessment if the newly added assessment has the same identifier", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.addAssessment( "testname", mockAssessment ); assessor.addAssessment( "testname", mockAssessment ); @@ -80,10 +80,10 @@ describe( "an assessor object", function() { describe( "removing an assessment", function() { it( "removes an assessment", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.removeAssessment( "testname" ); - var result = assessor.getAssessment( "testname" ); + const result = assessor.getAssessment( "testname" ); expect( result ).toEqual( undefined ); // eslint-disable-line no-undefined } ); @@ -91,8 +91,8 @@ describe( "an assessor object", function() { describe( "assess", function() { it( "should add the marker to the assessment result", function() { - var paper = new Paper(); - var assessor = new Assessor( new DefaultResearcher(), { + const paper = new Paper(); + const assessor = new Assessor( new DefaultResearcher(), { /** * A mock marker function. * @@ -100,7 +100,7 @@ describe( "an assessor object", function() { */ marker: function() {}, } ); - var assessment = { + const assessment = { /** * A mock getResult function. * @@ -128,21 +128,21 @@ describe( "an assessor object", function() { assessor.addAssessment( "test1", assessment ); assessor.assess( paper ); - var results = assessor.getValidResults(); + const results = assessor.getValidResults(); expect( results[ 0 ].hasMarker() ).toBe( true ); } ); } ); describe( "hasMarker", function() { - var assessor; + let assessor; beforeEach( function() { assessor = new Assessor( new DefaultResearcher(), {} ); } ); it( "should return true when we have a global marker and a getMarks function", function() { - var assessment = { + const assessment = { /** * A mock getMarks function. * @@ -156,7 +156,7 @@ describe( "an assessor object", function() { } ); it( "should return false when we don't have a global marker", function() { - var assessment = { + const assessment = { /** * A mock getMarks function. * @@ -169,28 +169,28 @@ describe( "an assessor object", function() { } ); it( "should return false when we don't have a getMarks function", function() { - var assessment = {}; + const assessment = {}; assessor._options.marker = function() {}; expect( assessor.hasMarker( assessment ) ).toBe( false ); } ); it( "should return false when we don't have a global marker and don't have a getMarks function", function() { - var assessment = {}; + const assessment = {}; expect( assessor.hasMarker( assessment ) ).toBe( false ); } ); } ); describe( "getMarker", function() { - var assessor; + let assessor; beforeEach( function() { assessor = new Assessor( {} ); } ); it( "should compose the global marker and the getMarks function", function() { - var functions = { + const functions = { /** * A mock getMarks function. * @@ -204,11 +204,11 @@ describe( "an assessor object", function() { */ globalMarker: function() {}, }; - spyOn( functions, "getMarks" ); - spyOn( functions, "globalMarker" ); - var assessment = { getMarks: functions.getMarks }; + jest.spyOn( functions, "getMarks" ); + jest.spyOn( functions, "globalMarker" ); + const assessment = { getMarks: functions.getMarks }; assessor._options.marker = functions.globalMarker; - var marker = assessor.getMarker( assessment ); + const marker = assessor.getMarker( assessment ); marker(); From 3de5bff45bc898de565e39b6f8f4fc32aaa0bbc6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 29 Jun 2023 11:25:39 +0200 Subject: [PATCH 208/616] Adapt check --- packages/yoastseo/src/markers/removeDuplicateMarks.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index a659566285b..022a47a9408 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -1,4 +1,4 @@ -import { uniqBy, isUndefined } from "lodash-es"; +import { uniqBy } from "lodash-es"; /** * Removes duplicate marks from an array. @@ -10,7 +10,7 @@ import { uniqBy, isUndefined } from "lodash-es"; * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { - if ( !! marks && ( marks.length === 0 || ! isUndefined( marks[ 0 ].hasPosition() ) ) ) { + if ( !! marks && ( marks.length === 0 || !! marks[ 0 ].hasPosition() ) ) { return marks; } From 4557c46843e4de878f619c66e617beef6ab72e2d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 29 Jun 2023 11:52:15 +0200 Subject: [PATCH 209/616] remove unnecessary unit test --- .../private/getTextElementPositionsSpec.js | 72 ------------------- 1 file changed, 72 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js index 41e5ff7c793..588bb90443e 100644 --- a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js +++ b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js @@ -482,78 +482,6 @@ describe( "A test for getting positions of sentences", () => { const sentences = [ { text: "Hello, World!" } ]; const [ helloSentence ] = getTextElementPositions( paragraph, sentences ); expect( helloSentence.sourceCodeRange ).toEqual( { startOffset: 11, endOffset: 42 } ); - - // - // const node = new Paragraph( {}, [ - // { name: "#text", value: "Hello, world!" }, - // { - // name: "strong", - // attributes: {}, - // childNodes: [ - // { - // name: "#text", - // value: "Hello", - // }, - // ], - // sourceCodeLocation: { - // startTag: { - // startOffset: 3, - // endOffset: 11, - // }, - // endTag: { - // startOffset: 16, - // endOffset: 25, - // }, - // startOffset: 3, - // endOffset: 25, - // }, - // }, - // { - // name: "em", - // attributes: {}, - // childNodes: [ - // { - // name: "#text", - // value: "world", - // }, - // ], - // sourceCodeLocation: { - // startTag: { - // startOffset: 27, - // endOffset: 31, - // }, - // endTag: { - // startOffset: 36, - // endOffset: 41, - // }, - // startOffset: 27, - // endOffset: 41, - // }, - // }, - // ], - // { - // startTag: { - // startOffset: 0, - // endOffset: 3, - // }, - // endTag: { - // startOffset: 42, - // endOffset: 46, - // }, - // startOffset: 0, - // endOffset: 46, - // } ); - // - // const tokens = [ { text: "Hello" }, { text: "," }, { text: " " }, { text: "world" }, { text: "!" } ]; - // const tokensWithPositions = [ - // { text: "Hello", sourceCodeRange: { startOffset: 3, endOffset: 25 } }, - // { text: ",", sourceCodeRange: { startOffset: 25, endOffset: 26 } }, - // { text: " ", sourceCodeRange: { startOffset: 26, endOffset: 27 } }, - // { text: "world", sourceCodeRange: { startOffset: 27, endOffset: 41 } }, - // { text: "!", sourceCodeRange: { startOffset: 41, endOffset: 42 } }, - // ]; - // - // expect( getTextElementPositions( node, tokens ) ).toEqual( tokensWithPositions ); } ); it( "don't calculate sentence position if the source code location of the node is unknown", function() { From f6d1103a04a9b59e37abff9297ab8f9a71b73091 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 29 Jun 2023 11:56:12 +0200 Subject: [PATCH 210/616] add unit test --- .../helpers/sentence/getSentencesFromTreeSpec.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index 25cbaa6037b..ed0f23f6a7f 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -112,4 +112,11 @@ describe( "test to get sentences from the tree", () => { }, ] ); } ); + it( "should return empty array if no sentences are found in paragraph/heading node", () => { + const paper = new Paper( "

The red panda (Ailurus fulgens), also known as the lesser panda," + + " is a small mammal native to the eastern Himalayas and southwestern China
" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getSentencesFromTree( paper ) ).toEqual( [] ); + } ); } ); From a324c23bc71f4fe57f783e8bd44b9b1e2382e056 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 4 Jul 2023 16:16:25 +0200 Subject: [PATCH 211/616] Adjust comment and rename research --- .../researches/keywordCountSpec.js | 24 +++++------ .../languageProcessing/AbstractResearcher.js | 4 +- .../researches/keywordCount.js | 41 +++++++++++++------ .../src/markers/removeDuplicateMarks.js | 5 +++ 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 1f1df8bbf3f..20ef8171746 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -1,4 +1,4 @@ -import keyphraseCount from "../../../src/languageProcessing/researches/keywordCount"; +import getKeyphraseCount from "../../../src/languageProcessing/researches/keywordCount"; import Paper from "../../../src/values/Paper.js"; import factory from "../../specHelpers/factory"; import Mark from "../../../src/values/Mark"; @@ -298,7 +298,7 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); expect( keyWordCountResult.count ).toBe( expectedCount ); expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); } ); @@ -555,7 +555,7 @@ describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyword test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); expect( keyWordCountResult.count ).toBe( expectedCount ); expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); } ); @@ -793,7 +793,7 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = keyphraseCount( paper, mockResearcher ); + const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); expect( keyWordCountResult.count ).toBe( expectedCount ); expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); } ); @@ -831,8 +831,8 @@ describe( "Test for counting the keyword in a text for Japanese", () => { const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); - expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいいです。" } ) ] ); } ); @@ -842,8 +842,8 @@ describe( "Test for counting the keyword in a text for Japanese", () => { const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); - expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); - expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいい猫です。", } ) ] ); @@ -853,16 +853,16 @@ describe( "Test for counting the keyword in a text for Japanese", () => { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); - expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); - expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "

私の猫はかわいいですかわいい。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); - expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のかわいい" + "ですかわいい。", diff --git a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js index 44785dcde94..2d17a93149e 100644 --- a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js +++ b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js @@ -24,7 +24,7 @@ import getSubheadingTextLengths from "./researches/getSubheadingTextLengths.js"; import h1s from "./researches/h1s"; import imageCount from "./researches/imageCount.js"; import keyphraseLength from "./researches/keyphraseLength"; -import keyphraseCount, { keywordCount } from "./researches/keywordCount"; +import getKeyphraseCount, { keywordCount } from "./researches/keywordCount"; import { keywordCountInSlug, keywordCountInUrl } from "./researches/keywordCountInUrl"; import matchKeywordInSubheadings from "./researches/matchKeywordInSubheadings"; import metaDescriptionKeyword from "./researches/metaDescriptionKeyword"; @@ -62,6 +62,7 @@ export default class AbstractResearcher { functionWordsInKeyphrase, getAnchorsWithKeyphrase, getFleschReadingScore, + getKeyphraseCount, getKeyphraseDensity, getKeywordDensity, getLinks, @@ -74,7 +75,6 @@ export default class AbstractResearcher { h1s, imageCount, keyphraseLength, - keyphraseCount, keywordCount, keywordCountInSlug, keywordCountInUrl, diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index c1d0cd01bd5..435ba768819 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -10,24 +10,38 @@ import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * * @param {Sentence[]} sentences The sentences to check. - * @param {Array} topicForms The keyphrase forms. + * @param {Array} keyphraseForms The keyphrase forms. * @param {string} locale The locale used in the analysis. * @param {function} matchWordCustomHelper A custom helper to match words with a text. * @param {boolean} isExactMatchRequested Whether the exact matching is requested. * * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ -export function countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper, isExactMatchRequested ) { +export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ) { const result = { count: 0, markings: [] }; sentences.forEach( sentence => { - const matchesInSentence = topicForms.keyphraseForms.map( wordForms => matchWordFormsWithSentence( sentence, + const matchesInSentence = keyphraseForms.map( wordForms => matchWordFormsWithSentence( sentence, wordForms, locale, matchWordCustomHelper, isExactMatchRequested ) ); - const hasAllKeywords = matchesInSentence.every( wordForms => wordForms.count > 0 ); + // A sentence has at least one full-match of the keyphrase if each word occurs at least once. + const isEachWordFound = matchesInSentence.every( wordForms => wordForms.count > 0 ); - if ( hasAllKeywords ) { + if ( isEachWordFound ) { + /* + * Retrieve all the occurrences' count of each word of the keyphrase and save it in an array. + * matches: [ [ { matches: ["red"], count: 1 } ], [ { matches: ["pandas"], count: 2 } ] ] + * counts: [ 1, 2 ] + */ const counts = matchesInSentence.map( match => match.count ); + /* + * The number of the full-match count is the lowest count of the occurrences. + * counts: [ 1, 2 ] + * totalMatchCount: 1 + * + * From the example above, the full-match is 1, because one of the "pandas" occurrences is not accompanied by "red" + * to be counted as a full-match. + */ const totalMatchCount = Math.min( ...counts ); const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); @@ -56,11 +70,14 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu * * @returns {Object} An object containing an array of all the matches, markings and the keyphrase count. */ -export default function keyphraseCount( paper, researcher ) { +export default function getKeyphraseCount( paper, researcher ) { const topicForms = researcher.getResearch( "morphology" ); - topicForms.keyphraseForms = topicForms.keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); + const keyphraseForms = topicForms.keyphraseForms; + const keyphraseLength = keyphraseForms.length; - if ( topicForms.keyphraseForms.length === 0 ) { + keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); + + if ( keyphraseLength === 0 ) { return { count: 0, markings: [], length: 0 }; } @@ -75,19 +92,19 @@ export default function keyphraseCount( paper, researcher ) { * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). * */ - const keyphraseFound = countKeyphraseInText( sentences, topicForms, locale, matchWordCustomHelper, isExactMatchRequested ); + const keyphraseFound = countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ); return { count: keyphraseFound.count, markings: flatten( keyphraseFound.markings ), - length: topicForms.keyphraseForms.length, + length: keyphraseLength, }; } /** * Calculates the keyphrase count, takes morphology into account. * - * @deprecated Since version 20.12. Use keyphraseCount instead. + * @deprecated Since version 20.12. Use getKeyphraseCount instead. * * @param {Paper} paper The paper containing keyphrase and text. * @param {Researcher} researcher The researcher. @@ -96,5 +113,5 @@ export default function keyphraseCount( paper, researcher ) { */ export function keywordCount( paper, researcher ) { console.warn( "This function is deprecated, use keyphraseCount instead." ); - return keyphraseCount( paper, researcher ); + return getKeyphraseCount( paper, researcher ); } diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index 022a47a9408..e2862e57f33 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -10,6 +10,11 @@ import { uniqBy } from "lodash-es"; * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { + /* + * We don't remove duplicates when mark has position information, for the reasons below: + * 1. Not removing duplicates is simpler than removing the duplicates by looking at the value of mark.getPosition(). + * 2. Our urrent approach of creating a mark object with position information eliminates the chance of having duplicates. + */ if ( !! marks && ( marks.length === 0 || !! marks[ 0 ].hasPosition() ) ) { return marks; } From 5f7bcbf33544ddb91e5f656c0c7ca2a900caa7a9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 4 Jul 2023 16:58:26 +0200 Subject: [PATCH 212/616] improve naming, comment and consistently call keyword keyphrase --- .../recommendedKeywordCountSpec.js | 34 +++---- .../match/matchWordFormsWithSentence.js | 91 ++++++++++--------- .../researches/getKeywordDensity.js | 2 +- .../researches/keywordCount.js | 19 ++-- .../seo/KeywordDensityAssessment.js | 8 +- .../assessments/recommendedKeywordCount.js | 30 +++--- 6 files changed, 94 insertions(+), 90 deletions(-) diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js index 9c23557c25b..9620e90fe88 100644 --- a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js +++ b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js @@ -1,4 +1,4 @@ -import recommendedKeywordCount from "../../../../src/scoring/helpers/assessments/recommendedKeywordCount.js"; +import recommendedKeyphraseCount from "../../../../src/scoring/helpers/assessments/recommendedKeywordCount.js"; import Paper from "../../../../src/values/Paper"; import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; import japaneseWordHelper from "../../../../src/languageProcessing/languages/ja/helpers/getWords"; @@ -9,45 +9,45 @@ const minRecommendedDensity = 0.5; const a = "a "; const defaultResearcher = new DefaultResearcher(); -describe( "Test for getting the recommended keyword count for a text", function() { - it( "returns the maximum recommended keyword count for a text with 300 words and a 1-word keyphrase", function() { +describe( "Test for getting the recommended keyphrase count for a text", function() { + it( "returns the maximum recommended keyphrase count for a text with 300 words and a 1-word keyphrase", function() { const text = a.repeat( 300 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); } ); - it( "returns the maximum recommended keyword count for a text with 300 words and a 1-word keyphrase, " + + it( "returns the maximum recommended keyphrase count for a text with 300 words and a 1-word keyphrase, " + "using the custom helper for retrieving the words", function() { // The sentence contains 10 words. const text = "計画段階では「東海道新線」と呼ばれ開業て.".repeat( 30 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max", japaneseWordHelper ) ).toBe( 8 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max", japaneseWordHelper ) ).toBe( 8 ); } ); - it( "returns the maximum recommended keyword count for a text with 300 words and a multi-word keyphrase", function() { + it( "returns the maximum recommended keyphrase count for a text with 300 words and a multi-word keyphrase", function() { const text = a.repeat( 300 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); + expect( recommendedKeyphraseCount( paper, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); } ); - it( "returns the minimum recommended keyword count for a text with 1200 words and a 1-word keyphrase", function() { + it( "returns the minimum recommended keyphrase count for a text with 1200 words and a 1-word keyphrase", function() { const text = a.repeat( 1200 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); + expect( recommendedKeyphraseCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); } ); - it( "returns 2 as the default recommended minimum keyword count if the text is very short", function() { + it( "returns 2 as the default recommended minimum keyphrase count if the text is very short", function() { const text = a.repeat( 3 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); + expect( recommendedKeyphraseCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); } ); it( "returns 0 when the word count of the text is 0", function() { @@ -55,28 +55,28 @@ describe( "Test for getting the recommended keyword count for a text", function( const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); } ); - it( "returns the maximum recommended keyword count when the maxOrMin variable is not explicitly defined", function() { + it( "returns the maximum recommended keyphrase count when the maxOrMin variable is not explicitly defined", function() { const text = a.repeat( 300 ); const paper = new Paper( "

" + text + "

" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 8 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 8 ); } ); it( "returns 0 when the paper is empty", function() { const paper = new Paper( "" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); } ); it( "returns 0 when the paper only contains excluded element", function() { const paper = new Paper( "
In ancient Egyptian mythology, cats were highly revered and considered sacred animals.
" ); buildTree( paper, defaultResearcher ); - expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 1f1b5a38246..5483aa0bce8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -11,7 +11,7 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; * @returns {string[]} The tokenized word forms. */ export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => { - // Tokenize keyword forms. + // Tokenize word form of the keyphrase. const wordFormText = wordForms[ 0 ]; return getWordsForHTMLParser( wordFormText ); }; @@ -20,9 +20,9 @@ export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => { * Gets the exact matches of the keyphrase. * Exact matching happens when the user puts the keyphrase in double quotes. * - * @param {Sentence} sentence The sentence to match the word forms with. - * @param {string[]} wordForms The word forms to match. - * @param {string} locale The locale used in the analysis. + * @param {Sentence} sentence The sentence to match the word forms with. + * @param {string[]} wordForms The word forms to match. + * @param {string} locale The locale used in the analysis. * * @returns {{count: number, matches: Token[]}} Object containing the number of the exact matches and the matched tokens. */ @@ -32,40 +32,44 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { matches: [], }; // Tokenize word forms of the keyphrase. - const keywordTokens = tokenizeKeyphraseFormsForExactMatching( wordForms ); + const keyphraseTokens = tokenizeKeyphraseFormsForExactMatching( wordForms ); const sentenceTokens = sentence.tokens; - // Check if tokenized word forms occur in the same order in the sentence tokens. - let keywordIndex = 0; - let sentenceIndex = 0; + // Initialize the index of the word token of the keyphrase. + let indexOfWordInKeyphrase = 0; + // Initialize the index of the word token of the sentence. + let indexOfWordInSentence = 0; let currentMatch = []; - while ( sentenceIndex < sentenceTokens.length ) { - // If the current sentence token matches the current word token, add it to the current match. - const sentenceTokenText = sentenceTokens[ sentenceIndex ].text; - const keywordTokenText = keywordTokens[ keywordIndex ]; + // Check if the tokenized word forms occur in the same order in the sentence tokens. + while ( indexOfWordInSentence < sentenceTokens.length ) { + // If the current sentence token matches the current word token of the keyphrase, add it to the current match. + const sentenceTokenText = sentenceTokens[ indexOfWordInSentence ].text; + const keyphraseTokenText = keyphraseTokens[ indexOfWordInKeyphrase ]; - const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keywordTokenText.toLowerCase(), locale ); + const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keyphraseTokenText.toLowerCase(), locale ); if ( foundMatches.length > 0 ) { - currentMatch.push( sentenceTokens[ sentenceIndex ] ); - keywordIndex++; + currentMatch.push( sentenceTokens[ indexOfWordInSentence ] ); + indexOfWordInKeyphrase++; } else { - keywordIndex = 0; + indexOfWordInKeyphrase = 0; currentMatch = []; } - // If the current match has the same length as the keyword tokens, the keyword forms have been matched. - // Add the current match to the matches array and reset the keyword index and the current match. - if ( currentMatch.length === keywordTokens.length ) { + /* + * If the current match has the same length as the keyphrase tokens, the keyphrase forms have been matched. + * Add the current match to the matches array and reset the index of the word in keyphrase and the current match. + */ + if ( currentMatch.length === keyphraseTokens.length ) { result.matches.push( ...currentMatch ); result.count++; - keywordIndex = 0; + indexOfWordInKeyphrase = 0; currentMatch = []; } - sentenceIndex++; + indexOfWordInSentence++; } return result; }; @@ -73,13 +77,13 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { /** * Matches a word form of the keyphrase with the tokens from the sentence. * - * @param {Token[]} tokens The array of tokens to check. - * @param {string} wordForm The word form of the keyphrase. - * @param {string} locale The locale used in the analysis. + * @param {Token[]} tokens The array of tokens to check. + * @param {string} wordForm The word form of the keyphrase. + * @param {string} locale The locale used in the analysis. * * @returns {Token[]} The array of the matched tokens. */ -const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { +const matchWordFormInTokens = ( tokens, wordForm, locale ) => { let matches = []; tokens.forEach( token => { @@ -95,44 +99,43 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => { /** * Finds keyphrase forms in a sentence. * - * @param {Sentence} sentence The sentence to check. - * @param {string[]} wordForms The word forms of the keyphrase to check. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {Sentence} sentence The sentence to check. + * @param {string[]} wordForms The word forms of the keyphrase to check. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => { const tokens = sentence.tokens.slice(); - let count = 0; - let matches = []; + const result = { + count: 0, + matches: [], + }; wordForms.forEach( wordForm => { - const occurrence = matchWordCustomHelper + const occurrences = matchWordCustomHelper ? matchWordCustomHelper( sentence.text, wordForm ) - : matchTokensWithKeywordForm( tokens, wordForm, locale ); - count += occurrence.length; - matches = matches.concat( occurrence ); + : matchWordFormInTokens( tokens, wordForm, locale ); + result.count += occurrences.length; + result.matches = matches.concat( occurrences ); } ); - return { - count: count, - matches: matches, - }; + return result; }; /** * Matches the word forms of a keyphrase with a sentence object from the html parser. * - * @param {Sentence} sentence The sentence to match against the keywordForms. - * @param {string[]} wordForms The array of word forms of the keyphrase. + * @param {Sentence} sentence The sentence to match against the word forms of a keyphrase. + * @param {string[]} wordForms The array of word forms of the keyphrase. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * - * @param {string} locale The locale used for transliteration. + * @param {string} locale The locale used for transliteration. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. * Depends on whether the keyphrase enclosed in double quotes. - * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. - * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 2d497a2e954..885d1d7892a 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -21,7 +21,7 @@ export default function getKeyphraseDensity( paper, researcher ) { return 0; } - const keyphraseCount = researcher.getResearch( "keyphraseCount" ); + const keyphraseCount = researcher.getResearch( "getKeyphraseCount" ); return ( keyphraseCount.count / wordCount ) * 100; } diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 435ba768819..ac73221079b 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -68,17 +68,18 @@ export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWo * @param {Paper} paper The paper containing keyphrase and text. * @param {Researcher} researcher The researcher. * - * @returns {Object} An object containing an array of all the matches, markings and the keyphrase count. + * @returns {{count: number, markings: Mark[], keyphraseLength: number}} An object containing the keyphrase count, markings and the kephrase length. */ export default function getKeyphraseCount( paper, researcher ) { + const result = { count: 0, markings: [], keyphraseLength: 0 }; const topicForms = researcher.getResearch( "morphology" ); - const keyphraseForms = topicForms.keyphraseForms; + let keyphraseForms = topicForms.keyphraseForms; const keyphraseLength = keyphraseForms.length; keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); if ( keyphraseLength === 0 ) { - return { count: 0, markings: [], length: 0 }; + return result; } const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); @@ -94,11 +95,11 @@ export default function getKeyphraseCount( paper, researcher ) { * */ const keyphraseFound = countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ); - return { - count: keyphraseFound.count, - markings: flatten( keyphraseFound.markings ), - length: keyphraseLength, - }; + result.count = keyphraseFound.count; + result.markings = flatten( keyphraseFound.markings ); + result.keyphraseLength = keyphraseLength; + + return result; } /** @@ -112,6 +113,6 @@ export default function getKeyphraseCount( paper, researcher ) { * @returns {Object} An array of all the matches, markings and the keyphrase count. */ export function keywordCount( paper, researcher ) { - console.warn( "This function is deprecated, use keyphraseCount instead." ); + console.warn( "This function is deprecated, use getKeyphraseCount instead." ); return getKeyphraseCount( paper, researcher ); } diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 900f0bb5bf3..086f67b8572 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -101,8 +101,8 @@ class KeyphraseDensityAssessment extends Assessment { */ getResult( paper, researcher ) { const customGetWords = researcher.getHelper( "getWordsCustomHelper" ); - this._keyphraseCount = researcher.getResearch( "keyphraseCount" ); - const keyphraseLength = this._keyphraseCount.length; + this._keyphraseCount = researcher.getResearch( "getKeyphraseCount" ); + const keyphraseLength = this._keyphraseCount.keyphraseLength; const assessmentResult = new AssessmentResult(); @@ -303,7 +303,7 @@ class KeyphraseDensityAssessment extends Assessment { /** - * Marks keywords in the text for the keyword density assessment. + * Marks the occurrences of keyphrase in the text for the keyphrase density assessment. * * @returns {Array} Marks that should be applied. */ @@ -313,7 +313,7 @@ class KeyphraseDensityAssessment extends Assessment { /** - * Checks whether the paper has a text of the minimum required length and a keyword is set. Language-specific length requirements and methods + * Checks whether the paper has a text of the minimum required length and a keyphrase is set. Language-specific length requirements and methods * of counting text length may apply (e.g. for Japanese, the text should be counted in characters instead of words, which also makes the minimum * required length higher). * diff --git a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js index d668353cfa2..830f223bd48 100644 --- a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js +++ b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js @@ -2,18 +2,18 @@ import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAll import keyphraseLengthFactor from "./keyphraseLengthFactor.js"; /** - * Calculates a recommended keyword count for a paper's text. The formula to calculate this number is based on the - * keyword density formula. + * Calculates a recommended keyphrase count for a paper's text. The formula to calculate this number is based on the + * keyphrase density formula. * - * @param {Paper} paper The paper to analyze. - * @param {number} keyphraseLength The length of the focus keyphrase in words. - * @param {number} recommendedKeywordDensity The recommended keyword density (either maximum or minimum). - * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyword density. - * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. + * @param {Paper} paper The paper to analyze. + * @param {number} keyphraseLength The length of the focus keyphrase in words. + * @param {number} recommendedKeyphraseDensity The recommended keyphrase density (either maximum or minimum). + * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyphrase density. + * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. * - * @returns {number} The recommended keyword count. + * @returns {number} The recommended keyphrase count. */ -export default function( paper, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) { +export default function( paper, keyphraseLength, recommendedKeyphraseDensity, maxOrMin, customGetWords ) { const wordCount = customGetWords ? customGetWords( paper.getText() ).length : getAllWordsFromTree( paper ).length; if ( wordCount === 0 ) { @@ -21,23 +21,23 @@ export default function( paper, keyphraseLength, recommendedKeywordDensity, maxO } const lengthKeyphraseFactor = keyphraseLengthFactor( keyphraseLength ); - const recommendedKeywordCount = ( recommendedKeywordDensity * wordCount ) / ( 100 * lengthKeyphraseFactor ); + const recommendedKeyphraseCount = ( recommendedKeyphraseDensity * wordCount ) / ( 100 * lengthKeyphraseFactor ); /* - * The recommended keyword count should always be at least 2, - * regardless of the keyword density, the word count, or the keyphrase length. + * The recommended keyphrase count should always be at least 2, + * regardless of the keyphrase density, the word count, or the keyphrase length. */ - if ( recommendedKeywordCount < 2 ) { + if ( recommendedKeyphraseCount < 2 ) { return 2; } switch ( maxOrMin ) { case "min": // Round up for the recommended minimum count. - return Math.ceil( recommendedKeywordCount ); + return Math.ceil( recommendedKeyphraseCount ); default: case "max": // Round down for the recommended maximum count. - return Math.floor( recommendedKeywordCount ); + return Math.floor( recommendedKeyphraseCount ); } } From c47d09cc069ff8a64da9eae4fabf64a62256286b Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 4 Jul 2023 17:28:40 +0200 Subject: [PATCH 213/616] Add comment --- .../helpers/match/matchWordFormsWithSentence.js | 2 +- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 5483aa0bce8..e7ff6837129 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -118,7 +118,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH ? matchWordCustomHelper( sentence.text, wordForm ) : matchWordFormInTokens( tokens, wordForm, locale ); result.count += occurrences.length; - result.matches = matches.concat( occurrences ); + result.matches = result.matches.concat( occurrences ); } ); return result; diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index ac73221079b..dc64552aeda 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -76,6 +76,8 @@ export default function getKeyphraseCount( paper, researcher ) { let keyphraseForms = topicForms.keyphraseForms; const keyphraseLength = keyphraseForms.length; + // Normalize single quotes so that word form with different type of single quotes can still be matched. + // For exmple, "key‛word" should match "key'word". keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); if ( keyphraseLength === 0 ) { From 4fb312bdd68cda9ab4c9285b1eb745e51e985590 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 13:06:04 +0200 Subject: [PATCH 214/616] remove deprecation version --- .../src/languageProcessing/researches/getKeywordDensity.js | 2 +- .../yoastseo/src/languageProcessing/researches/keywordCount.js | 2 +- .../src/scoring/assessments/seo/KeywordDensityAssessment.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 885d1d7892a..bdaf1fe61c9 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -29,7 +29,7 @@ export default function getKeyphraseDensity( paper, researcher ) { /** * Calculates the keyphrase density. * - * @deprecated Since version 20.12. Use getKeyphraseDensity instead. + * @deprecated Use getKeyphraseDensity instead. * * @param {Object} paper The paper containing keyphrase and text. * @param {Object} researcher The researcher. diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index dc64552aeda..07be00cc64b 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -107,7 +107,7 @@ export default function getKeyphraseCount( paper, researcher ) { /** * Calculates the keyphrase count, takes morphology into account. * - * @deprecated Since version 20.12. Use getKeyphraseCount instead. + * @deprecated Use getKeyphraseCount instead. * * @param {Paper} paper The paper containing keyphrase and text. * @param {Researcher} researcher The researcher. diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 086f67b8572..bb030a9aecc 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -340,7 +340,7 @@ class KeyphraseDensityAssessment extends Assessment { * KeywordDensityAssessment was the previous name for KeyphraseDensityAssessment (hence the name of this file). * We keep (and expose) this assessment for backwards compatibility. * - * @deprecated Since version 20.12 Use KeyphraseDensityAssessment instead. + * @deprecated Use KeyphraseDensityAssessment instead. */ class KeywordDensityAssessment extends KeyphraseDensityAssessment { /** From cedb55d4ad37085799c8b9ab5413a227d82c536e Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 13:13:05 +0200 Subject: [PATCH 215/616] Adjust unit tests --- .../researches/keywordCountSpec.js | 92 +++++++++++-------- 1 file changed, 55 insertions(+), 37 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 20ef8171746..1f573245e79 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -25,7 +25,7 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { const testCases = [ { - description: "counts/marks a string of text with a keyword in it.", + description: "counts/marks a string of text with an occurrence of the keyphrase in it.", paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 1, @@ -36,7 +36,7 @@ const testCases = [ skip: false, }, { - description: "counts a string of text with no keyword in it.", + description: "counts a string of text with no occurrence of the keyphrase in it.", paper: new Paper( "

a string of text

", { keyword: "" } ), keyphraseForms: [], expectedCount: 0, @@ -60,7 +60,7 @@ const testCases = [ skip: false, }, { - description: "counts a string with multiple keyword morphological forms", + description: "counts a string with multiple occurrences of keyphrase with different morphological forms", paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 2, @@ -76,7 +76,7 @@ const testCases = [ skip: false, }, { - description: "does not count images as keywords.", + description: "does not match keyphrase occurrence in image alt attribute.", paper: new Paper( "

A text with image

", { keyword: "image" } ), keyphraseForms: [ [ "image", "images" ] ], expectedCount: 0, @@ -103,7 +103,7 @@ const testCases = [ skip: false, }, { - description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", + description: "only counts full-matches of the keyphrase: full-match means when all word of the keyphrase are found in the sentence", paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, @@ -131,7 +131,7 @@ const testCases = [ skip: false, }, { - description: "matches both singular and reduplicated plural form of the keyword in Indonesian, " + + description: "matches both singular and reduplicated plural form of the keyphrase in Indonesian, " + "the plural should be counted as one occurrence", paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", { locale: "id_ID", keyword: "keyword" } ), @@ -236,6 +236,14 @@ const testCases = [ position: { endOffset: 14, startOffset: 5 } } ) ], skip: false, }, + { + description: "with exact matching, the keyphrase directly preceded by a full stop should not match a keyphrase occurrence without the full stop in the text.", + paper: new Paper( "

A sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), + keyphraseForms: [ [ ".sentence" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, { description: "can match dollar sign as in '$keyword' with exact matching.", paper: new Paper( "

A string with a $keyword.

", { keyword: "\"$keyword\"" } ), @@ -286,7 +294,7 @@ const testCases = [ }, ]; -describe.each( testCases )( "Test for counting the keyword in a text in english", function( { +describe.each( testCases )( "Test for counting the keyphrase in a text in english", function( { description, paper, keyphraseForms, @@ -298,9 +306,9 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); - expect( keyWordCountResult.count ).toBe( expectedCount ); - expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); @@ -318,7 +326,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts a string with a keyword with a '-' in it", + description: "counts a string with a keyphrase occurrence with a '-' in it", paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 1, @@ -347,7 +355,7 @@ const testCasesWithSpecialCharacters = [ }, { description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " + - "and the keyphrase doesn't contain dash", + "and the keyphrase doesn't contain a dash", paper: new Paper( "

A string with a panda-red.

", { keyword: "red panda" } ), keyphraseForms: [ [ "red" ], [ "panda" ] ], expectedCount: 1, @@ -361,7 +369,8 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts 'key word' in 'key-word'.", + description: "should find a match when the sentence contains the keyphrase with a dash with the same order " + + "and the keyphrase doesn't contain a dash", paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, @@ -375,7 +384,8 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts 'key' in 'key-phrase'", + description: "should find a match when the word of the keyphrase occurs in the sentence hyphenated with a non-keyphrase word." + + "For example 'key' in 'key-phrase'.", paper: new Paper( "

A string with a word of key-phrase.

", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, @@ -398,7 +408,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts a string with a keyword with a '_' in it", + description: "should find a match between a keyphrase with an underscore and its occurrence in the text with the same form", paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), keyphraseForms: [ [ "key_word", "key_words" ] ], expectedCount: 1, @@ -408,7 +418,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts a string with with '&' in the string and the keyword", + description: "should find a match between a keyphrase with an ampersand (&) and its occurrence in the text with the same form", paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, @@ -419,8 +429,8 @@ const testCasesWithSpecialCharacters = [ }, { description: "should still match keyphrase occurrence with different types of apostrophe.", - paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key'word" } ), - keyphraseForms: [ [ "key'word", "key'words" ] ], + paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key‛word" } ), + keyphraseForms: [ [ "key‛word", "key‛words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with quotes to match the key'word, even if the quotes differ.", @@ -439,7 +449,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match multiple occurrences of keyphrase ending in & as in 'keyphrase$', and output correct Marks objects", + description: "can match multiple occurrences of keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", paper: new Paper( "

A string with a keyphrase&.

", { keyword: "keyphrase&" } ), keyphraseForms: [ [ "keyphrase" ] ], expectedCount: 1, @@ -449,7 +459,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "doesn't count 'key-word' in 'key word'.", + description: "doesn't match unhyphenated occurrences in the text if the keyphrase is hyphenated.", paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 0, @@ -457,7 +467,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match keyphrase enclosed in «» '«keyword»' in the text", + description: "can match keyphrase enclosed in «» in the text, also if the paper's keyphrase is not enclosed in «»", paper: new Paper( "

A string with a «keyword».

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, @@ -471,7 +481,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match keyphrase enclosed in «» '«keyword»'", + description: "can match an occurrence of the keyphrase not enclosed in «» when the paper's keyphrase is enclosed in «»", paper: new Paper( "

A string with a keyword.

", { keyword: "«keyword»" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, @@ -485,7 +495,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match keyphrase enclosed in ‹› '‹keyword›' in the text", + description: "can match keyphrase enclosed in ‹› in the text, also if the paper's keyphrase is not enclosed in ‹›", paper: new Paper( "

A string with a ‹keyword›.

", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, @@ -499,7 +509,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match keyphrase enclosed in ‹› '‹keyword›'", + description: "can match an occurrence of the keyphrase not enclosed in ‹› when the paper's keyphrase is enclosed in ‹›", paper: new Paper( "

A string with a keyword.

", { keyword: "‹keyword›" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, @@ -542,7 +552,7 @@ const testCasesWithSpecialCharacters = [ }, ]; -describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyword in a text containing special characters", +describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyphrase in a text containing special characters", function( { description, paper, @@ -555,15 +565,22 @@ describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyword test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); - expect( keyWordCountResult.count ).toBe( expectedCount ); - expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); +/** + * The following test cases illustrates the current approach of matching transliterated keyphrase with non-transliterated version. + * For example, word form "acción" from the keyphrase will match word "accion" in the sentence. + * But, word form "accion" from the keyphrase will NOT match word "acción" in the sentence. + * + * This behaviour might change in the future. + */ const testCasesWithLocaleMapping = [ { - description: "counts a string of text with German diacritics and eszett as the keyword", + description: "counts a string of text with German diacritics and eszett as the keyphrase", paper: new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß", locale: "de_DE" } ), keyphraseForms: [ [ "äöüß" ] ], expectedCount: 1, @@ -780,7 +797,8 @@ const testCasesWithLocaleMapping = [ skip: false, }, ]; -describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a text with different locale mapping, e.g. Turkish", + +describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in a text with different locale mapping, e.g. Turkish", function( { description, paper, @@ -793,9 +811,9 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a test( description, function() { const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); buildTree( paper, mockResearcher ); - const keyWordCountResult = getKeyphraseCount( paper, mockResearcher ); - expect( keyWordCountResult.count ).toBe( expectedCount ); - expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); @@ -824,9 +842,9 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) } ); }; -describe( "Test for counting the keyword in a text for Japanese", () => { +describe( "Test for counting the keyphrase in a text for Japanese", () => { // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information. - it( "counts/marks a string of text with a keyword in it.", function() { + it( "counts/marks a string of text with a keyphrase in it.", function() { const mockPaper = new Paper( "

私の猫はかわいいです。 { original: "私の猫はかわいいです。" } ) ] ); } ); - it( "counts/marks a string of text with multiple occurrences of the same keyword in it.", function() { + it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() { const mockPaper = new Paper( "

私の猫はかわいい猫です。 { } ) ] ); } ); - it( "counts a string if text with no keyword in it.", function() { + it( "counts a string if text with no keyphrase in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); From 65b6b28e944de25f1aaa8d1082489024e8706657 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 13:13:23 +0200 Subject: [PATCH 216/616] Improve JSDoc --- .../helpers/match/matchWordFormsWithSentence.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index e7ff6837129..3a0ef39d91f 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -77,6 +77,11 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { /** * Matches a word form of the keyphrase with the tokens from the sentence. * + * With this approach, we transliterate the word form of the keyphrase before matching it with the sentence tokens. + * However, we don't do the transliteration step for the sentence tokens. + * As a result, for example, word form "acción" from the keyphrase will match word "accion" in the sentence. + * But, word form "accion" from the keyphrase will NOT match word "acción" in the sentence. + * * @param {Token[]} tokens The array of tokens to check. * @param {string} wordForm The word form of the keyphrase. * @param {string} locale The locale used in the analysis. From 88bcc3a79868a98c0866a2fb4b88e607c6ed69b2 Mon Sep 17 00:00:00 2001 From: Agnieszka Szuba Date: Wed, 5 Jul 2023 13:30:52 +0200 Subject: [PATCH 217/616] Fix typos and formatting --- .../helpers/match/matchWordFormsWithSentenceSpec.js | 2 +- .../languageProcessing/researches/keywordCountSpec.js | 10 +++++----- .../helpers/highlighting/getMarkingsInSentence.js | 7 +++++++ .../helpers/match/matchWordFormsWithSentence.js | 2 +- .../helpers/sentence/getSentencesFromTree.js | 1 + .../helpers/word/getAllWordsFromTree.js | 2 +- 6 files changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js index 6872a25d09c..19b57337917 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js @@ -21,7 +21,7 @@ const testCases = [ expectedResult: { count: 0, matches: [] }, }, { - testDescription: "returns no match when no word forms is available for matching", + testDescription: "returns no match when no word forms are available for matching", sentence: { text: "A sentence with notthekeyphrase.", tokens: [ diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 1f1df8bbf3f..b1d8bebdaad 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -408,7 +408,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts a string with with '&' in the string and the keyword", + description: "counts a string with '&' in the string and the keyword", paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, @@ -668,7 +668,7 @@ const testCasesWithLocaleMapping = [ skip: false, }, { - description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", + description: "counts a string with a different forms of Turkish i, kephrase: İstanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "İstanbul" ] ], expectedCount: 4, @@ -696,7 +696,7 @@ const testCasesWithLocaleMapping = [ skip: false, }, { - description: "counts a string with with a different forms of Turkish i, kephrase: Istanbul", + description: "counts a string with a different forms of Turkish i, kephrase: Istanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "Istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "Istanbul" ] ], expectedCount: 4, @@ -724,7 +724,7 @@ const testCasesWithLocaleMapping = [ skip: false, }, { - description: "counts a string with with a different forms of Turkish i, kephrase: istanbul", + description: "counts a string with a different forms of Turkish i, kephrase: istanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "istanbul" ] ], expectedCount: 4, @@ -752,7 +752,7 @@ const testCasesWithLocaleMapping = [ skip: false, }, { - description: "counts a string with with a different forms of Turkish i, kephrase: ıstanbul", + description: "counts a string with a different forms of Turkish i, kephrase: ıstanbul", paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "ıstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "ıstanbul" ] ], expectedCount: 4, diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index bbb6e65c3b6..ae74812f426 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -7,8 +7,10 @@ const markEnd = "
"; /** * Merges consecutive and overlapping markings into one marking. * This is a helper for position based highlighting. + * * @param {Object[]} matches An array of markings to merge. * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { @@ -41,10 +43,12 @@ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { /** * This function creates the old style marked sentence for search based highlighting. + * * Ideally this function becomes obsolete when position based highlighting is implemented everywhere. * @param {Sentence} sentence The sentence to which to apply the marks. * @param {Object} matches The matches to apply. * @param {string} locale The locale of the text. + * * @returns {string} The sentence with marks applied. */ const createMarksForSentence = ( sentence, matches, locale ) => { @@ -78,8 +82,10 @@ const createMarksForSentence = ( sentence, matches, locale ) => { /** * Merges consecutive and overlapping markings into one marking. * This is a helper for position based highlighting. + * * @param {Mark[]} markings An array of markings to merge. * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => { @@ -97,6 +103,7 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => } const lastMarking = newMarkings[ newMarkings.length - 1 ]; + if ( lastMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { // The marking is consecutive to the last marking, so we extend the last marking to include the new marking. lastMarking.setPositionEnd( marking.getPositionEnd() ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 1f1b5a38246..fbb195f0286 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -130,7 +130,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). * * @param {string} locale The locale used for transliteration. - * Depends on whether the keyphrase enclosed in double quotes. + * Depends on whether the keyphrase is enclosed in double quotes. * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not. * diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 23a09d4acbe..6edce9cf369 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -7,6 +7,7 @@ * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes. */ export default function( paper ) { + // Get all nodes that have a sentence property which is not an empty array. const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences ); return tree.flatMap( node => node.sentences ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js index d353db510be..945821cdf82 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -3,7 +3,7 @@ import { flatMap } from "lodash-es"; import removePunctuation from "../sanitize/removePunctuation"; /** - * Gets the words from the tree, i.e. the paragraph and heading nodes. + * Gets the words from the tree, i.e. from the paragraph and heading nodes. * These two node types are the nodes that should contain words for the analysis. * * @param {Paper} paper The paper to get the tree and words from. From 63decd23d89124928131f744dd9f34394b071d24 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 16:10:38 +0200 Subject: [PATCH 218/616] Improve code comments --- .../highlighting/getMarkingsInSentence.js | 84 ++++++++++--------- .../match/matchWordFormsWithSentence.js | 7 +- .../researches/getKeywordDensity.js | 6 +- .../researches/keywordCount.js | 20 +++-- .../src/markers/removeDuplicateMarks.js | 2 +- 5 files changed, 65 insertions(+), 54 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index bbb6e65c3b6..3328bd42208 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -1,17 +1,19 @@ import Mark from "../../../../src/values/Mark"; -import { getLanguage } from "../../../../src/languageProcessing"; const markStart = ""; const markEnd = ""; /** - * Merges consecutive and overlapping markings into one marking. - * This is a helper for position based highlighting. - * @param {Object[]} matches An array of markings to merge. - * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. - * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. + * Merges consecutive and overlapping matches into one match. + * + * This is a helper for search-based highlighting. + * + * @param {Token[]} matches An array of the matched tokens to merge. + * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * + * @returns {Token[]} An array of markings where consecutive and overlapping markings are merged. */ -const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { +const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace ) => { const newMatches = []; // Sort matches by start offset. This is probably redundant, but for extra safety. @@ -40,27 +42,28 @@ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace = true ) => { }; /** - * This function creates the old style marked sentence for search based highlighting. - * Ideally this function becomes obsolete when position based highlighting is implemented everywhere. - * @param {Sentence} sentence The sentence to which to apply the marks. - * @param {Object} matches The matches to apply. - * @param {string} locale The locale of the text. - * @returns {string} The sentence with marks applied. + * Adds `yoastmark` tags to the keyphrase matches in the sentence. + * + * This is a helper for search-based highlighting. + * + * @param {Sentence} sentence The sentence to add the `yoastmark` tags to. + * @param {Token[]} matches The array of the keyphrase matches. + * @param {Boolean} useSpace Whether words are separated by a space. + * + * @returns {string} The sentence with the added `yoastmark` tags. */ -const createMarksForSentence = ( sentence, matches, locale ) => { +const createMarksForSentence = ( sentence, matches, useSpace ) => { let sentenceText = sentence.text; - // Create one array with both primary and secondary matches, sorted by start offset. - // Merge consecutive and overlapping matches. - const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, getLanguage( locale ) !== "ja" ); + const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, useSpace ); const sentenceStartOffset = sentence.sourceCodeRange.startOffset; // Loop through the matches backwards, so that the reference is not affected by the changes. for ( let i = mergedMatches.length - 1; i >= 0; i-- ) { const match = mergedMatches[ i ]; - // Apply the mark to the sentence. + // Adds `yoastmark` tags to the keyphrase matches in the sentence. // sentenceStartOffset is subtracted because the start and end offsets are relative to the start of the source code. // Subtracting the sentenceStartOffset makes them relative to the start of the sentence. sentenceText = sentenceText.substring( 0, match.sourceCodeRange.endOffset - sentenceStartOffset ) + markEnd + @@ -77,12 +80,15 @@ const createMarksForSentence = ( sentence, matches, locale ) => { /** * Merges consecutive and overlapping markings into one marking. - * This is a helper for position based highlighting. - * @param {Mark[]} markings An array of markings to merge. - * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. + * + * This is a helper for position-based highlighting. + * + * @param {Mark[]} markings An array of markings to merge. + * @param {Boolean} useSpace Whether words are separated by a space. + * * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ -const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => { +const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace) => { const newMarkings = []; // Sort markings by start offset. This is probably redundant, but for extra safety. @@ -114,29 +120,31 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace = true ) => /** - * Gets the Mark objects of all keyphrase matches. + * Gets the Mark objects of all keyphrase matches in the sentence. + * Currently, this function creates Mark objects compatible for both search-based and position-based highlighting. + * In a pure position-based highlighting, we don't need to provide 'marked' and 'original' when creating the Mark object. * - * @param {Sentence} sentence The sentence to check. - * @param {Array} matchesInSentence An object containing the matches in the sentence. - * @param {string} locale The locale used in the analysis. + * @param {Sentence} sentence The sentence to check. + * @param {Token[]} matchesInSentence An array containing the keyphrase matches in the sentence. + * @param {Boolean} useSpace Whether words are separated by a space. Default to true. * - * @returns {Mark[]} The array of Mark objects of the keyphrase matches. + * @returns {Mark[]} The array of Mark objects of the keyphrase matches in the sentence. */ -function getMarkingsInSentence( sentence, matchesInSentence, locale ) { +function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { if ( matchesInSentence.length === 0 ) { return []; } - // Create the marked sentence that is used for search based highlighting. - const markedSentence = createMarksForSentence( sentence, matchesInSentence, locale ); + // Create the marked sentence that is used for search-based highlighting. + const markedSentence = createMarksForSentence( sentence, matchesInSentence, useSpace ); - // Create the markings for the primary matches. - // Note that there is a paradigm shift: - // With search based highlighting there would be one marking for the entire sentence. - // With Position based highlighting there is a marking for each match. - // In order to be backwards compatible with search based highlighting, - // all markings for a sentence have the same markedSentence. - // ... + /* + * Note that there is a paradigm shift: + * With search-based highlighting there would be one Mark object for the entire sentence. + * With position-based highlighting there is a Mark object for each match. + * Hence, in order to be backwards compatible with search based highlighting, + * all Mark objects for a sentence have the same markedSentence. + */ const markings = matchesInSentence.map( token => { return new Mark( { position: { @@ -148,7 +156,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, locale ) { } ); } ); - return mergeConsecutiveAndOverlappingMarkings( markings, getLanguage( locale ) !== "ja" ); + return mergeConsecutiveAndOverlappingMarkings( markings, useSpace ); } export default getMarkingsInSentence; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 3a0ef39d91f..5d9b8272cc9 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -79,8 +79,8 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { * * With this approach, we transliterate the word form of the keyphrase before matching it with the sentence tokens. * However, we don't do the transliteration step for the sentence tokens. - * As a result, for example, word form "acción" from the keyphrase will match word "accion" in the sentence. - * But, word form "accion" from the keyphrase will NOT match word "acción" in the sentence. + * As a result, for example, the word form "acción" from the keyphrase will match the word "accion" in the sentence. + * But, the word form "accion" from the keyphrase will NOT match the word "acción" in the sentence. * * @param {Token[]} tokens The array of tokens to check. * @param {string} wordForm The word form of the keyphrase. @@ -139,8 +139,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH * * @param {string} locale The locale used for transliteration. * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. - * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. - * Depends on whether the keyphrase enclosed in double quotes. + * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. Exact match is used when the keyphrase is enclosed in double quotes. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index bdaf1fe61c9..b05d2f246b9 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -10,11 +10,13 @@ import getAllWordsFromTree from "../helpers/word/getAllWordsFromTree"; */ export default function getKeyphraseDensity( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let wordCount = getAllWordsFromTree( paper ).length; + let wordCount = 0; - // If there is a custom getWords helper use its output for countWords. + // If there is a custom getWords helper, use its output for countWords. if ( getWordsCustomHelper ) { wordCount = getWordsCustomHelper( paper.getText() ).length; + } else { + wordCount = getAllWordsFromTree( paper ).length } if ( wordCount === 0 ) { diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 07be00cc64b..31394474c4c 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -9,11 +9,11 @@ import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * - * @param {Sentence[]} sentences The sentences to check. - * @param {Array} keyphraseForms The keyphrase forms. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * @param {boolean} isExactMatchRequested Whether the exact matching is requested. + * @param {Sentence[]} sentences The sentences to check. + * @param {Array} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {boolean} isExactMatchRequested Whether the exact matching is requested. * * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ @@ -51,7 +51,7 @@ export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWo // Currently, this check is only applicable for Japanese. markings = markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); } else { - markings = getMarkingsInSentence( sentence, foundWords, locale ); + markings = getMarkingsInSentence( sentence, foundWords ); } result.count += totalMatchCount; @@ -76,8 +76,10 @@ export default function getKeyphraseCount( paper, researcher ) { let keyphraseForms = topicForms.keyphraseForms; const keyphraseLength = keyphraseForms.length; - // Normalize single quotes so that word form with different type of single quotes can still be matched. - // For exmple, "key‛word" should match "key'word". + /* + * Normalize single quotes so that word form with different type of single quotes can still be matched. + * For exmple, "key‛word" should match "key'word". + */ keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); if ( keyphraseLength === 0 ) { @@ -94,7 +96,7 @@ export default function getKeyphraseCount( paper, researcher ) { * Count the amount of keyphrase occurrences in the sentences. * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). - * */ + */ const keyphraseFound = countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ); result.count = keyphraseFound.count; diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index e2862e57f33..04df1cd5745 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -13,7 +13,7 @@ function removeDuplicateMarks( marks ) { /* * We don't remove duplicates when mark has position information, for the reasons below: * 1. Not removing duplicates is simpler than removing the duplicates by looking at the value of mark.getPosition(). - * 2. Our urrent approach of creating a mark object with position information eliminates the chance of having duplicates. + * 2. Our current approach of creating a mark object with position information eliminates the chance of having duplicates. */ if ( !! marks && ( marks.length === 0 || !! marks[ 0 ].hasPosition() ) ) { return marks; From b67bc0d988cae63bfe82f9e93caf135a7b6fef2c Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 17:09:42 +0200 Subject: [PATCH 219/616] Add unit test --- .../highlighting/getMarkingsInSentenceSpec.js | 29 ++++++++++++++----- .../highlighting/getMarkingsInSentence.js | 8 ++--- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index fecd6d5ac66..26ceb70b587 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -6,14 +6,14 @@ const testCases = [ testDescription: "No markings in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [], - locale: "en_US", + useSpace: true, expectedResult: [], }, { testDescription: "One marking in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], - locale: "en_US", + useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -24,7 +24,7 @@ const testCases = [ testDescription: "One marking in sentence with two consecutive matches", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], - locale: "en_US", + useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -35,7 +35,7 @@ const testCases = [ testDescription: "Two markings that are not consecutive in sentence", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ], - locale: "en_US", + useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -53,7 +53,7 @@ const testCases = [ testDescription: "One marking in a sentence that has a non-zero startOffset", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], - locale: "en_US", + useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -64,7 +64,7 @@ const testCases = [ testDescription: "Two markings that overlap", sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } }, { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ], - locale: "en_US", + useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -73,15 +73,28 @@ const testCases = [ } ), ], }, + { + testDescription: "Two markings that overlap in a sentence that doesn't use space", + sentence: { text: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 12, endOffset: 13 } }, { sourceCodeRange: { startOffset: 13, endOffset: 16 } } ], + useSpace: false, + expectedResult: [ + new Mark( { + marked: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", + original: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", + position: { endOffset: 16, startOffset: 12 }, + } ), + ], + }, ]; describe.each( testCases )( "a test for getting the marks from a sentence", ( { testDescription, sentence, matchesInSentence, - locale, + useSpace, expectedResult } ) => { it( testDescription, () => { - expect( getMarkingsInSentence( sentence, matchesInSentence, locale ) ).toEqual( expectedResult ); + expect( getMarkingsInSentence( sentence, matchesInSentence, useSpace ) ).toEqual( expectedResult ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 9f730e1883c..5784691f8e8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -88,7 +88,7 @@ const createMarksForSentence = ( sentence, matches, useSpace ) => { * * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ -const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace) => { +const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace ) => { const newMarkings = []; // Sort markings by start offset. This is probably redundant, but for extra safety. @@ -141,9 +141,9 @@ function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { /* * Note that there is a paradigm shift: - * With search-based highlighting there would be one Mark object for the entire sentence. - * With position-based highlighting there is a Mark object for each match. - * Hence, in order to be backwards compatible with search based highlighting, + * With search-based highlighting, there would be one Mark object for the entire sentence. + * With position-based highlighting, there is a Mark object for each match. + * Hence, in order to be backwards compatible with search-based highlighting, * all Mark objects for a sentence have the same markedSentence. */ const markings = matchesInSentence.map( token => { From f232be6d98cba79e68ec1adec23ca1500b4c2fdb Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 5 Jul 2023 17:16:18 +0200 Subject: [PATCH 220/616] fix syntax error --- .../helpers/match/matchWordFormsWithSentence.js | 3 ++- .../src/languageProcessing/researches/getKeywordDensity.js | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 5d9b8272cc9..8aaa3da9d70 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -139,7 +139,8 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH * * @param {string} locale The locale used for transliteration. * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. - * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. Exact match is used when the keyphrase is enclosed in double quotes. + * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. + * Exact match is used when the keyphrase is enclosed in double quotes. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index b05d2f246b9..824453ad6df 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -16,7 +16,7 @@ export default function getKeyphraseDensity( paper, researcher ) { if ( getWordsCustomHelper ) { wordCount = getWordsCustomHelper( paper.getText() ).length; } else { - wordCount = getAllWordsFromTree( paper ).length + wordCount = getAllWordsFromTree( paper ).length; } if ( wordCount === 0 ) { From f177a5b1004a6f9e19a414535fd100426de64835 Mon Sep 17 00:00:00 2001 From: Agnieszka Szuba Date: Mon, 10 Jul 2023 15:23:07 +0200 Subject: [PATCH 221/616] Fix unit test description --- .../spec/languageProcessing/researches/keywordCountSpec.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 8d6f17ed606..1e6be145d48 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -449,7 +449,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match multiple occurrences of keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", + description: "can match an occurrence of keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", paper: new Paper( "

A string with a keyphrase&.

", { keyword: "keyphrase&" } ), keyphraseForms: [ [ "keyphrase" ] ], expectedCount: 1, From d27734a4646378ce78863d7b9dc5472032a2a6b2 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 12 Jul 2023 15:06:23 +0200 Subject: [PATCH 222/616] Add block index to the tree --- packages/yoastseo/src/parse/build/build.js | 39 +++++++++++++++++++ .../yoastseo/src/parse/build/private/adapt.js | 4 +- packages/yoastseo/src/parse/structure/Node.js | 6 ++- 3 files changed, 47 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index fcb045075ae..a82adb46577 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -7,6 +7,40 @@ import filterTree from "./private/filterTree"; import permanentFilters from "./private/alwaysFilterElements"; import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; + +/** + * Sets the block index to the node. + * + * @param {number} index The block index. + * @param {Node} node The node to check. + * @returns {void} + */ +const setBlockIndexOnNode = ( index, node ) => { + node.blockIndex = index; + if ( node.childNodes && node.childNodes.length > 0 ) { + node.childNodes.map( childNode => setBlockIndexOnNode( index, childNode ) ); + } +}; + +/** + * Retrieves the block index and sets them to Node. + * + * @param {Node} node The node to check. + * @returns {void} + */ +const getAnSetBlockIndex = ( node ) => { + let index = -1; + for ( const topNode of node.childNodes ) { + if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" )) { // + index++; + } else if (topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" )) { + continue; + } else { + setBlockIndexOnNode( index, topNode ); + } + } +}; + /** * Parses the HTML string to a tree representation of * the HTML document. @@ -18,6 +52,11 @@ import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; */ export default function build( htmlString, languageProcessor ) { let tree = adapt( parseFragment( htmlString, { sourceCodeLocationInfo: true } ) ); + + if ( tree.childNodes && tree.childNodes.length > 0 ) { + getAnSetBlockIndex( tree ); + } + /* * Filter out some content from the tree so that it can be correctly tokenized. We don't want to tokenize text in * between tags such as 'code' and 'script', but we do want to take into account the length of those elements when diff --git a/packages/yoastseo/src/parse/build/private/adapt.js b/packages/yoastseo/src/parse/build/private/adapt.js index b01f1dcc6b0..9d255e457d6 100644 --- a/packages/yoastseo/src/parse/build/private/adapt.js +++ b/packages/yoastseo/src/parse/build/private/adapt.js @@ -87,5 +87,7 @@ export default function adapt( tree ) { return new Heading( headingLevel, attributes, children, tree.sourceCodeLocation ); } - return new Node( tree.nodeName, attributes, children, tree.sourceCodeLocation ); + const commentData = tree.nodeName === "#comment" && tree.data; + + return new Node( tree.nodeName, attributes, children, tree.sourceCodeLocation, commentData ); } diff --git a/packages/yoastseo/src/parse/structure/Node.js b/packages/yoastseo/src/parse/structure/Node.js index aa5b82a2602..09010047ca4 100644 --- a/packages/yoastseo/src/parse/structure/Node.js +++ b/packages/yoastseo/src/parse/structure/Node.js @@ -14,13 +14,17 @@ class Node { * @param {(Node|Text)[]} childNodes This node's child nodes. * @param {Object} sourceCodeLocationInfo This node's location in the source code, from parse5. */ - constructor( name, attributes = {}, childNodes = [], sourceCodeLocationInfo = {} ) { + constructor( name, attributes = {}, childNodes = [], sourceCodeLocationInfo = {}, commentData ) { /** * This node's name or tag. * @type {string} */ this.name = name; + if ( commentData ){ + this.data = commentData; + } + /** * This node's attributes. * @type {Object} From 35a0b395a27efa7d6ad6f0739dcbc8388fdb03a7 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 12 Jul 2023 15:07:14 +0200 Subject: [PATCH 223/616] Add block index to the Mark object --- .../helpers/highlighting/getMarkingsInSentence.js | 11 +++++++---- .../helpers/sentence/getSentencesFromTree.js | 11 ++++++----- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 919b7db0545..593d5fba0d8 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -147,13 +147,16 @@ function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { * all Mark objects for a sentence have the same markedSentence. */ const markings = matchesInSentence.map( token => { + const startOffset = token.sourceCodeRange.startOffset; + const endOffset = token.sourceCodeRange.endOffset; return new Mark( { position: { - startOffset: token.sourceCodeRange.startOffset, - endOffset: token.sourceCodeRange.endOffset, + startOffset: startOffset, + endOffset: endOffset, // relative to start of block positions. - startOffsetBlock: token.sourceCodeRange.startOffset - ( sentence.parentStartOffset || 0 ), - endOffsetBlock: token.sourceCodeRange.endOffset - ( sentence.parentStartOffset || 0 ), + startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), + blockIndex: sentence.blockIndex, }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index cc7ef5d4d34..bd102392204 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -10,9 +10,10 @@ export default function( paper ) { // Get all nodes that have a sentence property which is not an empty array. const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences ); - return tree.flatMap( node => node.sentences.map( s => ( { - ...s, - parentStartOffset: ( node.sourceCodeLocation && node.sourceCodeLocation.startTag ) ? node.sourceCodeLocation.startTag.endOffset : 0, - } ) ) - ); + return tree.flatMap( node => node.sentences.map( s => { + return { + ...s, + parentStartOffset: ( node.sourceCodeLocation && node.sourceCodeLocation.startTag ) ? node.sourceCodeLocation.startTag.endOffset : 0, + blockIndex: node.blockIndex }; + } ) ); } From 2fe7b32d5db40cdbcd1f92ce8a7074be566a167c Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 12 Jul 2023 15:07:25 +0200 Subject: [PATCH 224/616] Add methods for block index --- packages/yoastseo/src/values/Mark.js | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index a5f6d82be01..f96b2e33418 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -95,6 +95,27 @@ Mark.prototype.setPositionEnd = function( positionEnd ) { this._properties.position.endOffset = positionEnd; }; +/** + * Sets the block index. + * + * @param {number} blockIndex The block index to set. + * @returns {void} + */ +Mark.prototype.setBlockIndex = function( blockIndex ) { + if ( this._properties.position ) { + this._properties.position.blockIndex = blockIndex; + } +} + +/** + * Gets the block index. + * + * @returns {number} The block index. + */ +Mark.prototype.getBlockIndex = function() { + return this._properties.position && this._properties.position.blockIndex; +} + /** * Returns the start position inside block. * From 2afe70d9de56103ec2adee340fa1073144583dc1 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 12 Jul 2023 16:49:50 +0200 Subject: [PATCH 225/616] Refactor position-based highlighting in block editor --- packages/js/src/decorator/gutenberg.js | 203 ++++++++++++++++--------- 1 file changed, 132 insertions(+), 71 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index 8331fa767aa..f55b9a29c8e 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -80,6 +80,9 @@ const ANNOTATION_ATTRIBUTES = { { key: "steps", }, + { + key: "jsonDescription", + }, ], }; @@ -137,7 +140,7 @@ function scheduleAnnotationQueueApplication() { } /** - * Returns whether or not annotations are available in Gutenberg. + * Returns whether annotations are available in Gutenberg. * * @returns {boolean} Whether or not annotations are available in Gutenberg. */ @@ -162,7 +165,7 @@ export function getYoastmarkOffsets( marked ) { // In that case, startMarkIndex will be -1 during later tests. const doesNotContainDoubleQuotedMark = startMarkIndex >= 0; - // If the start mark is not found, try the double quoted version. + // If the start mark is not found, try the double-quoted version. if ( ! doesNotContainDoubleQuotedMark ) { startMarkIndex = marked.indexOf( START_MARK_DOUBLE_QUOTED ); } @@ -204,7 +207,7 @@ export function getYoastmarkOffsets( marked ) { * * @param {string} text Text to search through. * @param {string} stringToFind Text to search for. - * @param {boolean} caseSensitive True if the search is case sensitive. + * @param {boolean} caseSensitive True if the search is case-sensitive. * * @returns {Array} All indices of the found occurrences. */ @@ -345,6 +348,17 @@ function getAnnotatableAttributes( blockTypeName ) { return assessmentAttributes[ blockTypeName ]; } +/** + * Checks if a block has innerblocks. + * + * @param {Object} block The block with potential inner blocks + * + * @returns {boolean} True if the block has innerblocks, False otherwise. + */ +export function hasInnerBlocks( block ) { + return block.innerBlocks.length > 0; +} + /** * Creates the annotations for a block. * @@ -353,10 +367,11 @@ function getAnnotatableAttributes( blockTypeName ) { * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. + * @param {number} index The index of the block. * * @returns {Array} An array of annotations for a specific block. */ -function createAnnotations( html, richTextIdentifier, attribute, block, marks ) { +function createAnnotations( html, richTextIdentifier, attribute, block, marks, index ) { const record = create( { html: html, multilineTag: attribute.multilineTag, @@ -368,6 +383,9 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) return flatMap( marks, ( ( mark ) => { let annotations; if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { + if ( index !== mark.getBlockIndex() ) { + return []; + } annotations = createAnnotationsFromPositionBasedMarks( mark ); } else { annotations = calculateAnnotationsForTextFormat( @@ -391,78 +409,133 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) } /** - * Gets the annotations for Yoast FAQ block and Yoast How-To block. + * Gets the annotations for Yoast How-To block. * * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * - * @returns {Array} An array of annotations for Yoast blocks. - */ -export function getAnnotationsForYoastBlocks( attribute, block, marks ) { - // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. + * @param {number} index The index of the block. + * @returns {Array} The created annotations. */ +const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { const annotatableTexts = block.attributes[ attribute.key ]; + if ( annotatableTexts.length === 0 ) { + return []; + } - if ( block.name === "yoast/faq-block" && annotatableTexts.length !== 0 ) { - // For each rich text of the attribute value, create annotations. - const annotations = annotatableTexts.map( item => { - const identifierQuestion = `${ item.id }-question`; - const identifierAnswer = `${ item.id }-answer`; + // For each rich text of the attribute value, create annotations. + const annotations = annotatableTexts.map( item => { + const identifierQuestion = `${ item.id }-question`; + const identifierAnswer = `${ item.id }-answer`; - const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marks ); - const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marks ); + const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marks, index ); + const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marks, index ); - return annotationsFromQuestion.concat( annotationsFromAnswer ); - } ); + return annotationsFromQuestion.concat( annotationsFromAnswer ); + } ); - return flatMap( annotations ); + return flatMap( annotations ); +}; + +/** + * Gets the annotations for Yoast FAQ block. + * + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * @param {number} index The index of the block. + * @returns {Array} The created annotations. + */ +const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { + const annotatableTexts = block.attributes[ attribute.key ]; + if ( annotatableTexts.length === 0 ) { + return []; } - // The check for getting the annotations for Yoast How-To block. - // Note: for Yoast How-To block, there are actually two attribute keys that are annotatable: steps and jsonDescription. - if ( block.name === "yoast/how-to-block" && annotatableTexts.length !== 0 ) { + + const annotations = []; + if ( attribute.key === "steps" ) { // For each rich text of the attribute value, create annotations. - const annotations = annotatableTexts.map( item => { + annotations.concat( annotatableTexts.map( item => { const identifierStepName = `${ item.id }-name`; const identifierStepText = `${ item.id }-text`; - const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks ); - const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks ); + const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks, index ); + const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks, index ); // For each step return an array of the name-text pair objects. - return annotationsFromStepName.concat( annotationsFromStepText ); - } ); - // Create annotations for the How-To topmost description by accessing the jsonDescription directly form the block attributes. - const annotationsFromStepDescription = createAnnotations( block.attributes.jsonDescription, "description", attribute, block, marks ); + return annotationsFromStepName.concat( annotationsFromStepText ); + } ) ); + console.log( annotatableTexts.map( item => { + const identifierStepName = `${ item.id }-name`; + const identifierStepText = `${ item.id }-text`; + + const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks, index ); + const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks, index ); - return flatMap( annotations.concat( annotationsFromStepDescription ) ); + // For each step return an array of the name-text pair objects. + return annotationsFromStepName.concat( annotationsFromStepText ); + } ), "FAQ ANN" ); + } + if ( attribute.key === "jsonDescription" ) { + annotations.concat( createAnnotations( annotatableTexts, "description", attribute, block, marks, index ) ); } + + return flatMap( annotations ); +}; + +/** + * Gets the annotations for Yoast FAQ block and Yoast How-To block. + * + * @param {Object} attributes The attributes to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * @param {number} index The index of the block. + * + * @returns {Array} An array of annotations for Yoast blocks. + */ +export function getAnnotationsForYoastBlock( attributes, block, marks, index ) { + // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. + return flatMap( attributes, attribute => { + if ( block.name === "yoast/faq-block" ) { + console.log( getAnnotationsForHowTo( attribute, block, marks, index ), "HOWTO" ); + return getAnnotationsForHowTo( attribute, block, marks, index ); + } + // The check for getting the annotations for Yoast How-To block. + // Note: for Yoast How-To block, there are actually two attribute keys that are annotatable: steps and jsonDescription. + if ( block.name === "yoast/how-to-block" ) { + console.log( getAnnotationsForFAQ( attribute, block, marks, index ), "FAQ" ); + return getAnnotationsForFAQ( attribute, block, marks, index ); + } + } ); } /** * Returns annotations that should be applied to the given attribute. * - * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} attributes The attributes to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. + * @param {number} index The index of the block. * * @returns {Array} The annotations to apply. */ -export function getAnnotationsForBlockAttribute( attribute, block, marks ) { - const attributeKey = attribute.key; - const { attributes: blockAttributes } = block; +export function getAnnotationsForWPBlock( attributes, block, marks, index ) { + const annotations = flatMap( attributes, attribute => { + if ( attribute.length === 0 ) { + return []; + } - if ( attribute.filter && ! attribute.filter( blockAttributes ) ) { - return []; - } + const richTextIdentifier = attribute.key; + const html = block.attributes[ richTextIdentifier ]; - // Get the annotations for Yoast FAQ and How-To blocks. - if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { - return getAnnotationsForYoastBlocks( attribute, block, marks ); - } + return createAnnotations( html, richTextIdentifier, attribute, block, marks, index ); + } ); - const attributeValue = blockAttributes[ attributeKey ]; - // Get the annotation for non-Yoast blocks. - return createAnnotations( attributeValue, attributeKey, attribute, block, marks ); + if ( hasInnerBlocks( block ) ) { + block.innerBlocks.forEach( innerBlock => { + annotations.concat( getAnnotationsForWPBlock( attributes, innerBlock, marks, index ) ); + } ); + } + return annotations; } /** @@ -500,26 +573,18 @@ function fillAnnotationQueue( annotations ) { * * @param { Object } block The block for which the annotations need to be determined. * @param { Mark[] } marks A list of marks that could apply to the block. - * + * @param {number} index The index of the block. * @returns { Object[] } All annotations that need to be placed on the block. */ -export function getAnnotationsFromBlock( block, marks ) { - return flatMap( - getAnnotatableAttributes( block.name ), - ( ( attribute ) => getAnnotationsForBlockAttribute( attribute, block, marks ) ) - ); -} +const getAnnotationsForABlock = ( block, marks, index ) => { + const attributes = getAnnotatableAttributes( block.name ); + console.log( attributes, "ATTRIBUTES" ); + if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { + return getAnnotationsForYoastBlock( attributes, block, marks, index ); + } + return getAnnotationsForWPBlock( attributes, block, marks, index ); +}; -/** - * Checks if a block has innerblocks. - * - * @param {Object} block The block with potential inner blocks - * - * @returns {boolean} True if the block has innerblocks, False otherwise. - */ -export function hasInnerBlocks( block ) { - return block.innerBlocks.length > 0; -} /** * Takes a list of blocks and matches those with a list of marks, in order to create an array of annotations. @@ -532,11 +597,9 @@ export function hasInnerBlocks( block ) { * @returns {Object[]} An array of annotation objects. */ export function getAnnotationsForBlocks( blocks, marks ) { - return flatMap( blocks, ( ( block ) => { - // If a block has innerblocks, get annotations for those blocks as well. - const innerBlockAnnotations = hasInnerBlocks( block ) ? getAnnotationsForBlocks( block.innerBlocks, marks ) : []; - return getAnnotationsFromBlock( block, marks ).concat( innerBlockAnnotations ); - } ) ); + return flatMap( blocks, ( block, i ) => { + return getAnnotationsForABlock( block, marks, i ); + } ); } /** @@ -593,6 +656,7 @@ function removeAllAnnotationsFromBlock( blockClientId ) { export function reapplyAnnotationsForSelectedBlock() { const block = select( "core/editor" ).getSelectedBlock(); const activeMarkerId = select( "yoast-seo/editor" ).getActiveMarker(); + const blockIndex = select( "core/block-editor" ).getBlockIndex( block.clientId ); if ( ! block || ! activeMarkerId ) { return; @@ -608,10 +672,7 @@ export function reapplyAnnotationsForSelectedBlock() { const marksForActiveMarker = activeMarker.marks; - const annotations = flatMap( - getAnnotatableAttributes( block.name ), - attribute => getAnnotationsForBlockAttribute( attribute, block, marksForActiveMarker ) - ); + const annotations = getAnnotationsForABlock( block, marksForActiveMarker, blockIndex ); fillAnnotationQueue( annotations ); From 67d674389c71f16798031dc7400f3a85518fe30e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 12 Jul 2023 17:43:17 +0200 Subject: [PATCH 226/616] change h1s.js to use parsed html tree as input. --- .../src/languageProcessing/researches/h1s.js | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index d51df64722c..d76272301f2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -24,21 +24,19 @@ const emptyBlock = function( block ) { * @returns {Array} An array with all H1s, their content and position. */ export default function( paper ) { - const text = paper.getText(); + const tree = paper.getTree(); + + const h1Matches = tree.findAll( node => node.name === "h1" ); - let blocks = getBlocks( text ); - blocks = reject( blocks, emptyBlock ); const h1s = []; - blocks.forEach( ( block, index ) => { - const match = h1Regex.exec( block ); - if ( match ) { - h1s.push( { - tag: "h1", - content: match[ 1 ], - position: index, - } ); - } + + h1Matches.forEach( h1Match => { + h1s.push( { + tag: "h1", + content: h1Match.findAll( node => node.name === "#text" ).map( textNode => textNode.value ).join( "" ), + position: { startOffset: h1Match.sourceCodeLocation.startTag.endOffset, endOffset: h1Match.sourceCodeLocation.endTag.startOffset }, + } ); } ); return h1s; From ca2761559eb9d43e17105baf8e33a245b2f0636e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 12 Jul 2023 17:43:37 +0200 Subject: [PATCH 227/616] remove unused imports in h1s.js --- .../src/languageProcessing/researches/h1s.js | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index d76272301f2..b767fa0ca21 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -1,19 +1,3 @@ -import { getBlocks } from "../helpers/html/html"; -import { reject } from "lodash-es"; - -const h1Regex = /(.*?)<\/h1>/; - -/** - * Gets a block from a text and checks if it is totally empty or if it is an empty paragraph. - * - * @param {string} block A HTML block extracted from the paper. - * - * @returns {boolean} Whether the block is empty or not. - */ -const emptyBlock = function( block ) { - block = block.trim(); - return block === "

" || block === ""; -}; /** From 24383ba8c27a67d716e969af70258937ceae89e6 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 12 Jul 2023 17:43:57 +0200 Subject: [PATCH 228/616] update h1sSpec.js to changed h1s --- .../languageProcessing/researches/h1sSpec.js | 43 +++++++++++++------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js index 97eb26a0f81..6fda6ad2679 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js @@ -1,49 +1,65 @@ import h1s from "../../../src/languageProcessing/researches/h1s.js"; import Paper from "../../../src/values/Paper.js"; +import buildTree from "../../specHelpers/parse/buildTree"; +import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher.js"; describe( "Gets all H1s in the text", function() { it( "should return empty when there is no H1", function() { - const mockPaper = new Paper( "some content

content h2

" ); + const mockPaper = new Paper( "

some content

content h2

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [] ); } ); it( "should return one object when there is one H1", function() { const mockPaper = new Paper( "

first h1

some content

content h2

" ); - expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: 0 } ] ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + // console.log(mockPaper.getTree()); + expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } } ] ); } ); it( "should return all H1s in the text", function() { const mockPaper = new Paper( "

first h1

not an h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 2 }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } }, + { tag: "h1", content: "second h1", position: { startOffset: 37, endOffset: 46 } }, ] ); } ); it( "should return two h1s next to each other", function() { const mockPaper = new Paper( "

first h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 1 }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } }, + { tag: "h1", content: "second h1", position: { startOffset: 21, endOffset: 30 } }, ] ); } ); it( "should rightly ignore empty paragraphs or empty blocks", function() { const mockPaper = new Paper( "

\n

first h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 1 }, + { tag: "h1", content: "first h1", position: { startOffset: 12, endOffset: 20 } }, + { tag: "h1", content: "second h1", position: { startOffset: 29, endOffset: 38 } }, ] ); } ); it( "should find H1 within division tags", function() { const mockPaper = new Paper( "

first h1

blah blah

second h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 2 }, + { tag: "h1", content: "first h1", position: { startOffset: 9, endOffset: 17 } }, + { tag: "h1", content: "second h1", position: { startOffset: 64, endOffset: 73 } }, ] ); } ); @@ -111,9 +127,12 @@ describe( "Gets all H1s in the text", function() { "I think voice will become the dominant search query, but I think screens will continue to be " + "important in presenting search results.’

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "Voice Search", position: 0 }, - { tag: "h1", content: "So what will the future bring?", position: 11 }, + { tag: "h1", content: "Voice Search", position: { startOffset: 4, endOffset: 16 } }, + { tag: "h1", content: "So what will the future bring?", position: { startOffset: 3903, endOffset: 3933 } }, ] ); } ); } ); From 9a8dbb8ec504597ec9ee6fe0fe3968f8079639b8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 12 Jul 2023 17:45:05 +0200 Subject: [PATCH 229/616] change how marks are made in SingleH1Assessment.js --- .../src/scoring/assessments/seo/SingleH1Assessment.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js index 1951e78b5f4..d6c5023b15a 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js @@ -91,12 +91,11 @@ class SingleH1Assessment extends Assessment { * @returns {Array} Array with all the marked H1s. */ getMarks() { - const h1s = this._h1s; - - return map( h1s, function( h1 ) { + return map( this._h1s, function( h1 ) { return new Mark( { original: "

" + h1.content + "

", marked: "

" + marker( h1.content ) + "

", + position: { startOffset: h1.position.startOffset, endOffset: h1.position.endOffset }, } ); } ); } From c7177f00ecd2a0c7971cedbfeba8e18125c27e3e Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 12 Jul 2023 17:45:37 +0200 Subject: [PATCH 230/616] update SingleH1AssessmentSpec.js to new marks --- .../assessments/seo/SingleH1AssessmentSpec.js | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js index 88ab2fd5836..d3fbbb92670 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js @@ -2,13 +2,16 @@ import SingleH1Assessment from "../../../../src/scoring/assessments/seo/SingleH1 import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; +import buildTree from "../../../specHelpers/parse/buildTree"; const h1Assessment = new SingleH1Assessment(); describe( "An assessment to check whether there is more than one H1 in the text", function() { it( "returns the default result when the paper doesn't contain an H1", function() { const mockPaper = new Paper( "

a paragraph

" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -17,7 +20,10 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns the default result when there's an H1 at the beginning of the body", function() { const mockPaper = new Paper( "

heading

a paragraph

" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); + expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -26,7 +32,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "also returns the default result when there's an H1 and it's not at the beginning of the body", function() { const mockPaper = new Paper( "

Cool intro

heading

a paragraph

" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -36,7 +44,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns a bad score and appropriate feedback when there are multiple one superfluous (i.e., non-title) " + "H1s in the body of the text", function() { const mockPaper = new Paper( "

a paragraph

heading 1

a paragraph

heading 2

" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 1 ); expect( assessment.getText() ).toEqual( "Single title: " + @@ -48,11 +58,16 @@ describe( "An assessment to check whether there is more than one H1 in the text" describe( "A test for marking incorrect H1s in the body", function() { it( "returns markers for incorrect H1s in the body", function() { const mockPaper = new Paper( "

a paragraph

heading

" ); - h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + h1Assessment.getResult( mockPaper, mockResearcher ); const expected = [ - new Mark( { original: "

heading

", - marked: "

heading

" } ), + new Mark( { + original: "

heading

", + marked: "

heading

", + position: { startOffset: 22, endOffset: 29 } } + ), ]; expect( h1Assessment.getMarks() ).toEqual( expected ); @@ -60,10 +75,15 @@ describe( "A test for marking incorrect H1s in the body", function() { it( "also returns markers for H1s in the first position of the body", function() { const mockPaper = new Paper( "

heading

a paragraph

" ); - h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + h1Assessment.getResult( mockPaper, mockResearcher ); const expected = [ - new Mark( { original: "

heading

", - marked: "

heading

" } ), + new Mark( { + original: "

heading

", + marked: "

heading

", + position: { startOffset: 4, endOffset: 11 }, + } ), ]; expect( h1Assessment.getMarks() ).toEqual( expected ); @@ -71,7 +91,9 @@ describe( "A test for marking incorrect H1s in the body", function() { it( "doesn't return markers when there are no H1s in the body", function() { const mockPaper = new Paper( "

a paragraph

" ); - const results = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const results = h1Assessment.getResult( mockPaper, mockResearcher ); expect( results._hasMarks ).toEqual( false ); } ); @@ -91,4 +113,11 @@ describe( "Checks if the assessment is applicable", function() { expect( assessment ).toBe( false ); } ); + + it( "is not applicable when there there is no text but there is an image", function() { + const mockPaper = new Paper( "\"Girl" ); + const assessment = h1Assessment.isApplicable( mockPaper ); + + expect( assessment ).toBe( false ); + } ); } ); From cfd8f24383fafc4918b2bdf220d263680aba008e Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 13 Jul 2023 10:53:45 +0200 Subject: [PATCH 231/616] remove logs --- packages/js/src/decorator/gutenberg.js | 57 ++++++++-------------- packages/yoastseo/src/parse/build/build.js | 4 +- 2 files changed, 23 insertions(+), 38 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index f55b9a29c8e..c67ebe57ac3 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -1,7 +1,7 @@ /* External dependencies */ import { isFunction, - flatMap, + flatMap, flattenDeep, } from "lodash"; // The WP annotations package isn't loaded by default so force loading it. import "@wordpress/annotations"; @@ -372,13 +372,13 @@ export function hasInnerBlocks( block ) { * @returns {Array} An array of annotations for a specific block. */ function createAnnotations( html, richTextIdentifier, attribute, block, marks, index ) { - const record = create( { - html: html, - multilineTag: attribute.multilineTag, - multilineWrapperTag: attribute.multilineWrapperTag, - } ); - - const text = record.text; + // const record = create( { + // html: html, + // multilineTag: attribute.multilineTag, + // multilineWrapperTag: attribute.multilineWrapperTag, + // } ); + // + // const text = record.text; return flatMap( marks, ( ( mark ) => { let annotations; @@ -389,7 +389,7 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks, i annotations = createAnnotationsFromPositionBasedMarks( mark ); } else { annotations = calculateAnnotationsForTextFormat( - text, + html, mark ); } @@ -409,14 +409,15 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks, i } /** - * Gets the annotations for Yoast How-To block. + * Gets the annotations for Yoast FAQ block. * * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. * @param {number} index The index of the block. - * @returns {Array} The created annotations. */ -const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { + * @returns {Array} The created annotations. + */ +const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { const annotatableTexts = block.attributes[ attribute.key ]; if ( annotatableTexts.length === 0 ) { return []; @@ -433,11 +434,11 @@ const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { return annotationsFromQuestion.concat( annotationsFromAnswer ); } ); - return flatMap( annotations ); + return flattenDeep( annotations ); }; /** - * Gets the annotations for Yoast FAQ block. + * Gets the annotations for Yoast How-To block. * * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. @@ -445,16 +446,15 @@ const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { * @param {number} index The index of the block. * @returns {Array} The created annotations. */ -const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { +const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { const annotatableTexts = block.attributes[ attribute.key ]; if ( annotatableTexts.length === 0 ) { return []; } - const annotations = []; if ( attribute.key === "steps" ) { // For each rich text of the attribute value, create annotations. - annotations.concat( annotatableTexts.map( item => { + annotations.push( annotatableTexts.map( item => { const identifierStepName = `${ item.id }-name`; const identifierStepText = `${ item.id }-text`; @@ -464,22 +464,11 @@ const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { // For each step return an array of the name-text pair objects. return annotationsFromStepName.concat( annotationsFromStepText ); } ) ); - console.log( annotatableTexts.map( item => { - const identifierStepName = `${ item.id }-name`; - const identifierStepText = `${ item.id }-text`; - - const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks, index ); - const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks, index ); - - // For each step return an array of the name-text pair objects. - return annotationsFromStepName.concat( annotationsFromStepText ); - } ), "FAQ ANN" ); } if ( attribute.key === "jsonDescription" ) { - annotations.concat( createAnnotations( annotatableTexts, "description", attribute, block, marks, index ) ); + annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks, index ) ); } - - return flatMap( annotations ); + return flattenDeep( annotations ); }; /** @@ -496,14 +485,11 @@ export function getAnnotationsForYoastBlock( attributes, block, marks, index ) { // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. return flatMap( attributes, attribute => { if ( block.name === "yoast/faq-block" ) { - console.log( getAnnotationsForHowTo( attribute, block, marks, index ), "HOWTO" ); - return getAnnotationsForHowTo( attribute, block, marks, index ); + return getAnnotationsForFAQ( attribute, block, marks, index ); } // The check for getting the annotations for Yoast How-To block. - // Note: for Yoast How-To block, there are actually two attribute keys that are annotatable: steps and jsonDescription. if ( block.name === "yoast/how-to-block" ) { - console.log( getAnnotationsForFAQ( attribute, block, marks, index ), "FAQ" ); - return getAnnotationsForFAQ( attribute, block, marks, index ); + return getAnnotationsForHowTo( attribute, block, marks, index ); } } ); } @@ -578,7 +564,6 @@ function fillAnnotationQueue( annotations ) { */ const getAnnotationsForABlock = ( block, marks, index ) => { const attributes = getAnnotatableAttributes( block.name ); - console.log( attributes, "ATTRIBUTES" ); if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { return getAnnotationsForYoastBlock( attributes, block, marks, index ); } diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index a82adb46577..2591f0d3f36 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -31,9 +31,9 @@ const setBlockIndexOnNode = ( index, node ) => { const getAnSetBlockIndex = ( node ) => { let index = -1; for ( const topNode of node.childNodes ) { - if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" )) { // + if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" ) ) { index++; - } else if (topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" )) { + } else if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" ) ) { continue; } else { setBlockIndexOnNode( index, topNode ); From a9e615818be7830dbdfa1a678f2d097323958aa8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 13 Jul 2023 14:53:09 +0200 Subject: [PATCH 232/616] remove spec --- .../spec/scoring/assessments/seo/SingleH1AssessmentSpec.js | 7 ------- 1 file changed, 7 deletions(-) diff --git a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js index d3fbbb92670..4590e5258be 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js @@ -113,11 +113,4 @@ describe( "Checks if the assessment is applicable", function() { expect( assessment ).toBe( false ); } ); - - it( "is not applicable when there there is no text but there is an image", function() { - const mockPaper = new Paper( "\"Girl" ); - const assessment = h1Assessment.isApplicable( mockPaper ); - - expect( assessment ).toBe( false ); - } ); } ); From 0affbc6eaa1b7b01a0d3e7d7c6b8216e20fb870a Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 13 Jul 2023 14:57:24 +0200 Subject: [PATCH 233/616] update jsdoc --- packages/yoastseo/src/languageProcessing/researches/h1s.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index b767fa0ca21..c4ef7d1d7e8 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -1,7 +1,5 @@ - - /** - * Gets all H1s in a text, including their content and their position with regards to other HTML blocks. + * Gets all H1s in a text, including their content and their position information. * * @param {Paper} paper The paper for which to get the H1s. * From 12455a4737b787c4be28b66fcf0736cf3a6df519 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 14 Jul 2023 11:01:48 +0200 Subject: [PATCH 234/616] Uncomment code --- packages/js/src/analysis/collectAnalysisData.js | 2 +- packages/js/src/decorator/gutenberg.js | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/js/src/analysis/collectAnalysisData.js b/packages/js/src/analysis/collectAnalysisData.js index 70fe78a3b33..9a65af56920 100644 --- a/packages/js/src/analysis/collectAnalysisData.js +++ b/packages/js/src/analysis/collectAnalysisData.js @@ -67,7 +67,7 @@ export default function collectAnalysisData( editorData, store, customAnalysisDa let blocks = null; if ( blockEditorDataModule ) { blocks = blockEditorDataModule.getBlocks(); - blocks = blocks.map( block => filterBlockData( block ) ); + // blocks = blocks.map( block => filterBlockData( block ) ); } // Make a data structure for the paper data. diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index c67ebe57ac3..abcb6545e36 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -372,13 +372,13 @@ export function hasInnerBlocks( block ) { * @returns {Array} An array of annotations for a specific block. */ function createAnnotations( html, richTextIdentifier, attribute, block, marks, index ) { - // const record = create( { - // html: html, - // multilineTag: attribute.multilineTag, - // multilineWrapperTag: attribute.multilineWrapperTag, - // } ); - // - // const text = record.text; + const record = create( { + html: html, + multilineTag: attribute.multilineTag, + multilineWrapperTag: attribute.multilineWrapperTag, + } ); + + const text = record.text; return flatMap( marks, ( ( mark ) => { let annotations; @@ -389,7 +389,7 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks, i annotations = createAnnotationsFromPositionBasedMarks( mark ); } else { annotations = calculateAnnotationsForTextFormat( - html, + text, mark ); } From e79bdb656f28756ee5d76e56122c44b01477da82 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 14 Jul 2023 15:23:20 +0200 Subject: [PATCH 235/616] Adjust the method for creating marked sentence --- .../researches/keywordCountSpec.js | 136 +++++++++++++++++- .../highlighting/getMarkingsInSentence.js | 76 ++-------- 2 files changed, 150 insertions(+), 62 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 1e6be145d48..e2dbc6d7322 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -237,7 +237,8 @@ const testCases = [ skip: false, }, { - description: "with exact matching, the keyphrase directly preceded by a full stop should not match a keyphrase occurrence without the full stop in the text.", + description: "with exact matching, the keyphrase directly preceded by a full stop should not match a " + + "keyphrase occurrence without the full stop in the text.", paper: new Paper( "

A sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 0, @@ -816,7 +817,140 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); +const testDataForHTMLTags = [ + { + description: "counts keyphrase occurrence correctly in a text containing `` tag, and outputs correct mark objects", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 288, + startOffset: 283, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 302, + startOffset: 297, + }, + } ), + ], + skip: false, + }, + { + description: "counts keyphrase occurrence correctly in a text containing `` tag and outputs correct mark objects", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 298, + startOffset: 287, + }, + } ), + ], + skip: false, + }, + { + description: "counts keyphrase occurrence correctly when it's found inside an anchor text and outputs correct mark objects", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 346, + startOffset: 335, + }, + } ), + ], + skip: false, + }, +]; +describe.each( testDataForHTMLTags )( "Test for counting the keyphrase in a text containing multiple html tags", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; + + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); + } ); + } ); /** * Mocks Japanese Researcher. diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 5784691f8e8..e555b5d5a56 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -3,44 +3,6 @@ import Mark from "../../../../src/values/Mark"; const markStart = ""; const markEnd = ""; -/** - * Merges consecutive and overlapping matches into one match. - * - * This is a helper for search-based highlighting. - * - * @param {Token[]} matches An array of the matched tokens to merge. - * @param {Boolean} useSpace Whether words are separated by a space. In Japanese, for example, words are not separated by a space. - * - * @returns {Token[]} An array of markings where consecutive and overlapping markings are merged. - */ -const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace ) => { - const newMatches = []; - - // Sort matches by start offset. This is probably redundant, but for extra safety. - matches.sort( function( a, b ) { - return a.sourceCodeRange.startOffset - b.sourceCodeRange.startOffset; - } ); - - matches.forEach( ( match ) => { - if ( newMatches.length === 0 ) { - newMatches.push( match ); - return; - } - - const lastMatch = newMatches[ newMatches.length - 1 ]; - if ( lastMatch.sourceCodeRange.endOffset + ( useSpace ? 1 : 0 ) === match.sourceCodeRange.startOffset ) { - lastMatch.sourceCodeRange.endOffset = match.sourceCodeRange.endOffset; - } else if ( match.sourceCodeRange.startOffset <= lastMatch.sourceCodeRange.endOffset ) { - // The match overlaps with the last match, so we extend the last match to include the new match. - lastMatch.sourceCodeRange.endOffset = match.sourceCodeRange.endOffset; - } else { - newMatches.push( match ); - } - } ); - - return newMatches; -}; - /** * Adds `yoastmark` tags to the keyphrase matches in the sentence. * @@ -48,34 +10,26 @@ const mergeConsecutiveAndOverlappingMatches = ( matches, useSpace ) => { * * @param {Sentence} sentence The sentence to add the `yoastmark` tags to. * @param {Token[]} matches The array of the keyphrase matches. - * @param {Boolean} useSpace Whether words are separated by a space. * * @returns {string} The sentence with the added `yoastmark` tags. */ -const createMarksForSentence = ( sentence, matches, useSpace ) => { - let sentenceText = sentence.text; - - // Merge consecutive and overlapping matches. - const mergedMatches = mergeConsecutiveAndOverlappingMatches( matches, useSpace ); - - const sentenceStartOffset = sentence.sourceCodeRange.startOffset; - // Loop through the matches backwards, so that the reference is not affected by the changes. - for ( let i = mergedMatches.length - 1; i >= 0; i-- ) { - const match = mergedMatches[ i ]; - - // Adds `yoastmark` tags to the keyphrase matches in the sentence. - // sentenceStartOffset is subtracted because the start and end offsets are relative to the start of the source code. - // Subtracting the sentenceStartOffset makes them relative to the start of the sentence. - sentenceText = sentenceText.substring( 0, match.sourceCodeRange.endOffset - sentenceStartOffset ) + markEnd + - sentenceText.substring( match.sourceCodeRange.endOffset - sentenceStartOffset ); - sentenceText = sentenceText.substring( 0, match.sourceCodeRange.startOffset - sentenceStartOffset ) + markStart + - sentenceText.substring( match.sourceCodeRange.startOffset - sentenceStartOffset ); +const createMarksForSentence = ( sentence, matches ) => { + // let sentenceText = sentence.text; + const tokens = sentence.tokens; + + const newTokens = []; + for ( let i = tokens.length - 1; i >= 0; i-- ) { + const token = tokens[ i ]; + if ( matches.some( match => match.sourceCodeRange.startOffset === token.sourceCodeRange.startOffset || + match.sourceCodeRange.endOffset === token.sourceCodeRange.endOffset ) ) { + newTokens.unshift( markStart, token.text, markEnd ); + } else { + newTokens.unshift( token.text ); + } } - + const markedSentence = newTokens.join( "" ); // Merge consecutive markings into one marking. - sentenceText = sentenceText.replace( new RegExp( "
( ?)", "ig" ), "$1" ); - - return sentenceText; + return markedSentence.replace( new RegExp( "(( |\u00A0)?)", "ig" ), "$1" ); }; /** From 25f8b54599205910a4d35faa8b8d2c6336a21fba Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 14 Jul 2023 15:47:12 +0200 Subject: [PATCH 236/616] remove unused variabe createMarksForSentence --- .../helpers/highlighting/getMarkingsInSentence.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index e555b5d5a56..f0efd11cf2d 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -91,7 +91,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { } // Create the marked sentence that is used for search-based highlighting. - const markedSentence = createMarksForSentence( sentence, matchesInSentence, useSpace ); + const markedSentence = createMarksForSentence( sentence, matchesInSentence ); /* * Note that there is a paradigm shift: From 8ee297af80f85a19c330d045eff591f5003bfade Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 14 Jul 2023 15:47:58 +0200 Subject: [PATCH 237/616] adapt specs for getMarkingsInSentenceSpec.js now the tokens attribute is mandatory for createMarksForSentence --- .../highlighting/getMarkingsInSentenceSpec.js | 90 ++++++++++++------- 1 file changed, 59 insertions(+), 31 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index 26ceb70b587..56058c93d8f 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -4,14 +4,35 @@ import Mark from "../../../../src/values/Mark"; const testCases = [ { testDescription: "No markings in sentence", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 0, endOffset: 4 } }, + { text: " ", sourceCodeRange: { startOffset: 4, endOffset: 5 } }, + { text: "is", sourceCodeRange: { startOffset: 5, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "a", sourceCodeRange: { startOffset: 8, endOffset: 9 } }, + { text: " ", sourceCodeRange: { startOffset: 9, endOffset: 10 } }, + { text: "sentence", sourceCodeRange: { startOffset: 10, endOffset: 18 } }, + { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ] }, matchesInSentence: [], useSpace: true, expectedResult: [], }, { testDescription: "One marking in sentence", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 0, endOffset: 4 } }, + { text: " ", sourceCodeRange: { startOffset: 4, endOffset: 5 } }, + { text: "is", sourceCodeRange: { startOffset: 5, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "a", sourceCodeRange: { startOffset: 8, endOffset: 9 } }, + { text: " ", sourceCodeRange: { startOffset: 9, endOffset: 10 } }, + { text: "sentence", sourceCodeRange: { startOffset: 10, endOffset: 18 } }, + { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ], + }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], useSpace: true, expectedResult: [ new Mark( { @@ -22,7 +43,18 @@ const testCases = [ }, { testDescription: "One marking in sentence with two consecutive matches", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 0, endOffset: 4 } }, + { text: " ", sourceCodeRange: { startOffset: 4, endOffset: 5 } }, + { text: "is", sourceCodeRange: { startOffset: 5, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "a", sourceCodeRange: { startOffset: 8, endOffset: 9 } }, + { text: " ", sourceCodeRange: { startOffset: 9, endOffset: 10 } }, + { text: "sentence", sourceCodeRange: { startOffset: 10, endOffset: 18 } }, + { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ], + }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], useSpace: true, expectedResult: [ new Mark( { @@ -33,7 +65,18 @@ const testCases = [ }, { testDescription: "Two markings that are not consecutive in sentence", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 0, endOffset: 4 } }, + { text: " ", sourceCodeRange: { startOffset: 4, endOffset: 5 } }, + { text: "is", sourceCodeRange: { startOffset: 5, endOffset: 7 } }, + { text: " ", sourceCodeRange: { startOffset: 7, endOffset: 8 } }, + { text: "a", sourceCodeRange: { startOffset: 8, endOffset: 9 } }, + { text: " ", sourceCodeRange: { startOffset: 9, endOffset: 10 } }, + { text: "sentence", sourceCodeRange: { startOffset: 10, endOffset: 18 } }, + { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + ], + }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ], useSpace: true, expectedResult: [ @@ -51,7 +94,18 @@ const testCases = [ }, { testDescription: "One marking in a sentence that has a non-zero startOffset", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 } }, + sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 10, endOffset: 38 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 10, endOffset: 14 } }, + { text: " ", sourceCodeRange: { startOffset: 14, endOffset: 15 } }, + { text: "is", sourceCodeRange: { startOffset: 15, endOffset: 17 } }, + { text: " ", sourceCodeRange: { startOffset: 17, endOffset: 18 } }, + { text: "a", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, + { text: " ", sourceCodeRange: { startOffset: 19, endOffset: 20 } }, + { text: "sentence", sourceCodeRange: { startOffset: 20, endOffset: 28 } }, + { text: ".", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + ], + }, matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], useSpace: true, expectedResult: [ new Mark( { @@ -60,32 +114,6 @@ const testCases = [ position: { endOffset: 14, startOffset: 10 }, } ) ], }, - { - testDescription: "Two markings that overlap", - sentence: { text: "This is a sentence.", sourceCodeRange: { startOffset: 0, endOffset: 18 } }, - matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 7 } }, { sourceCodeRange: { startOffset: 5, endOffset: 9 } } ], - useSpace: true, - expectedResult: [ - new Mark( { - marked: "This is a sentence.", - original: "This is a sentence.", - position: { endOffset: 9, startOffset: 0 }, - } ), - ], - }, - { - testDescription: "Two markings that overlap in a sentence that doesn't use space", - sentence: { text: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", sourceCodeRange: { startOffset: 0, endOffset: 31 } }, - matchesInSentence: [ { sourceCodeRange: { startOffset: 12, endOffset: 13 } }, { sourceCodeRange: { startOffset: 13, endOffset: 16 } } ], - useSpace: false, - expectedResult: [ - new Mark( { - marked: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", - original: "彼女はオンラインストアで黒の長袖マキシドレスを購入したかった。", - position: { endOffset: 16, startOffset: 12 }, - } ), - ], - }, ]; describe.each( testCases )( "a test for getting the marks from a sentence", ( { From c0bfa8e5b983fc69051e35162e77bff48deb19e6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 14 Jul 2023 17:07:27 +0200 Subject: [PATCH 238/616] Remove unnecessary comment --- .../helpers/highlighting/getMarkingsInSentence.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index f0efd11cf2d..a727facf39a 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -14,7 +14,6 @@ const markEnd = ""; * @returns {string} The sentence with the added `yoastmark` tags. */ const createMarksForSentence = ( sentence, matches ) => { - // let sentenceText = sentence.text; const tokens = sentence.tokens; const newTokens = []; From 3e2ce904c5ea2333d3cf675866a40767d1e5e557 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 17 Jul 2023 14:51:14 +0200 Subject: [PATCH 239/616] Add methods in Mark.js --- packages/yoastseo/src/values/Mark.js | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index f96b2e33418..2299ccf0ede 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -95,6 +95,24 @@ Mark.prototype.setPositionEnd = function( positionEnd ) { this._properties.position.endOffset = positionEnd; }; +/** + * Returns the start position of a block. + * @param {number} startOffsetBlock The block start offset. + * @returns {number} The start position of a block. + */ +Mark.prototype.setBlockPositionStart = function( startOffsetBlock ) { + this._properties.position.startOffsetBlock = startOffsetBlock; +}; + +/** + * Returns the end position of a block. + * @param {number} endOffsetBlock The block end offset. + * @returns {number} The end position of a block. + */ +Mark.prototype.setBlockPositionEnd = function( endOffsetBlock ) { + this._properties.position.endOffsetBlock = endOffsetBlock; +}; + /** * Sets the block index. * @@ -105,7 +123,7 @@ Mark.prototype.setBlockIndex = function( blockIndex ) { if ( this._properties.position ) { this._properties.position.blockIndex = blockIndex; } -} +}; /** * Gets the block index. @@ -114,7 +132,16 @@ Mark.prototype.setBlockIndex = function( blockIndex ) { */ Mark.prototype.getBlockIndex = function() { return this._properties.position && this._properties.position.blockIndex; -} +}; + +/** + * Gets the block client id. + * + * @returns {string} The block index. + */ +Mark.prototype.getBlockClientId = function() { + return this._properties.position && this._properties.position.clientId; +}; /** * Returns the start position inside block. From 08f2ad2d8083c2c1da192d04144b75fc313c5739 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 17 Jul 2023 14:53:45 +0200 Subject: [PATCH 240/616] Add clientId to the node --- packages/js/src/decorator/gutenberg.js | 50 ++++++++----------- .../spec/specHelpers/parse/buildTree.js | 2 +- .../highlighting/getMarkingsInSentence.js | 14 +++--- .../helpers/sentence/getSentencesFromTree.js | 5 +- packages/yoastseo/src/parse/build/build.js | 44 +++------------- packages/yoastseo/src/scoring/assessor.js | 2 +- .../yoastseo/src/worker/AnalysisWebWorker.js | 4 +- 7 files changed, 41 insertions(+), 80 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index abcb6545e36..1ea0f6388af 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -367,11 +367,11 @@ export function hasInnerBlocks( block ) { * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @param {number} index The index of the block. * * @returns {Array} An array of annotations for a specific block. */ -function createAnnotations( html, richTextIdentifier, attribute, block, marks, index ) { +function createAnnotations( html, richTextIdentifier, attribute, block, marks ) { + const blockClientId = block.clientId; const record = create( { html: html, multilineTag: attribute.multilineTag, @@ -383,7 +383,7 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks, i return flatMap( marks, ( ( mark ) => { let annotations; if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { - if ( index !== mark.getBlockIndex() ) { + if ( blockClientId !== mark.getBlockClientId() ) { return []; } annotations = createAnnotationsFromPositionBasedMarks( mark ); @@ -414,10 +414,9 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks, i * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @param {number} index The index of the block. * @returns {Array} The created annotations. */ -const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { +const getAnnotationsForFAQ = ( attribute, block, marks ) => { const annotatableTexts = block.attributes[ attribute.key ]; if ( annotatableTexts.length === 0 ) { return []; @@ -428,8 +427,8 @@ const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { const identifierQuestion = `${ item.id }-question`; const identifierAnswer = `${ item.id }-answer`; - const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marks, index ); - const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marks, index ); + const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marks ); + const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marks ); return annotationsFromQuestion.concat( annotationsFromAnswer ); } ); @@ -443,10 +442,9 @@ const getAnnotationsForFAQ = ( attribute, block, marks, index ) => { * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @param {number} index The index of the block. * @returns {Array} The created annotations. */ -const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { +const getAnnotationsForHowTo = ( attribute, block, marks ) => { const annotatableTexts = block.attributes[ attribute.key ]; if ( annotatableTexts.length === 0 ) { return []; @@ -458,15 +456,15 @@ const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { const identifierStepName = `${ item.id }-name`; const identifierStepText = `${ item.id }-text`; - const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks, index ); - const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks, index ); + const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks ); + const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks ); // For each step return an array of the name-text pair objects. return annotationsFromStepName.concat( annotationsFromStepText ); } ) ); } if ( attribute.key === "jsonDescription" ) { - annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks, index ) ); + annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks ) ); } return flattenDeep( annotations ); }; @@ -477,19 +475,18 @@ const getAnnotationsForHowTo = ( attribute, block, marks, index ) => { * @param {Object} attributes The attributes to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @param {number} index The index of the block. * * @returns {Array} An array of annotations for Yoast blocks. */ -export function getAnnotationsForYoastBlock( attributes, block, marks, index ) { +export function getAnnotationsForYoastBlock( attributes, block, marks ) { // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. return flatMap( attributes, attribute => { if ( block.name === "yoast/faq-block" ) { - return getAnnotationsForFAQ( attribute, block, marks, index ); + return getAnnotationsForFAQ( attribute, block, marks ); } // The check for getting the annotations for Yoast How-To block. if ( block.name === "yoast/how-to-block" ) { - return getAnnotationsForHowTo( attribute, block, marks, index ); + return getAnnotationsForHowTo( attribute, block, marks ); } } ); } @@ -500,11 +497,10 @@ export function getAnnotationsForYoastBlock( attributes, block, marks, index ) { * @param {Object} attributes The attributes to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @param {number} index The index of the block. * * @returns {Array} The annotations to apply. */ -export function getAnnotationsForWPBlock( attributes, block, marks, index ) { +export function getAnnotationsForWPBlock( attributes, block, marks ) { const annotations = flatMap( attributes, attribute => { if ( attribute.length === 0 ) { return []; @@ -513,12 +509,12 @@ export function getAnnotationsForWPBlock( attributes, block, marks, index ) { const richTextIdentifier = attribute.key; const html = block.attributes[ richTextIdentifier ]; - return createAnnotations( html, richTextIdentifier, attribute, block, marks, index ); + return createAnnotations( html, richTextIdentifier, attribute, block, marks ); } ); if ( hasInnerBlocks( block ) ) { block.innerBlocks.forEach( innerBlock => { - annotations.concat( getAnnotationsForWPBlock( attributes, innerBlock, marks, index ) ); + annotations.concat( getAnnotationsForWPBlock( attributes, innerBlock, marks ) ); } ); } return annotations; @@ -559,15 +555,14 @@ function fillAnnotationQueue( annotations ) { * * @param { Object } block The block for which the annotations need to be determined. * @param { Mark[] } marks A list of marks that could apply to the block. - * @param {number} index The index of the block. * @returns { Object[] } All annotations that need to be placed on the block. */ -const getAnnotationsForABlock = ( block, marks, index ) => { +const getAnnotationsForABlock = ( block, marks ) => { const attributes = getAnnotatableAttributes( block.name ); if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { - return getAnnotationsForYoastBlock( attributes, block, marks, index ); + return getAnnotationsForYoastBlock( attributes, block, marks ); } - return getAnnotationsForWPBlock( attributes, block, marks, index ); + return getAnnotationsForWPBlock( attributes, block, marks ); }; @@ -582,8 +577,8 @@ const getAnnotationsForABlock = ( block, marks, index ) => { * @returns {Object[]} An array of annotation objects. */ export function getAnnotationsForBlocks( blocks, marks ) { - return flatMap( blocks, ( block, i ) => { - return getAnnotationsForABlock( block, marks, i ); + return flatMap( blocks, ( block ) => { + return getAnnotationsForABlock( block, marks ); } ); } @@ -641,7 +636,6 @@ function removeAllAnnotationsFromBlock( blockClientId ) { export function reapplyAnnotationsForSelectedBlock() { const block = select( "core/editor" ).getSelectedBlock(); const activeMarkerId = select( "yoast-seo/editor" ).getActiveMarker(); - const blockIndex = select( "core/block-editor" ).getBlockIndex( block.clientId ); if ( ! block || ! activeMarkerId ) { return; @@ -657,7 +651,7 @@ export function reapplyAnnotationsForSelectedBlock() { const marksForActiveMarker = activeMarker.marks; - const annotations = getAnnotationsForABlock( block, marksForActiveMarker, blockIndex ); + const annotations = getAnnotationsForABlock( block, marksForActiveMarker ); fillAnnotationQueue( annotations ); diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index 80caa20c1ae..5f1920e757c 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -14,7 +14,7 @@ import permanentFilters from "../../../src/parse/build/private/alwaysFilterEleme */ export default function buildTree( paper, researcher ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + paper.setTree( build( paper, languageProcessor ) ); } /** diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 961bdf2cfe4..4cc6d80a402 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -14,7 +14,6 @@ const markEnd = "
"; * @returns {string} The sentence with the added `yoastmark` tags. */ const createMarksForSentence = ( sentence, matches ) => { - // let sentenceText = sentence.text; const tokens = sentence.tokens; const newTokens = []; @@ -58,12 +57,11 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace ) => { const lastMarking = newMarkings[ newMarkings.length - 1 ]; - if ( lastMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { + if ( ( lastMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) || + ( marking.getPositionStart() <= lastMarking.getPositionEnd() ) ) { // The marking is consecutive to the last marking, so we extend the last marking to include the new marking. lastMarking.setPositionEnd( marking.getPositionEnd() ); - } else if ( marking.getPositionStart() <= lastMarking.getPositionEnd() ) { - // The marking overlaps with the last marking, so we extend the last marking to include the new marking. - lastMarking.setPositionEnd( marking.getPositionEnd() ); + lastMarking.setBlockPositionEnd( marking.getBlockPositionEnd() ); } else { // The marking is not consecutive to the last marking, so we add it to the array by itself. newMarkings.push( marking ); @@ -108,9 +106,9 @@ function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { startOffset: startOffset, endOffset: endOffset, // relative to start of block positions. - startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), - endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), - blockIndex: sentence.blockIndex, + startOffsetBlock: startOffset - sentence.parentStartOffset, + endOffsetBlock: endOffset - sentence.parentStartOffset, + clientId: sentence.clientId, }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index bd102392204..e3d1f69df69 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -13,7 +13,8 @@ export default function( paper ) { return tree.flatMap( node => node.sentences.map( s => { return { ...s, - parentStartOffset: ( node.sourceCodeLocation && node.sourceCodeLocation.startTag ) ? node.sourceCodeLocation.startTag.endOffset : 0, - blockIndex: node.blockIndex }; + parentStartOffset: ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || + node.sourceCodeLocation.startOffset, + clientId: node.clientId }; } ) ); } diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index 2591f0d3f36..eefcff13224 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -6,55 +6,23 @@ import tokenize from "./private/tokenize"; import filterTree from "./private/filterTree"; import permanentFilters from "./private/alwaysFilterElements"; import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; - - -/** - * Sets the block index to the node. - * - * @param {number} index The block index. - * @param {Node} node The node to check. - * @returns {void} - */ -const setBlockIndexOnNode = ( index, node ) => { - node.blockIndex = index; - if ( node.childNodes && node.childNodes.length > 0 ) { - node.childNodes.map( childNode => setBlockIndexOnNode( index, childNode ) ); - } -}; - -/** - * Retrieves the block index and sets them to Node. - * - * @param {Node} node The node to check. - * @returns {void} - */ -const getAnSetBlockIndex = ( node ) => { - let index = -1; - for ( const topNode of node.childNodes ) { - if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" ) ) { - index++; - } else if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" ) ) { - continue; - } else { - setBlockIndexOnNode( index, topNode ); - } - } -}; +import parseBlocks from "./private/parseBlocks"; /** * Parses the HTML string to a tree representation of * the HTML document. * - * @param {string} htmlString The HTML string. + * @param {Paper} paper The HTML string. * @param {LanguageProcessor} languageProcessor The language processor to use. * * @returns {Node} The tree representation of the HTML string. */ -export default function build( htmlString, languageProcessor ) { - let tree = adapt( parseFragment( htmlString, { sourceCodeLocationInfo: true } ) ); +export default function build( paper, languageProcessor ) { + const html = paper.getText(); + let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); if ( tree.childNodes && tree.childNodes.length > 0 ) { - getAnSetBlockIndex( tree ); + parseBlocks( paper, tree ); } /* diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index aeae6742200..b0dce17a734 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -124,7 +124,7 @@ Assessor.prototype.assess = function( paper ) { this._researcher.setPaper( paper ); const languageProcessor = new LanguageProcessor( this._researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + paper.setTree( build( paper, languageProcessor ) ); let assessments = this.getAvailableAssessments(); this.results = []; diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index f480b24e80e..4f8eed22d91 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -1082,7 +1082,7 @@ export default class AnalysisWebWorker { const languageProcessor = new LanguageProcessor( this._researcher ); - this._paper.setTree( build( this._paper.getText(), languageProcessor ) ); + this._paper.setTree( build( this._paper, languageProcessor ) ); // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); @@ -1405,7 +1405,7 @@ export default class AnalysisWebWorker { // Build and set the tree if it's not been set before. if ( paper.getTree() === null ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + paper.setTree( build( paper, languageProcessor ) ); } } From 4d5d6a80e84a54f73e3757d4fd03708d4b3eed69 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 17 Jul 2023 14:56:32 +0200 Subject: [PATCH 241/616] Parses blocks --- .../src/parse/build/private/parseBlocks.js | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 packages/yoastseo/src/parse/build/private/parseBlocks.js diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js new file mode 100644 index 00000000000..22b123c5d5b --- /dev/null +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -0,0 +1,133 @@ + +// /** +// * Sets the block index to the node. +// * +// * @param {number} index The block index. +// * @param {Node} node The node to check. +// * @returns {void} +// */ +// const setBlockIndexOnNode = ( index, node ) => { +// node.blockIndex = index; +// if ( node.childNodes && node.childNodes.length > 0 ) { +// node.childNodes.map( childNode => setBlockIndexOnNode( index, childNode ) ); +// } +// }; +// +// /** +// * Retrieves the block index and sets them to Node. +// * +// * @param {Node} node The node to check. +// * @returns {void} +// */ +// export const getAnSetBlockIndex = ( node ) => { +// let index = -1; +// for ( const topNode of node.childNodes ) { +// if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" ) ) { +// index++; +// } else if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" ) ) { +// continue; +// } else { +// setBlockIndexOnNode( index, topNode ); +// } +// } +// }; + + +const getBlockName = ( node ) => { + const supportedBlocks = [ "paragraph", "list", "list-item", "heading", "audio", "embed", "gallery", "image", "table", "video" ]; + const blockNames = supportedBlocks.map( block => [ "wp:".concat( block ), "/wp:".concat( block ), "core/".concat( block ) ] ); + const foundBlockName = { blockName: "", closingName: "" }; + blockNames.forEach( name => { + if ( foundBlockName.blockName === "" && node.data && node.data.trim().startsWith( name[ 0 ] ) ) { + foundBlockName.blockName = name[ 2 ]; + } else if ( foundBlockName.closingName === "" && node.data && node.data.trim().startsWith( name[ 1 ] ) ) { + foundBlockName.closingName = name[ 1 ]; + } + } ); + return foundBlockName; +}; + + +const getYoastBlockName = ( node ) => { + const yoastBlocks = [ "yoast/faq-block", "yoast/how-to-block" ]; + const foundBlockName = { blockName: "", closingName: "" }; + yoastBlocks.forEach( block => { + if ( foundBlockName.blockName === "" && node.data && node.data.trim().startsWith( "wp:".concat( block ) ) ) { + foundBlockName.blockName = block; + } else if ( foundBlockName.closingName === "" && node.data && node.data.trim().startsWith( "/wp:".concat( block ) ) ) { + foundBlockName.closingName = block; + } + } ); + return foundBlockName; +}; + +const setClientIdOnNode = ( index, blockName, node, blocks ) => { + if ( blocks && blocks.length > 0 && blocks[ index ].name === blockName ) { + node.clientId = blocks[ index ].clientId; + if ( node.childNodes && node.childNodes.length > 0 ) { + node.childNodes.map( childNode => setClientIdOnNode( index, blockName, childNode, blocks ) ); + } + } +}; + +const getAllBlocks = ( paper ) => { + const blocks = paper._attributes.wpBlocks; + const flattenBlocks = []; + if ( ! ( blocks && blocks.length > 0 ) ) { + return []; + } + blocks.forEach( ( block ) => { + if ( block.innerBlocks.length > 0 ) { + const innerBlocks = block.innerBlocks; + flattenBlocks.push( + block, ...innerBlocks + ); + } else { + flattenBlocks.push( block ); + } + } ); + return flattenBlocks; +}; + +export default function( paper, node ) { + const blocks = paper._attributes.wpBlocks; + // const blocks = getAllBlocks( paper ); + let index = -1; + let blockName = ""; + + // const getAndSetClientID = ( tree ) => { + // let blockName = ""; + // + // for ( const topNode of tree.childNodes ) { + // const foundBlockName = getBlockName( topNode ); + // const foundYoastBlockName = getYoastBlockName( topNode ); + // if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { + // blockName = foundBlockName.blockName; + // index++; + // } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { + // continue; + // } else { + // if ( blocks.length > 0 && blocks[ index ].name === blockName ) { + // topNode.clientId = blocks[ index ].clientId; + // if ( topNode.childNodes && topNode.childNodes.length > 0 ) { + // getAndSetClientID( topNode ); + // } + // } + // // setClientIdOnNode( index, blockName, topNode, blocks ); + // } + // } + // }; + // getAndSetClientID( node ); + for ( const topNode of node.childNodes ) { + const foundBlockName = getBlockName( topNode ); + const foundYoastBlockName = getYoastBlockName( topNode ); + if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { + blockName = foundBlockName.blockName; + index++; + } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { + continue; + } else { + setClientIdOnNode( index, blockName, topNode, blocks ); + } + } +} From 7c72b544dffd065d59b6499aaa2ef55802ece024 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 18 Jul 2023 15:39:22 +0200 Subject: [PATCH 242/616] Use the sentences from the text for Japanese analysis --- .../match/matchWordFormsWithSentenceSpec.js | 35 +++---------------- .../researches/keywordCountSpec.js | 6 ++-- .../match/matchWordFormsWithSentence.js | 21 ++++++----- .../researches/keywordCount.js | 11 +++--- 4 files changed, 27 insertions(+), 46 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js index 19b57337917..ea5c264d7be 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/match/matchWordFormsWithSentenceSpec.js @@ -543,34 +543,19 @@ describe.each( exactMatchingTestCases )( "find keyphrase forms in sentence when const japaneseTestCases = [ { testDescription: "matches one occurrence of word form in the sentence", - sentence: { - text: "私の猫はかわいいです。", - tokens: [ - { text: "私の猫はかわいいです。", sourceCodeRange: { startOffset: 0, endOffset: 12 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 12 } }, + sentence: "私の猫はかわいいです。", wordForms: [ "猫" ], expectedResult: { count: 1, matches: [ "猫" ] }, }, { testDescription: "matches all occurrences of two word forms in the sentence", - sentence: { - text: "会う私の猫はかわいいですかわいい会い。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + sentence: "会う私の猫はかわいいですかわいい会い。", wordForms: [ "会う", "会い" ], expectedResult: { count: 2, matches: [ "会う", "会い" ] }, }, { testDescription: "matches all occurrences of one word form in the sentence", - sentence: { - text: "これによって少しでも夏休み明けの感染者数を抑えたいという事だけど、どうなるかな者数。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + sentence: "これによって少しでも夏休み明けの感染者数を抑えたいという事だけど、どうなるかな者数。", wordForms: [ "者数" ], expectedResult: { count: 2, matches: [ "者数", "者数" ] }, }, @@ -590,23 +575,13 @@ describe.each( japaneseTestCases )( "test for matching word forms for Japanese: const exactMatchJapaneseData = [ { testDescription: "matches only the exact form of the word forms in the sentence with the same word order", - sentence: { - text: "一日一冊の本を読むのはできるかどうかやってみます。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + sentence: "一日一冊の本を読むのはできるかどうかやってみます。", wordForms: [ "『一冊の本を読む』" ], expectedResult: { count: 1, matches: [ "一冊の本を読む" ] }, }, { testDescription: "doesn't match the occurrence of the word forms in the sentence with different word order", - sentence: { - text: "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", - tokens: [ - { text: "私の猫はかわいいですかわいい。", sourceCodeRange: { startOffset: 0, endOffset: 15 } }, - ], - sourceCodeRange: { startOffset: 0, endOffset: 15 } }, + sentence: "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", wordForms: [ "『一冊の本を読む』" ], expectedResult: { count: 0, matches: [] }, }, diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index e2dbc6d7322..1dcc3029854 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -979,7 +979,7 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) describe( "Test for counting the keyphrase in a text for Japanese", () => { // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information. it( "counts/marks a string of text with a keyphrase in it.", function() { - const mockPaper = new Paper( "

私の猫はかわいいです。私の猫はかわいいです。

", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -990,7 +990,7 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => { } ); it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() { - const mockPaper = new Paper( "

私の猫はかわいい猫です。私の猫はかわいい猫です。

", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -1002,7 +1002,7 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => { } ); it( "counts a string if text with no keyphrase in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); + const mockPaper = new Paper( "

私の猫はかわいいです。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 8aaa3da9d70..4a82c1bdabe 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -104,24 +104,27 @@ const matchWordFormInTokens = ( tokens, wordForm, locale ) => { /** * Finds keyphrase forms in a sentence. * - * @param {Sentence} sentence The sentence to check. - * @param {string[]} wordForms The word forms of the keyphrase to check. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {Sentence|string} sentence The sentence to check. + * @param {string[]} wordForms The word forms of the keyphrase to check. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => { - const tokens = sentence.tokens.slice(); const result = { count: 0, matches: [], }; wordForms.forEach( wordForm => { - const occurrences = matchWordCustomHelper - ? matchWordCustomHelper( sentence.text, wordForm ) - : matchWordFormInTokens( tokens, wordForm, locale ); + let occurrences = []; + if ( matchWordCustomHelper ) { + occurrences = matchWordCustomHelper( sentence, wordForm ); + } else { + const tokens = sentence.tokens.slice(); + occurrences = matchWordFormInTokens( tokens, wordForm, locale ); + } result.count += occurrences.length; result.matches = result.matches.concat( occurrences ); } ); @@ -132,7 +135,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH /** * Matches the word forms of a keyphrase with a sentence object from the html parser. * - * @param {Sentence} sentence The sentence to match against the word forms of a keyphrase. + * @param {Sentence|string} sentence The sentence to match against the word forms of a keyphrase. * @param {string[]} wordForms The array of word forms of the keyphrase. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 31394474c4c..e26b819b8ad 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -5,11 +5,12 @@ import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; +import getSentences from "../helpers/sentence/getSentences"; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * - * @param {Sentence[]} sentences The sentences to check. + * @param {(Sentence|string)[]} sentences The sentences to check. * @param {Array} keyphraseForms The keyphrase forms. * @param {string} locale The locale used in the analysis. * @param {function} matchWordCustomHelper A custom helper to match words with a text. @@ -49,7 +50,7 @@ export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWo if ( matchWordCustomHelper ) { // Currently, this check is only applicable for Japanese. - markings = markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); + markings = markWordsInASentence( sentence, foundWords, matchWordCustomHelper ); } else { markings = getMarkingsInSentence( sentence, foundWords ); } @@ -78,7 +79,7 @@ export default function getKeyphraseCount( paper, researcher ) { /* * Normalize single quotes so that word form with different type of single quotes can still be matched. - * For exmple, "key‛word" should match "key'word". + * For example, "key‛word" should match "key'word". */ keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); @@ -87,8 +88,10 @@ export default function getKeyphraseCount( paper, researcher ) { } const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); + const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" ); const locale = paper.getLocale(); - const sentences = getSentencesFromTree( paper ); + // When the custom helper is available, we're using the sentences retrieved from the text for the analysis. + const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper ); // Exact matching is requested when the keyphrase is enclosed in double quotes. const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() ); From 15b2c21d8d83d039c7522b4ece11eee28cf1eef6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 18 Jul 2023 15:43:42 +0200 Subject: [PATCH 243/616] Adapt japanese paper --- .../yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js index 6248ea48508..956e0d39262 100644 --- a/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js +++ b/packages/yoastseo/spec/fullTextTests/testTexts/ja/japanesePaper.js @@ -30,7 +30,7 @@ const expectedResults = { keywordDensity: { isApplicable: true, score: 4, - resultText: "Keyphrase density: The keyphrase was found 2 times. " + + resultText: "Keyphrase density: The keyphrase was found 3 times. " + "That's less than the recommended minimum of 8 times for a text of this length. " + "Focus on your keyphrase!", }, From cf1d124fb5cccb4f5233471b5712b32b3b6867dd Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 18 Jul 2023 16:03:04 +0200 Subject: [PATCH 244/616] remove check for useSpace in getMarkingsInSentence.js --- .../highlighting/getMarkingsInSentenceSpec.js | 8 +------- .../helpers/highlighting/getMarkingsInSentence.js | 13 ++++++------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index 56058c93d8f..caaea8d1a52 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -16,7 +16,6 @@ const testCases = [ { text: ".", sourceCodeRange: { startOffset: 18, endOffset: 19 } }, ] }, matchesInSentence: [], - useSpace: true, expectedResult: [], }, { @@ -34,7 +33,6 @@ const testCases = [ ], }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } } ], - useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -56,7 +54,6 @@ const testCases = [ ], }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 5, endOffset: 7 } } ], - useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -78,7 +75,6 @@ const testCases = [ ], }, matchesInSentence: [ { sourceCodeRange: { startOffset: 0, endOffset: 4 } }, { sourceCodeRange: { startOffset: 10, endOffset: 18 } } ], - useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", @@ -107,7 +103,6 @@ const testCases = [ ], }, matchesInSentence: [ { sourceCodeRange: { startOffset: 10, endOffset: 14 } } ], - useSpace: true, expectedResult: [ new Mark( { marked: "This is a sentence.", original: "This is a sentence.", @@ -120,9 +115,8 @@ describe.each( testCases )( "a test for getting the marks from a sentence", ( { testDescription, sentence, matchesInSentence, - useSpace, expectedResult } ) => { it( testDescription, () => { - expect( getMarkingsInSentence( sentence, matchesInSentence, useSpace ) ).toEqual( expectedResult ); + expect( getMarkingsInSentence( sentence, matchesInSentence ) ).toEqual( expectedResult ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index a727facf39a..e2286d28e27 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -37,11 +37,10 @@ const createMarksForSentence = ( sentence, matches ) => { * This is a helper for position-based highlighting. * * @param {Mark[]} markings An array of markings to merge. - * @param {Boolean} useSpace Whether words are separated by a space. * * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. */ -const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace ) => { +const mergeConsecutiveAndOverlappingMarkings = ( markings ) => { const newMarkings = []; // Sort markings by start offset. This is probably redundant, but for extra safety. @@ -56,8 +55,9 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace ) => { } const lastMarking = newMarkings[ newMarkings.length - 1 ]; - - if ( lastMarking.getPositionEnd() + ( useSpace ? 1 : 0 ) === marking.getPositionStart() ) { + // Adding 1 to the position end, with the assumption that the language uses spaces. + // When we adapt Japanese to use this helper, this check should also be adapted. + if ( lastMarking.getPositionEnd() + 1 === marking.getPositionStart() ) { // The marking is consecutive to the last marking, so we extend the last marking to include the new marking. lastMarking.setPositionEnd( marking.getPositionEnd() ); } else if ( marking.getPositionStart() <= lastMarking.getPositionEnd() ) { @@ -80,11 +80,10 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings, useSpace ) => { * * @param {Sentence} sentence The sentence to check. * @param {Token[]} matchesInSentence An array containing the keyphrase matches in the sentence. - * @param {Boolean} useSpace Whether words are separated by a space. Default to true. * * @returns {Mark[]} The array of Mark objects of the keyphrase matches in the sentence. */ -function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { +function getMarkingsInSentence( sentence, matchesInSentence ) { if ( matchesInSentence.length === 0 ) { return []; } @@ -110,7 +109,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, useSpace = true ) { } ); } ); - return mergeConsecutiveAndOverlappingMarkings( markings, useSpace ); + return mergeConsecutiveAndOverlappingMarkings( markings ); } export default getMarkingsInSentence; From c125b5111f2409a55791e5923649ae90f834fbed Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 18 Jul 2023 17:07:48 +0300 Subject: [PATCH 245/616] Implement set block client id for tree nodes. --- packages/yoastseo/src/parse/build/build.js | 1 + .../src/parse/build/private/parseBlocks.js | 93 ++++++++++++++++--- packages/yoastseo/src/values/Paper.js | 1 + 3 files changed, 81 insertions(+), 14 deletions(-) diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index eefcff13224..fac419f78fb 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -18,6 +18,7 @@ import parseBlocks from "./private/parseBlocks"; * @returns {Node} The tree representation of the HTML string. */ export default function build( paper, languageProcessor ) { + // console.log( "build paper", paper ); const html = paper.getText(); let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js index 22b123c5d5b..75c64be5c07 100644 --- a/packages/yoastseo/src/parse/build/private/parseBlocks.js +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -1,4 +1,6 @@ +const blockTokenizer = /)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g; + // /** // * Sets the block index to the node. // * @@ -89,11 +91,56 @@ const getAllBlocks = ( paper ) => { return flattenBlocks; }; +function updateBlocksOffset( blocks, text ) { + // eslint-disable-next-line no-constant-condition + blocks.forEach( currentBlock => { + const matches = blockTokenizer.exec( text ); + + if ( null === matches ) { + return; + } + + const [ match ] = matches; + + const startedAt = matches.index; + const length = match.length; + + currentBlock.startOffset = startedAt; + currentBlock.contentOffset = startedAt + length + 1; + + if ( currentBlock.innerBlocks && currentBlock.innerBlocks.length > 0 ) { + updateBlocksOffset( currentBlock.innerBlocks, text ); + } + } ); +} + +function updateClintIdForSubtree( rootNode, blocks ) { + if ( ! rootNode ) { + return; + } + + if ( rootNode.sourceCodeLocation && rootNode.sourceCodeLocation.startOffset ) { + const block = blocks.find( ( b ) => b.contentOffset === rootNode.sourceCodeLocation.startOffset ); + if ( block ) { + console.log( "found block: ", block ); + rootNode.clientId = block.clientId; + } + } + + ( rootNode.childNodes || [] ).forEach( ( node ) => updateClintIdForSubtree( node, blocks ) ); +} export default function( paper, node ) { + console.log( "parseBlocks", paper, node ); + const blocks = paper._attributes.wpBlocks; + blockTokenizer.lastIndex = 0; + updateBlocksOffset( blocks, paper.getText() ); + + // console.log( "Paper attributes: ", attributes ); + // const blocks = getAllBlocks( paper ); - let index = -1; - let blockName = ""; + // const index = -1; + // const blockName = ""; // const getAndSetClientID = ( tree ) => { // let blockName = ""; @@ -118,16 +165,34 @@ export default function( paper, node ) { // } // }; // getAndSetClientID( node ); - for ( const topNode of node.childNodes ) { - const foundBlockName = getBlockName( topNode ); - const foundYoastBlockName = getYoastBlockName( topNode ); - if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { - blockName = foundBlockName.blockName; - index++; - } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { - continue; - } else { - setClientIdOnNode( index, blockName, topNode, blocks ); - } - } + const rawBlocks = getAllBlocks( paper ); + + updateClintIdForSubtree( node, rawBlocks ); + + // for ( const topNode of node.childNodes ) { + // if ( ! topNode.sourceCodeLocation ) { + // continue; + // } + // if ( ! topNode.sourceCodeLocation.startOffset ) { + // continue; + // } + // + // const block = rawBlocks.find( ( b ) => b.contentOffset === topNode.sourceCodeLocation.startOffset ); + // if ( block ) { + // console.log( "found block: ", block ); + // topNode.clientId = block.clientId; + // // setClientIdOnNode( index, blockName, topNode, blocks ); + // } + // + // // const foundBlockName = getBlockName( topNode ); + // // const foundYoastBlockName = getYoastBlockName( topNode ); + // // if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { + // // blockName = foundBlockName.blockName; + // // index++; + // // } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { + // // continue; + // // } else { + // // setClientIdOnNode( index, blockName, topNode, blocks ); + // // } + // } } diff --git a/packages/yoastseo/src/values/Paper.js b/packages/yoastseo/src/values/Paper.js index 9c191bef2f8..46756c7da71 100644 --- a/packages/yoastseo/src/values/Paper.js +++ b/packages/yoastseo/src/values/Paper.js @@ -68,6 +68,7 @@ function Paper( text, attributes ) { this._attributes = attributes; } + /** * Checks whether a keyword is available. * @returns {boolean} Returns true if the Paper has a keyword. From 4a226dd3d1096b49daeb2ca28dd207012f41026a Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Tue, 18 Jul 2023 17:36:52 +0300 Subject: [PATCH 246/616] Fix set nested nodes block client id. --- .../src/parse/build/private/parseBlocks.js | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js index 75c64be5c07..8f2b2b4f102 100644 --- a/packages/yoastseo/src/parse/build/private/parseBlocks.js +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -114,20 +114,26 @@ function updateBlocksOffset( blocks, text ) { } ); } -function updateClintIdForSubtree( rootNode, blocks ) { +function updateClintIdForSubtree( rootNode, blocks, clientId ) { if ( ! rootNode ) { return; } + let currentClientId = clientId; if ( rootNode.sourceCodeLocation && rootNode.sourceCodeLocation.startOffset ) { const block = blocks.find( ( b ) => b.contentOffset === rootNode.sourceCodeLocation.startOffset ); if ( block ) { console.log( "found block: ", block ); - rootNode.clientId = block.clientId; + + currentClientId = block.clientId; } } - ( rootNode.childNodes || [] ).forEach( ( node ) => updateClintIdForSubtree( node, blocks ) ); + if ( currentClientId ) { + rootNode.clientId = currentClientId; + } + + ( rootNode.childNodes || [] ).forEach( ( node ) => updateClintIdForSubtree( node, blocks, currentClientId ) ); } export default function( paper, node ) { console.log( "parseBlocks", paper, node ); @@ -167,8 +173,10 @@ export default function( paper, node ) { // getAndSetClientID( node ); const rawBlocks = getAllBlocks( paper ); - updateClintIdForSubtree( node, rawBlocks ); + // eslint-disable-next-line no-undefined + updateClintIdForSubtree( node, rawBlocks, undefined ); + console.log( "after updateClintIdForSubtree: ", node ); // for ( const topNode of node.childNodes ) { // if ( ! topNode.sourceCodeLocation ) { // continue; From ff32182132a9093c7369a38d78d1e5e3433c6132 Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun Date: Wed, 19 Jul 2023 22:01:20 +0300 Subject: [PATCH 247/616] Use javascript native map function instead of foreach and push and lodash map. --- .../yoastseo/src/languageProcessing/researches/h1s.js | 10 ++++------ .../src/scoring/assessments/seo/SingleH1Assessment.js | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index c4ef7d1d7e8..aceff10b0b2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -11,15 +11,13 @@ export default function( paper ) { const h1Matches = tree.findAll( node => node.name === "h1" ); - const h1s = []; - - h1Matches.forEach( h1Match => { - h1s.push( { + const h1s = h1Matches.map( h1Match => ( + { tag: "h1", content: h1Match.findAll( node => node.name === "#text" ).map( textNode => textNode.value ).join( "" ), position: { startOffset: h1Match.sourceCodeLocation.startTag.endOffset, endOffset: h1Match.sourceCodeLocation.endTag.startOffset }, - } ); - } ); + } + ) ); return h1s; } diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js index d6c5023b15a..9ff0476f891 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js @@ -1,5 +1,5 @@ import { __, sprintf } from "@wordpress/i18n"; -import { isUndefined, map, merge } from "lodash-es"; +import { isUndefined, merge } from "lodash-es"; import Assessment from "../assessment.js"; import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; @@ -91,7 +91,7 @@ class SingleH1Assessment extends Assessment { * @returns {Array} Array with all the marked H1s. */ getMarks() { - return map( this._h1s, function( h1 ) { + return this._h1s.map( function( h1 ) { return new Mark( { original: "

" + h1.content + "

", marked: "

" + marker( h1.content ) + "

", From 80d225039d14f662bd61836f2b37ddcb5a6eca7d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 09:48:46 +0200 Subject: [PATCH 248/616] fix merge conflict --- .../helpers/highlighting/getMarkingsInSentence.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 06d31f52d50..7b011eb10ce 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -67,6 +67,7 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings ) => { newMarkings.push( marking ); } } ); + console.log( newMarkings, "NEW MARKINGS" ); return newMarkings; }; @@ -113,6 +114,8 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { original: sentence.text, } ); } ); + // console.log( markings, "MARKINSG YST" ); + // console.log( sentence.parentStartOffset, "offset parent" ); return mergeConsecutiveAndOverlappingMarkings( markings ); } From 9bb70db7375fe2aceabdc87a7cfac058560e017f Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:04:44 +0200 Subject: [PATCH 249/616] Add more methods in Mark.js and adjust hasBlockPosition method --- packages/yoastseo/src/values/Mark.js | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index 2299ccf0ede..81d69c8597f 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -137,12 +137,33 @@ Mark.prototype.getBlockIndex = function() { /** * Gets the block client id. * - * @returns {string} The block index. + * @returns {string} The block client id. */ Mark.prototype.getBlockClientId = function() { return this._properties.position && this._properties.position.clientId; }; +/** + * Gets the block attribute id. + * + * @returns {string} The block attribute id. + */ +Mark.prototype.getBlockAttributeId = function() { + return this._properties.position && this._properties.position.attributeId; +}; + + +/** + * Checks if the mark object is intended for the first pair of the annotatble fields. + * This method will be used only for Yoast blocks where each block consists of sub-blocks + * with a pair of annotatable fields. + * + * @returns {boolean} Whether the mark object is intended for the first pair of the annotatble fields. + */ +Mark.prototype.isMarkForFirstBlockPair = function() { + return this._properties.position && this._properties.position.isFirstPair; +}; + /** * Returns the start position inside block. * @@ -238,8 +259,9 @@ Mark.prototype.hasPosition = function() { * @returns {boolean} Returns true if the Mark object has block position information, false otherwise. */ Mark.prototype.hasBlockPosition = function() { - return !! this.getBlockPositionEnd() && this.getBlockPositionStart(); + return !! this.getBlockPositionStart && this.getBlockPositionStart() >= 0; }; + /** * Parses the object to a Mark. * From e375f0a5e47ff230acc21886cd542ad825e3ce52 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:05:56 +0200 Subject: [PATCH 250/616] Refactor gutenberg.js --- packages/js/src/decorator/gutenberg.js | 164 ++++++++++++++++--------- 1 file changed, 109 insertions(+), 55 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index 1ea0f6388af..2d36f318a16 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -314,20 +314,55 @@ export function calculateAnnotationsForTextFormat( text, mark ) { return blockOffsets; } +const htmlTagsRegex = /(<([^>]+)>)/ig; + /** * Create an annotation if the given mark is position based. * + * @param {string} clientId The client id of the block. * @param {Mark} mark The mark to apply to the content. + * @param {string} html The HTML of the block. + * @param {string} text The text of the block. * * @returns {Array} The annotations to apply. */ -export function createAnnotationsFromPositionBasedMarks( mark ) { - return [ - { - startOffset: mark.getBlockPositionStart(), - endOffset: mark.getBlockPositionEnd(), - }, - ]; +export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, text ) { + if ( clientId === mark.getBlockClientId() ) { + const slicedHtml = html.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); + const slicedText = text.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); + // Check if the html and the rich text contain the same text in the specified index. + if ( slicedHtml === slicedText ) { + return [ + { + startOffset: mark.getBlockPositionStart(), + endOffset: mark.getBlockPositionEnd(), + }, + ]; + } + // Adjust block position start and end. + let startOffset = mark.getBlockPositionStart(); + let endOffset = mark.getBlockPositionEnd(); + // Retrieve the html from the start until the startOffset of the mark. + html = html.slice( 0, mark.getBlockPositionStart() ); + // Find all html tags. + const foundHtmlTags = [ ...html.matchAll( htmlTagsRegex ) ]; + /* + * Loop through the found html tags backwards, and adjust the start and end offsets of the mark + * by subtracting them with the length of the found html tags. + */ + for ( let i = foundHtmlTags.length - 1; i >= 0; i-- ) { + const currentHtmlTag = foundHtmlTags[ i ][ 0 ]; + startOffset -= currentHtmlTag.length; + endOffset -= currentHtmlTag.length; + } + return [ + { + startOffset: startOffset, + endOffset: endOffset, + }, + ]; + } + return []; } /** @@ -372,6 +407,7 @@ export function hasInnerBlocks( block ) { */ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) { const blockClientId = block.clientId; + const record = create( { html: html, multilineTag: attribute.multilineTag, @@ -380,13 +416,10 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) const text = record.text; - return flatMap( marks, ( ( mark ) => { + return flatMap( marks, mark => { let annotations; if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { - if ( blockClientId !== mark.getBlockClientId() ) { - return []; - } - annotations = createAnnotationsFromPositionBasedMarks( mark ); + annotations = createAnnotationsFromPositionBasedMarks( blockClientId, mark, html, text ); } else { annotations = calculateAnnotationsForTextFormat( text, @@ -401,11 +434,11 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) return annotations.map( annotation => { return { ...annotation, - block: block.clientId, + block: blockClientId, richTextIdentifier: richTextIdentifier, }; } ); - } ) ); + } ); } /** @@ -426,9 +459,21 @@ const getAnnotationsForFAQ = ( attribute, block, marks ) => { const annotations = annotatableTexts.map( item => { const identifierQuestion = `${ item.id }-question`; const identifierAnswer = `${ item.id }-answer`; + const marksForQuestion = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); + } + return mark; + } ); + const marksForAnswer = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); + } + return mark; + } ); - const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marks ); - const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marks ); + const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marksForQuestion ); + const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marksForAnswer ); return annotationsFromQuestion.concat( annotationsFromAnswer ); } ); @@ -456,14 +501,30 @@ const getAnnotationsForHowTo = ( attribute, block, marks ) => { const identifierStepName = `${ item.id }-name`; const identifierStepText = `${ item.id }-text`; - const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marks ); - const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marks ); + const marksForStepName = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); + } + return mark; + } ); + const marksForStepText = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); + } + return mark; + } ); + + const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marksForStepName ); + const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marksForStepText ); // For each step return an array of the name-text pair objects. return annotationsFromStepName.concat( annotationsFromStepText ); } ) ); } if ( attribute.key === "jsonDescription" ) { + marks = marks.filter( mark => { + return mark.hasBlockPosition() && ! mark.getBlockAttributeId(); + } ); annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks ) ); } return flattenDeep( annotations ); @@ -472,52 +533,37 @@ const getAnnotationsForHowTo = ( attribute, block, marks ) => { /** * Gets the annotations for Yoast FAQ block and Yoast How-To block. * - * @param {Object} attributes The attributes to apply annotations to. + * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. * * @returns {Array} An array of annotations for Yoast blocks. */ -export function getAnnotationsForYoastBlock( attributes, block, marks ) { +export function getAnnotationsForYoastBlock( attribute, block, marks ) { // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. - return flatMap( attributes, attribute => { - if ( block.name === "yoast/faq-block" ) { - return getAnnotationsForFAQ( attribute, block, marks ); - } - // The check for getting the annotations for Yoast How-To block. - if ( block.name === "yoast/how-to-block" ) { - return getAnnotationsForHowTo( attribute, block, marks ); - } - } ); + if ( block.name === "yoast/faq-block" ) { + return getAnnotationsForFAQ( attribute, block, marks ); + } + // The check for getting the annotations for Yoast How-To block. + if ( block.name === "yoast/how-to-block" ) { + return getAnnotationsForHowTo( attribute, block, marks ); + } } /** * Returns annotations that should be applied to the given attribute. * - * @param {Object} attributes The attributes to apply annotations to. + * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. * * @returns {Array} The annotations to apply. */ -export function getAnnotationsForWPBlock( attributes, block, marks ) { - const annotations = flatMap( attributes, attribute => { - if ( attribute.length === 0 ) { - return []; - } - - const richTextIdentifier = attribute.key; - const html = block.attributes[ richTextIdentifier ]; - - return createAnnotations( html, richTextIdentifier, attribute, block, marks ); - } ); +export function getAnnotationsForWPBlock( attribute, block, marks ) { + const richTextIdentifier = attribute.key; + const html = block.attributes[ richTextIdentifier ]; - if ( hasInnerBlocks( block ) ) { - block.innerBlocks.forEach( innerBlock => { - annotations.concat( getAnnotationsForWPBlock( attributes, innerBlock, marks ) ); - } ); - } - return annotations; + return createAnnotations( html, richTextIdentifier, attribute, block, marks ); } /** @@ -557,12 +603,18 @@ function fillAnnotationQueue( annotations ) { * @param { Mark[] } marks A list of marks that could apply to the block. * @returns { Object[] } All annotations that need to be placed on the block. */ -const getAnnotationsForABlock = ( block, marks ) => { - const attributes = getAnnotatableAttributes( block.name ); - if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { - return getAnnotationsForYoastBlock( attributes, block, marks ); - } - return getAnnotationsForWPBlock( attributes, block, marks ); +const getAnnotationsForBlock = ( block, marks ) => { + return flatMap( + getAnnotatableAttributes( block.name ), + ( ( attribute ) => { + // Get the annotations for Yoast FAQ and How-To blocks. + if ( block.name === "yoast/faq-block" || block.name === "yoast/how-to-block" ) { + return getAnnotationsForYoastBlock( attribute, block, marks ); + } + // Get the annotation for non-Yoast blocks. + return getAnnotationsForWPBlock( attribute, block, marks ); + } ) + ); }; @@ -578,7 +630,9 @@ const getAnnotationsForABlock = ( block, marks ) => { */ export function getAnnotationsForBlocks( blocks, marks ) { return flatMap( blocks, ( block ) => { - return getAnnotationsForABlock( block, marks ); + // If a block has innerblocks, get annotations for those blocks as well. + const innerBlockAnnotations = hasInnerBlocks( block ) ? getAnnotationsForBlocks( block.innerBlocks, marks ) : []; + return getAnnotationsForBlock( block, marks ).concat( innerBlockAnnotations ); } ); } @@ -651,7 +705,7 @@ export function reapplyAnnotationsForSelectedBlock() { const marksForActiveMarker = activeMarker.marks; - const annotations = getAnnotationsForABlock( block, marksForActiveMarker ); + const annotations = getAnnotationsForBlock( block, marksForActiveMarker ); fillAnnotationQueue( annotations ); From 5a156206c42c7f20725c87bdabd49060afa44e65 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:08:02 +0200 Subject: [PATCH 251/616] Remove unused code --- .../js/src/analysis/collectAnalysisData.js | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/packages/js/src/analysis/collectAnalysisData.js b/packages/js/src/analysis/collectAnalysisData.js index 9a65af56920..3cfa1a68d59 100644 --- a/packages/js/src/analysis/collectAnalysisData.js +++ b/packages/js/src/analysis/collectAnalysisData.js @@ -10,36 +10,6 @@ import getWritingDirection from "./getWritingDirection"; import { Paper } from "yoastseo"; -/** - * Filters the WordPress block editor block data to use for the analysis. - * - * @param {Object} block The block from which we need to get the relevant data. - * - * @returns {Object} The block, with irrelevant data removed. - */ -function filterBlockData( block ) { - const filteredBlock = {}; - - // Main data of the block (content, but also heading level etc.) - filteredBlock.attributes = {}; - - // Heading level, HTML-content and image alt text. - const attributeNames = [ "level", "content", "alt" ]; - attributeNames.forEach( name => { - if ( block.attributes[ name ] ) { - filteredBlock.attributes[ name ] = block.attributes[ name ]; - } - } ); - - // Type of block, e.g. "core/paragraph" - filteredBlock.name = block.name; - filteredBlock.clientId = block.clientId; - - // Recurse on inner blocks. - filteredBlock.innerBlocks = block.innerBlocks.map( innerBlock => filterBlockData( innerBlock ) ); - - return filteredBlock; -} /** * Retrieves the data needed for the analyses. @@ -67,7 +37,6 @@ export default function collectAnalysisData( editorData, store, customAnalysisDa let blocks = null; if ( blockEditorDataModule ) { blocks = blockEditorDataModule.getBlocks(); - // blocks = blocks.map( block => filterBlockData( block ) ); } // Make a data structure for the paper data. From f48a7e5608640e2aba92ec88079a888c41f37d09 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:08:39 +0200 Subject: [PATCH 252/616] Pass more information from the parent node to the sentence --- .../helpers/sentence/getSentencesFromTree.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index e3d1f69df69..94e5e5d7ba4 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -15,6 +15,9 @@ export default function( paper ) { ...s, parentStartOffset: ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || node.sourceCodeLocation.startOffset, - clientId: node.clientId }; + parentClientId: node.clientId, + parentAttributeId: node.attributeId || "", + isParentFirstBlockPair: node.isFirstPair || false, + }; } ) ); } From 248904062a0900ee3c59077db5d3e9d10bbaa116 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:09:50 +0200 Subject: [PATCH 253/616] Add block clientId, attributeId when creating mark object --- .../helpers/highlighting/getMarkingsInSentence.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 7b011eb10ce..c840d5d38df 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -67,7 +67,6 @@ const mergeConsecutiveAndOverlappingMarkings = ( markings ) => { newMarkings.push( marking ); } } ); - console.log( newMarkings, "NEW MARKINGS" ); return newMarkings; }; @@ -108,14 +107,14 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { // relative to start of block positions. startOffsetBlock: startOffset - sentence.parentStartOffset, endOffsetBlock: endOffset - sentence.parentStartOffset, - clientId: sentence.clientId, + clientId: sentence.parentClientId, + attributeId: sentence.parentAttributeId, + isFirstPair: sentence.isParentFirstBlockPair, }, marked: markedSentence, original: sentence.text, } ); } ); - // console.log( markings, "MARKINSG YST" ); - // console.log( sentence.parentStartOffset, "offset parent" ); return mergeConsecutiveAndOverlappingMarkings( markings ); } From 6b5586c36e8a9c9fc071b5a0f67523a04a292028 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Fri, 21 Jul 2023 17:26:16 +0200 Subject: [PATCH 254/616] Add steps to parse blocks and set the client id to the tree --- .../src/parse/build/private/parseBlocks.js | 191 +++++------------- 1 file changed, 54 insertions(+), 137 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js index 8f2b2b4f102..cd57e90f999 100644 --- a/packages/yoastseo/src/parse/build/private/parseBlocks.js +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -1,77 +1,11 @@ - const blockTokenizer = /)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g; -// /** -// * Sets the block index to the node. -// * -// * @param {number} index The block index. -// * @param {Node} node The node to check. -// * @returns {void} -// */ -// const setBlockIndexOnNode = ( index, node ) => { -// node.blockIndex = index; -// if ( node.childNodes && node.childNodes.length > 0 ) { -// node.childNodes.map( childNode => setBlockIndexOnNode( index, childNode ) ); -// } -// }; -// -// /** -// * Retrieves the block index and sets them to Node. -// * -// * @param {Node} node The node to check. -// * @returns {void} -// */ -// export const getAnSetBlockIndex = ( node ) => { -// let index = -1; -// for ( const topNode of node.childNodes ) { -// if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "wp:" ) ) { -// index++; -// } else if ( topNode.name === "#comment" && topNode.data.trim().startsWith( "/wp:" ) ) { -// continue; -// } else { -// setBlockIndexOnNode( index, topNode ); -// } -// } -// }; - - -const getBlockName = ( node ) => { - const supportedBlocks = [ "paragraph", "list", "list-item", "heading", "audio", "embed", "gallery", "image", "table", "video" ]; - const blockNames = supportedBlocks.map( block => [ "wp:".concat( block ), "/wp:".concat( block ), "core/".concat( block ) ] ); - const foundBlockName = { blockName: "", closingName: "" }; - blockNames.forEach( name => { - if ( foundBlockName.blockName === "" && node.data && node.data.trim().startsWith( name[ 0 ] ) ) { - foundBlockName.blockName = name[ 2 ]; - } else if ( foundBlockName.closingName === "" && node.data && node.data.trim().startsWith( name[ 1 ] ) ) { - foundBlockName.closingName = name[ 1 ]; - } - } ); - return foundBlockName; -}; - - -const getYoastBlockName = ( node ) => { - const yoastBlocks = [ "yoast/faq-block", "yoast/how-to-block" ]; - const foundBlockName = { blockName: "", closingName: "" }; - yoastBlocks.forEach( block => { - if ( foundBlockName.blockName === "" && node.data && node.data.trim().startsWith( "wp:".concat( block ) ) ) { - foundBlockName.blockName = block; - } else if ( foundBlockName.closingName === "" && node.data && node.data.trim().startsWith( "/wp:".concat( block ) ) ) { - foundBlockName.closingName = block; - } - } ); - return foundBlockName; -}; - -const setClientIdOnNode = ( index, blockName, node, blocks ) => { - if ( blocks && blocks.length > 0 && blocks[ index ].name === blockName ) { - node.clientId = blocks[ index ].clientId; - if ( node.childNodes && node.childNodes.length > 0 ) { - node.childNodes.map( childNode => setClientIdOnNode( index, blockName, childNode, blocks ) ); - } - } -}; - +/** + * Get all the blocks including the inner blocks. + * + * @param {Paper} paper The paper to get all the blocks. + * @returns {Object[]} An array of blocks. + */ const getAllBlocks = ( paper ) => { const blocks = paper._attributes.wpBlocks; const flattenBlocks = []; @@ -91,7 +25,18 @@ const getAllBlocks = ( paper ) => { return flattenBlocks; }; + +/** + * Gets the offset of each block and set it to the tree. + * + * @param {Object[]} blocks An array of blocks. + * @param {string} text The text. + * @returns {void} + */ function updateBlocksOffset( blocks, text ) { + if ( blocks.length === 0 ) { + return; + } // eslint-disable-next-line no-constant-condition blocks.forEach( currentBlock => { const matches = blockTokenizer.exec( text ); @@ -114,7 +59,17 @@ function updateBlocksOffset( blocks, text ) { } ); } -function updateClintIdForSubtree( rootNode, blocks, clientId ) { + +/** + * Gets the client id for each block and set it to the child nodes. + * Additionally, when a node has an attribute with 'id' key, also set this id to the first. + * + * @param {Node} rootNode The root node. + * @param {Object[]} blocks An array of blocks. + * @param {string} clientId The client id to set. + * @returns {void} + */ +function updateClientIdAndAttrIdForSubtree( rootNode, blocks, clientId ) { if ( ! rootNode ) { return; } @@ -123,84 +78,46 @@ function updateClintIdForSubtree( rootNode, blocks, clientId ) { if ( rootNode.sourceCodeLocation && rootNode.sourceCodeLocation.startOffset ) { const block = blocks.find( ( b ) => b.contentOffset === rootNode.sourceCodeLocation.startOffset ); if ( block ) { - console.log( "found block: ", block ); - currentClientId = block.clientId; } } + // If the clientId is not undefined, also set this to the root node. if ( currentClientId ) { rootNode.clientId = currentClientId; } - ( rootNode.childNodes || [] ).forEach( ( node ) => updateClintIdForSubtree( node, blocks, currentClientId ) ); + // If the node has children, update the clientId for them. + ( rootNode.childNodes || [] ).forEach( ( node ) => { + if ( node.attributes && node.attributes.id ) { + // If the node's child has an attribute with 'id' key, also set this id to the first and third child node. + if ( node.childNodes && node.childNodes.length > 3 ) { + node.childNodes[ 0 ].attributeId = node.attributes.id; + node.childNodes[ 0 ].isFirstPair = true; + + node.childNodes[ 2 ].attributeId = node.attributes.id; + node.childNodes[ 2 ].isFirstPair = false; + } + } + updateClientIdAndAttrIdForSubtree( node, blocks, currentClientId ); + } ); } -export default function( paper, node ) { - console.log( "parseBlocks", paper, node ); - const blocks = paper._attributes.wpBlocks; +/** + * Parses blocks and updates the clientId for the subtree. + * Additionally, when a node has an attribute with 'id' key, also set this id to the first and third child nod of that node. + * + * @param {Paper} paper The paper to parse. + * @param {Node} node The node to parse. + * @returns {void} + */ +export default function( paper, node ) { + const blocks = paper._attributes.wpBlocks || []; blockTokenizer.lastIndex = 0; updateBlocksOffset( blocks, paper.getText() ); - // console.log( "Paper attributes: ", attributes ); - - // const blocks = getAllBlocks( paper ); - // const index = -1; - // const blockName = ""; - - // const getAndSetClientID = ( tree ) => { - // let blockName = ""; - // - // for ( const topNode of tree.childNodes ) { - // const foundBlockName = getBlockName( topNode ); - // const foundYoastBlockName = getYoastBlockName( topNode ); - // if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { - // blockName = foundBlockName.blockName; - // index++; - // } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { - // continue; - // } else { - // if ( blocks.length > 0 && blocks[ index ].name === blockName ) { - // topNode.clientId = blocks[ index ].clientId; - // if ( topNode.childNodes && topNode.childNodes.length > 0 ) { - // getAndSetClientID( topNode ); - // } - // } - // // setClientIdOnNode( index, blockName, topNode, blocks ); - // } - // } - // }; - // getAndSetClientID( node ); const rawBlocks = getAllBlocks( paper ); // eslint-disable-next-line no-undefined - updateClintIdForSubtree( node, rawBlocks, undefined ); - - console.log( "after updateClintIdForSubtree: ", node ); - // for ( const topNode of node.childNodes ) { - // if ( ! topNode.sourceCodeLocation ) { - // continue; - // } - // if ( ! topNode.sourceCodeLocation.startOffset ) { - // continue; - // } - // - // const block = rawBlocks.find( ( b ) => b.contentOffset === topNode.sourceCodeLocation.startOffset ); - // if ( block ) { - // console.log( "found block: ", block ); - // topNode.clientId = block.clientId; - // // setClientIdOnNode( index, blockName, topNode, blocks ); - // } - // - // // const foundBlockName = getBlockName( topNode ); - // // const foundYoastBlockName = getYoastBlockName( topNode ); - // // if ( foundBlockName.blockName.length > 0 || foundYoastBlockName.blockName.length > 0 ) { - // // blockName = foundBlockName.blockName; - // // index++; - // // } else if ( foundBlockName.closingName.length > 0 || foundYoastBlockName.closingName.length > 0 ) { - // // continue; - // // } else { - // // setClientIdOnNode( index, blockName, topNode, blocks ); - // // } - // } + updateClientIdAndAttrIdForSubtree( node, rawBlocks, undefined ); } From b3eb0345aac4735e58292f4d06129fecde9bb71e Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 24 Jul 2023 13:59:12 +0200 Subject: [PATCH 255/616] Move the helper to retrieve annotations to a different file --- packages/js/src/decorator/gutenberg.js | 402 +--------------- .../helpers/getAnnotationsHelpers.js | 442 ++++++++++++++++++ 2 files changed, 447 insertions(+), 397 deletions(-) create mode 100644 packages/js/src/decorator/helpers/getAnnotationsHelpers.js diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index 2d36f318a16..a768f06e7d7 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -1,18 +1,14 @@ /* External dependencies */ -import { - isFunction, - flatMap, flattenDeep, -} from "lodash"; +import { isFunction, flatMap } from "lodash"; // The WP annotations package isn't loaded by default so force loading it. import "@wordpress/annotations"; -import { create } from "@wordpress/rich-text"; import { select, dispatch } from "@wordpress/data"; import getFieldsToMarkHelper from "./helpers/getFieldsToMarkHelper"; +import { getAnnotationsForYoastBlock, getAnnotationsForWPBlock } from "./helpers/getAnnotationsHelpers"; const ANNOTATION_SOURCE = "yoast"; export const START_MARK = ""; -const START_MARK_DOUBLE_QUOTED = ""; export const END_MARK = ""; let annotationQueue = []; @@ -149,222 +145,6 @@ export function isAnnotationAvailable() { select( "core/annotations" ) && isFunction( dispatch( "core/annotations" ).__experimentalAddAnnotation ); } -/** - * Returns the offsets of the occurrences in the given mark. - * - * @param {string} marked The mark object to calculate offset for. - * - * @returns {Array<{startOffset: number, endOffset: number}>} The start and end indices for this mark. - */ -export function getYoastmarkOffsets( marked ) { - let startMarkIndex = marked.indexOf( START_MARK ); - - // Checks if the start mark is single quoted. - // Note: if doesNotContainDoubleQuotedMark is true, this does necessary mean that the start mark is single quoted. - // It could also be that the start mark doesn't occur at all in startMarkIndex. - // In that case, startMarkIndex will be -1 during later tests. - const doesNotContainDoubleQuotedMark = startMarkIndex >= 0; - - // If the start mark is not found, try the double-quoted version. - if ( ! doesNotContainDoubleQuotedMark ) { - startMarkIndex = marked.indexOf( START_MARK_DOUBLE_QUOTED ); - } - - let endMarkIndex = null; - - const offsets = []; - - /** - * Step by step search for a yoastmark-tag and its corresponding end tag. Each time a tag is found - * it is removed from the string because the function should return the indexes based on the string - * without the tags. - */ - while ( startMarkIndex >= 0 ) { - marked = doesNotContainDoubleQuotedMark ? marked.replace( START_MARK, "" ) : marked.replace( START_MARK_DOUBLE_QUOTED, "" ); - - endMarkIndex = marked.indexOf( END_MARK ); - - if ( endMarkIndex < startMarkIndex ) { - return []; - } - marked = marked.replace( END_MARK, "" ); - - offsets.push( { - startOffset: startMarkIndex, - endOffset: endMarkIndex, - } ); - - startMarkIndex = doesNotContainDoubleQuotedMark ? marked.indexOf( START_MARK ) : marked.indexOf( START_MARK_DOUBLE_QUOTED ); - - endMarkIndex = null; - } - - return offsets; -} - -/** - * Finds all indices for a given string in a text. - * - * @param {string} text Text to search through. - * @param {string} stringToFind Text to search for. - * @param {boolean} caseSensitive True if the search is case-sensitive. - * - * @returns {Array} All indices of the found occurrences. - */ -export function getIndicesOf( text, stringToFind, caseSensitive = true ) { - const indices = []; - if ( text.length === 0 ) { - return indices; - } - - let searchStartIndex = 0; - let index; - - if ( ! caseSensitive ) { - stringToFind = stringToFind.toLowerCase(); - text = text.toLowerCase(); - } - - while ( ( index = text.indexOf( stringToFind, searchStartIndex ) ) > -1 ) { - indices.push( index ); - searchStartIndex = index + stringToFind.length; - } - - return indices; -} - -/** - * Calculates an annotation if the given mark is applicable to the content of a block. - * - * @param {string} text The content of the block. - * @param {Mark} mark The mark to apply to the content. - * - * @returns {Array} The annotations to apply. - */ -export function calculateAnnotationsForTextFormat( text, mark ) { - /* - * Remove all tags from the original sentence. - * - * A cool keyword. => A cool keyword. - */ - const originalSentence = mark.getOriginal().replace( /(<([^>]+)>)/ig, "" ); - /* - * Remove all tags except yoastmark tags from the marked sentence. - * - * A cool keyword. => A cool keyword - */ - const markedSentence = mark.getMarked().replace( /(<(?!\/?yoastmark)[^>]+>)/ig, "" ); - /* - * A sentence can occur multiple times in a text, therefore we calculate all indices where - * the sentence occurs. We then calculate the marker offsets for a single sentence and offset - * them with each sentence index. - * - * ( "A cool text. A cool keyword.", "A cool keyword." ) => [ 13 ] - */ - const sentenceIndices = getIndicesOf( text, originalSentence ); - - if ( sentenceIndices.length === 0 ) { - return []; - } - - /* - * Calculate the mark offsets within the sentence that the current mark targets. - * - * "A cool keyword." => [ { startOffset: 7, endOffset: 14 } ] - */ - const yoastmarkOffsets = getYoastmarkOffsets( markedSentence ); - - const blockOffsets = []; - - /* - * The offsets array holds all start- and endtag offsets for a single sentence. We now need - * to apply all sentence offsets to each offset to properly map them to the blocks content. - */ - yoastmarkOffsets.forEach( ( yoastmarkOffset ) => { - sentenceIndices.forEach( sentenceIndex => { - /* - * The yoastmarkOffset.startOffset and yoastmarkOffset.endOffset are offsets of the - * relative to the start of the Mark object. The sentenceIndex is the index form the start of the - * RichText until the matched Mark, so to calculate the offset from the RichText to the - * we need to add those offsets. - * - * startOffset = ( sentenceIndex ) 13 + ( yoastmarkOffset.startOffset ) 7 = 20 - * endOffset = ( sentenceIndex ) 13 + ( yoastmarkOffset.endOffset ) 14 = 27 - * - * "A cool text. A cool keyword." - * ( startOffset ) ^20 ^27 ( endOffset ) - */ - const startOffset = sentenceIndex + yoastmarkOffset.startOffset; - let endOffset = sentenceIndex + yoastmarkOffset.endOffset; - - /* - * If the marks are at the beginning and the end we can use the length, which gives more - * consistent results given we strip HTML tags. - */ - if ( yoastmarkOffset.startOffset === 0 && yoastmarkOffset.endOffset === mark.getOriginal().length ) { - endOffset = sentenceIndex + originalSentence.length; - } - - blockOffsets.push( { - startOffset, - endOffset, - } ); - } ); - } ); - return blockOffsets; -} - -const htmlTagsRegex = /(<([^>]+)>)/ig; - -/** - * Create an annotation if the given mark is position based. - * - * @param {string} clientId The client id of the block. - * @param {Mark} mark The mark to apply to the content. - * @param {string} html The HTML of the block. - * @param {string} text The text of the block. - * - * @returns {Array} The annotations to apply. - */ -export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, text ) { - if ( clientId === mark.getBlockClientId() ) { - const slicedHtml = html.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); - const slicedText = text.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); - // Check if the html and the rich text contain the same text in the specified index. - if ( slicedHtml === slicedText ) { - return [ - { - startOffset: mark.getBlockPositionStart(), - endOffset: mark.getBlockPositionEnd(), - }, - ]; - } - // Adjust block position start and end. - let startOffset = mark.getBlockPositionStart(); - let endOffset = mark.getBlockPositionEnd(); - // Retrieve the html from the start until the startOffset of the mark. - html = html.slice( 0, mark.getBlockPositionStart() ); - // Find all html tags. - const foundHtmlTags = [ ...html.matchAll( htmlTagsRegex ) ]; - /* - * Loop through the found html tags backwards, and adjust the start and end offsets of the mark - * by subtracting them with the length of the found html tags. - */ - for ( let i = foundHtmlTags.length - 1; i >= 0; i-- ) { - const currentHtmlTag = foundHtmlTags[ i ][ 0 ]; - startOffset -= currentHtmlTag.length; - endOffset -= currentHtmlTag.length; - } - return [ - { - startOffset: startOffset, - endOffset: endOffset, - }, - ]; - } - return []; -} - /** * Returns an array of all the attributes of which we can annotate text for, for a specific block type name. * @@ -394,178 +174,6 @@ export function hasInnerBlocks( block ) { return block.innerBlocks.length > 0; } -/** - * Creates the annotations for a block. - * - * @param {string} html The string where we want to apply the annotations. - * @param {string} richTextIdentifier The identifier for the annotatable richt text. - * @param {Object} attribute The attribute to apply annotations to. - * @param {Object} block The block information in the state. - * @param {Array} marks The marks to turn into annotations. - * - * @returns {Array} An array of annotations for a specific block. - */ -function createAnnotations( html, richTextIdentifier, attribute, block, marks ) { - const blockClientId = block.clientId; - - const record = create( { - html: html, - multilineTag: attribute.multilineTag, - multilineWrapperTag: attribute.multilineWrapperTag, - } ); - - const text = record.text; - - return flatMap( marks, mark => { - let annotations; - if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { - annotations = createAnnotationsFromPositionBasedMarks( blockClientId, mark, html, text ); - } else { - annotations = calculateAnnotationsForTextFormat( - text, - mark - ); - } - - if ( ! annotations ) { - return []; - } - - return annotations.map( annotation => { - return { - ...annotation, - block: blockClientId, - richTextIdentifier: richTextIdentifier, - }; - } ); - } ); -} - -/** - * Gets the annotations for Yoast FAQ block. - * - * @param {Object} attribute The attribute to apply annotations to. - * @param {Object} block The block information in the state. - * @param {Array} marks The marks to turn into annotations. - * @returns {Array} The created annotations. - */ -const getAnnotationsForFAQ = ( attribute, block, marks ) => { - const annotatableTexts = block.attributes[ attribute.key ]; - if ( annotatableTexts.length === 0 ) { - return []; - } - - // For each rich text of the attribute value, create annotations. - const annotations = annotatableTexts.map( item => { - const identifierQuestion = `${ item.id }-question`; - const identifierAnswer = `${ item.id }-answer`; - const marksForQuestion = marks.filter( mark => { - if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); - } - return mark; - } ); - const marksForAnswer = marks.filter( mark => { - if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); - } - return mark; - } ); - - const annotationsFromQuestion = createAnnotations( item.jsonQuestion, identifierQuestion, attribute, block, marksForQuestion ); - const annotationsFromAnswer = createAnnotations( item.jsonAnswer, identifierAnswer, attribute, block, marksForAnswer ); - - return annotationsFromQuestion.concat( annotationsFromAnswer ); - } ); - - return flattenDeep( annotations ); -}; - -/** - * Gets the annotations for Yoast How-To block. - * - * @param {Object} attribute The attribute to apply annotations to. - * @param {Object} block The block information in the state. - * @param {Array} marks The marks to turn into annotations. - * @returns {Array} The created annotations. - */ -const getAnnotationsForHowTo = ( attribute, block, marks ) => { - const annotatableTexts = block.attributes[ attribute.key ]; - if ( annotatableTexts.length === 0 ) { - return []; - } - const annotations = []; - if ( attribute.key === "steps" ) { - // For each rich text of the attribute value, create annotations. - annotations.push( annotatableTexts.map( item => { - const identifierStepName = `${ item.id }-name`; - const identifierStepText = `${ item.id }-text`; - - const marksForStepName = marks.filter( mark => { - if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); - } - return mark; - } ); - const marksForStepText = marks.filter( mark => { - if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); - } - return mark; - } ); - - const annotationsFromStepName = createAnnotations( item.jsonName, identifierStepName, attribute, block, marksForStepName ); - const annotationsFromStepText = createAnnotations( item.jsonText, identifierStepText, attribute, block, marksForStepText ); - - // For each step return an array of the name-text pair objects. - return annotationsFromStepName.concat( annotationsFromStepText ); - } ) ); - } - if ( attribute.key === "jsonDescription" ) { - marks = marks.filter( mark => { - return mark.hasBlockPosition() && ! mark.getBlockAttributeId(); - } ); - annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks ) ); - } - return flattenDeep( annotations ); -}; - -/** - * Gets the annotations for Yoast FAQ block and Yoast How-To block. - * - * @param {Object} attribute The attribute to apply annotations to. - * @param {Object} block The block information in the state. - * @param {Array} marks The marks to turn into annotations. - * - * @returns {Array} An array of annotations for Yoast blocks. - */ -export function getAnnotationsForYoastBlock( attribute, block, marks ) { - // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. - if ( block.name === "yoast/faq-block" ) { - return getAnnotationsForFAQ( attribute, block, marks ); - } - // The check for getting the annotations for Yoast How-To block. - if ( block.name === "yoast/how-to-block" ) { - return getAnnotationsForHowTo( attribute, block, marks ); - } -} - -/** - * Returns annotations that should be applied to the given attribute. - * - * @param {Object} attribute The attribute to apply annotations to. - * @param {Object} block The block information in the state. - * @param {Array} marks The marks to turn into annotations. - * - * @returns {Array} The annotations to apply. - */ -export function getAnnotationsForWPBlock( attribute, block, marks ) { - const richTextIdentifier = attribute.key; - const html = block.attributes[ richTextIdentifier ]; - - return createAnnotations( html, richTextIdentifier, attribute, block, marks ); -} - /** * Removes all annotations from the editor. * @@ -603,7 +211,7 @@ function fillAnnotationQueue( annotations ) { * @param { Mark[] } marks A list of marks that could apply to the block. * @returns { Object[] } All annotations that need to be placed on the block. */ -const getAnnotationsForBlock = ( block, marks ) => { +const getAnnotationsForABlock = ( block, marks ) => { return flatMap( getAnnotatableAttributes( block.name ), ( ( attribute ) => { @@ -632,7 +240,7 @@ export function getAnnotationsForBlocks( blocks, marks ) { return flatMap( blocks, ( block ) => { // If a block has innerblocks, get annotations for those blocks as well. const innerBlockAnnotations = hasInnerBlocks( block ) ? getAnnotationsForBlocks( block.innerBlocks, marks ) : []; - return getAnnotationsForBlock( block, marks ).concat( innerBlockAnnotations ); + return getAnnotationsForABlock( block, marks ).concat( innerBlockAnnotations ); } ); } @@ -705,7 +313,7 @@ export function reapplyAnnotationsForSelectedBlock() { const marksForActiveMarker = activeMarker.marks; - const annotations = getAnnotationsForBlock( block, marksForActiveMarker ); + const annotations = getAnnotationsForABlock( block, marksForActiveMarker ); fillAnnotationQueue( annotations ); diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js new file mode 100644 index 00000000000..5211b8c2888 --- /dev/null +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -0,0 +1,442 @@ +import { flatMap, flattenDeep } from "lodash"; +import { create } from "@wordpress/rich-text"; + +const htmlTagsRegex = /(<([^>]+)>)/ig; +export const START_MARK = ""; +const START_MARK_DOUBLE_QUOTED = ""; +export const END_MARK = ""; + +/** + * Returns the offsets of the occurrences in the given mark. + * A helper for search-based highlighting. + * + * @param {string} marked The mark object to calculate offset for. + * + * @returns {Array<{startOffset: number, endOffset: number}>} The start and end indices for this mark. + */ +export function getYoastmarkOffsets( marked ) { + let startMarkIndex = marked.indexOf( START_MARK ); + + // Checks if the start mark is single quoted. + // Note: if doesNotContainDoubleQuotedMark is true, this does necessary mean that the start mark is single quoted. + // It could also be that the start mark doesn't occur at all in startMarkIndex. + // In that case, startMarkIndex will be -1 during later tests. + const doesNotContainDoubleQuotedMark = startMarkIndex >= 0; + + // If the start mark is not found, try the double-quoted version. + if ( ! doesNotContainDoubleQuotedMark ) { + startMarkIndex = marked.indexOf( START_MARK_DOUBLE_QUOTED ); + } + + let endMarkIndex = null; + + const offsets = []; + + /** + * Step by step search for a yoastmark-tag and its corresponding end tag. Each time a tag is found + * it is removed from the string because the function should return the indexes based on the string + * without the tags. + */ + while ( startMarkIndex >= 0 ) { + marked = doesNotContainDoubleQuotedMark ? marked.replace( START_MARK, "" ) : marked.replace( START_MARK_DOUBLE_QUOTED, "" ); + + endMarkIndex = marked.indexOf( END_MARK ); + + if ( endMarkIndex < startMarkIndex ) { + return []; + } + marked = marked.replace( END_MARK, "" ); + + offsets.push( { + startOffset: startMarkIndex, + endOffset: endMarkIndex, + } ); + + startMarkIndex = doesNotContainDoubleQuotedMark ? marked.indexOf( START_MARK ) : marked.indexOf( START_MARK_DOUBLE_QUOTED ); + + endMarkIndex = null; + } + + return offsets; +} + +/** + * Finds all indices for a given string in a text. + * A helper for search-based highlighting. + * + * @param {string} text Text to search through. + * @param {string} stringToFind Text to search for. + * @param {boolean} caseSensitive True if the search is case-sensitive. + * + * @returns {Array} All indices of the found occurrences. + */ +export function getIndicesOf( text, stringToFind, caseSensitive = true ) { + const indices = []; + if ( text.length === 0 ) { + return indices; + } + + let searchStartIndex = 0; + let index; + + if ( ! caseSensitive ) { + stringToFind = stringToFind.toLowerCase(); + text = text.toLowerCase(); + } + + while ( ( index = text.indexOf( stringToFind, searchStartIndex ) ) > -1 ) { + indices.push( index ); + searchStartIndex = index + stringToFind.length; + } + + return indices; +} + +/** + * Calculates an annotation if the given mark is applicable to the content of a block. + * A helper for search-based highlighting. + * + * @param {string} text The content of the block. + * @param {Mark} mark The mark to apply to the content. + * + * @returns {Array} The annotations to apply. + */ +export function calculateAnnotationsForTextFormat( text, mark ) { + /* + * Remove all tags from the original sentence. + * + * A cool keyword. => A cool keyword. + */ + const originalSentence = mark.getOriginal().replace( /(<([^>]+)>)/ig, "" ); + /* + * Remove all tags except yoastmark tags from the marked sentence. + * + * A cool keyword. => A cool keyword + */ + const markedSentence = mark.getMarked().replace( /(<(?!\/?yoastmark)[^>]+>)/ig, "" ); + /* + * A sentence can occur multiple times in a text, therefore we calculate all indices where + * the sentence occurs. We then calculate the marker offsets for a single sentence and offset + * them with each sentence index. + * + * ( "A cool text. A cool keyword.", "A cool keyword." ) => [ 13 ] + */ + const sentenceIndices = getIndicesOf( text, originalSentence ); + + if ( sentenceIndices.length === 0 ) { + return []; + } + + /* + * Calculate the mark offsets within the sentence that the current mark targets. + * + * "A cool keyword." => [ { startOffset: 7, endOffset: 14 } ] + */ + const yoastmarkOffsets = getYoastmarkOffsets( markedSentence ); + + const blockOffsets = []; + + /* + * The offsets array holds all start- and endtag offsets for a single sentence. We now need + * to apply all sentence offsets to each offset to properly map them to the blocks content. + */ + yoastmarkOffsets.forEach( ( yoastmarkOffset ) => { + sentenceIndices.forEach( sentenceIndex => { + /* + * The yoastmarkOffset.startOffset and yoastmarkOffset.endOffset are offsets of the + * relative to the start of the Mark object. The sentenceIndex is the index form the start of the + * RichText until the matched Mark, so to calculate the offset from the RichText to the + * we need to add those offsets. + * + * startOffset = ( sentenceIndex ) 13 + ( yoastmarkOffset.startOffset ) 7 = 20 + * endOffset = ( sentenceIndex ) 13 + ( yoastmarkOffset.endOffset ) 14 = 27 + * + * "A cool text. A cool keyword." + * ( startOffset ) ^20 ^27 ( endOffset ) + */ + const startOffset = sentenceIndex + yoastmarkOffset.startOffset; + let endOffset = sentenceIndex + yoastmarkOffset.endOffset; + + /* + * If the marks are at the beginning and the end we can use the length, which gives more + * consistent results given we strip HTML tags. + */ + if ( yoastmarkOffset.startOffset === 0 && yoastmarkOffset.endOffset === mark.getOriginal().length ) { + endOffset = sentenceIndex + originalSentence.length; + } + + blockOffsets.push( { + startOffset, + endOffset, + } ); + } ); + } ); + return blockOffsets; +} + +/** + * Creates an annotation if the given mark is position based. + * A helper for position-based highlighting. + * + * @param {string} clientId The client id of the block. + * @param {Mark} mark The mark to apply to the content. + * @param {string} html The HTML of the block. + * @param {string} text The text of the block. + * + * @returns {Array} The annotations to apply. + */ +export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, text ) { + if ( clientId === mark.getBlockClientId() ) { + const slicedHtml = html.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); + const slicedText = text.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); + // Check if the html and the rich text contain the same text in the specified index. + if ( slicedHtml === slicedText ) { + return [ + { + startOffset: mark.getBlockPositionStart(), + endOffset: mark.getBlockPositionEnd(), + }, + ]; + } + // Adjust block position start and end. + let startOffset = mark.getBlockPositionStart(); + let endOffset = mark.getBlockPositionEnd(); + // Retrieve the html from the start until the startOffset of the mark. + html = html.slice( 0, mark.getBlockPositionStart() ); + // Find all html tags. + const foundHtmlTags = [ ...html.matchAll( htmlTagsRegex ) ]; + /* + * Loop through the found html tags backwards, and adjust the start and end offsets of the mark + * by subtracting them with the length of the found html tags. + */ + for ( let i = foundHtmlTags.length - 1; i >= 0; i-- ) { + const currentHtmlTag = foundHtmlTags[ i ][ 0 ]; + startOffset -= currentHtmlTag.length; + endOffset -= currentHtmlTag.length; + } + return [ + { + startOffset: startOffset, + endOffset: endOffset, + }, + ]; + } + return []; +} + +/** + * Creates the annotations for a block. + * + * @param {string} html The string where we want to apply the annotations. + * @param {string} richTextIdentifier The identifier for the annotatable richt text. + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Mark[]} marks The marks to turn into annotations. + * + * @returns {Array} An array of annotations for a specific block. + */ +function createAnnotations( html, richTextIdentifier, attribute, block, marks ) { + const blockClientId = block.clientId; + + const record = create( { + html: html, + multilineTag: attribute.multilineTag, + multilineWrapperTag: attribute.multilineWrapperTag, + } ); + + const text = record.text; + + return flatMap( marks, mark => { + let annotations; + if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { + annotations = createAnnotationsFromPositionBasedMarks( blockClientId, mark, html, text ); + } else { + annotations = calculateAnnotationsForTextFormat( + text, + mark + ); + } + + if ( ! annotations ) { + return []; + } + + return annotations.map( annotation => { + return { + ...annotation, + block: blockClientId, + richTextIdentifier: richTextIdentifier, + }; + } ); + } ); +} +const firstIdentifierRegex = /()(.*?)(<\/strong>)/gis; + +/** + * Adjusts the block start and end offset for a given mark. + * + * @param {Mark} mark The Mark object to adjust. + * @param {Object} block The block to get the original content from. + * @param {String} firstIdentifierHtml The html of the first identifier. + * + * @returns {Mark} The adjusted mark object. + */ +const adjustBlockOffset = ( mark, block, firstIdentifierHtml ) => { + const originalContent = block.originalContent; + const firstPairElements = [ ...originalContent.matchAll( firstIdentifierRegex ) ]; + if ( firstPairElements.length === 0 ) { + return mark; + } + const startOffset = mark.getBlockPositionStart(); + const endOffset = mark.getBlockPositionEnd(); + firstPairElements.forEach( firstPairElement => { + if ( firstPairElement[ 3 ] === firstIdentifierHtml ) { + mark.setBlockPositionStart( startOffset - firstPairElement[ 1 ].length ); + mark.setBlockPositionEnd( endOffset - firstPairElement[ 1 ].length ); + } + } ); + return mark; +}; + +/** + * Gets annotations from an array of annotatable block attribute. + * This is a helper specifically for Yoast blocks. + * Yoast blocks' annotatable attribute is an array of objects where each object contains a pair of annotatble texts. + * For example Yoast FAQ block's annotatable attribute is `questions`: + * { + * attributes: { + * questions: [ + * { jsonQuestion: "", jsonAnswers: "" }, + * { jsonQuestion: "", jsonAnswers: "" }, + * ] + * } + * } + * + * @param {Object[]} array An array of annotatable block attribute. + * @param {Mark[]} marks An array mark object to create annotations for. + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} identifierPair An array of identifier pair for the block. + * For example, the identifier pair for the FAQ block is `[ "question", "answer" ]`, in this order. + * + * @returns {Array} The annotations created for the block attribute. + */ +const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair ) => { + array = array.map( item => { + const firstIdentifier = `${ item.id }-${ identifierPair[ 0 ] }`; + const secondIdentifier = `${ item.id }-${ identifierPair[ 1 ] }`; + + // Get the first item of the identifier pair and make the first letter uppercase. + identifierPair[ 0 ] = identifierPair[ 0 ][ 0 ].toUpperCase() + identifierPair[ 0 ].slice( 1 ); + // Get the second item of the identifier pair and make the first letter uppercase. + identifierPair[ 1 ] = identifierPair[ 1 ][ 0 ].toUpperCase() + identifierPair[ 1 ].slice( 1 ); + + const firstIdentifierHtml = item[ `json${ identifierPair[ 0 ] }` ]; + const secondIdentifierHtml = item[ `json${ identifierPair[ 1 ] }` ]; + + let marksForFirstIdentifier = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); + } + return mark; + } ); + marksForFirstIdentifier = marksForFirstIdentifier.map( mark => { + if ( mark.hasBlockPosition() ) { + return adjustBlockOffset( mark, block, firstIdentifierHtml ); + } + return mark; + } ); + const marksForSecondIdentifier = marks.filter( mark => { + if ( mark.hasBlockPosition() ) { + return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); + } + return mark; + } ); + + + const firstIdentifierAnnotations = createAnnotations( firstIdentifierHtml, firstIdentifier, attribute, block, marksForFirstIdentifier ); + const secondIdentifierAnnotations = createAnnotations( secondIdentifierHtml, secondIdentifier, attribute, block, marksForSecondIdentifier ); + + return firstIdentifierAnnotations.concat( secondIdentifierAnnotations ); + } ); + return flattenDeep( array ); +}; + +/** + * Gets the annotations for Yoast FAQ block. + * + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * @returns {Array} The created annotations. + */ +const getAnnotationsForFAQ = ( attribute, block, marks ) => { + const annotatableTexts = block.attributes[ attribute.key ]; + if ( annotatableTexts.length === 0 ) { + return []; + } + + return getAnnotationsFromArray( annotatableTexts, marks, attribute, block, [ "question", "answer" ] ); +}; + +/** + * Gets the annotations for Yoast How-To block. + * + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * @returns {Array} The created annotations. + */ +const getAnnotationsForHowTo = ( attribute, block, marks ) => { + const annotatableTexts = block.attributes[ attribute.key ]; + if ( annotatableTexts.length === 0 ) { + return []; + } + const annotations = []; + if ( attribute.key === "steps" ) { + annotations.push( getAnnotationsFromArray( annotatableTexts, marks, attribute, block, [ "name", "text" ] ) ); + } + if ( attribute.key === "jsonDescription" ) { + marks = marks.filter( mark => { + return mark.hasBlockPosition() && ! mark.getBlockAttributeId(); + } ); + annotations.push( createAnnotations( annotatableTexts, "description", attribute, block, marks ) ); + } + return flattenDeep( annotations ); +}; + +/** + * Gets the annotations for Yoast FAQ block and Yoast How-To block. + * + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * + * @returns {Array} An array of annotations for Yoast blocks. + */ +export function getAnnotationsForYoastBlock( attribute, block, marks ) { + // For Yoast FAQ and How-To blocks, we create separate annotation objects for each individual Rich Text found in the attribute. + if ( block.name === "yoast/faq-block" ) { + return getAnnotationsForFAQ( attribute, block, marks ); + } + // The check for getting the annotations for Yoast How-To block. + if ( block.name === "yoast/how-to-block" ) { + return getAnnotationsForHowTo( attribute, block, marks ); + } +} + +/** + * Gets the annotations for non-Yoast blocks. + * + * @param {Object} attribute The attribute to apply annotations to. + * @param {Object} block The block information in the state. + * @param {Array} marks The marks to turn into annotations. + * + * @returns {Array} The annotations to apply. + */ +export function getAnnotationsForWPBlock( attribute, block, marks ) { + const richTextIdentifier = attribute.key; + const html = block.attributes[ richTextIdentifier ]; + + return createAnnotations( html, richTextIdentifier, attribute, block, marks ); +} From d106120971cbd92adb92cf885e5f3f4ea4a00594 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 24 Jul 2023 14:00:16 +0200 Subject: [PATCH 256/616] Remove the step to add comment data --- packages/yoastseo/src/parse/build/private/adapt.js | 4 +--- packages/yoastseo/src/parse/structure/Node.js | 6 +----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/adapt.js b/packages/yoastseo/src/parse/build/private/adapt.js index 9d255e457d6..b01f1dcc6b0 100644 --- a/packages/yoastseo/src/parse/build/private/adapt.js +++ b/packages/yoastseo/src/parse/build/private/adapt.js @@ -87,7 +87,5 @@ export default function adapt( tree ) { return new Heading( headingLevel, attributes, children, tree.sourceCodeLocation ); } - const commentData = tree.nodeName === "#comment" && tree.data; - - return new Node( tree.nodeName, attributes, children, tree.sourceCodeLocation, commentData ); + return new Node( tree.nodeName, attributes, children, tree.sourceCodeLocation ); } diff --git a/packages/yoastseo/src/parse/structure/Node.js b/packages/yoastseo/src/parse/structure/Node.js index 09010047ca4..aa5b82a2602 100644 --- a/packages/yoastseo/src/parse/structure/Node.js +++ b/packages/yoastseo/src/parse/structure/Node.js @@ -14,17 +14,13 @@ class Node { * @param {(Node|Text)[]} childNodes This node's child nodes. * @param {Object} sourceCodeLocationInfo This node's location in the source code, from parse5. */ - constructor( name, attributes = {}, childNodes = [], sourceCodeLocationInfo = {}, commentData ) { + constructor( name, attributes = {}, childNodes = [], sourceCodeLocationInfo = {} ) { /** * This node's name or tag. * @type {string} */ this.name = name; - if ( commentData ){ - this.data = commentData; - } - /** * This node's attributes. * @type {Object} From da89cc630b63dba6fe7c240ff85d4d019d887425 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:45:33 +0200 Subject: [PATCH 257/616] unregister the shortcodes plugin --- packages/js/src/analysis/plugins/shortcode-plugin.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/js/src/analysis/plugins/shortcode-plugin.js b/packages/js/src/analysis/plugins/shortcode-plugin.js index f20cd9d1e51..d465bef7939 100644 --- a/packages/js/src/analysis/plugins/shortcode-plugin.js +++ b/packages/js/src/analysis/plugins/shortcode-plugin.js @@ -55,7 +55,6 @@ var YoastShortcodePlugin = function( { registerPlugin, registerModification, plu */ YoastShortcodePlugin.prototype.declareReady = function() { this._pluginReady( "YoastShortcodePlugin" ); - this.registerModifications(); }; /** From 88f9b20a9ae5ad21b72106f75bc616db9f5251a3 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:46:51 +0200 Subject: [PATCH 258/616] add shortcode filtering to build.js --- packages/yoastseo/src/parse/build/build.js | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index fcb045075ae..14a4ece8242 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -6,6 +6,7 @@ import tokenize from "./private/tokenize"; import filterTree from "./private/filterTree"; import permanentFilters from "./private/alwaysFilterElements"; import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; +import filterShortcodesFromTree from "../../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; /** * Parses the HTML string to a tree representation of @@ -13,11 +14,13 @@ import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; * * @param {string} htmlString The HTML string. * @param {LanguageProcessor} languageProcessor The language processor to use. + * @param {string[]} shortcodes An array of all active shortcodes. * * @returns {Node} The tree representation of the HTML string. */ -export default function build( htmlString, languageProcessor ) { +export default function build( htmlString, languageProcessor, shortcodes ) { let tree = adapt( parseFragment( htmlString, { sourceCodeLocationInfo: true } ) ); + /* * Filter out some content from the tree so that it can be correctly tokenized. We don't want to tokenize text in * between tags such as 'code' and 'script', but we do want to take into account the length of those elements when @@ -28,6 +31,10 @@ export default function build( htmlString, languageProcessor ) { // Add sentences and tokens to the tree's paragraph and heading nodes. tree = tokenize( tree, languageProcessor ); + if ( shortcodes ) { + filterShortcodesFromTree( tree, shortcodes ); + } + /* * Filter out elements we don't want to include in the analysis. Only do this after tokenization as we need to * have all inline elements in the tree during tokenization to correctly calculate sentence and token positions. From 16a60f02864dabec4f18499242adf0a56d471985 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:48:20 +0200 Subject: [PATCH 259/616] pass shortcodes when calling build --- packages/yoastseo/src/worker/AnalysisWebWorker.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index f480b24e80e..1ba4522827b 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -1081,8 +1081,8 @@ export default class AnalysisWebWorker { this._researcher.setPaper( this._paper ); const languageProcessor = new LanguageProcessor( this._researcher ); - - this._paper.setTree( build( this._paper.getText(), languageProcessor ) ); + const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes; + this._paper.setTree( build( this._paper.getText(), languageProcessor, shortcodes ) ); // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); @@ -1405,7 +1405,8 @@ export default class AnalysisWebWorker { // Build and set the tree if it's not been set before. if ( paper.getTree() === null ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); } } From 50177569bc1d359a19f99473ff7e6ae05efebd57 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:48:30 +0200 Subject: [PATCH 260/616] pass shortcodes when calling build --- packages/yoastseo/src/scoring/assessor.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index aeae6742200..dd9e49c92d9 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -124,7 +124,8 @@ Assessor.prototype.assess = function( paper ) { this._researcher.setPaper( paper ); const languageProcessor = new LanguageProcessor( this._researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); let assessments = this.getAvailableAssessments(); this.results = []; From 1cfceb336461057c78d6b779cd9b5d7c95c9edcc Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:48:42 +0200 Subject: [PATCH 261/616] pass shortcodes when calling build --- packages/yoastseo/spec/specHelpers/parse/buildTree.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index 80caa20c1ae..adeb2358faa 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -14,7 +14,9 @@ import permanentFilters from "../../../src/parse/build/private/alwaysFilterEleme */ export default function buildTree( paper, researcher ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + + paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); } /** From 390a3e19564984f0826d4e311fd5009650c6af2d Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:48:58 +0200 Subject: [PATCH 262/616] add shortcodes data to paper --- packages/js/src/analysis/collectAnalysisData.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/js/src/analysis/collectAnalysisData.js b/packages/js/src/analysis/collectAnalysisData.js index 70fe78a3b33..16ff597991e 100644 --- a/packages/js/src/analysis/collectAnalysisData.js +++ b/packages/js/src/analysis/collectAnalysisData.js @@ -103,6 +103,7 @@ export default function collectAnalysisData( editorData, store, customAnalysisDa data.titleWidth = measureTextWidth( filteredSEOTitle || storeData.snippetEditor.data.title ); data.locale = getContentLocale(); data.writingDirection = getWritingDirection(); + data.shortcodes = window.wpseoScriptData.analysis.plugins.shortcodes.wpseo_shortcode_tags; return Paper.parse( applyFilters( "yoast.analysis.data", data ) ); } From 726c4a524f2d9ee8d0c5e5b3c8eaeb469b091af4 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:50:17 +0200 Subject: [PATCH 263/616] create function to filter shortcodes --- .../sanitize/filterShortcodesFromTree.js | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js new file mode 100644 index 00000000000..faa5636b1ad --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -0,0 +1,37 @@ +const filterShortcodesFromSentence = ( sentence, shortcodeTags ) => { + const tokens = sentence.tokens; + // traverse through tokens backwards + for ( let i = tokens.length - 1; i >= 0; i-- ) { + if ( shortcodeTags.includes( tokens[ i ].text ) && tokens[ i - 1 ] && tokens[ i - 1 ].text === "[" ) { + while ( tokens[ i ].text !== "]" ) { + tokens.splice( i, 1 ); + } + tokens.splice( i, 1 ); + tokens.splice( i - 1, 1 ); + } + } + sentence.tokens = tokens; +}; + +const filterFromTokens = ( tree, shortcodeTags ) => { + if ( tree.sentences ) { + tree.sentences.forEach( sentence => { + filterShortcodesFromSentence( sentence, shortcodeTags ); + } ); + } + + if ( tree.childNodes ) { + tree.childNodes.forEach( childNode => { + filterFromTokens( childNode, shortcodeTags ); + } ); + } + return tree; +}; + +const filterShortcodesFromTree = ( tree, shortcodeTags ) => { + if ( shortcodeTags ) { + filterFromTokens( tree, shortcodeTags ); + } +}; + +export default filterShortcodesFromTree; From 072847fac2e2fdc81ba832c6ebe316332b0ba6b8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 10:51:10 +0200 Subject: [PATCH 264/616] add specs for shortcode filtering function --- .../sanitize/filterShortcodesFromTreeSpec.js | 327 ++++++++++++++++++ 1 file changed, 327 insertions(+) create mode 100644 packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js new file mode 100644 index 00000000000..613c66664d7 --- /dev/null +++ b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js @@ -0,0 +1,327 @@ +import filterShortcodesFromTree from "../../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; + + +describe( "filterShortcodesFromTree", function() { + it( "should filter a shortcode with no parameters from a tree", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + } ] }; + const shortcodes = [ "shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); + + it( "should filter a shortcode with parameters from a tree", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode param1='value1' param2='value2'] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: " " }, + { text: "param1='value1" }, + { text: "'" }, + { text: " " }, + { text: "param2='value1" }, + { text: "'" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + } ] }; + const shortcodes = [ "shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); + + it( "should filter multiple shortcodes from the same sentence", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode] test with [another_shortcode] in it.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: " " }, + { text: "with" }, + { text: " " }, + { text: "[" }, + { text: "another_shortcode" }, + { text: "]" }, + { text: " " }, + { text: "in" }, + { text: " " }, + { text: "it" }, + { text: "." }, + ], + }, + ] }; + const shortcodes = [ "shortcode", "another_shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: " " }, + { text: "with" }, + { text: " " }, + { text: " " }, + { text: "in" }, + { text: " " }, + { text: "it" }, + { text: "." }, + ] ); + } ); + + it( "should filter multiple shortcodes from different sentences", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + }, + { + text: "This is a [another_shortcode] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "another_shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + }, + ], + }; + + const shortcodes = [ "shortcode", "another_shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + expect( tree.sentences[ 1 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); + it( "should filter a shortcode from a childnode.", function() { + const tree = { + childNodes: [ + { + sentences: [ + { + text: "This is a [shortcode] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + + }, + ], + }, + ], + }; + + const shortcodes = [ "shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.childNodes[ 0 ].sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); + + it( "should not filter a word between square brackets if it is not a shortcode", function() { + const tree = { + sentences: [ + { + text: "This is a test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + }, + ], + }; + + const shortcodes = [ "shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); + it( "should not filter if no shortcodes are available", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode] test.", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ], + + }, + ], + }; + + filterShortcodesFromTree( tree ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); +} ); From 80397ee3536c53c2d4450d2d9dbcc8cfd7606036 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 11:33:56 +0200 Subject: [PATCH 265/616] Filter shortcodes from sentence text --- .../sanitize/filterShortcodesFromTree.js | 38 ++++++++++++++++--- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index faa5636b1ad..d636b08b7dc 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -1,4 +1,13 @@ -const filterShortcodesFromSentence = ( sentence, shortcodeTags ) => { + +/** + * Filters shortcodes from a sentence. + * @param {Sentence} sentence The sentence to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @param {RegExp} shortcodeTagsRegex The regex to filter the shortcodes. + * @returns {void} + * + */ +const filterShortcodesFromSentence = ( sentence, shortcodeTags, shortcodeTagsRegex ) => { const tokens = sentence.tokens; // traverse through tokens backwards for ( let i = tokens.length - 1; i >= 0; i-- ) { @@ -11,26 +20,43 @@ const filterShortcodesFromSentence = ( sentence, shortcodeTags ) => { } } sentence.tokens = tokens; + + sentence.text = sentence.text.replace( shortcodeTagsRegex, "" ); }; -const filterFromTokens = ( tree, shortcodeTags ) => { +/** + * A recursive function that filters shortcodes from sentences. + * @param {Node} tree The tree to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @param {RegExp} shortcodeTagsRegex The regex to filter the shortcodes. + * @returns {void} + */ +const filterShortcodesFromSentences = ( tree, shortcodeTags, shortcodeTagsRegex ) => { if ( tree.sentences ) { tree.sentences.forEach( sentence => { - filterShortcodesFromSentence( sentence, shortcodeTags ); + filterShortcodesFromSentence( sentence, shortcodeTags, shortcodeTagsRegex ); } ); } if ( tree.childNodes ) { tree.childNodes.forEach( childNode => { - filterFromTokens( childNode, shortcodeTags ); + filterShortcodesFromSentences( childNode, shortcodeTags, shortcodeTagsRegex ); } ); } - return tree; }; +/** + * Filters shortcodes from the tree. + * @param {Node} tree The tree to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {void} + */ const filterShortcodesFromTree = ( tree, shortcodeTags ) => { if ( shortcodeTags ) { - filterFromTokens( tree, shortcodeTags ); + const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); + + filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); } }; From 3129928f6b195520c7017863b520d243decd1e0a Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 11:34:46 +0200 Subject: [PATCH 266/616] adapt specs to removal of shortcodes from sentence text --- .../sanitize/filterShortcodesFromTreeSpec.js | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js index 613c66664d7..ca31f87556e 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js @@ -37,6 +37,8 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 0 ].text ).toEqual( "This is a test." ); } ); it( "should filter a shortcode with parameters from a tree", function() { @@ -80,6 +82,8 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 0 ].text ).toEqual( "This is a test." ); } ); it( "should filter multiple shortcodes from the same sentence", function() { @@ -135,6 +139,7 @@ describe( "filterShortcodesFromTree", function() { { text: "it" }, { text: "." }, ] ); + expect( tree.sentences[ 0 ].text ).toEqual( "This is a test with in it." ); } ); it( "should filter multiple shortcodes from different sentences", function() { @@ -192,6 +197,9 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 0 ].text ).toEqual( "This is a test." ); + expect( tree.sentences[ 1 ].tokens ).toEqual( [ { text: "This" }, { text: " " }, @@ -203,6 +211,8 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 1 ].text ).toEqual( "This is a test." ); } ); it( "should filter a shortcode from a childnode.", function() { const tree = { @@ -247,6 +257,9 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.childNodes[ 0 ].sentences[ 0 ].text ).toEqual( "This is a test." ); + } ); it( "should not filter a word between square brackets if it is not a shortcode", function() { @@ -282,6 +295,9 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 0 ].text ).toEqual( "This is a test." ); + } ); it( "should not filter if no shortcodes are available", function() { const tree = { @@ -323,5 +339,7 @@ describe( "filterShortcodesFromTree", function() { { text: "test" }, { text: "." }, ] ); + + expect( tree.sentences[ 0 ].text ).toEqual( "This is a [shortcode] test." ); } ); } ); From 2fc24d0c28c82eedfd629664c855ea8278be0400 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 14:18:55 +0200 Subject: [PATCH 267/616] remove shortcode-plugin --- .../src/analysis/plugins/shortcode-plugin.js | 329 ------------------ packages/js/tests/shortcode-plugin.test.js | 34 -- 2 files changed, 363 deletions(-) delete mode 100644 packages/js/src/analysis/plugins/shortcode-plugin.js delete mode 100644 packages/js/tests/shortcode-plugin.test.js diff --git a/packages/js/src/analysis/plugins/shortcode-plugin.js b/packages/js/src/analysis/plugins/shortcode-plugin.js deleted file mode 100644 index d465bef7939..00000000000 --- a/packages/js/src/analysis/plugins/shortcode-plugin.js +++ /dev/null @@ -1,329 +0,0 @@ -/* global tinyMCE */ -/* global wpseoScriptData */ -/* global ajaxurl */ -/* global _ */ - -const shortcodeNameMatcher = "[^<>&/\\[\\]\x00-\x20=]+?"; -const shortcodeAttributesMatcher = "( [^\\]]+?)?"; - -const shortcodeStartRegex = new RegExp( "\\[" + shortcodeNameMatcher + shortcodeAttributesMatcher + "\\]", "g" ); -const shortcodeEndRegex = new RegExp( "\\[/" + shortcodeNameMatcher + "\\]", "g" ); - -/** - * The Yoast Shortcode plugin parses the shortcodes in a given piece of text. It analyzes multiple input fields for - * shortcodes which it will preload using AJAX. - * - * @constructor - * @property {RegExp} keywordRegex Used to match a given string for valid shortcode keywords. - * @property {RegExp} closingTagRegex Used to match a given string for shortcode closing tags. - * @property {RegExp} nonCaptureRegex Used to match a given string for non capturing shortcodes. - * @property {Array} parsedShortcodes Used to store parsed shortcodes. - * - * @param {Object} interface Object Formerly Known as App, but for backwards compatibility - * still passed here as one argument. - * @param {function} interface.registerPlugin Register a plugin with Yoast SEO. - * @param {function} interface.registerModification Register a modification with Yoast SEO. - * @param {function} interface.pluginReady Notify Yoast SEO that the plugin is ready. - * @param {function} interface.pluginReloaded Notify Yoast SEO that the plugin has been reloaded. - */ -var YoastShortcodePlugin = function( { registerPlugin, registerModification, pluginReady, pluginReloaded } ) { - this._registerModification = registerModification; - this._pluginReady = pluginReady; - this._pluginReloaded = pluginReloaded; - - registerPlugin( "YoastShortcodePlugin", { status: "loading" } ); - this.bindElementEvents(); - - var keywordRegexString = "(" + wpseoScriptData.analysis.plugins.shortcodes.wpseo_shortcode_tags.join( "|" ) + ")"; - - // The regex for matching shortcodes based on the available shortcode keywords. - this.keywordRegex = new RegExp( keywordRegexString, "g" ); - this.closingTagRegex = new RegExp( "\\[\\/" + keywordRegexString + "\\]", "g" ); - this.nonCaptureRegex = new RegExp( "\\[" + keywordRegexString + "[^\\]]*?\\]", "g" ); - - this.parsedShortcodes = []; - - this.loadShortcodes( this.declareReady.bind( this ) ); -}; - -/* YOAST SEO CLIENT */ - -/** - * Declares ready with YoastSEO. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.declareReady = function() { - this._pluginReady( "YoastShortcodePlugin" ); -}; - -/** - * Declares reloaded with YoastSEO. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.declareReloaded = function() { - this._pluginReloaded( "YoastShortcodePlugin" ); -}; - -/** - * Registers the modifications for the content in which we want to replace shortcodes. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.registerModifications = function() { - this._registerModification( "content", this.replaceShortcodes.bind( this ), "YoastShortcodePlugin" ); -}; - - -/** - * Removes all unknown shortcodes. Not all plugins properly registerd their shortcodes in the WordPress backend. - * Since we cannot use the data from these shortcodes they must be removed. - * - * @param {string} data The text to remove unknown shortcodes. - * @returns {string} The text with removed unknown shortcodes. - */ -YoastShortcodePlugin.prototype.removeUnknownShortCodes = function( data ) { - data = data.replace( shortcodeStartRegex, "" ); - data = data.replace( shortcodeEndRegex, "" ); - - return data; -}; - -/** - * The callback used to replace the shortcodes. - * - * @param {string} data The text to replace the shortcodes in. - * - * @returns {string} The text with replaced shortcodes. - */ -YoastShortcodePlugin.prototype.replaceShortcodes = function( data ) { - var parsedShortcodes = this.parsedShortcodes; - - if ( typeof data === "string" && parsedShortcodes.length > 0 ) { - for ( var i = 0; i < parsedShortcodes.length; i++ ) { - data = data.replace( parsedShortcodes[ i ].shortcode, parsedShortcodes[ i ].output ); - } - } - - data = this.removeUnknownShortCodes( data ); - - return data; -}; - -/* DATA SOURCING */ - -/** - * Get data from inputfields and store them in an analyzerData object. This object will be used to fill - * the analyzer and the snippetpreview - * - * @param {function} callback To declare either ready or reloaded after parsing. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.loadShortcodes = function( callback ) { - var unparsedShortcodes = this.getUnparsedShortcodes( this.getShortcodes( this.getContentTinyMCE() ) ); - if ( unparsedShortcodes.length > 0 ) { - this.parseShortcodes( unparsedShortcodes, callback ); - } else { - return callback(); - } -}; - -/** - * Bind elements to be able to reload the dataset if shortcodes get added. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.bindElementEvents = function() { - var contentElement = document.getElementById( "content" ) || false; - var callback = _.debounce( this.loadShortcodes.bind( this, this.declareReloaded.bind( this ) ), 500 ); - - if ( contentElement ) { - contentElement.addEventListener( "keyup", callback ); - contentElement.addEventListener( "change", callback ); - } - - if ( typeof tinyMCE !== "undefined" && typeof tinyMCE.on === "function" ) { - tinyMCE.on( "addEditor", function( e ) { - e.editor.on( "change", callback ); - e.editor.on( "keyup", callback ); - } ); - } -}; - -/** - * Gets content from the content field, if tinyMCE is initialized, use the getContent function to - * get the data from tinyMCE. - * - * @returns {String} Content from tinyMCE. - */ -YoastShortcodePlugin.prototype.getContentTinyMCE = function() { - var val = document.getElementById( "content" ) && document.getElementById( "content" ).value || ""; - if ( typeof tinyMCE !== "undefined" && typeof tinyMCE.editors !== "undefined" && tinyMCE.editors.length !== 0 ) { - val = tinyMCE.get( "content" ) && tinyMCE.get( "content" ).getContent() || ""; - } - - return val; -}; - -/* SHORTCODE PARSING */ - -/** - * Returns the unparsed shortcodes out of a collection of shortcodes. - * - * @param {Array} shortcodes The shortcodes to check. - * - * @returns {Array} Array with unparsed shortcodes. - */ -YoastShortcodePlugin.prototype.getUnparsedShortcodes = function( shortcodes ) { - if ( typeof shortcodes !== "object" ) { - console.error( "Failed to get unparsed shortcodes. Expected parameter to be an array, instead received " + typeof shortcodes ); - return false; - } - - var unparsedShortcodes = []; - - for ( var i = 0; i < shortcodes.length; i++ ) { - var shortcode = shortcodes[ i ]; - if ( unparsedShortcodes.indexOf( shortcode ) === -1 && this.isUnparsedShortcode( shortcode ) ) { - unparsedShortcodes.push( shortcode ); - } - } - - return unparsedShortcodes; -}; - -/** - * Checks if a given shortcode was already parsed. - * - * @param {string} shortcode The shortcode to check. - * - * @returns {boolean} True when shortcode is not parsed yet. - */ -YoastShortcodePlugin.prototype.isUnparsedShortcode = function( shortcode ) { - var alreadyExists = false; - - for ( var i = 0; i < this.parsedShortcodes.length; i++ ) { - if ( this.parsedShortcodes[ i ].shortcode === shortcode ) { - alreadyExists = true; - } - } - - return alreadyExists === false; -}; - -/** - * Gets the shortcodes from a given piece of text. - * - * @param {string} text Text to extract shortcodes from. - * - * @returns {array} The matched shortcodes. - */ -YoastShortcodePlugin.prototype.getShortcodes = function( text ) { - if ( typeof text !== "string" ) { - /* jshint ignore:start */ - console.error( "Failed to get shortcodes. Expected parameter to be a string, instead received" + typeof text ); - /* jshint ignore:end*/ - return false; - } - - var captures = this.matchCapturingShortcodes( text ); - - // Remove the capturing shortcodes from the text before trying to match the capturing shortcodes. - for ( var i = 0; i < captures.length; i++ ) { - text = text.replace( captures[ i ], "" ); - } - - var nonCaptures = this.matchNonCapturingShortcodes( text ); - - return captures.concat( nonCaptures ); -}; - -/** - * Matches the capturing shortcodes from a given piece of text. - * - * @param {string} text Text to get the capturing shortcodes from. - * - * @returns {Array} The capturing shortcodes. - */ -YoastShortcodePlugin.prototype.matchCapturingShortcodes = function( text ) { - var captures = []; - - // First identify which tags are being used in a capturing shortcode by looking for closing tags. - var captureKeywords = ( text.match( this.closingTagRegex ) || [] ).join( " " ).match( this.keywordRegex ) || []; - - // Fetch the capturing shortcodes and strip them from the text so we can easily match the non capturing shortcodes. - for ( var i = 0; i < captureKeywords.length; i++ ) { - var captureKeyword = captureKeywords[ i ]; - var captureRegex = "\\[" + captureKeyword + "[^\\]]*?\\].*?\\[\\/" + captureKeyword + "\\]"; - var matches = text.match( new RegExp( captureRegex, "g" ) ) || []; - - captures = captures.concat( matches ); - } - - return captures; -}; - -/** - * Matches the non capturing shortcodes from a given piece of text. - * - * @param {string} text Text to get the non capturing shortcodes from. - * - * @returns {Array} The non capturing shortcodes. - */ -YoastShortcodePlugin.prototype.matchNonCapturingShortcodes = function( text ) { - return text.match( this.nonCaptureRegex ) || []; -}; - -/** - * Parses the unparsed shortcodes through AJAX and clears them. - * - * @param {Array} shortcodes shortcodes to be parsed. - * @param {function} callback function to be called in the context of the AJAX callback. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.parseShortcodes = function( shortcodes, callback ) { - if ( typeof callback !== "function" ) { - /* jshint ignore:start */ - console.error( "Failed to parse shortcodes. Expected parameter to be a function, instead received " + typeof callback ); - /* jshint ignore:end */ - return false; - } - - if ( typeof shortcodes === "object" && shortcodes.length > 0 ) { - jQuery.post( - ajaxurl, - { - action: "wpseo_filter_shortcodes", - _wpnonce: wpseoScriptData.analysis.plugins.shortcodes.wpseo_filter_shortcodes_nonce, - data: shortcodes, - }, - function( shortcodeResults ) { - this.saveParsedShortcodes( shortcodeResults, callback ); - }.bind( this ) - ); - } else { - return callback(); - } -}; - -/** - * Saves the shortcodes that were parsed with AJAX to `this.parsedShortcodes` - * - * @param {Array} shortcodeResults Shortcodes that must be saved. - * @param {function} callback Callback to execute of saving shortcodes. - * - * @returns {void} - */ -YoastShortcodePlugin.prototype.saveParsedShortcodes = function( shortcodeResults, callback ) { - shortcodeResults = JSON.parse( shortcodeResults ); - for ( var i = 0; i < shortcodeResults.length; i++ ) { - this.parsedShortcodes.push( shortcodeResults[ i ] ); - } - - callback(); -}; - -export default YoastShortcodePlugin; diff --git a/packages/js/tests/shortcode-plugin.test.js b/packages/js/tests/shortcode-plugin.test.js deleted file mode 100644 index 95d4fa1a949..00000000000 --- a/packages/js/tests/shortcode-plugin.test.js +++ /dev/null @@ -1,34 +0,0 @@ -import YoastShortcodePlugin from "../src/analysis/plugins/shortcode-plugin"; - -const { removeUnknownShortCodes } = YoastShortcodePlugin.prototype; - -describe( "removeUnknownShortcodes", () => { - it( "filters undefined shortcodes", () => { - const input = "[shortcode]Hello[/shortcode]"; - const expected = "Hello"; - - const actual = removeUnknownShortCodes( input ); - - expect( actual ).toBe( expected ); - } ); - - it( "filters shortcodes with special characters", () => { - const input = "

" + - "[vc_row]" + - "[vc_column]" + - "[vc_column_text" + - ' css_animation="bounceInLeft"' + - ' el_class="tøüst123"' + - ' css=".vc_custom_1482153765626{margin-right: 4px !important;}"]' + - "This is the text that needs to be preserved." + - "[/vc_column_text]" + - "[/vc_column]" + - "[/vc_row]" + - "

"; - const expected = "

This is the text that needs to be preserved.

"; - - const actual = removeUnknownShortCodes( input ); - - expect( actual ).toBe( expected ); - } ); -} ); From f5228e391f8b04afe2c3924e7d7041969685ad49 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 14:19:12 +0200 Subject: [PATCH 268/616] remove shortcode plugin from post-scraper --- packages/js/src/initializers/post-scraper.js | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/packages/js/src/initializers/post-scraper.js b/packages/js/src/initializers/post-scraper.js index e72c6dd58dd..80eb4102857 100644 --- a/packages/js/src/initializers/post-scraper.js +++ b/packages/js/src/initializers/post-scraper.js @@ -12,7 +12,6 @@ import { select, subscribe } from "@wordpress/data"; // Internal dependencies. import YoastReplaceVarPlugin from "../analysis/plugins/replacevar-plugin"; import YoastReusableBlocksPlugin from "../analysis/plugins/reusable-blocks-plugin"; -import YoastShortcodePlugin from "../analysis/plugins/shortcode-plugin"; import YoastMarkdownPlugin from "../analysis/plugins/markdown-plugin"; import * as tinyMCEHelper from "../lib/tinymce"; import CompatibilityHelper from "../compatibility/compatibilityHelper"; @@ -66,7 +65,6 @@ const { // Plugin class prototypes (not the instances) are being used by other plugins from the window. window.YoastReplaceVarPlugin = YoastReplaceVarPlugin; -window.YoastShortcodePlugin = YoastShortcodePlugin; /** * @summary Initializes the post scraper script. @@ -496,12 +494,7 @@ export default function initPostScraper( $, store, editorData ) { // Analysis plugins window.YoastSEO.wp = {}; window.YoastSEO.wp.replaceVarsPlugin = new YoastReplaceVarPlugin( app, store ); - window.YoastSEO.wp.shortcodePlugin = new YoastShortcodePlugin( { - registerPlugin: app.registerPlugin, - registerModification: app.registerModification, - pluginReady: app.pluginReady, - pluginReloaded: app.pluginReloaded, - } ); + if ( isBlockEditor() ) { const reusableBlocksPlugin = new YoastReusableBlocksPlugin( app.registerPlugin, app.registerModification, window.YoastSEO.app.refresh ); reusableBlocksPlugin.register(); From 9b6700d79ae4204a50d2f7351c794e0ba53b43ac Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 25 Jul 2023 15:00:42 +0200 Subject: [PATCH 269/616] Remove unused method and rename a method --- packages/yoastseo/src/values/Mark.js | 39 ++++++++++------------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index 81d69c8597f..d209f3bbffc 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -79,7 +79,9 @@ Mark.prototype.getPositionEnd = function() { /** * Sets the start position. + * * @param {number} positionStart The new start position. + * * @returns {void} */ Mark.prototype.setPositionStart = function( positionStart ) { @@ -88,7 +90,9 @@ Mark.prototype.setPositionStart = function( positionStart ) { /** * Sets the end position. + * * @param {number} positionEnd The new end position. + * * @returns {void} */ Mark.prototype.setPositionEnd = function( positionEnd ) { @@ -97,7 +101,9 @@ Mark.prototype.setPositionEnd = function( positionEnd ) { /** * Returns the start position of a block. + * * @param {number} startOffsetBlock The block start offset. + * * @returns {number} The start position of a block. */ Mark.prototype.setBlockPositionStart = function( startOffsetBlock ) { @@ -106,34 +112,15 @@ Mark.prototype.setBlockPositionStart = function( startOffsetBlock ) { /** * Returns the end position of a block. + * * @param {number} endOffsetBlock The block end offset. + * * @returns {number} The end position of a block. */ Mark.prototype.setBlockPositionEnd = function( endOffsetBlock ) { this._properties.position.endOffsetBlock = endOffsetBlock; }; -/** - * Sets the block index. - * - * @param {number} blockIndex The block index to set. - * @returns {void} - */ -Mark.prototype.setBlockIndex = function( blockIndex ) { - if ( this._properties.position ) { - this._properties.position.blockIndex = blockIndex; - } -}; - -/** - * Gets the block index. - * - * @returns {number} The block index. - */ -Mark.prototype.getBlockIndex = function() { - return this._properties.position && this._properties.position.blockIndex; -}; - /** * Gets the block client id. * @@ -154,14 +141,14 @@ Mark.prototype.getBlockAttributeId = function() { /** - * Checks if the mark object is intended for the first pair of the annotatble fields. + * Checks if the mark object is intended for the first section of a Yoast sub-block. * This method will be used only for Yoast blocks where each block consists of sub-blocks - * with a pair of annotatable fields. + * with two sections. * - * @returns {boolean} Whether the mark object is intended for the first pair of the annotatble fields. + * @returns {boolean} Whether the mark object is intended for the first section of a Yoast sub-block. */ -Mark.prototype.isMarkForFirstBlockPair = function() { - return this._properties.position && this._properties.position.isFirstPair; +Mark.prototype.isMarkForFirstBlockSection = function() { + return this._properties.position && this._properties.position.isFirstSection; }; /** From d1b754489447c5ec83f9fbac004f56aead82e6c9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 25 Jul 2023 15:02:15 +0200 Subject: [PATCH 270/616] Add docs --- .../helpers/getAnnotationsHelpers.js | 63 +++++++++++++------ .../highlighting/getMarkingsInSentence.js | 2 +- .../helpers/sentence/getSentencesFromTree.js | 12 ++-- .../src/parse/build/private/parseBlocks.js | 27 +++++--- 4 files changed, 73 insertions(+), 31 deletions(-) diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js index 5211b8c2888..5ac23553b74 100644 --- a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -270,6 +270,11 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) } ); } ); } + +/** + * The regex to capture the first section of a Yoast sub-block. + * @type {RegExp} + */ const firstIdentifierRegex = /()(.*?)(<\/strong>)/gis; /** @@ -290,9 +295,11 @@ const adjustBlockOffset = ( mark, block, firstIdentifierHtml ) => { const startOffset = mark.getBlockPositionStart(); const endOffset = mark.getBlockPositionEnd(); firstPairElements.forEach( firstPairElement => { - if ( firstPairElement[ 3 ] === firstIdentifierHtml ) { - mark.setBlockPositionStart( startOffset - firstPairElement[ 1 ].length ); - mark.setBlockPositionEnd( endOffset - firstPairElement[ 1 ].length ); + // Destructure the matched element based on the capturing groups. + const [ , openTag, , innerText ] = firstPairElement; + if ( innerText === firstIdentifierHtml ) { + mark.setBlockPositionStart( startOffset - openTag.length ); + mark.setBlockPositionEnd( endOffset - openTag.length ); } } ); return mark; @@ -323,41 +330,61 @@ const adjustBlockOffset = ( mark, block, firstIdentifierHtml ) => { */ const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair ) => { array = array.map( item => { - const firstIdentifier = `${ item.id }-${ identifierPair[ 0 ] }`; - const secondIdentifier = `${ item.id }-${ identifierPair[ 1 ] }`; + /* + * Get the rich text identifier of the first section of the sub-block. + * + * The rich text identifier of the first section is always the sub-block id + "-question" for Yoast FAQ block + * (in line with what we set in `packages/js/src/structured-data-blocks/faq/components/Question.js`), and + * the sub-block id + "-name" for Yoast How-To block + * (in line with what we set in `packages/js/src/structured-data-blocks/how-to/components/HowToStep.js`). + */ + const firstSectionIdentifier = `${ item.id }-${ identifierPair[ 0 ] }`; + /* + * Get the rich text identifier of the second section of the sub-block. + * + * The rich text identifier of the second section is always the sub-block id + "-answer" for Yoast FAQ block + * (in line with what we set in `packages/js/src/structured-data-blocks/faq/components/Question.js`), and + * the sub-block id + "-text" for Yoast How-To block + * (in line with what we set in `packages/js/src/structured-data-blocks/how-to/components/HowToStep.js`). + */ + const secondSectionIdentifier = `${ item.id }-${ identifierPair[ 1 ] }`; - // Get the first item of the identifier pair and make the first letter uppercase. + // Get the first item of the identifier pair and make the first letter uppercase, e.g. "question" -> "Question". identifierPair[ 0 ] = identifierPair[ 0 ][ 0 ].toUpperCase() + identifierPair[ 0 ].slice( 1 ); - // Get the second item of the identifier pair and make the first letter uppercase. + // Get the second item of the identifier pair and make the first letter uppercase, e.g. "answer" -> "Answer". identifierPair[ 1 ] = identifierPair[ 1 ][ 0 ].toUpperCase() + identifierPair[ 1 ].slice( 1 ); - const firstIdentifierHtml = item[ `json${ identifierPair[ 0 ] }` ]; - const secondIdentifierHtml = item[ `json${ identifierPair[ 1 ] }` ]; + const firstSectionHtml = item[ `json${ identifierPair[ 0 ] }` ]; + const secondSectionHtml = item[ `json${ identifierPair[ 1 ] }` ]; - let marksForFirstIdentifier = marks.filter( mark => { + let marksForFirstSection = marks.filter( mark => { if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockPair(); + // Filter the marks array and only include the marks that are intended for the first sub-block section. + return mark.getBlockAttributeId() === item.id && mark.isMarkForFirstBlockSection(); } return mark; } ); - marksForFirstIdentifier = marksForFirstIdentifier.map( mark => { + + // For the second section marks, we need to adjust the block start and end offset for the first sub-block section + marksForFirstSection = marksForFirstSection.map( mark => { if ( mark.hasBlockPosition() ) { - return adjustBlockOffset( mark, block, firstIdentifierHtml ); + return adjustBlockOffset( mark, block, firstSectionHtml ); } return mark; } ); - const marksForSecondIdentifier = marks.filter( mark => { + const marksForSecondSection = marks.filter( mark => { if ( mark.hasBlockPosition() ) { - return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockPair(); + // Filter the marks array and only include the marks that are intended for the second sub-block section. + return mark.getBlockAttributeId() === item.id && ! mark.isMarkForFirstBlockSection(); } return mark; } ); - const firstIdentifierAnnotations = createAnnotations( firstIdentifierHtml, firstIdentifier, attribute, block, marksForFirstIdentifier ); - const secondIdentifierAnnotations = createAnnotations( secondIdentifierHtml, secondIdentifier, attribute, block, marksForSecondIdentifier ); + const firstSectionAnnotations = createAnnotations( firstSectionHtml, firstSectionIdentifier, attribute, block, marksForFirstSection ); + const secondSectionAnnotations = createAnnotations( secondSectionHtml, secondSectionIdentifier, attribute, block, marksForSecondSection ); - return firstIdentifierAnnotations.concat( secondIdentifierAnnotations ); + return firstSectionAnnotations.concat( secondSectionAnnotations ); } ); return flattenDeep( array ); }; diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index c840d5d38df..fde78d5c2ba 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -109,7 +109,7 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { endOffsetBlock: endOffset - sentence.parentStartOffset, clientId: sentence.parentClientId, attributeId: sentence.parentAttributeId, - isFirstPair: sentence.isParentFirstBlockPair, + isFirstSection: sentence.isParentFirstSectionOfBlock, }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 94e5e5d7ba4..71da23be3f4 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -4,7 +4,7 @@ * * @param {Paper} paper The paper to get the sentences from. * - * @returns {Object[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node. + * @returns {Sentence[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node. */ export default function( paper ) { // Get all nodes that have a sentence property which is not an empty array. @@ -13,11 +13,15 @@ export default function( paper ) { return tree.flatMap( node => node.sentences.map( s => { return { ...s, - parentStartOffset: ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || - node.sourceCodeLocation.startOffset, + // The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property. + parentStartOffset: node.sourceCodeLocation && ( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || + node.sourceCodeLocation.startOffset ), + // The block client id of the parent node. parentClientId: node.clientId, + // The attribute id of the parent node, if available, otherwise an empty string. parentAttributeId: node.attributeId || "", - isParentFirstBlockPair: node.isFirstPair || false, + // Whether the parent node is the first section of Yoast sub-blocks. + isParentFirstSectionOfBlock: node.isFirstSection || false, }; } ) ); } diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js index cd57e90f999..4e956754ea0 100644 --- a/packages/yoastseo/src/parse/build/private/parseBlocks.js +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -37,7 +37,6 @@ function updateBlocksOffset( blocks, text ) { if ( blocks.length === 0 ) { return; } - // eslint-disable-next-line no-constant-condition blocks.forEach( currentBlock => { const matches = blockTokenizer.exec( text ); @@ -76,9 +75,9 @@ function updateClientIdAndAttrIdForSubtree( rootNode, blocks, clientId ) { let currentClientId = clientId; if ( rootNode.sourceCodeLocation && rootNode.sourceCodeLocation.startOffset ) { - const block = blocks.find( ( b ) => b.contentOffset === rootNode.sourceCodeLocation.startOffset ); - if ( block ) { - currentClientId = block.clientId; + const foundBlock = blocks.find( block => block.contentOffset === rootNode.sourceCodeLocation.startOffset ); + if ( foundBlock ) { + currentClientId = foundBlock.clientId; } } @@ -90,13 +89,25 @@ function updateClientIdAndAttrIdForSubtree( rootNode, blocks, clientId ) { // If the node has children, update the clientId for them. ( rootNode.childNodes || [] ).forEach( ( node ) => { if ( node.attributes && node.attributes.id ) { - // If the node's child has an attribute with 'id' key, also set this id to the first and third child node. + /* + * If the node's child has an attribute with 'id' key, also set this id to the first and third child node. + * This step is specifically for parsing the Yoast blocks. + * + * For Yoast blocks, if a node has attribute with 'id' key, this means that this node represents a sub-block. + * A sub-block has four child nodes: + * 1. The first child node of the sub-block, represents the first section of that sub-block. + * - For example, in Yoast FAQ block, the first child node would represent the "question" section. + * 2. The second child node of the sub-block, only contains a new line. + * 3. The third child node of the sub-block, represents the second section of that sub-block. + * - For example, in Yoast FAQ block, the second child node would represent the "answer" section. + * 4. The fourth child node of the sub-block, only contains a new line. + */ if ( node.childNodes && node.childNodes.length > 3 ) { node.childNodes[ 0 ].attributeId = node.attributes.id; - node.childNodes[ 0 ].isFirstPair = true; + node.childNodes[ 0 ].isFirstSection = true; node.childNodes[ 2 ].attributeId = node.attributes.id; - node.childNodes[ 2 ].isFirstPair = false; + node.childNodes[ 2 ].isFirstSection = false; } } updateClientIdAndAttrIdForSubtree( node, blocks, currentClientId ); @@ -105,7 +116,7 @@ function updateClientIdAndAttrIdForSubtree( rootNode, blocks, clientId ) { /** * Parses blocks and updates the clientId for the subtree. - * Additionally, when a node has an attribute with 'id' key, also set this id to the first and third child nod of that node. + * Additionally, when a node has an attribute with 'id' key, also set this id to the first and third child node of that node. * * @param {Paper} paper The paper to parse. * @param {Node} node The node to parse. From 7bdd0cb08520617fcd8ee17e071f42e2eec6e5fc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 25 Jul 2023 15:02:35 +0200 Subject: [PATCH 271/616] Add and adjust unit tests --- .../yoastseo/spec/parse/build/buildSpec.js | 690 +++++++++++++++++- 1 file changed, 673 insertions(+), 17 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 4669044bbe6..7ce35467cf7 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -1,5 +1,6 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; +import Paper from "../../../src/values/Paper"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; @@ -7,12 +8,13 @@ import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/ describe( "The parse function", () => { it( "parses a basic HTML text", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -70,11 +72,12 @@ describe( "The parse function", () => { it( "parses a basic Japanese HTML text", () => { const html = "

犬が大好き

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { splitIntoTokensCustom: splitIntoTokensCustom, memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -130,6 +133,7 @@ describe( "The parse function", () => { it( "adds implicit paragraphs around phrasing content outside of paragraphs and headings", () => { const html = "
Hello World!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -147,7 +151,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -216,6 +220,7 @@ describe( "The parse function", () => { it( "parses another HTML text and adds implicit paragraphs where needed", () => { const html = "
Hello

World!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -236,7 +241,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -319,6 +324,7 @@ describe( "The parse function", () => { it( "parses an HTML text with implicit paragraphs before, between, and after p tags", () => { const html = "
So long, and

thanks

for

all

the fish!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -348,7 +354,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -572,12 +578,13 @@ describe( "The parse function", () => { const html = "\n
" + "

Table of contents

\n

This is the first sentence.

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -718,12 +725,13 @@ describe( "The parse function", () => { it( "parses an HTML text with a Yoast breadcrumbs widget in Elementor, which should be filtered out", () => { // HTML:

The first sentence

const html = "

Home

The first sentence

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, @@ -803,12 +811,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a script element inside a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -911,12 +920,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a script element outside of a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -962,12 +972,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a comment inside a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -1029,12 +1040,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a comment within a sentence", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -1102,12 +1114,13 @@ describe( "The parse function", () => { it( "parses an HTML text with a code element within a paragraph", () => { const html = "

Hello code! array.push( something ) Hello world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1251,12 +1264,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a code element within a sentence", () => { const html = "

Hello array.push( something ) code!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -1317,12 +1331,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a code element with a child node within a sentence", () => { const html = "

Some text and code console.log( code )

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { @@ -1419,6 +1434,282 @@ describe( "The parse function", () => { name: "#document-fragment", } ); } ); + it( "also parses blocks when the blocks array is available inside Paper, " + + "the available block client id should also be added to the childNodes", () => { + const html = "\n" + + "

The red panda's coat is mainly red or orange-brown with a black belly and legs.

\n" + + "\n" + + "\n" + + "\n" + + "
    \n" + + "
  • giant panda
  • \n" + + "\n" + + "\n" + + "\n" + + "
  • red panda
  • \n" + + "
\n" + + ""; + const paper = new Paper( html, { wpBlocks: [ + { + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + name: "core/paragraph", + isValid: true, + originalContent: "

The red panda's coat is mainly red or orange-brown with a black belly and legs.

", + validationIssues: [], + attributes: { + content: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + dropCap: false, + }, + innerBlocks: [], + }, + { + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + name: "core/list", + isValid: true, + originalContent: "
    \n\n
", + validationIssues: [], + attributes: { + ordered: false, + values: "", + }, + innerBlocks: [ + { + clientId: "b45eed06-594b-468e-8723-1f597689c300", + name: "core/list-item", + isValid: true, + originalContent: "
  • giant panda
  • ", + validationIssues: [], + attributes: { + content: "giant panda", + }, + innerBlocks: [], + }, + { + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + name: "core/list-item", + isValid: true, + originalContent: "
  • red panda
  • ", + validationIssues: [], + attributes: { + content: "red panda", + }, + innerBlocks: [], + }, + ], + }, + ] } ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( paper, languageProcessor ) ).toEqual( + { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 0, endOffset: 21 }, + }, + { name: "#text", value: "\n" }, + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "#text", + value: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 22, endOffset: 25 }, + endTag: { startOffset: 104, endOffset: 108 }, + startOffset: 22, + endOffset: 108, + }, + isImplicit: false, + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + sentences: [ + { + text: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + tokens: [ + { text: "The", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, + { text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + { text: "red", sourceCodeRange: { startOffset: 29, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "panda's", sourceCodeRange: { startOffset: 33, endOffset: 40 } }, + { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } }, + { text: "coat", sourceCodeRange: { startOffset: 41, endOffset: 45 } }, + { text: " ", sourceCodeRange: { startOffset: 45, endOffset: 46 } }, + { text: "is", sourceCodeRange: { startOffset: 46, endOffset: 48 } }, + { text: " ", sourceCodeRange: { startOffset: 48, endOffset: 49 } }, + { text: "mainly", sourceCodeRange: { startOffset: 49, endOffset: 55 } }, + { text: " ", sourceCodeRange: { startOffset: 55, endOffset: 56 } }, + { text: "red", sourceCodeRange: { startOffset: 56, endOffset: 59 } }, + { text: " ", sourceCodeRange: { startOffset: 59, endOffset: 60 } }, + { text: "or", sourceCodeRange: { startOffset: 60, endOffset: 62 } }, + { text: " ", sourceCodeRange: { startOffset: 62, endOffset: 63 } }, + { text: "orange-brown", sourceCodeRange: { startOffset: 63, endOffset: 75 } }, + { text: " ", sourceCodeRange: { startOffset: 75, endOffset: 76 } }, + { text: "with", sourceCodeRange: { startOffset: 76, endOffset: 80 } }, + { text: " ", sourceCodeRange: { startOffset: 80, endOffset: 81 } }, + { text: "a", sourceCodeRange: { startOffset: 81, endOffset: 82 } }, + { text: " ", sourceCodeRange: { startOffset: 82, endOffset: 83 } }, + { text: "black", sourceCodeRange: { startOffset: 83, endOffset: 88 } }, + { text: " ", sourceCodeRange: { startOffset: 88, endOffset: 89 } }, + { text: "belly", sourceCodeRange: { startOffset: 89, endOffset: 94 } }, + { text: " ", sourceCodeRange: { startOffset: 94, endOffset: 95 } }, + { text: "and", sourceCodeRange: { startOffset: 95, endOffset: 98 } }, + { text: " ", sourceCodeRange: { startOffset: 98, endOffset: 99 } }, + { text: "legs", sourceCodeRange: { startOffset: 99, endOffset: 103 } }, + { text: ".", sourceCodeRange: { startOffset: 103, endOffset: 104 } }, + ], + sourceCodeRange: { startOffset: 25, endOffset: 104 }, + }, + ], + }, + { name: "#text", value: "\n" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 109, endOffset: 131 }, + }, + { name: "#text", value: "\n\n" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 133, endOffset: 149 }, + }, + { name: "#text", value: "\n" }, + { + name: "ul", + attributes: {}, + childNodes: [ + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 154, endOffset: 175 }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45" }, + { + name: "li", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { name: "#text", value: "giant panda", clientId: "b45eed06-594b-468e-8723-1f597689c300" }, + ], + sourceCodeLocation: { startOffset: 180, endOffset: 191 }, + isImplicit: true, + clientId: "b45eed06-594b-468e-8723-1f597689c300", + sentences: [ + { + text: "giant panda", + tokens: [ + { text: "giant", sourceCodeRange: { startOffset: 180, endOffset: 185 } }, + { text: " ", sourceCodeRange: { startOffset: 185, endOffset: 186 } }, + { text: "panda", sourceCodeRange: { startOffset: 186, endOffset: 191 } }, + ], + sourceCodeRange: { startOffset: 180, endOffset: 191 }, + }, + ], + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 176, endOffset: 180 }, + endTag: { startOffset: 191, endOffset: 196 }, + startOffset: 176, + endOffset: 196, + }, + clientId: "b45eed06-594b-468e-8723-1f597689c300", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 197, endOffset: 219 }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + { name: "#text", value: "\n\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 221, endOffset: 242 }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45" }, + { + name: "li", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { name: "#text", value: "red panda", clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce" }, + ], + sourceCodeLocation: { startOffset: 247, endOffset: 256 }, + isImplicit: true, + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + sentences: [ + { + text: "red panda", + tokens: [ + { text: "red", sourceCodeRange: { startOffset: 247, endOffset: 250 } }, + { text: " ", sourceCodeRange: { startOffset: 250, endOffset: 251 } }, + { text: "panda", sourceCodeRange: { startOffset: 251, endOffset: 256 } }, + ], + sourceCodeRange: { startOffset: 247, endOffset: 256 }, + }, + ], + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 243, endOffset: 247 }, + endTag: { startOffset: 256, endOffset: 261 }, + startOffset: 243, endOffset: 261, + }, + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 262, endOffset: 284 }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 150, endOffset: 154 }, + endTag: { startOffset: 284, endOffset: 289 }, + startOffset: 150, + endOffset: 289, + }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + { name: "#text", value: "\n" }, + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 290, endOffset: 307 }, + }, + ], + } + ); + } ); } ); describe( "parsing html with Yoast blocks that enter the Paper as html comments", () => { @@ -1427,11 +1718,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" "\n" + "

    The Norwegian Forest cat is adapted to survive Norway's cold weather.

    \n" + ""; + const paper = new Paper( html ); + const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1664,12 +1957,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" it( "parses an HTML text with a Yoast siblings block", () => { const html = "

    Hello, world!

    "; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1757,12 +2051,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" it( "parses an HTML text with a Yoast subpages block", () => { const html = "
    The Norwegian Forest cat is strongly built and larger than an average cat.
    "; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -2004,4 +2299,365 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" ], } ); } ); + + it( "parses an HTML text with a Yoast FAQ block: " + + "The block client id, attribute id, and the information whether a child node is " + + "the first section in the sub-block should be added to the tree", () => { + const html = "\n" + + "
    " + + "What is giant panda

    " + + "Giant panda is is relative to red panda

    " + + "Test

    Test

    " + + "
    " + + "giant panda is silly

    \n" + + ""; + const paper = new Paper( html, { + wpBlocks: [ + { + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + name: "yoast/faq-block", + isValid: true, + originalContent: "
    " + + "What is giant panda

    Giant panda" + + " is relative to red panda

    " + + "Test

    Tets

    giant panda is silly " + + "

    ", + validationIssues: [], + attributes: { + questions: [ + { + id: "faq-question-1689322642789", + question: [ "What is giant panda" ], + answer: [ "Giant ", { type: "strong", props: { children: [ "panda" ] } }, " is relative to red panda" ], + jsonQuestion: "What is giant panda", + jsonAnswer: "Giant panda is is relative to red panda", + }, + { + id: "faq-question-1689322667728", + question: [ "Test" ], + answer: [ "Tests" ], + jsonQuestion: "Test", + jsonAnswer: "Tests", + }, + { + id: "faq-question-1689936392675", + question: [ "giant panda is silly" ], + answer: [], + jsonQuestion: "giant panda is silly", + jsonAnswer: "", + }, + ], + }, + innerBlocks: [], + startOffset: 2510, + contentOffset: 2963, + }, + ], + } ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( paper, languageProcessor ) ).toEqual( + { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { + name: "#comment", + attributes: {}, + childNodes: [], + sourceCodeLocation: { startOffset: 0, endOffset: 458 }, + }, + { name: "#text", value: "\n" }, + { + name: "div", + attributes: { "class": {} }, + childNodes: [ + { + name: "div", + attributes: { "class": {}, id: "faq-question-1689322642789" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": {} }, + childNodes: [ + { name: "#text", value: "What is giant panda", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 572, endOffset: 608 }, + endTag: { startOffset: 627, endOffset: 636 }, + startOffset: 572, + endOffset: 636, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 572, endOffset: 727 }, + isImplicit: true, + attributeId: "faq-question-1689322642789", + isFirstPair: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "What is giant panda", + tokens: [ + { text: "What", sourceCodeRange: { startOffset: 608, endOffset: 612 } }, + { text: " ", sourceCodeRange: { startOffset: 612, endOffset: 613 } }, + { text: "is", sourceCodeRange: { startOffset: 613, endOffset: 615 } }, + { text: " ", sourceCodeRange: { startOffset: 615, endOffset: 616 } }, + { text: "giant", sourceCodeRange: { startOffset: 616, endOffset: 621 } }, + { text: " ", sourceCodeRange: { startOffset: 621, endOffset: 622 } }, + { text: "panda", sourceCodeRange: { startOffset: 622, endOffset: 627 } }, + ], + sourceCodeRange: { startOffset: 608, endOffset: 627 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "p", + attributes: { "class": {} }, + childNodes: [ + { name: "#text", value: "Giant ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "strong", + attributes: {}, + childNodes: [ + { name: "#text", value: "panda", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 672, endOffset: 680 }, + endTag: { startOffset: 685, endOffset: 694 }, + startOffset: 672, + endOffset: 694, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " is is relative to red panda", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 637, endOffset: 666 }, + endTag: { startOffset: 722, endOffset: 726 }, + startOffset: 637, + endOffset: 726, + }, + isImplicit: false, + attributeId: "faq-question-1689322642789", + isFirstPair: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Giant panda is is relative to red panda", + tokens: [ + { text: "Giant", sourceCodeRange: { startOffset: 666, endOffset: 671 } }, + { text: " ", sourceCodeRange: { startOffset: 671, endOffset: 672 } }, + { text: "panda", sourceCodeRange: { startOffset: 680, endOffset: 685 } }, + { text: " ", sourceCodeRange: { startOffset: 694, endOffset: 695 } }, + { text: "is", sourceCodeRange: { startOffset: 695, endOffset: 697 } }, + { text: " ", sourceCodeRange: { startOffset: 697, endOffset: 698 } }, + { text: "is", sourceCodeRange: { startOffset: 698, endOffset: 700 } }, + { text: " ", sourceCodeRange: { startOffset: 700, endOffset: 701 } }, + { text: "relative", sourceCodeRange: { startOffset: 701, endOffset: 709 } }, + { text: " ", sourceCodeRange: { startOffset: 709, endOffset: 710 } }, + { text: "to", sourceCodeRange: { startOffset: 710, endOffset: 712 } }, + { text: " ", sourceCodeRange: { startOffset: 712, endOffset: 713 } }, + { text: "red", sourceCodeRange: { startOffset: 713, endOffset: 716 } }, + { text: " ", sourceCodeRange: { startOffset: 716, endOffset: 717 } }, + { text: "panda", sourceCodeRange: { startOffset: 717, endOffset: 722 } }, + ], + sourceCodeRange: { startOffset: 666, endOffset: 722 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 508, endOffset: 572 }, + endTag: { startOffset: 727, endOffset: 733 }, + startOffset: 508, + endOffset: 733, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "div", + attributes: { "class": {}, id: "faq-question-1689322667728" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": {} }, + childNodes: [ + { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 798, endOffset: 834 }, + endTag: { startOffset: 838, endOffset: 847 }, + startOffset: 798, + endOffset: 847, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 798, endOffset: 886 }, + isImplicit: true, + attributeId: "faq-question-1689322667728", + isFirstPair: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Test", + tokens: [ + { text: "Test", sourceCodeRange: { startOffset: 834, endOffset: 838 } }, + ], + sourceCodeRange: { startOffset: 834, endOffset: 838 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "p", + attributes: { "class": {} }, + childNodes: [ + { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 848, endOffset: 877 }, + endTag: { startOffset: 881, endOffset: 885 }, + startOffset: 848, + endOffset: 885, + }, + isImplicit: false, + attributeId: "faq-question-1689322667728", + isFirstPair: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Test", + tokens: [ + { text: "Test", sourceCodeRange: { startOffset: 877, endOffset: 881 } }, + ], + sourceCodeRange: { startOffset: 877, endOffset: 881 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 734, endOffset: 798 }, + endTag: { startOffset: 886, endOffset: 892 }, + startOffset: 734, + endOffset: 892, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "div", + attributes: { "class": {}, id: "faq-question-1689936392675" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": {} }, + childNodes: [ + { name: "#text", value: "giant panda is silly", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 957, endOffset: 993 }, + endTag: { startOffset: 1013, endOffset: 1022 }, + startOffset: 957, + endOffset: 1022, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 957, endOffset: 1057 }, + isImplicit: true, + attributeId: "faq-question-1689936392675", + isFirstPair: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "giant panda is silly", + tokens: [ + { text: "giant", sourceCodeRange: { startOffset: 993, endOffset: 998 } }, + { text: " ", sourceCodeRange: { startOffset: 998, endOffset: 999 } }, + { text: "panda", sourceCodeRange: { startOffset: 999, endOffset: 1004 } }, + { text: " ", sourceCodeRange: { startOffset: 1004, endOffset: 1005 } }, + { text: "is", sourceCodeRange: { startOffset: 1005, endOffset: 1007 } }, + { text: " ", sourceCodeRange: { startOffset: 1007, endOffset: 1008 } }, + { text: "silly", sourceCodeRange: { startOffset: 1008, endOffset: 1013 } }, + ], + sourceCodeRange: { startOffset: 993, endOffset: 1013 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + { + name: "p", + attributes: { "class": {} }, + childNodes: [], + sourceCodeLocation: { + startTag: { startOffset: 1023, endOffset: 1052 }, + endTag: { startOffset: 1052, endOffset: 1056 }, + startOffset: 1023, + endOffset: 1056, + }, + isImplicit: false, + attributeId: "faq-question-1689936392675", + isFirstPair: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 893, endOffset: 957 }, + endTag: { startOffset: 1057, endOffset: 1063 }, + startOffset: 893, + endOffset: 1063, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, + ], + sourceCodeLocation: { + startTag: { startOffset: 459, endOffset: 508 }, + endTag: { startOffset: 1064, endOffset: 1070 }, + startOffset: 459, + endOffset: 1070, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: "\n" }, + { name: "#comment", attributes: {}, childNodes: [], sourceCodeLocation: { startOffset: 1071, endOffset: 1099 } }, + ], + } + ); + } ); } ); From e06e662e2610b6d1442a7b6660c5d72961c62317 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 16:05:18 +0200 Subject: [PATCH 272/616] Fix padded blocks eslint error --- .../helpers/sanitize/filterShortcodesFromTreeSpec.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js index ca31f87556e..fe4f701d8ad 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js @@ -259,7 +259,6 @@ describe( "filterShortcodesFromTree", function() { ] ); expect( tree.childNodes[ 0 ].sentences[ 0 ].text ).toEqual( "This is a test." ); - } ); it( "should not filter a word between square brackets if it is not a shortcode", function() { @@ -297,7 +296,6 @@ describe( "filterShortcodesFromTree", function() { ] ); expect( tree.sentences[ 0 ].text ).toEqual( "This is a test." ); - } ); it( "should not filter if no shortcodes are available", function() { const tree = { From ea827261fd8cce922ec89f41ad7b017b64bda9b2 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 25 Jul 2023 16:26:21 +0200 Subject: [PATCH 273/616] Adjust docs --- .../helpers/getAnnotationsHelpers.js | 66 +++++++++++++++---- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js index 5ac23553b74..f29b79df01b 100644 --- a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -198,7 +198,7 @@ export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, t }, ]; } - // Adjust block position start and end. + // Adjust block start and end offset. let startOffset = mark.getBlockPositionStart(); let endOffset = mark.getBlockPositionEnd(); // Retrieve the html from the start until the startOffset of the mark. @@ -208,6 +208,12 @@ export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, t /* * Loop through the found html tags backwards, and adjust the start and end offsets of the mark * by subtracting them with the length of the found html tags. + * + * This step is necessary to account for the difference in the way we "parse" the block and calculate the token position + * between `yoastseo` package and block annotation API. + * Inside `yoastseo`, the token's position information also takes into account all the HTML tags surrounding it in a block. + * However, the block annotation API applies annotations to "clean" text/html without any HTML tags. + * As a result, the token position information we retrieve from `yoastseo` wouldn't match that of block annotation API. */ for ( let i = foundHtmlTags.length - 1; i >= 0; i-- ) { const currentHtmlTag = foundHtmlTags[ i ][ 0 ]; @@ -228,7 +234,7 @@ export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, t * Creates the annotations for a block. * * @param {string} html The string where we want to apply the annotations. - * @param {string} richTextIdentifier The identifier for the annotatable richt text. + * @param {string} richTextIdentifier The identifier for the annotatable rich text. * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Mark[]} marks The marks to turn into annotations. @@ -275,33 +281,57 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) * The regex to capture the first section of a Yoast sub-block. * @type {RegExp} */ -const firstIdentifierRegex = /()(.*?)(<\/strong>)/gis; +const firstSectionRegex = /()(.*?)(<\/strong>)/gis; /** * Adjusts the block start and end offset for a given mark. * - * @param {Mark} mark The Mark object to adjust. - * @param {Object} block The block to get the original content from. - * @param {String} firstIdentifierHtml The html of the first identifier. + * @param {Mark} mark The Mark object to adjust. + * @param {Object} block The block to get the original content from. + * @param {String} firstSectionHtml The html of the first section of the sub-block. * * @returns {Mark} The adjusted mark object. */ -const adjustBlockOffset = ( mark, block, firstIdentifierHtml ) => { +const adjustBlockOffset = ( mark, block, firstSectionHtml ) => { + // Get the original content of the block, which still contains the html tags. + // The original content example for a Yoast FAQ block: + /* +
    + What is giant panda +

    Giant panda is test tests

    +
    Test +

    Tests

    + giant panda is silly

    + */ const originalContent = block.originalContent; - const firstPairElements = [ ...originalContent.matchAll( firstIdentifierRegex ) ]; - if ( firstPairElements.length === 0 ) { + + /* + * From the original content, get all the first sections of the sub-block. + * + * Example of the first section of a Yoast FAQ sub-block from the original content above: + * 1. What is giant panda + * 2. Test + * 3. giant panda is silly + */ + const firstSectionElements = [ ...originalContent.matchAll( firstSectionRegex ) ]; + + if ( firstSectionElements.length === 0 ) { return mark; } + const startOffset = mark.getBlockPositionStart(); const endOffset = mark.getBlockPositionEnd(); - firstPairElements.forEach( firstPairElement => { + + firstSectionElements.forEach( firstSectionElement => { // Destructure the matched element based on the capturing groups. - const [ , openTag, , innerText ] = firstPairElement; - if ( innerText === firstIdentifierHtml ) { + const [ , openTag, , innerText ] = firstSectionElement; + + if ( innerText === firstSectionHtml ) { mark.setBlockPositionStart( startOffset - openTag.length ); mark.setBlockPositionEnd( endOffset - openTag.length ); } } ); + return mark; }; @@ -365,13 +395,22 @@ const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair return mark; } ); - // For the second section marks, we need to adjust the block start and end offset for the first sub-block section + /* + * For the first section marks, we need to adjust the block start and end offset. + * + * This is because the first section of a Yoast block is always wrapped in `` tags. + * In `yoastseo`, when calculating the position information of the matched token, we also take + * into account the length of `` tags. + * However, here, the html for the first section doesn't include the `` tags. + * As a result, the position information of the matched token will be incorrect. + */ marksForFirstSection = marksForFirstSection.map( mark => { if ( mark.hasBlockPosition() ) { return adjustBlockOffset( mark, block, firstSectionHtml ); } return mark; } ); + const marksForSecondSection = marks.filter( mark => { if ( mark.hasBlockPosition() ) { // Filter the marks array and only include the marks that are intended for the second sub-block section. @@ -386,6 +425,7 @@ const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair return firstSectionAnnotations.concat( secondSectionAnnotations ); } ); + return flattenDeep( array ); }; From 03294fa4b3c2981db3f126efbc590d4d29eaa220 Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 25 Jul 2023 16:38:00 +0200 Subject: [PATCH 274/616] mock window to make collectAnalysisData.test.js work --- packages/js/tests/collectAnalysisData.test.js | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/packages/js/tests/collectAnalysisData.test.js b/packages/js/tests/collectAnalysisData.test.js index 1f733b07ae0..30a5f80a773 100644 --- a/packages/js/tests/collectAnalysisData.test.js +++ b/packages/js/tests/collectAnalysisData.test.js @@ -2,6 +2,14 @@ import collectAnalysisData from "../src/analysis/collectAnalysisData"; import gutenbergBlocks from "./__test-data__/gutenbergBlocksTestData"; import expectedBlocks from "./__test-data__/blocksForAnalysisTestData"; +const originalWindow = { ...window }; +const windowSpy = jest.spyOn( global, "window", "get" ); +windowSpy.mockImplementation( () => ( { + ...originalWindow, + // eslint-disable-next-line camelcase + wpseoScriptData: { analysis: { plugins: { shortcodes: { wpseo_shortcode_tags: [] } } } }, +} ) ); + describe( "collectAnalysisData", () => { const storeData = { focusKeyword: "focus keyword", From bb8faff9ae0b6e24af2b761d86c4f945c296dc52 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Jul 2023 09:57:06 +0200 Subject: [PATCH 275/616] filter shortcodes from paper._text. --- .../sanitize/filterShortcodesFromTreeSpec.js | 42 ++++++++++++++++++- .../spec/specHelpers/parse/buildTree.js | 4 ++ .../sanitize/filterShortcodesFromTree.js | 22 +++++++++- packages/yoastseo/src/scoring/assessor.js | 4 ++ .../yoastseo/src/worker/AnalysisWebWorker.js | 9 ++++ 5 files changed, 79 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js index fe4f701d8ad..e73adaed6e7 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js @@ -1,4 +1,4 @@ -import filterShortcodesFromTree from "../../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; +import filterShortcodesFromTree, { filterShortcodesFromHTML } from "../../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; describe( "filterShortcodesFromTree", function() { @@ -341,3 +341,43 @@ describe( "filterShortcodesFromTree", function() { expect( tree.sentences[ 0 ].text ).toEqual( "This is a [shortcode] test." ); } ); } ); + + +describe( "filterShortcodesFromHTML", function() { + it( "should filter a shortcode from an html string", function() { + const html = "

    This is a [shortcode] test.

    "; + + const shortcodes = [ "shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "

    This is a test.

    " ); + } ); + it( "should filter a shortcode with parameters from an html string", function() { + const html = "

    This is a [shortcode param=\"value\"] test.

    "; + + const shortcodes = [ "shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "

    This is a test.

    " ); + } ); + it( "should filter multiple shortcodes from an html string", function() { + const html = "

    This is a [shortcode] test.

    This is a [another_shortcode] test.

    "; + + const shortcodes = [ "shortcode", "another_shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "

    This is a test.

    This is a test.

    " ); + } ); + it( "should not filter something between square brackets if it is not a shortcode.", function() { + const html = "

    This is a [not_a_shortcode] test.

    "; + + const shortcodes = [ "shortcode", "another_shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "

    This is a [not_a_shortcode] test.

    " ); + } ); +} ); diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index adeb2358faa..b3b1d65c0e4 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,6 +4,7 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; +import { filterShortcodesFromHTML } from "../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -17,6 +18,9 @@ export default function buildTree( paper, researcher ) { const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } } /** diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index d636b08b7dc..e3ade5140ba 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -1,3 +1,23 @@ +/** + * Creates a regex to filter shortcodes from HTML. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {RegExp} The regex to recognize the shortcodes. + */ +const createShortcodeTagsRegex = shortcodeTags => { + const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + return new RegExp( sortcodeTagsRegexString, "g" ); +}; + +/** + * Filters shortcodes from HTML string. + * @param {string} html The HTML to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {string} The filtered HTML. + */ +export const filterShortcodesFromHTML = ( html, shortcodeTags ) => { + const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); + return html.replace( shortcodeTagsRegex, "" ); +}; /** * Filters shortcodes from a sentence. @@ -53,7 +73,7 @@ const filterShortcodesFromSentences = ( tree, shortcodeTags, shortcodeTagsRegex */ const filterShortcodesFromTree = ( tree, shortcodeTags ) => { if ( shortcodeTags ) { - const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + const sortcodeTagsRegexString = createShortcodeTagsRegex( shortcodeTags ); const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index dd9e49c92d9..1d6ace55c28 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -7,6 +7,7 @@ import { __, sprintf } from "@wordpress/i18n"; import { filter, find, findIndex, isFunction, isUndefined, map } from "lodash-es"; import LanguageProcessor from "../parse/language/LanguageProcessor"; import { build } from "../parse/build"; +import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; // The maximum score of individual assessment is 9. This is why we set the "score rating" here to 9. const ScoreRating = 9; @@ -126,6 +127,9 @@ Assessor.prototype.assess = function( paper ) { const languageProcessor = new LanguageProcessor( this._researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } let assessments = this.getAvailableAssessments(); this.results = []; diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index 1ba4522827b..70e0b11f0d7 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -33,6 +33,7 @@ import wrapTryCatchAroundAction from "./wrapTryCatchAroundAction"; // Tree assessor functionality. import { ReadabilityScoreAggregator, SEOScoreAggregator } from "../parsedPaper/assess/scoreAggregators"; +import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; const logger = getLogger( "yoast-analysis-worker" ); logger.setDefaultLevel( "error" ); @@ -1084,6 +1085,11 @@ export default class AnalysisWebWorker { const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes; this._paper.setTree( build( this._paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + this._paper._text = filterShortcodesFromHTML( this._paper.getText(), shortcodes ); + } + + // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); } @@ -1407,6 +1413,9 @@ export default class AnalysisWebWorker { const languageProcessor = new LanguageProcessor( researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } } } From 04a0bbb8c2e9db41125125a14a33960f71e53adf Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 26 Jul 2023 10:13:48 +0200 Subject: [PATCH 276/616] Adjust unit tests --- .../helpers/getAnnotationsHelpers.js | 1 + .../yoastseo/spec/parse/build/buildSpec.js | 478 +++--------------- 2 files changed, 79 insertions(+), 400 deletions(-) diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js index f29b79df01b..ba17b68d418 100644 --- a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -464,6 +464,7 @@ const getAnnotationsForHowTo = ( attribute, block, marks ) => { annotations.push( getAnnotationsFromArray( annotatableTexts, marks, attribute, block, [ "name", "text" ] ) ); } if ( attribute.key === "jsonDescription" ) { + // Filter out marks that are intended for the "steps" attribute above. marks = marks.filter( mark => { return mark.hasBlockPosition() && ! mark.getBlockAttributeId(); } ); diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 7ce35467cf7..6c7c087bbb1 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -1732,31 +1732,19 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" attributes: {}, childNodes: [], name: "#comment", - sourceCodeLocation: { - endOffset: 34, - startOffset: 0, - }, + sourceCodeLocation: { endOffset: 34, startOffset: 0 }, }, { attributes: {}, childNodes: [], name: "#comment", - sourceCodeLocation: { - endOffset: 55, - startOffset: 34, - }, - }, - { - name: "#text", - value: "\n", + sourceCodeLocation: { endOffset: 55, startOffset: 34 }, }, + { name: "#text", value: "\n" }, { attributes: {}, childNodes: [ - { - name: "#text", - value: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", - }, + { name: "#text", value: "The Norwegian Forest cat is adapted to survive Norway's cold weather." }, ], isImplicit: false, name: "p", @@ -1768,188 +1756,44 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, text: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", tokens: [ - { - sourceCodeRange: { - endOffset: 62, - startOffset: 59, - }, - text: "The", - }, - { - sourceCodeRange: { - endOffset: 63, - startOffset: 62, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 72, - startOffset: 63, - }, - text: "Norwegian", - }, - { - sourceCodeRange: { - endOffset: 73, - startOffset: 72, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 79, - startOffset: 73, - }, - text: "Forest", - }, - { - sourceCodeRange: { - endOffset: 80, - startOffset: 79, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 83, - startOffset: 80, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 84, - startOffset: 83, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 86, - startOffset: 84, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 87, - startOffset: 86, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 94, - startOffset: 87, - }, - text: "adapted", - }, - { - sourceCodeRange: { - endOffset: 95, - startOffset: 94, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 97, - startOffset: 95, - }, - text: "to", - }, - { - sourceCodeRange: { - endOffset: 98, - startOffset: 97, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 105, - startOffset: 98, - }, - text: "survive", - }, - { - sourceCodeRange: { - endOffset: 106, - startOffset: 105, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 114, - startOffset: 106, - }, - text: "Norway's", - }, - { - sourceCodeRange: { - endOffset: 115, - startOffset: 114, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 119, - startOffset: 115, - }, - text: "cold", - }, - { - sourceCodeRange: { - endOffset: 120, - startOffset: 119, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 127, - startOffset: 120, - }, - text: "weather", - }, - { - sourceCodeRange: { - endOffset: 128, - startOffset: 127, - }, - text: ".", - }, + { sourceCodeRange: { endOffset: 62, startOffset: 59 }, text: "The" }, + { sourceCodeRange: { endOffset: 63, startOffset: 62 }, text: " " }, + { sourceCodeRange: { endOffset: 72, startOffset: 63 }, text: "Norwegian" }, + { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: " " }, + { sourceCodeRange: { endOffset: 79, startOffset: 73 }, text: "Forest" }, + { sourceCodeRange: { endOffset: 80, startOffset: 79 }, text: " " }, + { sourceCodeRange: { endOffset: 83, startOffset: 80 }, text: "cat" }, + { sourceCodeRange: { endOffset: 84, startOffset: 83 }, text: " " }, + { sourceCodeRange: { endOffset: 86, startOffset: 84 }, text: "is" }, + { sourceCodeRange: { endOffset: 87, startOffset: 86 }, text: " " }, + { sourceCodeRange: { endOffset: 94, startOffset: 87 }, text: "adapted" }, + { sourceCodeRange: { endOffset: 95, startOffset: 94 }, text: " " }, + { sourceCodeRange: { endOffset: 97, startOffset: 95 }, text: "to" }, + { sourceCodeRange: { endOffset: 98, startOffset: 97 }, text: " " }, + { sourceCodeRange: { endOffset: 105, startOffset: 98 }, text: "survive" }, + { sourceCodeRange: { endOffset: 106, startOffset: 105 }, text: " " }, + { sourceCodeRange: { endOffset: 114, startOffset: 106 }, text: "Norway's" }, + { sourceCodeRange: { endOffset: 115, startOffset: 114 }, text: " " }, + { sourceCodeRange: { endOffset: 119, startOffset: 115 }, text: "cold" }, + { sourceCodeRange: { endOffset: 120, startOffset: 119 }, text: " " }, + { sourceCodeRange: { endOffset: 127, startOffset: 120 }, text: "weather" }, + { sourceCodeRange: { endOffset: 128, startOffset: 127 }, text: "." }, ], }, ], sourceCodeLocation: { endOffset: 132, - endTag: { - endOffset: 132, - startOffset: 128, - }, + endTag: { endOffset: 132, startOffset: 128 }, startOffset: 56, - startTag: { - endOffset: 59, - startOffset: 56, - }, + startTag: { endOffset: 59, startOffset: 56 }, }, }, - { - name: "#text", - value: "\n", - }, + { name: "#text", value: "\n" }, { attributes: {}, childNodes: [], name: "#comment", - sourceCodeLocation: { - endOffset: 155, - startOffset: 133, - }, + sourceCodeLocation: { endOffset: 155, startOffset: 133 }, }, ], } ); @@ -2082,219 +1926,51 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, text: "The Norwegian Forest cat is strongly built and larger than an average cat.", tokens: [ - { - sourceCodeRange: { - endOffset: 8, - startOffset: 5, - }, - text: "The", - }, - { - sourceCodeRange: { - endOffset: 9, - startOffset: 8, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 18, - startOffset: 9, - }, - text: "Norwegian", - }, - { - sourceCodeRange: { - endOffset: 19, - startOffset: 18, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 25, - startOffset: 19, - }, - text: "Forest", - }, - { - sourceCodeRange: { - endOffset: 26, - startOffset: 25, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 29, - startOffset: 26, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 30, - startOffset: 29, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 32, - startOffset: 30, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 33, - startOffset: 32, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 41, - startOffset: 33, - }, - text: "strongly", - }, - { - sourceCodeRange: { - endOffset: 42, - startOffset: 41, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 47, - startOffset: 42, - }, - text: "built", - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 47, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 51, - startOffset: 48, - }, - text: "and", - }, - { - sourceCodeRange: { - endOffset: 52, - startOffset: 51, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 58, - startOffset: 52, - }, - text: "larger", - }, - { - sourceCodeRange: { - endOffset: 59, - startOffset: 58, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 63, - startOffset: 59, - }, - text: "than", - }, - { - sourceCodeRange: { - endOffset: 64, - startOffset: 63, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 66, - startOffset: 64, - }, - text: "an", - }, - { - sourceCodeRange: { - endOffset: 67, - startOffset: 66, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 74, - startOffset: 67, - }, - text: "average", - }, - { - sourceCodeRange: { - endOffset: 75, - startOffset: 74, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 78, - startOffset: 75, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 79, - startOffset: 78, - }, - text: ".", - }, + { sourceCodeRange: { endOffset: 8, startOffset: 5 }, text: "The" }, + { sourceCodeRange: { endOffset: 9, startOffset: 8 }, text: " " }, + { sourceCodeRange: { endOffset: 18, startOffset: 9 }, text: "Norwegian" }, + { sourceCodeRange: { endOffset: 19, startOffset: 18 }, text: " " }, + { sourceCodeRange: { endOffset: 25, startOffset: 19 }, text: "Forest" }, + { sourceCodeRange: { endOffset: 26, startOffset: 25 }, text: " " }, + { sourceCodeRange: { endOffset: 29, startOffset: 26 }, text: "cat" }, + { sourceCodeRange: { endOffset: 30, startOffset: 29 }, text: " " }, + { sourceCodeRange: { endOffset: 32, startOffset: 30 }, text: "is" }, + { sourceCodeRange: { endOffset: 33, startOffset: 32 }, text: " " }, + { sourceCodeRange: { endOffset: 41, startOffset: 33 }, text: "strongly" }, + { sourceCodeRange: { endOffset: 42, startOffset: 41 }, text: " " }, + { sourceCodeRange: { endOffset: 47, startOffset: 42 }, text: "built" }, + { sourceCodeRange: { endOffset: 48, startOffset: 47 }, text: " " }, + { sourceCodeRange: { endOffset: 51, startOffset: 48 }, text: "and" }, + { sourceCodeRange: { endOffset: 52, startOffset: 51 }, text: " " }, + { sourceCodeRange: { endOffset: 58, startOffset: 52 }, text: "larger" }, + { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " }, + { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "than" }, + { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: " " }, + { sourceCodeRange: { endOffset: 66, startOffset: 64 }, text: "an" }, + { sourceCodeRange: { endOffset: 67, startOffset: 66 }, text: " " }, + { sourceCodeRange: { endOffset: 74, startOffset: 67 }, text: "average" }, + { sourceCodeRange: { endOffset: 75, startOffset: 74 }, text: " " }, + { sourceCodeRange: { endOffset: 78, startOffset: 75 }, text: "cat" }, + { sourceCodeRange: { endOffset: 79, startOffset: 78 }, text: "." }, ], }, ], - sourceCodeLocation: { - endOffset: 79, - startOffset: 5, - }, + sourceCodeLocation: { endOffset: 79, startOffset: 5 }, }, ], name: "div", sourceCodeLocation: { endOffset: 85, - endTag: { - endOffset: 85, - startOffset: 79, - }, + endTag: { endOffset: 85, startOffset: 79 }, startOffset: 0, - startTag: { - endOffset: 5, - startOffset: 0, - }, + startTag: { endOffset: 5, startOffset: 0 }, }, }, { attributes: {}, childNodes: [], name: "#comment", - sourceCodeLocation: { - endOffset: 116, - startOffset: 85, - }, + sourceCodeLocation: { endOffset: 116, startOffset: 85 }, }, ], } ); @@ -2381,11 +2057,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: "\n" }, { name: "div", - attributes: { "class": {} }, + attributes: { + "class": new Set( [ "schema-faq", "wp-block-yoast-faq-block" ] ), + }, childNodes: [ { name: "div", - attributes: { "class": {}, id: "faq-question-1689322642789" }, + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322642789" }, childNodes: [ { name: "p", @@ -2393,7 +2071,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" childNodes: [ { name: "strong", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, childNodes: [ { name: "#text", value: "What is giant panda", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, @@ -2410,7 +2088,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" sourceCodeLocation: { startOffset: 572, endOffset: 727 }, isImplicit: true, attributeId: "faq-question-1689322642789", - isFirstPair: true, + isFirstSection: true, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [ { @@ -2431,7 +2109,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { name: "p", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, childNodes: [ { name: "#text", value: "Giant ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { @@ -2459,7 +2137,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, isImplicit: false, attributeId: "faq-question-1689322642789", - isFirstPair: false, + isFirstSection: false, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [ { @@ -2498,7 +2176,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { name: "div", - attributes: { "class": {}, id: "faq-question-1689322667728" }, + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322667728" }, childNodes: [ { name: "p", @@ -2506,7 +2184,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" childNodes: [ { name: "strong", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, childNodes: [ { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, ], @@ -2522,7 +2200,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" sourceCodeLocation: { startOffset: 798, endOffset: 886 }, isImplicit: true, attributeId: "faq-question-1689322667728", - isFirstPair: true, + isFirstSection: true, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [ { @@ -2537,7 +2215,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { name: "p", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, childNodes: [ { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, ], @@ -2549,7 +2227,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, isImplicit: false, attributeId: "faq-question-1689322667728", - isFirstPair: false, + isFirstSection: false, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [ { @@ -2574,7 +2252,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { name: "div", - attributes: { "class": {}, id: "faq-question-1689936392675" }, + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689936392675" }, childNodes: [ { name: "p", @@ -2582,7 +2260,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" childNodes: [ { name: "strong", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, childNodes: [ { name: "#text", value: "giant panda is silly", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, @@ -2599,7 +2277,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" sourceCodeLocation: { startOffset: 957, endOffset: 1057 }, isImplicit: true, attributeId: "faq-question-1689936392675", - isFirstPair: true, + isFirstSection: true, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [ { @@ -2620,7 +2298,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1" }, { name: "p", - attributes: { "class": {} }, + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, childNodes: [], sourceCodeLocation: { startTag: { startOffset: 1023, endOffset: 1052 }, @@ -2630,7 +2308,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, isImplicit: false, attributeId: "faq-question-1689936392675", - isFirstPair: false, + isFirstSection: false, clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", sentences: [], }, From 47489c57ab84f3782a3010b197f52e188c703748 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 26 Jul 2023 15:12:18 +0200 Subject: [PATCH 277/616] remove unnecessary variable declaration --- packages/js/src/decorator/gutenberg.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index a768f06e7d7..d959870e4e5 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -8,9 +8,6 @@ import { getAnnotationsForYoastBlock, getAnnotationsForWPBlock } from "./helpers const ANNOTATION_SOURCE = "yoast"; -export const START_MARK = ""; -export const END_MARK = ""; - let annotationQueue = []; const ANNOTATION_ATTRIBUTES = { From 7e662d2661fb43334b16c9b45d40e7e44090a2e0 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 26 Jul 2023 15:13:27 +0200 Subject: [PATCH 278/616] improve code --- .../helpers/getAnnotationsHelpers.js | 107 +++++++++++------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js index ba17b68d418..5ee40685555 100644 --- a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -1,10 +1,25 @@ import { flatMap, flattenDeep } from "lodash"; import { create } from "@wordpress/rich-text"; -const htmlTagsRegex = /(<([^>]+)>)/ig; -export const START_MARK = ""; +const START_MARK = ""; +const END_MARK = ""; + const START_MARK_DOUBLE_QUOTED = ""; -export const END_MARK = ""; + +/* + * The regex to detect html tags. + * Please note that this regex will also detect non-html tags that are also wrapped in `<>`. + * For example, in the following sentence, cats rabbit , + * we will match , and . This is an edge case though. + * @type {RegExp} + */ +const htmlTagsRegex = /(<([a-z]|\/)[^<>]+>)/ig; + +/** + * The regex to capture the first section of a Yoast sub-block. + * @type {RegExp} + */ +const firstSectionRegex = /()(.*?)(<\/strong>)/gis; /** * Returns the offsets of the occurrences in the given mark. @@ -178,33 +193,44 @@ export function calculateAnnotationsForTextFormat( text, mark ) { * Creates an annotation if the given mark is position based. * A helper for position-based highlighting. * - * @param {string} clientId The client id of the block. - * @param {Mark} mark The mark to apply to the content. - * @param {string} html The HTML of the block. - * @param {string} text The text of the block. + * @param {string} blockClientId The client id of the block. + * @param {Mark} mark The mark to apply to the content. + * @param {string} blockHtml The HTML of the block: possibly contains html tags. + * @param {string} richText The rich text of the block: the text without html tags. * * @returns {Array} The annotations to apply. */ -export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, text ) { - if ( clientId === mark.getBlockClientId() ) { - const slicedHtml = html.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); - const slicedText = text.slice( mark.getBlockPositionStart(), mark.getBlockPositionEnd() ); - // Check if the html and the rich text contain the same text in the specified index. - if ( slicedHtml === slicedText ) { +export function createAnnotationsFromPositionBasedMarks( blockClientId, mark, blockHtml, richText ) { + // If the block client id is the same as the mark's block client id, it means that this mark object is intended for this block. + if ( blockClientId === mark.getBlockClientId() ) { + let blockStartOffset = mark.getBlockPositionStart(); + let blockEndOffset = mark.getBlockPositionEnd(); + + // Get the html part from the block start offset of the mark until the block end offset of the mark. + const slicedHtml = blockHtml.slice( blockStartOffset, blockEndOffset ); + // Get the rich text part from the block start offset of the mark until the block end offset of the mark. + const slicedRichText = richText.slice( blockStartOffset, blockEndOffset ); + + /* + * If the html and the rich text contain the same text in the specified index, + * don't adjust the block start and end offsets of the mark. + * If the html and the rich text do not contain the same text in the specified index, + * adjust the block start and end offsets of the mark. + */ + if ( slicedHtml === slicedRichText ) { return [ { - startOffset: mark.getBlockPositionStart(), - endOffset: mark.getBlockPositionEnd(), + startOffset: blockStartOffset, + endOffset: blockEndOffset, }, ]; } - // Adjust block start and end offset. - let startOffset = mark.getBlockPositionStart(); - let endOffset = mark.getBlockPositionEnd(); - // Retrieve the html from the start until the startOffset of the mark. - html = html.slice( 0, mark.getBlockPositionStart() ); + + // Retrieve the html from the start until the block startOffset of the mark. + blockHtml = blockHtml.slice( 0, blockStartOffset ); + // Find all html tags. - const foundHtmlTags = [ ...html.matchAll( htmlTagsRegex ) ]; + const foundHtmlTags = [ ...blockHtml.matchAll( htmlTagsRegex ) ]; /* * Loop through the found html tags backwards, and adjust the start and end offsets of the mark * by subtracting them with the length of the found html tags. @@ -216,14 +242,15 @@ export function createAnnotationsFromPositionBasedMarks( clientId, mark, html, t * As a result, the token position information we retrieve from `yoastseo` wouldn't match that of block annotation API. */ for ( let i = foundHtmlTags.length - 1; i >= 0; i-- ) { - const currentHtmlTag = foundHtmlTags[ i ][ 0 ]; - startOffset -= currentHtmlTag.length; - endOffset -= currentHtmlTag.length; + const [ foundTag ] = foundHtmlTags[ i ]; + + blockStartOffset -= foundTag.length; + blockEndOffset -= foundTag.length; } return [ { - startOffset: startOffset, - endOffset: endOffset, + startOffset: blockStartOffset, + endOffset: blockEndOffset, }, ]; } @@ -250,15 +277,15 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) multilineWrapperTag: attribute.multilineWrapperTag, } ); - const text = record.text; + const richText = record.text; return flatMap( marks, mark => { let annotations; if ( mark.hasBlockPosition && mark.hasBlockPosition() ) { - annotations = createAnnotationsFromPositionBasedMarks( blockClientId, mark, html, text ); + annotations = createAnnotationsFromPositionBasedMarks( blockClientId, mark, html, richText ); } else { annotations = calculateAnnotationsForTextFormat( - text, + richText, mark ); } @@ -277,12 +304,6 @@ function createAnnotations( html, richTextIdentifier, attribute, block, marks ) } ); } -/** - * The regex to capture the first section of a Yoast sub-block. - * @type {RegExp} - */ -const firstSectionRegex = /()(.*?)(<\/strong>)/gis; - /** * Adjusts the block start and end offset for a given mark. * @@ -313,7 +334,7 @@ const adjustBlockOffset = ( mark, block, firstSectionHtml ) => { * 2. Test * 3. giant panda is silly */ - const firstSectionElements = [ ...originalContent.matchAll( firstSectionRegex ) ]; + const firstSectionElements = originalContent ? [ ...originalContent.matchAll( firstSectionRegex ) ] : []; if ( firstSectionElements.length === 0 ) { return mark; @@ -380,12 +401,12 @@ const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair const secondSectionIdentifier = `${ item.id }-${ identifierPair[ 1 ] }`; // Get the first item of the identifier pair and make the first letter uppercase, e.g. "question" -> "Question". - identifierPair[ 0 ] = identifierPair[ 0 ][ 0 ].toUpperCase() + identifierPair[ 0 ].slice( 1 ); + const firstSectionKeyName = identifierPair[ 0 ][ 0 ].toUpperCase() + identifierPair[ 0 ].slice( 1 ); // Get the second item of the identifier pair and make the first letter uppercase, e.g. "answer" -> "Answer". - identifierPair[ 1 ] = identifierPair[ 1 ][ 0 ].toUpperCase() + identifierPair[ 1 ].slice( 1 ); + const secondSectionKeyName = identifierPair[ 1 ][ 0 ].toUpperCase() + identifierPair[ 1 ].slice( 1 ); - const firstSectionHtml = item[ `json${ identifierPair[ 0 ] }` ]; - const secondSectionHtml = item[ `json${ identifierPair[ 1 ] }` ]; + const firstSectionHtml = item[ `json${ firstSectionKeyName }` ]; + const secondSectionHtml = item[ `json${ secondSectionKeyName }` ]; let marksForFirstSection = marks.filter( mark => { if ( mark.hasBlockPosition() ) { @@ -435,7 +456,8 @@ const getAnnotationsFromArray = ( array, marks, attribute, block, identifierPair * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @returns {Array} The created annotations. + * + * @returns {Array} The created annotations for Yoast FAQ block. */ const getAnnotationsForFAQ = ( attribute, block, marks ) => { const annotatableTexts = block.attributes[ attribute.key ]; @@ -452,7 +474,8 @@ const getAnnotationsForFAQ = ( attribute, block, marks ) => { * @param {Object} attribute The attribute to apply annotations to. * @param {Object} block The block information in the state. * @param {Array} marks The marks to turn into annotations. - * @returns {Array} The created annotations. + * + * @returns {Array} The created annotations for Yoast How-To block. */ const getAnnotationsForHowTo = ( attribute, block, marks ) => { const annotatableTexts = block.attributes[ attribute.key ]; From cc62d13f0b5e056c78d1094a76757291903a0e63 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 26 Jul 2023 16:39:20 +0200 Subject: [PATCH 279/616] filter shortcodes from text for backwards compatability --- .../spec/specHelpers/parse/buildTree.js | 4 ---- .../src/languageProcessing/helpers/index.js | 2 ++ .../sanitize/filterShortcodesFromTree.js | 17 ++++++++++++----- .../researches/countSentencesFromText.js | 2 ++ .../researches/findTransitionWords.js | 2 ++ .../researches/getKeywordDensity.js | 2 ++ .../researches/getLongCenterAlignedTexts.js | 1 + .../researches/getParagraphLength.js | 2 ++ .../researches/getPassiveVoiceResult.js | 3 +++ .../researches/getSentenceBeginnings.js | 2 ++ .../researches/getSubheadingTextLengths.js | 2 ++ .../researches/keyphraseDistribution.js | 1 + .../researches/keywordCount.js | 2 ++ .../researches/matchKeywordInSubheadings.js | 2 ++ .../languageProcessing/researches/sentences.js | 2 ++ .../researches/wordComplexity.js | 1 + .../researches/wordCountInText.js | 2 ++ .../src/scoring/assessments/assessment.js | 2 ++ .../assessments/readability/ListAssessment.js | 1 + .../SubheadingDistributionTooLongAssessment.js | 2 ++ .../readability/TransitionWordsAssessment.js | 2 ++ .../seo/KeyphraseDistributionAssessment.js | 2 ++ .../assessments/seo/KeywordDensityAssessment.js | 2 ++ .../assessments/seo/SingleH1Assessment.js | 2 +- packages/yoastseo/src/scoring/assessor.js | 4 ---- .../yoastseo/src/worker/AnalysisWebWorker.js | 9 --------- 26 files changed, 52 insertions(+), 23 deletions(-) diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index b3b1d65c0e4..adeb2358faa 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,7 +4,6 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; -import { filterShortcodesFromHTML } from "../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -18,9 +17,6 @@ export default function buildTree( paper, researcher ) { const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); - if ( shortcodes ) { - paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); - } } /** diff --git a/packages/yoastseo/src/languageProcessing/helpers/index.js b/packages/yoastseo/src/languageProcessing/helpers/index.js index f24c546c114..4ecacde422f 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/index.js +++ b/packages/yoastseo/src/languageProcessing/helpers/index.js @@ -1,9 +1,11 @@ import matchStringWithRegex from "./regex/matchStringWithRegex"; import { normalize } from "./sanitize/quotes"; +import { filterShortcodesFromHTML } from "./sanitize/filterShortcodesFromTree"; import removeHtmlBlocks from "./html/htmlParser"; export { matchStringWithRegex, normalize, removeHtmlBlocks, + filterShortcodesFromHTML, }; diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index e3ade5140ba..fc0d1b3c112 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -1,3 +1,5 @@ +import { cloneDeep } from "lodash-es"; + /** * Creates a regex to filter shortcodes from HTML. * @param {string[]} shortcodeTags The tags of the shortcodes to filter. @@ -15,6 +17,9 @@ const createShortcodeTagsRegex = shortcodeTags => { * @returns {string} The filtered HTML. */ export const filterShortcodesFromHTML = ( html, shortcodeTags ) => { + if ( ! shortcodeTags || shortcodeTags.length === 0 ) { + return html; + } const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); return html.replace( shortcodeTagsRegex, "" ); }; @@ -37,6 +42,7 @@ const filterShortcodesFromSentence = ( sentence, shortcodeTags, shortcodeTagsReg } tokens.splice( i, 1 ); tokens.splice( i - 1, 1 ); + i--; } } sentence.tokens = tokens; @@ -72,12 +78,13 @@ const filterShortcodesFromSentences = ( tree, shortcodeTags, shortcodeTagsRegex * @returns {void} */ const filterShortcodesFromTree = ( tree, shortcodeTags ) => { - if ( shortcodeTags ) { - const sortcodeTagsRegexString = createShortcodeTagsRegex( shortcodeTags ); - const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); - - filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); + if ( ! shortcodeTags || shortcodeTags.length === 0 ) { + return; } + const sortcodeTagsRegexString = createShortcodeTagsRegex( shortcodeTags ); + const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); + + filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); }; export default filterShortcodesFromTree; diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js index 6465709e478..25932359050 100644 --- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js +++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js @@ -1,6 +1,7 @@ import getSentences from "../helpers/sentence/getSentences"; import sentencesLength from "../helpers/sentence/sentencesLength.js"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Count sentences in the text. @@ -12,6 +13,7 @@ export default function( paper, researcher ) { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); return sentencesLength( sentences, researcher ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js b/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js index dba7cd9b4a1..89588e096dc 100644 --- a/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js +++ b/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js @@ -5,6 +5,7 @@ import { isWordInSentence as matchWordInSentence } from "../helpers/word/matchWo import removeHtmlBlocks from "../helpers/html/htmlParser"; import { flattenDeep } from "lodash-es"; +import { filterShortcodesFromHTML } from "../helpers"; let regexFromDoubleArray = null; let regexFromDoubleArrayCacheKey = ""; @@ -110,6 +111,7 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); const sentenceResults = checkSentencesForTransitionWords( sentences, transitionWords, twoPartTransitionWords, matchTransitionWordsHelper ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 25cc513707a..98f77850b15 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -2,6 +2,7 @@ import countWords from "../helpers/word/countWords.js"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Calculates the keyword density. @@ -15,6 +16,7 @@ export default function( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); let wordCount = countWords( text ); // If there is a custom getWords helper use its output for countWords. diff --git a/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js b/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js index 4ca827ff5b1..97f2bb3274c 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js +++ b/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js @@ -44,6 +44,7 @@ function getLongCenterAlignedElements( elements, elementType ) { export default function( paper ) { let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // Get all paragraphs from the text. We only retrieve the paragraphs with

    tags. const allParagraphs = helpers.matchStringWithRegex( text, paragraphsRegex ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js index acda161f183..a85cb29d00f 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js +++ b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js @@ -6,6 +6,7 @@ import countWords from "../helpers/word/countWords.js"; import matchParagraphs from "../helpers/html/matchParagraphs.js"; import { filter } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Gets all paragraphs and their word counts or character counts from the text. @@ -18,6 +19,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = excludeTableOfContentsTag( text ); // Exclude the Estimated Reading time text from the research diff --git a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js index c1bd628f69b..d099d005e63 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js +++ b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js @@ -4,6 +4,7 @@ import Sentence from "../../languageProcessing/values/Sentence.js"; import { forEach } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Looks for morphological passive voice. @@ -18,6 +19,7 @@ export const getMorphologicalPassives = function( paper, researcher ) { const isPassiveSentence = researcher.getHelper( "isPassiveSentence" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); const sentences = getSentences( text, memoizedTokenizer ) .map( function( sentence ) { @@ -55,6 +57,7 @@ export const getPeriphrasticPassives = function( paper, researcher ) { const getClauses = researcher.getHelper( "getClauses" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); const sentences = getSentences( text, memoizedTokenizer ) .map( function( sentence ) { diff --git a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js index a8d0eb66151..22f9c3ae56a 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js +++ b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js @@ -5,6 +5,7 @@ import { stripFullTags as stripTags } from "../helpers/sanitize/stripHTMLTags.js import { filter, forEach, isEmpty } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Compares the first word of each sentence with the first word of the following sentence. @@ -95,6 +96,7 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // Remove any HTML whitespace padding and replace it with a single whitespace. text = text.replace( /[\s\n]+/g, " " ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js index 07adb8693c5..618413675e1 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js +++ b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js @@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent import countWords from "../helpers/word/countWords"; import { forEach } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Gets the subheadings from the text and returns the length of these subheading in an array. @@ -15,6 +16,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = excludeTableOfContentsTag( text ); const matches = getSubheadingTexts( text ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js index 43920b9c699..3dbd010134f 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js +++ b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js @@ -204,6 +204,7 @@ const keyphraseDistributionResearcher = function( paper, researcher ) { let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = mergeListItems( text ); const sentences = getSentences( text, memoizedTokenizer ); const topicForms = researcher.getResearch( "morphology" ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index dabd39ed1ec..c08a50b2cff 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -5,6 +5,7 @@ import { flattenDeep, uniq as unique } from "lodash-es"; import getSentences from "../helpers/sentence/getSentences"; import { markWordsInSentences } from "../helpers/word/markWordsInSentences"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Calculates the keyword count, takes morphology into account. @@ -23,6 +24,7 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const locale = paper.getLocale(); const sentences = getSentences( text, memoizedTokenizer ); diff --git a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js index fa036350f29..82e4a332912 100644 --- a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js +++ b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js @@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent import stripSomeTags from "../helpers/sanitize/stripNonTextTags"; import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Computes the amount of subheadings reflecting the topic. @@ -44,6 +45,7 @@ export default function matchKeywordInSubheadings( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = stripSomeTags( excludeTableOfContentsTag( text ) ); const topicForms = researcher.getResearch( "morphology" ); const locale = paper.getLocale(); diff --git a/packages/yoastseo/src/languageProcessing/researches/sentences.js b/packages/yoastseo/src/languageProcessing/researches/sentences.js index 18a93b9960d..bce3cce5735 100644 --- a/packages/yoastseo/src/languageProcessing/researches/sentences.js +++ b/packages/yoastseo/src/languageProcessing/researches/sentences.js @@ -1,5 +1,6 @@ import getSentences from "../helpers/sentence/getSentences"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Returns the sentences from a paper. @@ -14,5 +15,6 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return getSentences( text, memoizedTokenizer ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js index 3feb922619a..14b14feafb4 100644 --- a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js +++ b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js @@ -81,6 +81,7 @@ export default function wordComplexity( paper, researcher ) { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); // Find the complex words in each sentence. diff --git a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js index 67ea613c7f7..03e61387ec2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js +++ b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js @@ -1,5 +1,6 @@ import wordCount from "../helpers/word/countWords.js"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * A result of the word count calculation. @@ -19,6 +20,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return { text: text, count: wordCount( text ), diff --git a/packages/yoastseo/src/scoring/assessments/assessment.js b/packages/yoastseo/src/scoring/assessments/assessment.js index b349ddd8270..a9d6edcb950 100644 --- a/packages/yoastseo/src/scoring/assessments/assessment.js +++ b/packages/yoastseo/src/scoring/assessments/assessment.js @@ -3,6 +3,7 @@ import { sanitizeString } from "../../languageProcessing"; import { isUndefined } from "lodash-es"; import removeHtmlBlocks from "../../languageProcessing/helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../../languageProcessing/helpers"; /** * Represents the defaults of an assessment. @@ -43,6 +44,7 @@ class Assessment { hasEnoughContentForAssessment( paper, contentNeededForAssessment = 50 ) { let text = isUndefined( paper ) ? "" : paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // The isUndefined check is necessary, because if paper is undefined .getText will throw a typeError. return sanitizeString( text ).length >= contentNeededForAssessment; diff --git a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js index bf07fd7d2ba..9cf10885632 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js @@ -45,6 +45,7 @@ export default class ListAssessment extends Assessment { let text = paper.getText(); text = languageProcessingHelpers.removeHtmlBlocks( text ); + text = languageProcessingHelpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return regex.test( text ); } diff --git a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js index 940cc7adc42..9fcc75c9bef 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js @@ -10,6 +10,7 @@ import getWords from "../../../languageProcessing/helpers/word/getWords"; import AssessmentResult from "../../../values/AssessmentResult"; import { stripFullTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; /** * Represents the assessment for calculating the text after each subheading. @@ -85,6 +86,7 @@ class SubheadingsDistributionTooLong extends Assessment { const customCountLength = researcher.getHelper( "customCountLength" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return customCountLength ? customCountLength( text ) : getWords( text ).length; } diff --git a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js index fbcf0177402..c8bf641ddc8 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js @@ -11,6 +11,7 @@ import marker from "../../../markers/addMark.js"; import Assessment from "../assessment"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; import getWords from "../../../languageProcessing/helpers/word/getWords"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; /** @@ -195,6 +196,7 @@ export default class TransitionWordsAssessment extends Assessment { } let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length; // Do not use hasEnoughContent in this assessment as it is mostly redundant with `textLength >= this._config.applicableIfTextLongerThan` diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js index c9fc68e3431..03060c2ad00 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js @@ -2,6 +2,7 @@ import { __, sprintf } from "@wordpress/i18n"; import { merge } from "lodash-es"; import { languageProcessing, AssessmentResult, Assessment, helpers } from "yoastseo"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; const { getSentences, helpers: languageProcessingHelpers } = languageProcessing; const { createAnchorOpeningTag } = helpers; @@ -181,6 +182,7 @@ class KeyphraseDistributionAssessment extends Assessment { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = languageProcessingHelpers.removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); return paper.hasText() && paper.hasKeyword() && sentences.length >= 15 && researcher.hasResearch( "keyphraseDistribution" ); diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index e7bdac1736a..e819ac3d289 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -9,6 +9,7 @@ import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; import getWords from "../../../languageProcessing/helpers/word/getWords"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; /** * Represents the assessment that will look if the keyphrase density is within the recommended range. @@ -113,6 +114,7 @@ class KeywordDensityAssessment extends Assessment { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); this.setBoundaries( text, keyphraseLength, customGetWords ); this._keywordDensity = this._keywordDensity * keyphraseLengthFactor( keyphraseLength ); diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js index 9ff0476f891..ab84ab0581b 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js @@ -2,7 +2,7 @@ import { __, sprintf } from "@wordpress/i18n"; import { isUndefined, merge } from "lodash-es"; import Assessment from "../assessment.js"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import marker from "../../../markers/addMark.js"; import AssessmentResult from "../../../values/AssessmentResult.js"; import Mark from "../../../values/Mark.js"; diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index 1d6ace55c28..dd9e49c92d9 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -7,7 +7,6 @@ import { __, sprintf } from "@wordpress/i18n"; import { filter, find, findIndex, isFunction, isUndefined, map } from "lodash-es"; import LanguageProcessor from "../parse/language/LanguageProcessor"; import { build } from "../parse/build"; -import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; // The maximum score of individual assessment is 9. This is why we set the "score rating" here to 9. const ScoreRating = 9; @@ -127,9 +126,6 @@ Assessor.prototype.assess = function( paper ) { const languageProcessor = new LanguageProcessor( this._researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); - if ( shortcodes ) { - paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); - } let assessments = this.getAvailableAssessments(); this.results = []; diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index 70e0b11f0d7..1ba4522827b 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -33,7 +33,6 @@ import wrapTryCatchAroundAction from "./wrapTryCatchAroundAction"; // Tree assessor functionality. import { ReadabilityScoreAggregator, SEOScoreAggregator } from "../parsedPaper/assess/scoreAggregators"; -import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; const logger = getLogger( "yoast-analysis-worker" ); logger.setDefaultLevel( "error" ); @@ -1085,11 +1084,6 @@ export default class AnalysisWebWorker { const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes; this._paper.setTree( build( this._paper.getText(), languageProcessor, shortcodes ) ); - if ( shortcodes ) { - this._paper._text = filterShortcodesFromHTML( this._paper.getText(), shortcodes ); - } - - // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); } @@ -1413,9 +1407,6 @@ export default class AnalysisWebWorker { const languageProcessor = new LanguageProcessor( researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); - if ( shortcodes ) { - paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); - } } } From afcb257616d052e67e373f8536a12c7f97aed16f Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 27 Jul 2023 11:54:12 +0200 Subject: [PATCH 280/616] Adjust unit tests --- .../highlighting/getMarkingsInSentenceSpec.js | 49 ++ .../matchTokenWithWordFormsSpec.js | 0 .../sentence/getSentencesFromTreeSpec.js | 143 ++++- .../researches/keywordCountSpec.js | 557 ++++++++++++++++-- .../yoastseo/spec/parse/structure/NodeSpec.js | 26 +- .../spec/parse/traverse/findAllInTreeSpec.js | 16 +- .../spec/parse/traverse/innerTextSpec.js | 16 +- .../seo/KeywordDensityAssessmentSpec.js | 15 + 8 files changed, 734 insertions(+), 88 deletions(-) delete mode 100644 packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js diff --git a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js index 2d5c7d1d434..2d467711fad 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/highlighting/getMarkingsInSentenceSpec.js @@ -41,6 +41,9 @@ const testCases = [ endOffset: 4, startOffsetBlock: 0, endOffsetBlock: 4, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ], }, @@ -67,6 +70,9 @@ const testCases = [ endOffset: 7, startOffsetBlock: 0, endOffsetBlock: 7, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ], }, @@ -94,6 +100,9 @@ const testCases = [ endOffset: 4, startOffsetBlock: 0, endOffsetBlock: 4, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -104,6 +113,9 @@ const testCases = [ endOffset: 18, startOffsetBlock: 10, endOffsetBlock: 18, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -131,6 +143,43 @@ const testCases = [ endOffset: 14, startOffsetBlock: 10, endOffsetBlock: 14, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ], + }, + { + testDescription: "One marking in a sentence that is found in a WordPress block", + sentence: { text: "This is a sentence.", + sourceCodeRange: { startOffset: 300, endOffset: 329 }, + tokens: [ + { text: "This", sourceCodeRange: { startOffset: 300, endOffset: 314 } }, + { text: " ", sourceCodeRange: { startOffset: 314, endOffset: 315 } }, + { text: "is", sourceCodeRange: { startOffset: 315, endOffset: 317 } }, + { text: " ", sourceCodeRange: { startOffset: 317, endOffset: 318 } }, + { text: "a", sourceCodeRange: { startOffset: 318, endOffset: 319 } }, + { text: " ", sourceCodeRange: { startOffset: 319, endOffset: 320 } }, + { text: "sentence", sourceCodeRange: { startOffset: 320, endOffset: 328 } }, + { text: ".", sourceCodeRange: { startOffset: 328, endOffset: 329 } }, + ], + parentStartOffset: 100, + parentClientId: "aaaparentid12345678901234567890", + parentAttributeId: "apparentattributeid12345678901234567", + isParentFirstSectionOfBlock: true, + }, + matchesInSentence: [ { sourceCodeRange: { startOffset: 300, endOffset: 314 } } ], + expectedResult: [ new Mark( { + marked: "This is a sentence.", + original: "This is a sentence.", + position: { + startOffset: 300, + endOffset: 314, + startOffsetBlock: 200, + endOffsetBlock: 214, + attributeId: "apparentattributeid12345678901234567", + clientId: "aaaparentid12345678901234567890", + isFirstSection: true, }, } ) ], }, diff --git a/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/keywordCount/matchTokenWithWordFormsSpec.js deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index 623714a8860..aa2b3c43bd4 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -34,6 +34,9 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 43, startOffset: 38 }, text: "human" }, { sourceCodeRange: { endOffset: 44, startOffset: 43 }, text: "." }, ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", }, { sourceCodeRange: { endOffset: 64, startOffset: 44 }, @@ -51,7 +54,11 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " }, { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "cute" }, { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: "." }, - ] }, + ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", + }, { sourceCodeRange: { endOffset: 86, startOffset: 72 }, parentStartOffset: 72, @@ -62,7 +69,11 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 84, startOffset: 74 }, text: "subheading" }, { sourceCodeRange: { endOffset: 85, startOffset: 84 }, text: " " }, { sourceCodeRange: { endOffset: 86, startOffset: 85 }, text: "3" }, - ] }, + ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", + }, { sourceCodeRange: {}, parentStartOffset: 0, @@ -73,7 +84,11 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: {}, text: "text" }, { sourceCodeRange: {}, text: " " }, { sourceCodeRange: {}, text: "text" }, - ] }, + ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", + }, { sourceCodeRange: { endOffset: 123, startOffset: 109 }, parentStartOffset: 109, @@ -84,10 +99,14 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 121, startOffset: 111 }, text: "subheading" }, { sourceCodeRange: { endOffset: 122, startOffset: 121 }, text: " " }, { sourceCodeRange: { endOffset: 123, startOffset: 122 }, text: "4" }, - ] }, + ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", + }, { sourceCodeRange: { endOffset: 138, startOffset: 128 }, - parentStartOffset: 0, + parentStartOffset: 128, text: "more text.", tokens: [ { sourceCodeRange: { endOffset: 132, startOffset: 128 }, text: "more" }, @@ -95,6 +114,9 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 137, startOffset: 133 }, text: "text" }, { sourceCodeRange: { endOffset: 138, startOffset: 137 }, text: "." }, ], + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", }, ] ); } ); @@ -115,9 +137,120 @@ describe( "test to get sentences from the tree", () => { { sourceCodeRange: { endOffset: 14, startOffset: 13 }, text: " " }, { sourceCodeRange: { endOffset: 19, startOffset: 14 }, text: "panda" }, ], + parentStartOffset: 3, + isParentFirstSectionOfBlock: false, + parentAttributeId: "", + parentClientId: "", }, ] ); } ); + it( "returns the sentences retrieved from Yoast block", () => { + const paper = new Paper( "

    What is giant panda " + + "

    Giant panda is test testts

    Test " + + "

    Tets

    " + + "giant panda is silly

    " + + "is this giant panda

    " ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getSentencesFromTree( paper ) ).toEqual( + [ + { + text: "What is giant panda", + tokens: [ + { text: "What", sourceCodeRange: { startOffset: 149, endOffset: 153 } }, + { text: " ", sourceCodeRange: { startOffset: 153, endOffset: 154 } }, + { text: "is", sourceCodeRange: { startOffset: 154, endOffset: 156 } }, + { text: " ", sourceCodeRange: { startOffset: 156, endOffset: 157 } }, + { text: "giant", sourceCodeRange: { startOffset: 157, endOffset: 162 } }, + { text: " ", sourceCodeRange: { startOffset: 162, endOffset: 163 } }, + { text: "panda", sourceCodeRange: { startOffset: 163, endOffset: 168 } }, + ], + sourceCodeRange: { startOffset: 149, endOffset: 168 }, + parentStartOffset: 113, + parentClientId: "", + parentAttributeId: "faq-question-1689322642789", + isParentFirstSectionOfBlock: true, + }, + { + text: "Giant panda is test testts", + tokens: [ + { text: "Giant", sourceCodeRange: { startOffset: 207, endOffset: 212 } }, + { text: " ", sourceCodeRange: { startOffset: 212, endOffset: 213 } }, + { text: "panda", sourceCodeRange: { startOffset: 221, endOffset: 226 } }, + { text: " ", sourceCodeRange: { startOffset: 235, endOffset: 236 } }, + { text: "is", sourceCodeRange: { startOffset: 236, endOffset: 238 } }, + { text: " ", sourceCodeRange: { startOffset: 238, endOffset: 239 } }, + { text: "test", sourceCodeRange: { startOffset: 239, endOffset: 243 } }, + { text: " ", sourceCodeRange: { startOffset: 243, endOffset: 244 } }, + { text: "testts", sourceCodeRange: { startOffset: 244, endOffset: 250 } }, + ], + sourceCodeRange: { startOffset: 207, endOffset: 250 }, + parentStartOffset: 207, + parentClientId: "", + parentAttributeId: "faq-question-1689322642789", + isParentFirstSectionOfBlock: false, + }, + { + text: "Test", + tokens: [ + { text: "Test", sourceCodeRange: { startOffset: 362, endOffset: 366 } }, + ], + sourceCodeRange: { startOffset: 362, endOffset: 366 }, + parentStartOffset: 326, + parentClientId: "", + parentAttributeId: "faq-question-1689322667728", + isParentFirstSectionOfBlock: true, + }, + { + text: "Tets", + tokens: [ + { text: "Tets", sourceCodeRange: { startOffset: 405, endOffset: 409 } }, + ], + sourceCodeRange: { startOffset: 405, endOffset: 409 }, + parentStartOffset: 405, + parentClientId: "", + parentAttributeId: "faq-question-1689322667728", + isParentFirstSectionOfBlock: false, + }, + { + text: "giant panda is silly", + tokens: [ + { text: "giant", sourceCodeRange: { startOffset: 521, endOffset: 526 } }, + { text: " ", sourceCodeRange: { startOffset: 526, endOffset: 527 } }, + { text: "panda", sourceCodeRange: { startOffset: 527, endOffset: 532 } }, + { text: " ", sourceCodeRange: { startOffset: 532, endOffset: 533 } }, + { text: "is", sourceCodeRange: { startOffset: 533, endOffset: 535 } }, + { text: " ", sourceCodeRange: { startOffset: 535, endOffset: 536 } }, + { text: "silly", sourceCodeRange: { startOffset: 536, endOffset: 541 } }, + ], + sourceCodeRange: { startOffset: 521, endOffset: 541 }, + parentStartOffset: 485, + parentClientId: "", + parentAttributeId: "faq-question-1689936392675", + isParentFirstSectionOfBlock: true, + }, + { + text: "is this giant panda", + tokens: [ + { text: "is", sourceCodeRange: { startOffset: 580, endOffset: 582 } }, + { text: " ", sourceCodeRange: { startOffset: 582, endOffset: 583 } }, + { text: "this", sourceCodeRange: { startOffset: 583, endOffset: 587 } }, + { text: " ", sourceCodeRange: { startOffset: 587, endOffset: 588 } }, + { text: "giant", sourceCodeRange: { startOffset: 588, endOffset: 593 } }, + { text: " ", sourceCodeRange: { startOffset: 593, endOffset: 594 } }, + { text: "panda", sourceCodeRange: { startOffset: 594, endOffset: 599 } }, + ], + sourceCodeRange: { startOffset: 580, endOffset: 599 }, + parentStartOffset: 580, + parentClientId: "", + parentAttributeId: "faq-question-1689936392675", + isParentFirstSectionOfBlock: false, + }, + ] + ); + } ); it( "should return empty array if no sentences are found in paragraph/heading node", () => { const paper = new Paper( "

    The red panda (Ailurus fulgens), also known as the lesser panda," + " is a small mammal native to the eastern Himalayas and southwestern China
    " ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 866840d60d9..d32ffecfa2c 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -37,6 +37,9 @@ const testCases = [ endOffset: 36, startOffsetBlock: 26, endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -65,6 +68,9 @@ const testCases = [ endOffset: 37, startOffsetBlock: 26, endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -76,6 +82,9 @@ const testCases = [ endOffset: 64, startOffsetBlock: 52, endOffsetBlock: 61, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -96,6 +105,9 @@ const testCases = [ endOffset: 34, startOffsetBlock: 24, endOffsetBlock: 31, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -107,6 +119,9 @@ const testCases = [ endOffset: 56, startOffsetBlock: 45, endOffsetBlock: 53, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -134,6 +149,9 @@ const testCases = [ endOffset: 25, startOffsetBlock: 14, endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -163,6 +181,9 @@ const testCases = [ endOffset: 27, startOffsetBlock: 20, endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -175,6 +196,9 @@ const testCases = [ endOffset: 32, startOffsetBlock: 26, endOffsetBlock: 29, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -187,6 +211,9 @@ const testCases = [ endOffset: 48, startOffsetBlock: 42, endOffsetBlock: 45, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -199,6 +226,9 @@ const testCases = [ endOffset: 62, startOffsetBlock: 55, endOffsetBlock: 59, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -221,6 +251,9 @@ const testCases = [ startOffset: 43, startOffsetBlock: 40, endOffsetBlock: 55, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -232,6 +265,9 @@ const testCases = [ endOffset: 67, startOffsetBlock: 57, endOffsetBlock: 64, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -247,7 +283,14 @@ const testCases = [ new Mark( { marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", - position: { endOffset: 58, startOffset: 43 } } ), + position: { + endOffset: 58, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 55, + attributeId: "", + clientId: "", + isFirstSection: false } } ), ], skip: false, }, @@ -262,7 +305,15 @@ const testCases = [ new Mark( { marked: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", original: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", - position: { endOffset: 52, startOffset: 43 } } ), + position: { + endOffset: 52, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 49, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, }, @@ -279,6 +330,9 @@ const testCases = [ endOffset: 26, startOffsetBlock: 16, endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -299,7 +353,15 @@ const testCases = [ expectedCount: 2, expectedMarkings: [ new Mark( { marked: "A string with a key phrase key phrase.", original: "A string with a key phrase key phrase.", - position: { endOffset: 40, startOffset: 19 } } ) ], + position: { + endOffset: 40, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 37, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -310,10 +372,13 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", original: "A string with a key phrase.", position: { - startOffset: 29, - endOffset: 36, - startOffsetBlock: 26, - endOffsetBlock: 33, + startOffset: 19, + endOffset: 29, + startOffsetBlock: 16, + endOffsetBlock: 26, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ], skip: false, @@ -335,10 +400,13 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", original: "A .sentence with a keyphrase.", position: { - startOffset: 29, - endOffset: 36, - startOffsetBlock: 26, - endOffsetBlock: 33, + startOffset: 5, + endOffset: 14, + startOffsetBlock: 2, + endOffsetBlock: 11, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ], skip: false, @@ -359,7 +427,15 @@ const testCases = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", original: "A string with a $keyword.", - position: { endOffset: 27, startOffset: 19 } } ) ], + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -371,11 +447,27 @@ const testCases = [ new Mark( { marked: "A string with cats and dogs, " + "and other cats and dogs.", original: "A string with cats and dogs, and other cats and dogs.", - position: { endOffset: 30, startOffset: 17 } } ), + position: { + endOffset: 30, + startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 27, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "A string with cats and dogs, " + "and other cats and dogs.", original: "A string with cats and dogs, and other cats and dogs.", - position: { endOffset: 55, startOffset: 42 } } ) ], + position: { + endOffset: 55, + startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 52, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -386,7 +478,15 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "this is a keyword keyword keyword.", original: "this is a keyword keyword keyword.", - position: { endOffset: 36, startOffset: 13 } } ) ], + position: { + endOffset: 36, + startOffset: 13, + startOffsetBlock: 10, + endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -397,7 +497,15 @@ const testCases = [ expectedMarkings: [ new Mark( { marked: "A sentence about a red panda red panda red panda.", original: "A sentence about a red panda red panda red panda.", - position: { endOffset: 51, startOffset: 22 } } ) ], + position: { + endOffset: 51, + startOffset: 22, + startOffsetBlock: 19, + endOffsetBlock: 48, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, ]; @@ -434,6 +542,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 43, startOffsetBlock: 32, endOffsetBlock: 40, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -446,7 +557,15 @@ const testCasesWithSpecialCharacters = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key-word.", original: "A string with a key-word.", - position: { endOffset: 27, startOffset: 19 } } ) ], + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -456,7 +575,15 @@ const testCasesWithSpecialCharacters = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key-phrase and key phrase.", original: "A string with a key-phrase and key phrase.", - position: { endOffset: 29, startOffset: 19 } } ) ], + position: { + endOffset: 29, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 26, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -477,7 +604,15 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a panda-red.", marked: "A string with a panda-red.", - position: { startOffset: 19, endOffset: 28 }, + position: { + startOffset: 19, + endOffset: 28, + startOffsetBlock: 16, + endOffsetBlock: 25, + attributeId: "", + clientId: "", + isFirstSection: false, + }, } ), ], skip: false, @@ -492,7 +627,15 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a key-word.", marked: "A string with a key-word.", - position: { endOffset: 27, startOffset: 19 }, + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + }, } ), ], skip: false, @@ -508,13 +651,29 @@ const testCasesWithSpecialCharacters = [ original: "A string with a word of key-phrase.", marked: "A string with a word of" + " key-phrase.", - position: { endOffset: 23, startOffset: 19 }, + position: { + endOffset: 23, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 20, + attributeId: "", + clientId: "", + isFirstSection: false, + }, } ), new Mark( { original: "A string with a word of key-phrase.", marked: "A string with a word of" + " key-phrase.", - position: { endOffset: 37, startOffset: 27 }, + position: { + endOffset: 37, + startOffset: 27, + startOffsetBlock: 24, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + }, } ), // Note that in this case, we'll highlight 'key-phrase' even though technically we only match "key" in "key-phrase". // This is the consequence of tokenizing "key-phrase" into one token instead of three. @@ -528,7 +687,13 @@ const testCasesWithSpecialCharacters = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a key_word.", original: "A string with a key_word.", - position: { endOffset: 27, startOffset: 19 } } ) ], + position: { endOffset: 27, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -538,7 +703,13 @@ const testCasesWithSpecialCharacters = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with key&word in it", original: "A string with key&word in it", - position: { endOffset: 25, startOffset: 17 } } ) ], + position: { endOffset: 25, startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -549,7 +720,13 @@ const testCasesWithSpecialCharacters = [ expectedMarkings: [ new Mark( { marked: "A string with quotes to match the key'word, even if the quotes differ.", original: "A string with quotes to match the key'word, even if the quotes differ.", - position: { endOffset: 45, startOffset: 37 } } ) ], + position: { endOffset: 45, startOffset: 37, + startOffsetBlock: 34, + endOffsetBlock: 42, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -564,6 +741,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 27, startOffsetBlock: 16, endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ], skip: false, @@ -575,7 +755,13 @@ const testCasesWithSpecialCharacters = [ expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a keyphrase&.", original: "A string with a keyphrase&.", - position: { endOffset: 28, startOffset: 19 } } ) ], + position: { endOffset: 28, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 25, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -595,7 +781,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a «keyword».", marked: "A string with a «keyword».", - position: { endOffset: 27, startOffset: 20 } } + position: { endOffset: 27, startOffset: 20, + startOffsetBlock: 17, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -609,7 +801,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a keyword.", marked: "A string with a keyword.", - position: { endOffset: 26, startOffset: 19 } } + position: { endOffset: 26, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -623,7 +821,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a ‹keyword›.", marked: "A string with a ‹keyword›.", - position: { endOffset: 27, startOffset: 20 } } + position: { endOffset: 27, startOffset: 20, + startOffsetBlock: 17, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -637,7 +841,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "A string with a keyword.", marked: "A string with a keyword.", - position: { endOffset: 26, startOffset: 19 } } + position: { endOffset: 26, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -651,7 +861,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "¿Keyword is it", marked: "¿Keyword is it", - position: { endOffset: 11, startOffset: 4 } } + position: { endOffset: 11, startOffset: 4, + startOffsetBlock: 1, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -665,7 +881,13 @@ const testCasesWithSpecialCharacters = [ new Mark( { original: "الجيدة؟", marked: "الجيدة؟", - position: { endOffset: 9, startOffset: 3 } } + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, @@ -701,6 +923,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 62, startOffsetBlock: 26, endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -747,6 +972,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 305, startOffsetBlock: 266, endOffsetBlock: 276, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -790,6 +1018,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 291, startOffsetBlock: 253, endOffsetBlock: 262, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -828,6 +1059,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 118, startOffsetBlock: 11, endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -904,6 +1138,9 @@ const testCasesWithSpecialCharacters = [ endOffset: 154, startOffsetBlock: 14, endOffsetBlock: 19, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -914,10 +1151,12 @@ const testCasesWithSpecialCharacters = [ endOffset: 431, startOffsetBlock: 47, endOffsetBlock: 52, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], - // Skipped for now, coz the PR for exact matching is not yet merged. skip: false, }, ]; @@ -957,7 +1196,13 @@ const testCasesWithLocaleMapping = [ expectedMarkings: [ new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - position: { endOffset: 40, startOffset: 36 } } ) ], + position: { endOffset: 40, startOffset: 36, + startOffsetBlock: 33, + endOffsetBlock: 37, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -976,7 +1221,13 @@ const testCasesWithLocaleMapping = [ expectedMarkings: [ new Mark( { marked: "Accion Española fue una revista.", original: "Accion Española fue una revista.", - position: { endOffset: 9, startOffset: 3 } } ) ], + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -989,12 +1240,24 @@ const testCasesWithLocaleMapping = [ marked: "oeverlaatelsebesiktning " + "and overlatelsebesiktning", original: "oeverlaatelsebesiktning and overlatelsebesiktning", - position: { endOffset: 26, startOffset: 3 } } ), + position: { endOffset: 26, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "oeverlaatelsebesiktning " + "and overlatelsebesiktning", original: "oeverlaatelsebesiktning and overlatelsebesiktning", - position: { endOffset: 52, startOffset: 31 } } ) ], + position: { endOffset: 52, startOffset: 31, + startOffsetBlock: 28, + endOffsetBlock: 49, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -1007,17 +1270,35 @@ const testCasesWithLocaleMapping = [ marked: "København and " + "Koebenhavn and Kobenhavn", original: "København and Koebenhavn and Kobenhavn", - position: { endOffset: 12, startOffset: 3 } } ), + position: { endOffset: 12, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 9, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "København and " + "Koebenhavn and Kobenhavn", original: "København and Koebenhavn and Kobenhavn", - position: { endOffset: 27, startOffset: 17 } } ), + position: { endOffset: 27, startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "København and " + "Koebenhavn and Kobenhavn", original: "København and Koebenhavn and Kobenhavn", - position: { endOffset: 41, startOffset: 32 } } ) ], + position: { endOffset: 41, startOffset: 32, + startOffsetBlock: 29, + endOffsetBlock: 38, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -1029,11 +1310,23 @@ const testCasesWithLocaleMapping = [ new Mark( { marked: "Türkçe and Turkce", original: "Türkçe and Turkce", - position: { endOffset: 9, startOffset: 3 } } ), + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "Türkçe and Turkce", original: "Türkçe and Turkce", - position: { endOffset: 20, startOffset: 14 } } ), + position: { endOffset: 20, startOffset: 14, + startOffsetBlock: 11, + endOffsetBlock: 17, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, }, @@ -1046,11 +1339,23 @@ const testCasesWithLocaleMapping = [ new Mark( { marked: "Türkçe and Turkce", original: "Türkçe and Turkce", - position: { endOffset: 9, startOffset: 3 } } ), + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "Türkçe and Turkce", original: "Türkçe and Turkce", - position: { endOffset: 20, startOffset: 14 } } ), + position: { endOffset: 20, startOffset: 14, + startOffsetBlock: 11, + endOffsetBlock: 17, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), ], skip: false, }, @@ -1064,22 +1369,46 @@ const testCasesWithLocaleMapping = [ marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 11, startOffset: 3 } } ), + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 24, startOffset: 16 } } ), + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 37, startOffset: 29 } } ), + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 50, startOffset: 42 } } ) ], + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -1092,22 +1421,46 @@ const testCasesWithLocaleMapping = [ marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 11, startOffset: 3 } } ), + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 24, startOffset: 16 } } ), + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 37, startOffset: 29 } } ), + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 50, startOffset: 42 } } ) ], + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -1120,22 +1473,46 @@ const testCasesWithLocaleMapping = [ marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 11, startOffset: 3 } } ), + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 24, startOffset: 16 } } ), + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 37, startOffset: 29 } } ), + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 50, startOffset: 42 } } ) ], + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, { @@ -1148,22 +1525,46 @@ const testCasesWithLocaleMapping = [ marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 11, startOffset: 3 } } ), + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 24, startOffset: 16 } } ), + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 37, startOffset: 29 } } ), + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), new Mark( { marked: "İstanbul and Istanbul and " + "istanbul and ıstanbul", original: "İstanbul and Istanbul and istanbul and ıstanbul", - position: { endOffset: 50, startOffset: 42 } } ) ], + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], skip: false, }, ]; @@ -1186,6 +1587,7 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); + const testDataForHTMLTags = [ { description: "counts keyphrase occurrence correctly in a text containing `` tag, and outputs correct mark objects", @@ -1206,6 +1608,11 @@ const testDataForHTMLTags = [ position: { endOffset: 253, startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -1216,6 +1623,11 @@ const testDataForHTMLTags = [ position: { endOffset: 288, startOffset: 283, + startOffsetBlock: 280, + endOffsetBlock: 285, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -1226,6 +1638,11 @@ const testDataForHTMLTags = [ position: { endOffset: 302, startOffset: 297, + startOffsetBlock: 294, + endOffsetBlock: 299, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -1250,6 +1667,11 @@ const testDataForHTMLTags = [ position: { endOffset: 253, startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -1260,6 +1682,11 @@ const testDataForHTMLTags = [ position: { endOffset: 298, startOffset: 287, + startOffsetBlock: 284, + endOffsetBlock: 295, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], @@ -1285,6 +1712,11 @@ const testDataForHTMLTags = [ position: { endOffset: 253, startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -1295,6 +1727,11 @@ const testDataForHTMLTags = [ position: { endOffset: 346, startOffset: 335, + startOffsetBlock: 332, + endOffsetBlock: 343, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ], diff --git a/packages/yoastseo/spec/parse/structure/NodeSpec.js b/packages/yoastseo/spec/parse/structure/NodeSpec.js index 9f8f0a750bf..21d93ca20b8 100644 --- a/packages/yoastseo/spec/parse/structure/NodeSpec.js +++ b/packages/yoastseo/spec/parse/structure/NodeSpec.js @@ -3,6 +3,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import build from "../../../src/parse/build/build"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; describe( "A test for the Node object", () => { it( "should correctly create a simple Node object", function() { @@ -11,14 +12,19 @@ describe( "A test for the Node object", () => { } ); describe( "A test for the findAll method", () => { + let paper; + + beforeEach( () => { + paper = new Paper(); + } ); it( "should find all occurrences of a p tag", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; + paper._text = "

    Hello, world!

    Hello, yoast!

    "; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - const tree = build( html, languageProcessor ); + const tree = build( paper, languageProcessor ); const searchResult = tree.findAll( ( node ) => node.name === "p" ); @@ -86,15 +92,17 @@ describe( "A test for the findAll method", () => { } ); describe( "A test for the innerText method", () => { - const html = "

    Hello, world!

    Hello, yoast!

    "; + it( "should return the inner text of a node", function() { + const paper = new Paper( "

    Hello, world!

    Hello, yoast!

    " ); - const researcher = Factory.buildMockResearcher( {}, true, false, false, - { memoizedTokenizer: memoizedSentenceTokenizer } ); - const languageProcessor = new LanguageProcessor( researcher ); + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); - const tree = build( html, languageProcessor ); + const tree = build( paper, languageProcessor ); - const innerText = tree.innerText(); + const innerText = tree.innerText(); - expect( innerText ).toEqual( "Hello, world! Hello, yoast!" ); + expect( innerText ).toEqual( "Hello, world! Hello, yoast!" ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js index 0ef1fbf9aa7..06537557d55 100644 --- a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js +++ b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js @@ -3,19 +3,21 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; -let languageProcessor; +let languageProcessor, paper; beforeEach( () => { const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); languageProcessor = new LanguageProcessor( researcher ); + paper = new Paper(); } ); describe( "A test for findAllInTree", () => { it( "should correctly extract all p-tags from a tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "p" ); @@ -90,8 +92,8 @@ describe( "A test for findAllInTree", () => { } ); it( "should return an empty result if the tag doesnt exist within the tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "h1" ); @@ -99,8 +101,8 @@ describe( "A test for findAllInTree", () => { } ); it( "should return all sub-nodes if recurseFoundNodes is true", function() { - const html = "
    foo
    "; - const tree = build( html, languageProcessor ); + paper._text = "
    foo
    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "div", true ); diff --git a/packages/yoastseo/spec/parse/traverse/innerTextSpec.js b/packages/yoastseo/spec/parse/traverse/innerTextSpec.js index 4ffa2ed71fc..39e1504f105 100644 --- a/packages/yoastseo/spec/parse/traverse/innerTextSpec.js +++ b/packages/yoastseo/spec/parse/traverse/innerTextSpec.js @@ -4,19 +4,21 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import Node from "../../../src/parse/structure/Node"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; -let languageProcessor; +let languageProcessor, paper; beforeEach( () => { const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); languageProcessor = new LanguageProcessor( researcher ); + paper = new Paper(); } ); describe( "A test for innerText", () => { it( "should correctly extract the inner text from a tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; @@ -25,8 +27,8 @@ describe( "A test for innerText", () => { } ); it( "should correctly extract the inner text from a tree where the text contains markup", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; @@ -35,8 +37,8 @@ describe( "A test for innerText", () => { } ); it( "should correctly extract the inner text from a tree if there is a subtree without text.", function() { - const html = "

    Hello, world!

    Hello,

    yoast
    !

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello,

    yoast
    !

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 211657b9a69..391cc0246ca 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -259,6 +259,9 @@ describe( "A test for marking the keyphrase", function() { endOffset: 50, startOffsetBlock: 40, endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -271,6 +274,9 @@ describe( "A test for marking the keyphrase", function() { endOffset: 70, startOffsetBlock: 60, endOffsetBlock: 67, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ]; expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); @@ -292,6 +298,9 @@ describe( "A test for marking the keyphrase", function() { endOffset: 38, startOffsetBlock: 23, endOffsetBlock: 35, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ) ]; expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); @@ -318,6 +327,9 @@ describe( "A test for marking the keyphrase", function() { endOffset: 204, startOffsetBlock: 198, endOffsetBlock: 201, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), new Mark( { @@ -329,6 +341,9 @@ describe( "A test for marking the keyphrase", function() { endOffset: 215, startOffsetBlock: 209, endOffsetBlock: 212, + attributeId: "", + clientId: "", + isFirstSection: false, }, } ), ]; From 88cc34b92ed5be0c448f6fac19e3bbe86103603d Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 27 Jul 2023 11:54:35 +0200 Subject: [PATCH 281/616] Add fallback values --- .../highlighting/getMarkingsInSentence.js | 17 ++++++++++------- .../helpers/sentence/getSentencesFromTree.js | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index fde78d5c2ba..f4017ccc547 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -28,7 +28,7 @@ const createMarksForSentence = ( sentence, matches ) => { } const markedSentence = newTokens.join( "" ); // Merge consecutive markings into one marking. - return markedSentence.replace( new RegExp( "
    (( |\u00A0)?)", "ig" ), "$1" ); + return markedSentence.replace( new RegExp( "([ \u00A0]?)", "ig" ), "$1" ); }; /** @@ -104,12 +104,15 @@ function getMarkingsInSentence( sentence, matchesInSentence ) { position: { startOffset: startOffset, endOffset: endOffset, - // relative to start of block positions. - startOffsetBlock: startOffset - sentence.parentStartOffset, - endOffsetBlock: endOffset - sentence.parentStartOffset, - clientId: sentence.parentClientId, - attributeId: sentence.parentAttributeId, - isFirstSection: sentence.isParentFirstSectionOfBlock, + // Relative to start of block positions. + startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), + // The client id of the block the match was found in. + clientId: sentence.parentClientId || "", + // The attribute id of the Yoast sub-block the match was found in. + attributeId: sentence.parentAttributeId || "", + // Whether the match was found in the first section of the Yoast sub-block. + isFirstSection: sentence.isParentFirstSectionOfBlock || false, }, marked: markedSentence, original: sentence.text, diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js index 71da23be3f4..a225c8e23f6 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -15,9 +15,9 @@ export default function( paper ) { ...s, // The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property. parentStartOffset: node.sourceCodeLocation && ( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || - node.sourceCodeLocation.startOffset ), + node.sourceCodeLocation.startOffset ) || 0, // The block client id of the parent node. - parentClientId: node.clientId, + parentClientId: node.clientId || "", // The attribute id of the parent node, if available, otherwise an empty string. parentAttributeId: node.attributeId || "", // Whether the parent node is the first section of Yoast sub-blocks. From a5e1696725cb525b9324b8e33697f91a3e6335e8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 27 Jul 2023 13:03:31 +0200 Subject: [PATCH 282/616] add specs for filtered shortcodes in assessments. --- .../researches/findTransitionWordsSpec.js | 7 +++++++ .../researches/getParagraphLengthSpec.js | 5 +++++ .../researches/getSentenceBeginningsSpec.js | 15 ++++++++++++++ .../researches/getSubheadingTextLengthSpec.js | 12 +++++++++++ .../researches/keyphraseDistributionSpec.js | 20 +++++++++++++++++++ .../researches/keywordCountSpec.js | 7 +++++++ .../matchKeywordInSubheadingsSpec.js | 9 +++++++++ .../researches/sentencesSpec.js | 6 ++++++ .../researches/wordComplexitySpec.js | 8 ++++++++ .../researches/wordCountInTextSpec.js | 4 ++++ .../scoring/assessments/assessmentSpec.js | 7 +++++++ ...eadingDistributionTooLongAssessmentSpec.js | 11 ++++++++++ .../TransitionWordsAssessmentSpec.js | 6 ++++++ .../KeyphraseDistributionAssessmentSpec.js | 10 ++++++++++ .../seo/KeywordDensityAssessmentSpec.js | 12 +++++++++++ packages/yoastseo/src/parse/build/build.js | 1 + .../assessments/readability/ListAssessment.js | 1 - 17 files changed, 140 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/findTransitionWordsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/findTransitionWordsSpec.js index 2f7e328b82d..02a74743d29 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/findTransitionWordsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/findTransitionWordsSpec.js @@ -152,6 +152,13 @@ describe( "a test for finding transition words from a string", function() { expect( result.transitionWordSentences ).toBe( 0 ); } ); + it( "should ignore transition words if they are shortcodes", function() { + mockPaper = new Paper( "There is a hidden transition word [however] in this sentence.", { shortcodes: [ "shortcode" ] } ); + result = transitionWordsResearch( mockPaper, new EnglishResearcher( mockPaper ) ); + expect( result.totalSentences ).toBe( 1 ); + expect( result.transitionWordSentences ).toBe( 0 ); + } ); + /*it( "returns 1 when a transition word is found in a sentence (German)", function() { // Transition word: zuerst. mockPaper = new Paper( "Zuerst werde ich versuchen zu verstehen, warum er so denkt.", { locale: "de_DE" } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js index e274380fc23..858b85c4259 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js @@ -82,6 +82,11 @@ describe( "a test for getting paragraph length", function() { expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); } ); + it( "returns the paragraph length, ignoring shortcodes", function() { + const mockPaper = new Paper( "

    test [shortcode]

    ", { shortcodes: [ "shortcode" ] } ); + expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); + } ); + it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() { const mockPaper = new Paper( "

    Lorem ipsum dolor sit amet

    " ); expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js index 8c7f22a806d..4635d21cac8 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js @@ -118,6 +118,21 @@ describe( "gets the sentence beginnings and the count of consecutive duplicates. expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 ); } ); + it( "ignores shortcodes", function() { + mockPaper = new Paper( "Cats are fluffy. Dogs are happy. [shortcode]Unicorns cant code.", { shortcodes: [ "shortcode" ] } ); + researcher = new EnglishResearcher( mockPaper ); + + const sentenceBeginnings = getSentenceBeginnings( mockPaper, researcher ); + + expect( sentenceBeginnings ).toHaveLength( 3 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].word ).toBe( "cats" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].count ).toBe( 1 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].word ).toBe( "dogs" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].word ).toBe( "unicorns" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].count ).toBe( 1 ); + } ); + it( "returns an object with English sentence beginnings with paragraph tags - it should match over paragraphs", function() { mockPaper = new Paper( "

    Sentence 1. Sentence 2.

    Sentence 3.

    " ); researcher = new EnglishResearcher( mockPaper ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js index bdf9cc4a06e..c59b61abb76 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js @@ -57,6 +57,18 @@ describe( "gets the length of text segments", function() { expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 2 ].countLength ).toBe( 10 ); } ); + it( "should not include a shortcode when calculating the text length", function() { + const mockPaper = new Paper( "

    heading with [shortcode]

    this is a text string with a [shortcode]", + { shortcodes: [ "shortcode" ] } ); + const englishResearcher = new EnglishResearcher( mockPaper ); + // Check the total number of subheading blocks. + expect( foundSubheadingsTextLength( mockPaper, englishResearcher ).length ).toBe( 1 ); + // Check the length of each individual text segment. + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].subheading ).toBe( "

    heading with

    " ); + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].text ).toBe( "this is a text string with a " ); + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].countLength ).toBe( 7 ); + } ); + it( "does not count text inside elements we want to exclude from the analysis ", function() { const mockPaper = new Paper( "

    test

    one two threeone two three" ); const englishResearcher = new EnglishResearcher( mockPaper ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js index 3b4a8e1b6bd..a85708b7cda 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js @@ -477,6 +477,26 @@ describe( "Test for the research", function() { } ); } ); + it( "returns the score 100 when the keyphrase is a shortcode", function() { + const paper = new Paper( + "This is a text with a [keyphrase]that doesn't count", + { + locale: "en_EN", + keyword: "keyphrase", + synonyms: "synonym", + shortcodes: [ "keyphrase" ], + } + ); + + const researcher = new Researcher( paper ); + researcher.addResearchData( "morphology", morphologyData ); + + expect( keyphraseDistributionResearcher( paper, researcher ) ).toEqual( { + keyphraseDistributionScore: 100, + sentencesToHighlight: [], + } ); + } ); + const paragraphWithKeyphrase1 = "

    Lorem ipsum keyphrase dolor sit amet, consectetur adipiscing elit." + "In sit amet semper sem, id faucibus massa.

    \n"; diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 9cf437643bb..34888acf1d5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -160,6 +160,13 @@ const testCases = [ expectedCount: 0, expectedMarkings: [], }, + { + description: "doesn't count keyphrase instances if they are a shortcode", + paper: new Paper( "There is no [keyword] in this sentence.", { shortcodes: [ "keyword" ] } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "A string with three keys (key and another key) and one word." ), diff --git a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js index d6d803710fa..609d63776fd 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js @@ -48,6 +48,15 @@ describe( "Matching keyphrase in subheadings", () => { expect( result.matches ).toBe( 0 ); } ); + it( "should not match text that is also a shortcode", function() { + const paper = new Paper( "

    " + + "What is a [shortcode]

    A beautiful dog.", { keyword: "shortcode", shortcodes: [ "shortcode" ] } ); + + const result = matchKeywordInSubheadings( paper, new Researcher( paper ) ); + expect( result.count ).toBe( 1 ); + expect( result.matches ).toBe( 0 ); + } ); + // This unit test is skipped for now because it returns an incorrect result. Result.matches should return 0, instead it returns 1. // When this research is adapted to use HTML Parser 5, please unskip this test and make sure that it passes. xit( "should not match text within heading attributes", () => { diff --git a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js index 3260e313d12..1c5ca3cec55 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js @@ -12,4 +12,10 @@ describe( "Get sentences from text", function() { const researcher = new EnglishResearcher(); expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye." ] ); } ); + + it( "should get sentences without the shortcodes", function() { + const paper = new Paper( "Hello. Goodbye. To be [shortcode]or not to be.", { shortcodes: [ "shortcode" ] } ); + const researcher = new EnglishResearcher(); + expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye.", "To be or not to be." ] ); + } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js index 589d5cbec70..db7af9b21fa 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js @@ -83,6 +83,14 @@ describe( "a test for getting the complex words in the sentence and calculating expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 ); } ); + it( "should return an empty array and 0% if the only complex word are is a shortcode ", function() { + const paper = new Paper( "This is short text. [tortoiseshell] A text about Calico.", { shortcodes: [ "tortoiseshell" ] } ); + researcher.setPaper( paper ); + + expect( wordComplexity( paper, researcher ).complexWords ).toEqual( [] ); + expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 ); + } ); + it( "should return an empty array and 0% if there is no complex word found in the text: " + "Also test with a word starting with capital letter enclosed in different types of quotation mark.", () => { let paper = new Paper( "This is short text. This is another short text. A text about \"Calico\"." ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js index 4da5ffd745f..6a5936c3890 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js @@ -10,4 +10,8 @@ describe( "a test for counting the words in the text", function() { const paper = new Paper( "Tell me a story on how to love your cats", { keyword: "how to love your cats" } ); expect( wordCountInText( paper ).count ).toBe( 7 ); } ); + it( "should not count shortcodes", function() { + const paper = new Paper( "Tell me a story on [shortcode]how to love[/shortcode] your cats", { shortcodes: [ "shortcode" ] } ); + expect( wordCountInText( paper ).count ).toBe( 10 ); + } ); } ); diff --git a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js index 0266e13572d..894db1cac54 100644 --- a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js @@ -24,6 +24,13 @@ describe( "test hasEnoughContentForAssessment", () => { expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false ); } ); + it( "should return false if text has less than 50 chars after shortcodes are removed.", () => { + const mockPaper = new Paper( "Some text" + "[shortcode] ".repeat( 50 ), { shortcodes: [ "shortcode" ] } ); + const mockAssessment = new Assessment(); + + expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false ); + } ); + it( "should return false if text is 49 chars and an image and no specification for contentNeededForAssessment", () => { const mockPaper = new Paper( "This text contains forty nine characterssssssssss" + // eslint-disable-next-line max-len diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js index 6745f5495d7..04c5483404d 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js @@ -43,6 +43,17 @@ describe( "An assessment for scoring too long text fragments without a subheadin "You are not using any subheadings, but your text is short enough and probably doesn't need them." ); } ); + it( "Scores a text that's short (<300 words) after excluding shortodes," + + " and which does not have subheadings.", function() { + const assessment = subheadingDistributionTooLong.getResult( + new Paper( shortText + "[shortcode] ".repeat( 150 ) + "[/shortcode] ".repeat( 150 ), { shortcodes: [ "shortcode" ] } ), + Factory.buildMockResearcher( [] ) + ); + expect( assessment.getScore() ).toBe( 9 ); + expect( assessment.getText() ).toBe( "Subheading distribution: " + + "You are not using any subheadings, but your text is short enough and probably doesn't need them." ); + } ); + it( "Scores a short text (<300 words), which has subheadings.", function() { const assessment = subheadingDistributionTooLong.getResult( new Paper( "a " + subheading + shortText ), diff --git a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js index 8cf18699849..1b145d0b147 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js @@ -148,6 +148,12 @@ describe( "An assessment for transition word percentage", function() { expect( assessment ).toBe( false ); } ); + it( "should not be applicable if the text has more than 200 words, but part of the words are shortcodes", function() { + const mockPaper = new Paper( "Text " + "text ".repeat( 198 ) + "[shortcode]".repeat( 2 ), { shortcodes: [ "shortcode" ] } ); + const assessment = new TransitionWordsAssessment().isApplicable( mockPaper, new EnglishResearcher( mockPaper ) ); + expect( assessment ).toBe( false ); + } ); + it( "is applicable when used with a supported researcher, e.g. the English researcher", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, ne sed agam oblique alterum. Eos percipit singulis no. No scripta graecis cum. " + "Ut vim eius porro labore. Id quem civibus sit. Sed no primis urbanitas, aperiri laboramus voluptatibus ei per. Esse consul possim " + diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js index 7b74f7951c7..6acddfc8610 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js @@ -162,6 +162,16 @@ describe( "Checks if the assessment is applicable", function() { expect( assessmentIsApplicable ).toBe( false ); } ); + + it( "should not be applicable to a text consisting only of shortcodes", function() { + const shortcodeSentence = "[shortcode]".repeat( 15 ) + ". "; + const mockPaper = new Paper( shortcodeSentence.repeat( 15 ), { shortcodes: [ "shortcode" ] } ); + researcher.setPaper( mockPaper ); + + const assessmentIsApplicable = keyphraseDistributionAssessment.isApplicable( mockPaper, researcher ); + + expect( assessmentIsApplicable ).toBe( false ); + } ); } ); describe( "A test for marking keywords in the text", function() { diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index dfd177c8fdb..9eb907ac76f 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -100,6 +100,18 @@ describe( "Tests for the keywordDensity assessment for languages without morphol " Don't overoptimize!" ); } ); + it( "should not count shortcodes when calculating keyphrase density", function() { + const paper = new Paper( nonkeyword.repeat( 99 ) + `[${keyword}]`, { keyword: keyword, shortcodes: [ keyword ] } ); + + const researcher = new DefaultResearcher( paper ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density: The keyphrase was found 0 times." + + " That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); + it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() { const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } ); const researcher = new DefaultResearcher( paper ); diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index 14a4ece8242..5e15c565913 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -31,6 +31,7 @@ export default function build( htmlString, languageProcessor, shortcodes ) { // Add sentences and tokens to the tree's paragraph and heading nodes. tree = tokenize( tree, languageProcessor ); + // Filter out shortcodes from the tree. if ( shortcodes ) { filterShortcodesFromTree( tree, shortcodes ); } diff --git a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js index 9cf10885632..bf07fd7d2ba 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js @@ -45,7 +45,6 @@ export default class ListAssessment extends Assessment { let text = paper.getText(); text = languageProcessingHelpers.removeHtmlBlocks( text ); - text = languageProcessingHelpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return regex.test( text ); } From efd1798d5c9406530bfeee70268181d214c6b3d5 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 27 Jul 2023 13:04:02 +0200 Subject: [PATCH 283/616] also remove closing shortcodes --- .../helpers/sanitize/filterShortcodesFromTree.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index fc0d1b3c112..17b5b23a5bc 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -1,4 +1,3 @@ -import { cloneDeep } from "lodash-es"; /** * Creates a regex to filter shortcodes from HTML. @@ -6,7 +5,7 @@ import { cloneDeep } from "lodash-es"; * @returns {RegExp} The regex to recognize the shortcodes. */ const createShortcodeTagsRegex = shortcodeTags => { - const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + const sortcodeTagsRegexString = `\\[\\/?(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; return new RegExp( sortcodeTagsRegexString, "g" ); }; @@ -36,7 +35,9 @@ const filterShortcodesFromSentence = ( sentence, shortcodeTags, shortcodeTagsReg const tokens = sentence.tokens; // traverse through tokens backwards for ( let i = tokens.length - 1; i >= 0; i-- ) { - if ( shortcodeTags.includes( tokens[ i ].text ) && tokens[ i - 1 ] && tokens[ i - 1 ].text === "[" ) { + if ( shortcodeTags.includes( tokens[ i ].text ) && + ( ( tokens[ i - 1 ] && tokens[ i - 1 ].text === "[" ) || + ( tokens[ i - 1 ] && tokens[ i - 1 ].text === "/" && tokens[ i - 2 ] && tokens[ i - 2 ] === "]" ) ) ) { while ( tokens[ i ].text !== "]" ) { tokens.splice( i, 1 ); } @@ -81,8 +82,7 @@ const filterShortcodesFromTree = ( tree, shortcodeTags ) => { if ( ! shortcodeTags || shortcodeTags.length === 0 ) { return; } - const sortcodeTagsRegexString = createShortcodeTagsRegex( shortcodeTags ); - const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); + const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); }; From 621a5881a353f764d015c71de7984ab8facebd45 Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 27 Jul 2023 14:47:21 +0200 Subject: [PATCH 284/616] add specs for removing closing shortcodes --- .../sanitize/filterShortcodesFromTreeSpec.js | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js index e73adaed6e7..65431577c25 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sanitize/filterShortcodesFromTreeSpec.js @@ -340,6 +340,48 @@ describe( "filterShortcodesFromTree", function() { expect( tree.sentences[ 0 ].text ).toEqual( "This is a [shortcode] test." ); } ); + + it( "should filter an open-close pair of shortcodes.", function() { + const tree = { + sentences: [ + { + text: "This is a [shortcode]test[/shortcode].", + tokens: [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "[" }, + { text: "shortcode" }, + { text: "]" }, + { text: "test" }, + { text: "[" }, + { text: "/" }, + { text: "shortcode" }, + { text: "]" }, + { text: "." }, + ], + }, + ], + }; + + const shortcodes = [ "shortcode" ]; + + filterShortcodesFromTree( tree, shortcodes ); + + expect( tree.sentences[ 0 ].tokens ).toEqual( [ + { text: "This" }, + { text: " " }, + { text: "is" }, + { text: " " }, + { text: "a" }, + { text: " " }, + { text: "test" }, + { text: "." }, + ] ); + } ); } ); @@ -380,4 +422,21 @@ describe( "filterShortcodesFromHTML", function() { expect( filtered ).toEqual( "

    This is a [not_a_shortcode] test.

    " ); } ); + + it( "should not filter if shortcodes parameter is not given", function() { + const html = "

    This is a [shortcode] test.

    "; + + const filtered = filterShortcodesFromHTML( html ); + + expect( filtered ).toEqual( "

    This is a [shortcode] test.

    " ); + } ); + it( "should not filter if shortcodes is an empty list", function() { + const html = "

    This is a [shortcode] test.

    "; + + const shortcodes = []; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "

    This is a [shortcode] test.

    " ); + } ); } ); From ad0bb76507f750977cc4dd76bdcb92724cf35fee Mon Sep 17 00:00:00 2001 From: hdvos Date: Thu, 27 Jul 2023 14:47:34 +0200 Subject: [PATCH 285/616] refactor to reduce complexity --- .../sanitize/filterShortcodesFromTree.js | 45 ++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index 17b5b23a5bc..aac457bfb98 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -23,6 +23,38 @@ export const filterShortcodesFromHTML = ( html, shortcodeTags ) => { return html.replace( shortcodeTagsRegex, "" ); }; +/** + * Checks if a token is part of an opening shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @returns {boolean} Whether the token is part of an opening shortcode. + */ +const tokenIsPartOfOpeningShortcode = ( tokens, index ) => { + return tokens[ index - 1 ] && tokens[ index - 1 ].text === "["; +}; + +/** + * Checks if a token is part of a closing shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @returns {boolean} Whether the token is part of a closing shortcode. + */ +const tokenIsPartOfClosingShortcode = ( tokens, index ) => { + return tokens[ index - 1 ] && tokens[ index - 1 ].text === "/" && tokens[ index - 2 ] && tokens[ index - 2 ].text === "["; +}; + +/** + * Checks if a token is part of a shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @param {string[]} shortcodeTags An array of active shortcode tags. + * @returns {boolean} Whether the token is part of a shortcode. + */ +const tokenIsShortcode = ( tokens, index, shortcodeTags ) => { + return shortcodeTags.includes( tokens[ index ].text ) && + ( tokenIsPartOfOpeningShortcode( tokens, index ) || tokenIsPartOfClosingShortcode( tokens, index ) ); +}; + /** * Filters shortcodes from a sentence. * @param {Sentence} sentence The sentence to filter. @@ -35,15 +67,18 @@ const filterShortcodesFromSentence = ( sentence, shortcodeTags, shortcodeTagsReg const tokens = sentence.tokens; // traverse through tokens backwards for ( let i = tokens.length - 1; i >= 0; i-- ) { - if ( shortcodeTags.includes( tokens[ i ].text ) && - ( ( tokens[ i - 1 ] && tokens[ i - 1 ].text === "[" ) || - ( tokens[ i - 1 ] && tokens[ i - 1 ].text === "/" && tokens[ i - 2 ] && tokens[ i - 2 ] === "]" ) ) ) { + if ( tokenIsShortcode( tokens, i, shortcodeTags ) ) { while ( tokens[ i ].text !== "]" ) { tokens.splice( i, 1 ); } tokens.splice( i, 1 ); - tokens.splice( i - 1, 1 ); - i--; + + // console.log( "after splice 1: ", tokens ); + + while ( "[/".includes( tokens[ i - 1 ].text ) ) { + tokens.splice( i - 1, 1 ); + i--; + } } } sentence.tokens = tokens; From 20dcc48cd7decbb7e6849eebb039d2fa9bccb4dc Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Thu, 27 Jul 2023 15:53:08 +0200 Subject: [PATCH 286/616] Add unit tests --- .../helpers/getAnnotationsHelpers.js | 4 +- .../helpers/getAnnotationsHelpers.test.js | 687 ++++++++++++++++++ packages/yoastseo/src/parse/build/build.js | 6 +- .../src/parse/build/private/parseBlocks.js | 16 +- 4 files changed, 701 insertions(+), 12 deletions(-) create mode 100644 packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js index 5ee40685555..128614cd004 100644 --- a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js +++ b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js @@ -1,8 +1,8 @@ import { flatMap, flattenDeep } from "lodash"; import { create } from "@wordpress/rich-text"; -const START_MARK = ""; -const END_MARK = ""; +export const START_MARK = ""; +export const END_MARK = ""; const START_MARK_DOUBLE_QUOTED = ""; diff --git a/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js new file mode 100644 index 00000000000..197dcaa5dd6 --- /dev/null +++ b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js @@ -0,0 +1,687 @@ +import { + START_MARK, + END_MARK, + getYoastmarkOffsets, + getIndicesOf, + calculateAnnotationsForTextFormat, + getAnnotationsForWPBlock, + getAnnotationsForYoastBlock, + createAnnotationsFromPositionBasedMarks, +} from "../../../src/decorator/helpers/getAnnotationsHelpers"; + +jest.mock( "@wordpress/rich-text", () => ( { + create: jest.fn(), +} ) ); + +import { create } from "@wordpress/rich-text"; + +import { select } from "@wordpress/data"; +import { Mark } from "yoastseo/src/values"; + +jest.mock( "@wordpress/data" ); + +/** + * Mocks a YoastSEO.js Mark object. + * + * @param {string} original The sentence without yoastmark tags. + * @param {string} marked The sentence with yoastmark tags. + * + * @returns {Object} Mark mock. + */ +function mockMark( original, marked ) { + return { + getOriginal: () => original, + getMarked: () => marked, + }; +} + +describe( "getOffsets", () => { + it( "successfully finds offsets for a single mark at the start of the text", () => { + const markedText = `${ START_MARK }A marked${ END_MARK } text.`; + + const expected = [ + { + startOffset: 0, + endOffset: 8, + }, + ]; + + expect( getYoastmarkOffsets( markedText ) ).toEqual( expected ); + } ); + + it( "successfully finds offsets for a single mark at the end of the text", () => { + const markedText = `A ${ START_MARK }marked text.${ END_MARK }`; + + const expected = [ + { + startOffset: 2, + endOffset: 14, + }, + ]; + + expect( getYoastmarkOffsets( markedText ) ).toEqual( expected ); + } ); + + it( "successfully finds multiple offsets for multiple marks", () => { + const markedText = `${ START_MARK }A${ END_MARK } marked ${ START_MARK }text.${ END_MARK }`; + + const expected = [ + { + startOffset: 0, + endOffset: 1, + }, + { + startOffset: 9, + endOffset: 14, + }, + ]; + + expect( getYoastmarkOffsets( markedText ) ).toEqual( expected ); + } ); + + /** + * If we receive an unexpected string we don't do anything to avoid code complexity. We + * expect YoastSEO.js to provide us with proper marked sentences after all. + */ + it( "returns an empty array if the start and end tags are in a incorrect order", () => { + const markedText = `${ START_MARK }A${ END_MARK } marked ${ END_MARK }text.${ START_MARK }`; + + const expected = []; + + expect( getYoastmarkOffsets( markedText ) ).toEqual( expected ); + } ); +} ); + +describe( "getIndicesOf", () => { + it( "finds a single case sensitive occurence", () => { + const expected = [ 3 ]; + + expect( getIndicesOf( "la LA", "LA" ) ).toEqual( expected ); + } ); + + it( "finds multiple case insensitive occurences", () => { + const expected = [ 0, 3 ]; + + expect( getIndicesOf( "la LA", "LA", false ) ).toEqual( expected ); + } ); +} ); + +describe( "calculateAnnotationsForTextFormat", () => { + it( "correctly calculates multiple offsets for a single sentence in a text", () => { + const text = "A long text. A marked text."; + + const mark = mockMark( + "A marked text.", + `A marked ${ START_MARK }text${ END_MARK }.` + ); + + /* + * "A long text. A marked text." + * 22 ^ ^ 26 + */ + const expected = [ + { + startOffset: 22, + endOffset: 26, + }, + ]; + + const actual = calculateAnnotationsForTextFormat( + text, + mark + ); + + expect( actual ).toEqual( expected ); + } ); + + it( "correctly calculates offsets in a text with HTML tags", () => { + const text = "A long text. A marked text."; + + const mark = mockMark( + "A marked text.", + `A ${ START_MARK }marked text${ END_MARK }.` + ); + + /* + * "A long text. A marked text." + * ^ 15 ^ 26 + */ + const expected = [ { + startOffset: 15, + endOffset: 26, + } ]; + + const actual = calculateAnnotationsForTextFormat( + text, + mark + ); + + expect( actual ).toEqual( expected ); + } ); + + it( "correctly calculates offsets in a text with invalid HTML markup", () => { + const text = "A long text. A marked text."; + + const mark = mockMark( + "A marked text.", + `A ${ START_MARK }marked${ END_MARK } text.` + ); + + /* + * "A long text. A marked text." + * ^ 15 ^ 21 + */ + const expected = [ { + startOffset: 15, + endOffset: 21, + } ]; + + const actual = calculateAnnotationsForTextFormat( + text, + mark + ); + + expect( actual ).toEqual( expected ); + } ); +} ); + +describe( "createAnnotationsFromPositionBasedMarks", () => { + it( "create annotation from block position based mark:" + + " when the block client id matches the mark client id and the block html is the same as the rich text", () => { + const mark = new Mark( { + position: { + startOffsetBlock: 22, + endOffsetBlock: 26, + clientId: "261e3892-f28c-4273-86b4-a00801c38d22", + }, + } ); + const html = "The Maine Coon is a large domesticated cat breed."; + + /* + * "A long text. A marked text." + * 22 ^ ^ 26 + */ + const expected = [ + { + startOffset: 22, + endOffset: 26, + }, + ]; + + const actual = createAnnotationsFromPositionBasedMarks( + "261e3892-f28c-4273-86b4-a00801c38d22", + mark, + html, + html + ); + + expect( actual ).toEqual( expected ); + } ); + it( "should return an empty array if the block client id doesn't match the mark client id", () => { + const mark = new Mark( { + position: { + startOffsetBlock: 22, + endOffsetBlock: 26, + clientId: "261aaa892-f28c-4273-86b4-a008574858", + }, + } ); + const html = "The Maine Coon is a large domesticated cat breed."; + + const actual = createAnnotationsFromPositionBasedMarks( + "261e3892-f28c-4273-86b4-a00801c38d22", + mark, + html, + html + ); + + expect( actual ).toEqual( [] ); + } ); + it( "should return annotations with adjusted block start and end position " + + "when the block html is not the same as the rich text", () => { + const mark = new Mark( { + position: { + startOffsetBlock: 115, + endOffsetBlock: 120, + clientId: "261e3892-f28c-4273-86b4-a00801c38d22", + }, + } ); + const html = "Red panda has smaller chewing muscles than " + + "the giant panda. "; + const richText = "Red panda has a smaller chewing muscles than the giant panda."; + + const actual = createAnnotationsFromPositionBasedMarks( + "261e3892-f28c-4273-86b4-a00801c38d22", + mark, + html, + richText + ); + + expect( actual ).toEqual( [ { + startOffset: 47, + endOffset: 52, + } ] ); + } ); +} ); + +describe( "getAnnotationsForYoastBlock", () => { + describe( "a test for retrieving annotations from a Yoast How-to block", () => { + it( "returns the annotations from a Yoast How-To block, steps section", () => { + create.mockReturnValue( { + // This value will be returned from the last call. + // This value matches `jsonDescription` in which inside `getAnnotationsForYoastBlocks` is the last value to be processed. + text: "Step by step guide on how to make your cat love you (with or without food bribe).", + } ) + .mockReturnValueOnce( { + // This value will be returned from the first call. + // This value matches `jsonName` in which inside `getAnnotationsForYoastBlocks` is the first value to be processed. + text: "Establish a close contact with your cat", + } ) + .mockReturnValueOnce( { + // This value will be returned from the second call. + // This value matches `jsonText` in which inside `getAnnotationsForYoastBlocks` is the second value to be processed. + text: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + } ); + const mockAttribute = { + key: "steps", + }; + + const mockBlock = { + clientId: "2821282d-aead-4191-a844-c43568cd112d", + name: "yoast/how-to-block", + isValid: true, + attributes: { + jsonDescription: "Step-by-step guide on how to make your cat love you (with or without food bribe).", + steps: [ { + id: "how-to-step-1674124378605", + jsonName: "Establish a close contact with your cat", + jsonText: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + } ], + }, + innerBlocks: [], + originalContent: "

    " + + "Step by step guide on how to make your cat love you (with or without food bribe).

      " + + "
    1. " + + "Establish a close contact with your cat

      According to a research, " + + "cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.

      " + + "
    ", + }; + + const mockMarks = [ + new Mark( { + original: "Establish a close contact with your cat", + marked: "Establish a close contact with your cat", + position: { + startOffsetBlock: 76, + endOffsetBlock: 79, + isFirstSection: true, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "how-to-step-1674124378605", + }, + } ), + new Mark( { + original: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + marked: "According to a research, cats are social animal. Your cat" + + " will prefer the hooman than a very tempting snack in a stressful situation.", + position: { + startOffsetBlock: 54, + endOffsetBlock: 57, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "how-to-step-1674124378605", + }, + } ), + new Mark( { + original: "Step-by-step guide on how to make your cat love you (with or without food bribe).", + marked: "Step-by-step guide on how to make your cat love you (with or without food bribe).", + position: { + startOffsetBlock: 39, + endOffsetBlock: 42, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "", + }, + } ), + ]; + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "keyphraseDensity" ), + } ); + + const annotations = getAnnotationsForYoastBlock( mockAttribute, mockBlock, mockMarks ); + const resultWithAnnotation = [ + { + startOffset: 36, + endOffset: 39, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "how-to-step-1674124378605-name", + }, + { + startOffset: 54, + endOffset: 57, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "how-to-step-1674124378605-text", + }, + ]; + + expect( annotations ).toEqual( resultWithAnnotation ); + } ); + it( "returns the annotations from a Yoast How-To block, jsonDescription section", () => { + create.mockReturnValue( { + // This value will be returned from the last call. + // This value matches `jsonDescription` in which inside `getAnnotationsForYoastBlocks` is the last value to be processed. + text: "Step by step guide on how to make your cat love you (with or without food bribe).", + } ) + .mockReturnValueOnce( { + // This value will be returned from the first call. + // This value matches `jsonName` in which inside `getAnnotationsForYoastBlocks` is the first value to be processed. + text: "Establish a close contact with your cat", + } ) + .mockReturnValueOnce( { + // This value will be returned from the second call. + // This value matches `jsonText` in which inside `getAnnotationsForYoastBlocks` is the second value to be processed. + text: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + } ); + const mockAttribute = { + key: "jsonDescription", + }; + + const mockBlock = { + clientId: "2821282d-aead-4191-a844-c43568cd112d", + name: "yoast/how-to-block", + isValid: true, + attributes: { + jsonDescription: "Step by step guide on how to make your cat love you (with or without food bribe).", + steps: [ { + id: "how-to-step-1674124378605", + jsonName: "Establish a close contact with your cat", + jsonText: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + } ], + }, + innerBlocks: [], + }; + + const mockMarks = [ + new Mark( { + original: "Establish a close contact with your cat", + marked: "Establish a close contact with your cat", + position: { + startOffsetBlock: 36, + endOffsetBlock: 39, + isFirstSection: true, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "how-to-step-1674124378605", + }, + } ), + new Mark( { + original: "According to a research, cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.", + marked: "According to a research, cats are social animal. Your cat" + + " will prefer the hooman than a very tempting snack in a stressful situation.", + position: { + startOffsetBlock: 54, + endOffsetBlock: 57, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "how-to-step-1674124378605", + }, + } ), + new Mark( { + original: "Step-by-step guide on how to make your cat love you (with or without food bribe).", + marked: "Step-by-step guide on how to make your cat love you (with or without food bribe).", + position: { + startOffsetBlock: 39, + endOffsetBlock: 42, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "", + }, + } ), + ]; + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "keyphraseDensity" ), + } ); + + const annotations = getAnnotationsForYoastBlock( mockAttribute, mockBlock, mockMarks ); + const resultWithAnnotation = [ + { + startOffset: 39, + endOffset: 42, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "description", + }, + ]; + + expect( annotations ).toEqual( resultWithAnnotation ); + } ); + } ); + describe( "a test for retrieving annotations from a Yoast FAQ block", () => { + it( "returns an annotation if there is an applicable marker for the text in a Yoast FAQ block", () => { + create.mockReturnValue( { + /* + * This value will be returned from the last call. + * This value matches `jsonAnswer` of the second question in `mockBlock` in which + * inside `getAnnotationsForYoastBlocks` is the last value to be processed. + */ + text: "Studies reveal that commercially prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.", + } ) + .mockReturnValueOnce( { + /* + * This value will be returned from the first call. + * This value matches `jsonQuestion` of the first question in `mockBlock` in which + * inside `getAnnotationsForYoastBlocks` is the first value to be processed. + */ + text: "What are the benefits of raw food for your cat?", + } ) + .mockReturnValueOnce( { + /* + * This value will be returned from the second call. + * This value matches `jsonAnswer` of the first question in `mockBlock` in which + * inside `getAnnotationsForYoastBlocks` is the second value to be processed. + */ + text: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + } ) + .mockReturnValueOnce( { + /* + * This value will be returned from the third call. + * This value matches `jsonQuestion` of the second question in `mockBlock` in which + * inside `getAnnotationsForYoastBlocks` is the third value to be processed. + */ + text: "Are there disadvantages to giving raw food to cats?", + } ); + const mockAttribute = { + key: "questions", + }; + + const mockBlock = { + clientId: "2821282d-aead-4191-a844-c43568cd112d", + name: "yoast/faq-block", + isValid: true, + attributes: { + questions: [ + { + id: "faq-question-1674124378605", + jsonAnswer: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + jsonQuestion: "What are the benefits of raw food for your cat?", + }, + { + + id: "faq-question-16746w9898469", + jsonAnswer: "Studies reveal that commercially prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.", + jsonQuestion: "Are there disadvantages to giving raw food to cats?", + }, + ], + }, + innerBlocks: [], + originalContent: "
    " + + "What are the benefits of raw food for your cat? " + + "

    A raw food diet for cats is more digestible than a diet of plant-based foods.

    " + + "
    " + + "Are there disadvantages to giving raw food to cats?

    Studies reveal that commercially " + + "prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.

    " + + "
    ", + }; + + const mockMarks = [ + new Mark( { + original: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + marked: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + position: { + startOffsetBlock: 2, + endOffsetBlock: 10, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-1674124378605", + }, + } ), + new Mark( { + original: "What are the benefits of raw food for your cat?", + marked: "What are the benefits of raw food for your cat?", + position: { + startOffsetBlock: 61, + endOffsetBlock: 69, + isFirstSection: true, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-1674124378605", + }, + } ), + new Mark( { + original: "Studies reveal that commercially prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.", + marked: "Studies reveal that commercially prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.", + position: { + startOffsetBlock: 42, + endOffsetBlock: 50, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-16746w9898469", + }, + } ), + new Mark( { + original: "Are there disadvantages to giving raw food to cats?", + marked: "Are there disadvantages to giving raw food to cats?", + position: { + startOffsetBlock: 70, + endOffsetBlock: 78, + isFirstSection: true, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-16746w9898469", + }, + } ), + ]; + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "keyphraseDensity" ), + } ); + + const annotations = getAnnotationsForYoastBlock( mockAttribute, mockBlock, mockMarks ); + const resultWithAnnotation = [ + { + startOffset: 25, + endOffset: 33, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "faq-question-1674124378605-question", + }, + { + startOffset: 2, + endOffset: 10, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "faq-question-1674124378605-answer", + }, + { + startOffset: 34, + endOffset: 42, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "faq-question-16746w9898469-question", + }, + { + startOffset: 42, + endOffset: 50, + block: "2821282d-aead-4191-a844-c43568cd112d", + richTextIdentifier: "faq-question-16746w9898469-answer", + }, + ]; + + expect( annotations ).toEqual( resultWithAnnotation ); + } ); + } ); +} ); + +describe( "a test for retrieving annotations from non-Yoast blocks", () => { + it( "returns an annotation if there is an applicable marker for a paragraph block: " + + "the paragraph text contains some styling such as strong and em tags", () => { + // The string we want to highlight: "red panda's". + create.mockImplementation( () => { + return { text: "The red panda's skull is wide, and its lower jaw is robust." }; + } ); + + const mockBlock = { + clientId: "34f61542-0902-44f7-ab48-d9f88a022b43", + name: "core/paragraph", + isValid: true, + attributes: { + content: "The red panda's skull is wide, and its lower jaw is robust.", + }, + innerBlocks: [], + originalContent: "

    The red panda's skull is wide, and its lower jaw is robust.

    ", + }; + + const mockAttribute = { + key: "content", + }; + + const mockMarks = [ + new Mark( { + original: "The red panda's skull is wide, and its lower jaw is robust.", + marked: "The red panda's skull is wide, and its lower jaw is robust.", + position: { + startOffsetBlock: 4, + endOffsetBlock: 7, + isFirstSection: false, + clientId: "34f61542-0902-44f7-ab48-d9f88a022b43", + attributeId: "", + }, + } ), + new Mark( { + original: "The red panda's skull is wide, and its lower jaw is robust.", + marked: "The red panda's skull is wide, and its lower jaw is robust.", + position: { + startOffsetBlock: 16, + endOffsetBlock: 23, + isFirstSection: false, + clientId: "34f61542-0902-44f7-ab48-d9f88a022b43", + attributeId: "", + }, + } ), + new Mark( { + original: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + marked: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + position: { + startOffsetBlock: 2, + endOffsetBlock: 10, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-1674124378605", + }, + } ), + ]; + + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "keyphraseDensity" ), + } ); + + const annotations = getAnnotationsForWPBlock( mockAttribute, mockBlock, mockMarks ); + + const resultWithAnnotation = [ + { + startOffset: 4, + endOffset: 7, + block: "34f61542-0902-44f7-ab48-d9f88a022b43", + richTextIdentifier: "content", + }, + { + startOffset: 8, + endOffset: 15, + block: "34f61542-0902-44f7-ab48-d9f88a022b43", + richTextIdentifier: "content", + }, + ]; + + expect( annotations ).toEqual( resultWithAnnotation ); + } ); +} ); diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index fac419f78fb..9e5f55d05b6 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -9,16 +9,14 @@ import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; import parseBlocks from "./private/parseBlocks"; /** - * Parses the HTML string to a tree representation of - * the HTML document. + * Parses the HTML string to a tree representation of the HTML document. * - * @param {Paper} paper The HTML string. + * @param {Paper} paper The paper to build the tree from. * @param {LanguageProcessor} languageProcessor The language processor to use. * * @returns {Node} The tree representation of the HTML string. */ export default function build( paper, languageProcessor ) { - // console.log( "build paper", paper ); const html = paper.getText(); let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js index 4e956754ea0..67eff4563c3 100644 --- a/packages/yoastseo/src/parse/build/private/parseBlocks.js +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -3,7 +3,8 @@ const blockTokenizer = /" + - "

    A HeadingA Heading

    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); const filteredTree = filterTree( tree, permanentFilters ); @@ -439,10 +445,12 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 71, endOffset: 72 }, }, { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 358, endOffset: 359 }, }, { attributes: {}, @@ -459,17 +467,22 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo { name: "#text", value: "A Heading", + sourceCodeRange: { startOffset: 408, endOffset: 417 }, }, ], level: 2, name: "h2", sourceCodeLocation: { - endOffset: 421, + endOffset: 422, startOffset: 404, startTag: { endOffset: 408, startOffset: 404, }, + endTag: { + startOffset: 417, + endOffset: 422, + }, }, }, ], @@ -499,6 +512,7 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo { name: "#text", value: "Hello world!", + sourceCodeRange: { startOffset: 33, endOffset: 45 }, }, ], sourceCodeLocation: { From e434dd5cec364dbdc07713f9d58664a0c713f682 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:19:23 +0200 Subject: [PATCH 323/616] adapt findAllInTreeSpec.js to new Text.js --- packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js index 06537557d55..d9a3733406c 100644 --- a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js +++ b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js @@ -29,6 +29,7 @@ describe( "A test for findAllInTree", () => { childNodes: [ { name: "#text", value: "Hello, world! ", + sourceCodeRange: { startOffset: 22, endOffset: 36 }, } ], sentences: [ { text: "Hello, world!", @@ -61,6 +62,7 @@ describe( "A test for findAllInTree", () => { childNodes: [ { name: "#text", value: "Hello, yoast! ", + sourceCodeRange: { startOffset: 57, endOffset: 71 }, } ], sentences: [ { text: "Hello, yoast!", From 3e4e3c86dcd9e050270c86e6ead1db701689f1d8 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:19:50 +0200 Subject: [PATCH 324/616] adapt getAnchorsWithKeyphraseSpec.js to new Text.js --- .../researches/getAnchorsWithKeyphraseSpec.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js index b2e54d278df..d25be3ae409 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js @@ -145,7 +145,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { attributes: { href: "example.com" }, - childNodes: [ { name: "#text", value: "Mrs. always tries to be awesome", sourceCodeLocation: { startOffset: 24, endOffset: 55 } } ], + childNodes: [ { name: "#text", value: "Mrs. always tries to be awesome", sourceCodeRange: { startOffset: 24, endOffset: 55 } } ], name: "a", sourceCodeLocation: { endOffset: 59, @@ -172,7 +172,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { attributes: { href: "http://yoast.com/some-other-page/" }, - childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeLocation: { startOffset: 51, endOffset: 77 } } ], + childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 51, endOffset: 77 } } ], name: "a", sourceCodeLocation: { endOffset: 81, @@ -198,7 +198,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { attributes: { href: "http://test.com/keyword#top" }, - childNodes: [ { name: "#text", value: "food for cats", sourceCodeLocation: { startOffset: 53, endOffset: 66 } } ], + childNodes: [ { name: "#text", value: "food for cats", sourceCodeRange: { startOffset: 53, endOffset: 66 } } ], name: "a", sourceCodeLocation: { endOffset: 70, @@ -225,7 +225,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { attributes: { href: "http://example.com/some-other-page/" }, - childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeLocation: { startOffset: 53, endOffset: 79 } } ], + childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 53, endOffset: 79 } } ], name: "a", sourceCodeLocation: { endOffset: 83, @@ -252,10 +252,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphrase ).toEqual( [ { attributes: { href: "http://test.com/that-page" }, childNodes: [ - { name: "#text", value: "food for ", sourceCodeLocation: { startOffset: 51, endOffset: 60 } }, + { name: "#text", value: "food for ", sourceCodeRange: { startOffset: 51, endOffset: 60 } }, { attributes: {}, - childNodes: [ { name: "#text", value: "cats", sourceCodeLocation: { startOffset: 68, endOffset: 72 } } ], + childNodes: [ { name: "#text", value: "cats", sourceCodeRange: { startOffset: 68, endOffset: 72 } } ], name: "strong", sourceCodeLocation: { endOffset: 81, From 5ee9ec879efaa6e85c96f5a7e098e10b7718620d Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:20:05 +0200 Subject: [PATCH 325/616] adapt getSentencesFromTreeSpec.js to new Text.js --- .../sentence/getSentencesFromTreeSpec.js | 112 +++++++++--------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index 5fefda6038f..c9cfd506726 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -15,104 +15,104 @@ describe( "test to get sentences from the tree", () => { buildTree( paper, researcher ); expect( getSentencesFromTree( paper ) ).toEqual( [ { - sourceCodeRange: { endOffset: 44, startOffset: 3 }, - parentStartOffset: 3, + sourceCodeRange: { endOffset: 49, startOffset: 8 }, + parentStartOffset: 8, text: "A very intelligent cat loves their human.", tokens: [ - { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, - { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: " " }, - { sourceCodeRange: { endOffset: 9, startOffset: 5 }, text: "very" }, - { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, - { sourceCodeRange: { endOffset: 21, startOffset: 10 }, text: "intelligent" }, - { sourceCodeRange: { endOffset: 22, startOffset: 21 }, text: " " }, - { sourceCodeRange: { endOffset: 25, startOffset: 22 }, text: "cat" }, - { sourceCodeRange: { endOffset: 26, startOffset: 25 }, text: " " }, - { sourceCodeRange: { endOffset: 31, startOffset: 26 }, text: "loves" }, - { sourceCodeRange: { endOffset: 32, startOffset: 31 }, text: " " }, - { sourceCodeRange: { endOffset: 37, startOffset: 32 }, text: "their" }, - { sourceCodeRange: { endOffset: 38, startOffset: 37 }, text: " " }, - { sourceCodeRange: { endOffset: 43, startOffset: 38 }, text: "human" }, - { sourceCodeRange: { endOffset: 44, startOffset: 43 }, text: "." }, + { sourceCodeRange: { startOffset: 8, endOffset: 9 }, text: "A" }, + { sourceCodeRange: { startOffset: 9, endOffset: 10 }, text: " " }, + { sourceCodeRange: { startOffset: 10, endOffset: 14 }, text: "very" }, + { sourceCodeRange: { startOffset: 14, endOffset: 15 }, text: " " }, + { sourceCodeRange: { startOffset: 15, endOffset: 26 }, text: "intelligent" }, + { sourceCodeRange: { startOffset: 26, endOffset: 27 }, text: " " }, + { sourceCodeRange: { startOffset: 27, endOffset: 30 }, text: "cat" }, + { sourceCodeRange: { startOffset: 30, endOffset: 31 }, text: " " }, + { sourceCodeRange: { startOffset: 31, endOffset: 36 }, text: "loves" }, + { sourceCodeRange: { startOffset: 36, endOffset: 37 }, text: " " }, + { sourceCodeRange: { startOffset: 37, endOffset: 42 }, text: "their" }, + { sourceCodeRange: { startOffset: 42, endOffset: 43 }, text: " " }, + { sourceCodeRange: { startOffset: 43, endOffset: 48 }, text: "human" }, + { sourceCodeRange: { startOffset: 48, endOffset: 49 }, text: "." }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", parentClientId: "", }, { - sourceCodeRange: { endOffset: 64, startOffset: 44 }, - parentStartOffset: 3, + sourceCodeRange: { endOffset: 69, startOffset: 49 }, + parentStartOffset: 8, text: " A dog is very cute.", tokens: [ - { sourceCodeRange: { endOffset: 45, startOffset: 44 }, text: " " }, - { sourceCodeRange: { endOffset: 46, startOffset: 45 }, text: "A" }, - { sourceCodeRange: { endOffset: 47, startOffset: 46 }, text: " " }, - { sourceCodeRange: { endOffset: 50, startOffset: 47 }, text: "dog" }, - { sourceCodeRange: { endOffset: 51, startOffset: 50 }, text: " " }, - { sourceCodeRange: { endOffset: 53, startOffset: 51 }, text: "is" }, - { sourceCodeRange: { endOffset: 54, startOffset: 53 }, text: " " }, - { sourceCodeRange: { endOffset: 58, startOffset: 54 }, text: "very" }, - { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " }, - { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "cute" }, - { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: "." }, + { sourceCodeRange: { startOffset: 49, endOffset: 50 }, text: " " }, + { sourceCodeRange: { startOffset: 50, endOffset: 51 }, text: "A" }, + { sourceCodeRange: { startOffset: 51, endOffset: 52 }, text: " " }, + { sourceCodeRange: { startOffset: 52, endOffset: 55 }, text: "dog" }, + { sourceCodeRange: { startOffset: 55, endOffset: 56 }, text: " " }, + { sourceCodeRange: { startOffset: 56, endOffset: 58 }, text: "is" }, + { sourceCodeRange: { startOffset: 58, endOffset: 59 }, text: " " }, + { sourceCodeRange: { startOffset: 59, endOffset: 63 }, text: "very" }, + { sourceCodeRange: { startOffset: 63, endOffset: 64 }, text: " " }, + { sourceCodeRange: { startOffset: 64, endOffset: 68 }, text: "cute" }, + { sourceCodeRange: { startOffset: 68, endOffset: 69 }, text: "." }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", parentClientId: "", }, { - sourceCodeRange: { endOffset: 86, startOffset: 72 }, - parentStartOffset: 72, + sourceCodeRange: { endOffset: 91, startOffset: 77 }, + parentStartOffset: 77, text: "A subheading 3", tokens: [ - { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: "A" }, - { sourceCodeRange: { endOffset: 74, startOffset: 73 }, text: " " }, - { sourceCodeRange: { endOffset: 84, startOffset: 74 }, text: "subheading" }, - { sourceCodeRange: { endOffset: 85, startOffset: 84 }, text: " " }, - { sourceCodeRange: { endOffset: 86, startOffset: 85 }, text: "3" }, + { sourceCodeRange: { startOffset: 77, endOffset: 78 }, text: "A" }, + { sourceCodeRange: { startOffset: 78, endOffset: 79 }, text: " " }, + { sourceCodeRange: { startOffset: 79, endOffset: 89 }, text: "subheading" }, + { sourceCodeRange: { startOffset: 89, endOffset: 90 }, text: " " }, + { sourceCodeRange: { startOffset: 90, endOffset: 91 }, text: "3" }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", parentClientId: "", }, { - sourceCodeRange: {}, - parentStartOffset: 0, + sourceCodeRange: { startOffset: 96, endOffset: 110 }, + parentStartOffset: 96, text: "text text text", tokens: [ - { sourceCodeRange: {}, text: "text" }, - { sourceCodeRange: {}, text: " " }, - { sourceCodeRange: {}, text: "text" }, - { sourceCodeRange: {}, text: " " }, - { sourceCodeRange: {}, text: "text" }, + { sourceCodeRange: { startOffset: 96, endOffset: 100 }, text: "text" }, + { sourceCodeRange: { startOffset: 100, endOffset: 101 }, text: " " }, + { sourceCodeRange: { startOffset: 101, endOffset: 105 }, text: "text" }, + { sourceCodeRange: { startOffset: 105, endOffset: 106 }, text: " " }, + { sourceCodeRange: { startOffset: 106, endOffset: 110 }, text: "text" }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", parentClientId: "", }, { - sourceCodeRange: { endOffset: 123, startOffset: 109 }, - parentStartOffset: 109, + sourceCodeRange: { endOffset: 128, startOffset: 114 }, + parentStartOffset: 114, text: "A subheading 4", tokens: [ - { sourceCodeRange: { endOffset: 110, startOffset: 109 }, text: "A" }, - { sourceCodeRange: { endOffset: 111, startOffset: 110 }, text: " " }, - { sourceCodeRange: { endOffset: 121, startOffset: 111 }, text: "subheading" }, - { sourceCodeRange: { endOffset: 122, startOffset: 121 }, text: " " }, - { sourceCodeRange: { endOffset: 123, startOffset: 122 }, text: "4" }, + { sourceCodeRange: { startOffset: 114, endOffset: 115 }, text: "A" }, + { sourceCodeRange: { startOffset: 115, endOffset: 116 }, text: " " }, + { sourceCodeRange: { startOffset: 116, endOffset: 126 }, text: "subheading" }, + { sourceCodeRange: { startOffset: 126, endOffset: 127 }, text: " " }, + { sourceCodeRange: { startOffset: 127, endOffset: 128 }, text: "4" }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", parentClientId: "", }, { - sourceCodeRange: { endOffset: 138, startOffset: 128 }, - parentStartOffset: 128, + sourceCodeRange: { endOffset: 143, startOffset: 133 }, + parentStartOffset: 133, text: "more text.", tokens: [ - { sourceCodeRange: { endOffset: 132, startOffset: 128 }, text: "more" }, - { sourceCodeRange: { endOffset: 133, startOffset: 132 }, text: " " }, - { sourceCodeRange: { endOffset: 137, startOffset: 133 }, text: "text" }, - { sourceCodeRange: { endOffset: 138, startOffset: 137 }, text: "." }, + { sourceCodeRange: { startOffset: 133, endOffset: 137 }, text: "more" }, + { sourceCodeRange: { startOffset: 137, endOffset: 138 }, text: " " }, + { sourceCodeRange: { startOffset: 138, endOffset: 142 }, text: "text" }, + { sourceCodeRange: { startOffset: 142, endOffset: 143 }, text: "." }, ], isParentFirstSectionOfBlock: false, parentAttributeId: "", From 40d0719dbd3efb7343053ee357b58519d0e76667 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:20:18 +0200 Subject: [PATCH 326/616] adapt NodeSpec.js to new Text.js --- packages/yoastseo/spec/parse/structure/NodeSpec.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/parse/structure/NodeSpec.js b/packages/yoastseo/spec/parse/structure/NodeSpec.js index 21d93ca20b8..3ce0d2c7399 100644 --- a/packages/yoastseo/spec/parse/structure/NodeSpec.js +++ b/packages/yoastseo/spec/parse/structure/NodeSpec.js @@ -31,7 +31,7 @@ describe( "A test for the findAll method", () => { const expected = [ { name: "p", attributes: { "class": new Set( [ "yoast" ] ) }, - childNodes: [ { name: "#text", value: "Hello, world! " } ], + childNodes: [ { name: "#text", value: "Hello, world! ", sourceCodeRange: { startOffset: 22, endOffset: 36 } } ], isImplicit: false, sentences: [ { text: "Hello, world!", @@ -60,7 +60,7 @@ describe( "A test for the findAll method", () => { { name: "p", attributes: { "class": new Set( [ "yoast" ] ) }, - childNodes: [ { name: "#text", value: "Hello, yoast!" } ], + childNodes: [ { name: "#text", value: "Hello, yoast!", sourceCodeRange: { startOffset: 57, endOffset: 70 } } ], isImplicit: false, sentences: [ { text: "Hello, yoast!", From f96f8b7998e852d31f866afce5fdfc7d5ad0c19b Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:20:27 +0200 Subject: [PATCH 327/616] adapt tokenizeSpec.js to new Text.js --- .../yoastseo/spec/parse/build/private/tokenizeSpec.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 9b00c2edf47..2fe5869fe28 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -20,7 +20,7 @@ describe( "A test for the tokenize function", childNodes: [ { name: "#text", value: "This is a paragraph", - sourceCodeLocation: new SourceCodeLocation( { startOffset: 3, endOffset: 22 } ) } ], + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 22 } ) } ], isImplicit: false, name: "p", sentences: [ { @@ -66,7 +66,7 @@ describe( "A test for the tokenize function", { name: "#text", value: "This is a sentence. This is another sentence.", - sourceCodeLocation: new SourceCodeLocation( { startOffset: 3, endOffset: 48 } ), + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 48 } ), }, ], isImplicit: false, @@ -246,7 +246,7 @@ describe( "A test for the tokenize function", { name: "#text", value: "This is the release of YoastSEO 9.3.", - sourceCodeLocation: new SourceCodeLocation( {startOffset: 3, endOffset: 39} ) + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 39 } ), }, ], sourceCodeLocation: { @@ -393,7 +393,7 @@ describe( "A test for tokenizing a japanese sentence", function() { { name: "#text", value: "犬が大好き。", - sourceCodeLocation: new SourceCodeLocation( {startOffset: 3, endOffset: 9} ) + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 9 } ), }, ], isImplicit: false, From 51ae806cf8a7819cc56d9c81aba937b3b1d440cd Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:22:24 +0200 Subject: [PATCH 328/616] switch conditions in getTextElementPositions.js --- .../src/parse/build/private/getTextElementPositions.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index dafe1cad5b1..de0720694ee 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -105,7 +105,7 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { */ export default function getTextElementPositions( node, textElements, startOffset = -1 ) { // We cannot calculate positions if there are no text elements, or if we don't know the node's source code location. - if ( textElements.length === 0 || ! node.sourceCodeLocation ) { + if ( textElements.length === 0 || ( ! node.sourceCodeLocation && ! node.sourceCodeRange ) ) { return textElements; } @@ -115,9 +115,9 @@ export default function getTextElementPositions( node, textElements, startOffset * node's start tag. */ let textElementStart; - if ( node instanceof Paragraph && node.isImplicit ) { - textElementStart = node.sourceCodeLocation.startOffset; - } else if ( node.name === "#text" ) { + if ( node.name === "#text" && node.sourceCodeRange && node.sourceCodeRange.startOffset ) { + textElementStart = node.sourceCodeRange.startOffset; + } else if ( node instanceof Paragraph && node.isImplicit ) { textElementStart = node.sourceCodeLocation.startOffset; } else { textElementStart = startOffset >= 0 ? startOffset : node.sourceCodeLocation.startTag.endOffset; From 41812b580882b424ea1ee236d5a51ff5d9979002 Mon Sep 17 00:00:00 2001 From: hdvos Date: Fri, 25 Aug 2023 13:24:35 +0200 Subject: [PATCH 329/616] adapt sourcecodelocationsetting in combineIntoImplicitParagraphs.js --- .../private/combineIntoImplicitParagraphs.js | 52 ++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index c50ff7de2ea..4ec356111f8 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -38,8 +38,11 @@ function hasChildren( node ) { */ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { const newNodes = []; - let currentSourceCodeLocation = {}; + // console.log( nodes ); + + + //
    foo

    bar

    baz
    // For implicit paragraphs, strip off the start and end tag information from the parent's source code location. if ( ! isEmpty( parentSourceCodeLocation ) ) { @@ -60,15 +63,10 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { startOffset: firstNode.sourceCodeLocation.startOffset, endOffset: lastNode.sourceCodeLocation.endOffset, } ); - } else { - const a = 0 - // console.log( firstNode ); - // console.log( lastNode ); } } let implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); - nodes.forEach( node => { if ( isPhrasingContent( node.name ) && ! isInterElementWhitespace( node ) ) { implicitParagraph.childNodes.push( node ); @@ -79,8 +77,29 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { if ( ! isEmpty( node.sourceCodeLocation ) ) { // Update the endOffset of the current implicit paragraph to be the start of the current node. if ( ! isEmpty( implicitParagraph.sourceCodeLocation ) ) { + // implicitParagraph.childNodes[ 0 ]; implicitParagraph.sourceCodeLocation.endOffset = node.sourceCodeLocation.startOffset; } + if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, + } ); + } + } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && + ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, + } ); + } + } // Update the startOffset of the next implicit paragraph to be the end of the current node. currentSourceCodeLocation.startOffset = node.sourceCodeLocation.endOffset; } @@ -93,9 +112,28 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { } ); if ( hasChildren( implicitParagraph ) ) { + if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, + } ); + } + } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && + ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, + } ); + } + } newNodes.push( implicitParagraph ); } - return newNodes; } From 4acfd1483dc5222fa383b14c429c2d38e1e640a5 Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Fri, 25 Aug 2023 12:39:32 -0700 Subject: [PATCH 330/616] fix typos --- .../parse/build/private/filterTreeSpec.js | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 615f8e6cda8..3e2e33fedf4 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -169,8 +169,8 @@ const samplesWithOneOccurrence = [ }, { element: "cite", - html: "\n\n

    If a triangle has one 90 degrees angle and one 30" + - "degrees angle, how big is the remaining angle?Fun for the whole family

    ", + html: "\n\n

    There are many craft ideas that are interesting to both children and adults." + + " Fun for the whole family!

    ", }, { element: "em", @@ -235,7 +235,7 @@ const samplesWithTwoOccurrences = [ }, { element: "bdi", - html: "\n\n

    Their name spells as أندرو or alternatively أندريه" + + html: "\n\n

    Their name spells as أندرو or alternatively أندريه" + "and both are correct

    ", }, { @@ -255,15 +255,11 @@ const samplesWithTwoOccurrences = [ }, { element: "ruby", - html: "くんしてどうぜず。", + html: "くんぜず。", }, { element: "rt", - html: "くんしてどうぜず。", - }, - { - element: "rt", - html: "くんしてどうぜず。", + html: "くんぜず。", }, { element: "sup", @@ -320,7 +316,7 @@ describe( "Miscellaneous tests", () => { // The head element seems to be removed by the parser we employ. const html = "\n
  • \nExamples can be found here\n
  • \n"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "a" ) ).toHaveLength( 0 ); + expect( tree.findAll( child => child.name === "a" ) ).toHaveLength( 1 ); } ); it( "should filter out abbr elements", () => { @@ -337,20 +333,6 @@ describe( "Miscellaneous tests", () => { expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); } ); - it( "should filter out b elements", () => { - // The head element seems to be removed by the parser we employ. - const html = "\n

    Six abandoned kittens were adopted by a capybara.

    \n"; - const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); - } ); - - it( "should filter out b elements", () => { - // The head element seems to be removed by the parser we employ. - const html = "\n

    Six abandoned kittens were adopted by a capybara.

    \n"; - const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); - } ); - it( "should filter out the Elementor Yoast Breadcrumbs widget ", () => { // When the HTML enters the paper, the Breadcrumbs widget doesn't include the div tag. let html = "

    Home

    Date: Fri, 25 Aug 2023 15:41:29 -0700 Subject: [PATCH 331/616] fix failing specs --- .../parse/build/private/filterTreeSpec.js | 48 +++++++++++-------- .../build/private/alwaysFilterElements.js | 24 +++++++++- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 3e2e33fedf4..6f953b1f9e2 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -200,7 +200,8 @@ const samplesWithOneOccurrence = [ }, { element: "wbr", - html: "

    To acquire AJAX, it us crucial to know the XMLHttpRequest Object.

    ", + html: "\n\n

    To acquire AJAX, it is crucial to know the XMLHttpRequest " + + "Object.

    ", }, ]; @@ -240,8 +241,8 @@ const samplesWithTwoOccurrences = [ }, { element: "br", - html: "\n\n

    The triange's angle is
    90 degrees
    and the second angle is" + - "30 degrees How big is the remaining angle?

    ", + html: "\n\n

    The triange's angle is
    90 degrees
    and the second angle" + + "is
    30 degrees
    How big is the remaining angle?

    ", }, { element: "i", @@ -255,16 +256,12 @@ const samplesWithTwoOccurrences = [ }, { element: "ruby", - html: "くんぜず。", - }, - { - element: "rt", - html: "くんぜず。", + html: "\n\n

    くんぜず。

    ", }, { element: "sup", - html: "

    Their names areMlle Bujeau " + - "andMme Gregoire.

    ", + html: "\n\n

    Their names areMlle Bujeau " + + "andMme Gregoire.

    ", }, ]; @@ -278,31 +275,40 @@ describe.each( samplesWithTwoOccurrences )( "Tests HTML elements, part 2", ( htm } ); describe( "Miscellaneous tests", () => { - it( "shouldn't filter out elements like , which are not included in the list of excluded elements", () => { + it( "should filter out elements like , which are included in the list of excluded elements", () => { const html = "

    Welcome to the blue screen of death.

    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); } ); - it( "should filter out elements like when nested within excluded elements, e.g. ", () => { - const html = "

    Welcome to the blue screen of death.
    Kiss your computer goodbye.

    "; + it( "shouldn't filter out elements like , which are included in the list of excluded elements", () => { + const html = "

    Welcome to the blue screen of death.

    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); + expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); + expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); } ); - it( "should filter out elements like when nested within a regular element, which is nested in an excluded element", () => { - const html = "

    Welcome to the blue screen ofArgh!death.
    Kiss your computer goodbye.

    "; + it( "should filter out elements like when nested within excluded elements, e.g. ", () => { + const html = "

    Welcome to the blue screen of death.
    Kiss your computer goodbye.

    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); + expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); + expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 0 ); + } ); + + it( "should filter out elements like when nested within a regular element, which is nested in an excluded element", () => { + const html = "

    Welcome to the blue screen ofArgh!death.
    Kiss your computer goodbye.

    "; + + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); + const filteredTree = filterTree( tree, permanentFilters ); + expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 0 ); } ); it( "should filter out head elements", () => { @@ -328,7 +334,7 @@ describe( "Miscellaneous tests", () => { it( "should filter out b elements", () => { // The head element seems to be removed by the parser we employ. - const html = "\n

    Six abandoned kittens were adopted by a capybara.

    \n"; + const html = "\n<b>Six abandoned kittens were adopted by a capybara.</b></title\n</html>"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); } ); diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index a76f4a6c5d8..e762a506d8b 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -6,8 +6,7 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "abbr", "b", "bdi", "br", - "cite", "em", "i", "ins", "map", "q", "ruby", "rt", "s", "samp", "small", "strong", "sup", "u", "var", - "wbr" ]; + "cite", "em", "i", "ins", "map", "q", "ruby", "s", "samp", "small", "strong", "sup", "u", "wbr" ]; const permanentFilters = [ // Filters out Yoast blocks that don't need to be part of the analysis. @@ -50,6 +49,27 @@ const permanentFilters = [ elementHasName( "textarea" ), elementHasName( "title" ), elementHasName( "var" ), + elementHasName( "#comment" ), + elementHasName( "a" ), + elementHasName( "abbr" ), + elementHasName( "b" ), + elementHasName( "bdi" ), + elementHasName( "br" ), + elementHasName( "cite" ), + elementHasName( "em" ), + elementHasName( "i" ), + elementHasName( "ins" ), + elementHasName( "map" ), + elementHasName( "q" ), + elementHasName( "ruby" ), + elementHasName( "rt" ), + elementHasName( "s" ), + elementHasName( "samp" ), + elementHasName( "small" ), + elementHasName( "strong" ), + elementHasName( "sup" ), + elementHasName( "u" ), + elementHasName( "wbr" ), ]; export default permanentFilters; From 6ac8352179d833d098d74e8a4b27fb88091bea3a Mon Sep 17 00:00:00 2001 From: Hanna Worku <hanna@yoast.com> Date: Fri, 25 Aug 2023 15:49:45 -0700 Subject: [PATCH 332/616] remove added invalid br element --- packages/yoastseo/spec/parse/build/private/filterTreeSpec.js | 5 ----- .../yoastseo/src/parse/build/private/alwaysFilterElements.js | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 6f953b1f9e2..050fd8e9e6b 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -239,11 +239,6 @@ const samplesWithTwoOccurrences = [ html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>Their name spells as <bdi>أندرو</bdi> or alternatively <bdi>أندريه</bdi>" + "and both are correct</p></body></html>", }, - { - element: "br", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>The triange's angle is <br>90 degrees</br> and the second angle" + - "is <br>30 degrees</br> How big is the remaining angle?</p></body></html>", - }, { element: "i", html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>These concepts include <i>terms</i> and <i>taxonomies</i>," + diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index e762a506d8b..7d703df0dd2 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -5,7 +5,7 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. -export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "abbr", "b", "bdi", "br", +export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "abbr", "b", "bdi", "cite", "em", "i", "ins", "map", "q", "ruby", "s", "samp", "small", "strong", "sup", "u", "wbr" ]; const permanentFilters = [ From 460c1cdf09967a4bd98cd91dba936cda42cb675e Mon Sep 17 00:00:00 2001 From: Hanna Worku <hanna@yoast.com> Date: Fri, 25 Aug 2023 16:58:13 -0700 Subject: [PATCH 333/616] add correct elements --- .../parse/build/private/filterTreeSpec.js | 90 +++++-------------- .../build/private/alwaysFilterElements.js | 25 ++---- 2 files changed, 29 insertions(+), 86 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 050fd8e9e6b..8a7cdf8e28b 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -173,36 +173,14 @@ const samplesWithOneOccurrence = [ " <cite>Fun for the whole family!</cite></p></body></html>", }, { - element: "em", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>Armadillos <em>are</em> cute animals.</p></body></html>", - }, - { - element: "q", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>They started singing <q>what goes around comes around</q> on top " + - "of their lungs.</p></body></html>", - }, - { - element: "s", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>This book's <s>price was 25</s> and is now only 15</p></body></html>", - }, - { - element: "strong", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p><strong>The new price is only 15</strong></p></body></html>", + element: "output", + html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>Armadillos <output id='x'>x</output> are cute animals.</p></body></html>", }, { element: "samp", html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>The automated chat said<samp>The answer is incorrect</samp>but I wasn't " + "sure why.</p></body></html>", }, - { - element: "u", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>The <u>ocen</u> has plenty of animals.</p></body></html>", - }, - { - element: "wbr", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body></body><p>To acquire AJAX, it is crucial to know the XML<wbr>Http</wbr>Request " + - "Object.</p>", - }, ]; describe.each( samplesWithOneOccurrence )( "Tests HTML elements, part 1 ", ( htmlElement ) => { @@ -235,28 +213,14 @@ const samplesWithTwoOccurrences = [ "degrees angle, how big is the remaining angle?</p><p>Fun for the whole family</p></body></html>", }, { - element: "bdi", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>Their name spells as <bdi>أندرو</bdi> or alternatively <bdi>أندريه</bdi>" + - "and both are correct</p></body></html>", - }, - { - element: "i", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>These concepts include <i>terms</i> and <i>taxonomies</i>," + + element: "del", + html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>These concepts include <del>terms</del> and <del>taxonomies</del>," + " among others</p></body></html>", }, { - element: "ins", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><aside><ins>Pies are amazing.</ins><ins>So are " + - "cakes.</ins></aside></body></html>", - }, - { - element: "ruby", - html: "<!DOCTYPE html>\n<html lang=\"ja-JA\">\n<body><p><ruby>君<rt>くん</rt>子<rt>し</ruby>は<ruby>和<rt>わ</ruby>ぜず。</p></body></html>", - }, - { - element: "sup", - html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><p>Their names are<span lang=\"fr\"><abbr>M<sup>lle</sup></abbr> Bujeau</span> " + - "and<span lang=\"fr\"><abbr>M<sup>me</sup></abbr> Gregoire</span>.</p></body></html>", + element: "label", + html: "<!DOCTYPE html>\n<html lang=\"en-US\">\n<body><aside><label>Pies are amazing.</label>There are many reciples.<label>So are " + + "cakes.</label></aside></body></html>", }, ]; @@ -270,40 +234,40 @@ describe.each( samplesWithTwoOccurrences )( "Tests HTML elements, part 2", ( htm } ); describe( "Miscellaneous tests", () => { - it( "should filter out elements like <strong>, which are included in the list of excluded elements", () => { + it( "shouldn't filter out elements like <strong>, which are not included in the list of excluded elements", () => { const html = "<p>Welcome to the blue screen of <strong>death</strong>.</p>"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); } ); - it( "shouldn't filter out elements like <mark>, which are included in the list of excluded elements", () => { - const html = "<p>Welcome to the blue screen of <mark>death</mark>.</p>"; + it( "shouldn't filter out elements like <strong>, which are included in the list of excluded elements", () => { + const html = "<p>Welcome to the blue screen of <strong>death</strong>.</p>"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); + expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); } ); - it( "should filter out elements like <mark> when nested within excluded elements, e.g. <samp>", () => { - const html = "<p><samp>Welcome to the blue screen of <mark>death</mark>.<br>Kiss your computer goodbye.</samp></p>"; + it( "should filter out elements like <strong> when nested within excluded elements, e.g. <samp>", () => { + const html = "<p><samp>Welcome to the blue screen of <strong>death</strong>.<br>Kiss your computer goodbye.</samp></p>"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); + expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 0 ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); } ); - it( "should filter out elements like <mark> when nested within a regular element, which is nested in an excluded element", () => { - const html = "<p><samp>Welcome to the blue screen of<span>Argh!<mark>death</mark></span>.<br>Kiss your computer goodbye.</samp></p>"; + it( "should filter out elements like <strong> when nested within a regular element, which is nested in an excluded element", () => { + const html = "<p><samp>Welcome to the blue screen of<span>Argh!<strong>death</strong></span>.<br>Kiss your computer goodbye.</samp></p>"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "mark" ) ).toHaveLength( 1 ); + expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); const filteredTree = filterTree( tree, permanentFilters ); - expect( filteredTree.findAll( child => child.name === "mark" ) ).toHaveLength( 0 ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 0 ); } ); it( "should filter out head elements", () => { @@ -320,20 +284,12 @@ describe( "Miscellaneous tests", () => { expect( tree.findAll( child => child.name === "a" ) ).toHaveLength( 1 ); } ); - it( "should filter out abbr elements", () => { + it( "should filter out map elements", () => { // The head element seems to be removed by the parser we employ. - const html = "<!DOCTYPE html>\n<title>About artificial intelligence<abbr>AI</abbr>\n\n"; + const html = "\nAbout artificial intelligence<map name=>AI</map>\n\n"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); expect( tree.findAll( child => child.name === "abbr" ) ).toHaveLength( 0 ); } ); - - it( "should filter out b elements", () => { - // The head element seems to be removed by the parser we employ. - const html = "\n<b>Six abandoned kittens were adopted by a capybara.</b></title\n</html>"; - const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); - } ); - it( "should filter out the Elementor Yoast Breadcrumbs widget ", () => { // When the HTML enters the paper, the Breadcrumbs widget doesn't include the div tag. let html = "<p id=\"breadcrumbs\"><span><span><a href=\"https://basic.wordpress.test/\">Home</a></span></span></p><div " + diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index 7d703df0dd2..eb5a165d4a0 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -5,8 +5,8 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. -export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "abbr", "b", "bdi", - "cite", "em", "i", "ins", "map", "q", "ruby", "s", "samp", "small", "strong", "sup", "u", "wbr" ]; +export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "del", "cite", "label", + "map", "noscript", "output" ]; const permanentFilters = [ // Filters out Yoast blocks that don't need to be part of the analysis. @@ -51,25 +51,12 @@ const permanentFilters = [ elementHasName( "var" ), elementHasName( "#comment" ), elementHasName( "a" ), - elementHasName( "abbr" ), - elementHasName( "b" ), - elementHasName( "bdi" ), - elementHasName( "br" ), + elementHasName( "del" ), elementHasName( "cite" ), - elementHasName( "em" ), - elementHasName( "i" ), - elementHasName( "ins" ), + elementHasName( "label" ), elementHasName( "map" ), - elementHasName( "q" ), - elementHasName( "ruby" ), - elementHasName( "rt" ), - elementHasName( "s" ), - elementHasName( "samp" ), - elementHasName( "small" ), - elementHasName( "strong" ), - elementHasName( "sup" ), - elementHasName( "u" ), - elementHasName( "wbr" ), + elementHasName( "noscript" ), + elementHasName( "output" ), ]; export default permanentFilters; From 42a6a1d9121ab4c86e87d1899be1f3bbed01f52e Mon Sep 17 00:00:00 2001 From: Hanna Worku <hanna@yoast.com> Date: Fri, 25 Aug 2023 17:09:58 -0700 Subject: [PATCH 334/616] Remove element from being filtered --- .../yoastseo/spec/parse/build/private/filterTreeSpec.js | 7 ------- .../src/parse/build/private/alwaysFilterElements.js | 3 +-- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 8a7cdf8e28b..70eb5809335 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -277,13 +277,6 @@ describe( "Miscellaneous tests", () => { expect( tree.findAll( child => child.name === "head" ) ).toHaveLength( 0 ); } ); - it( "should filter out a elements", () => { - // The head element seems to be removed by the parser we employ. - const html = "<!DOCTYPE html>\n<li>\n<a>Examples can be found here</a>\n</li>\n</html>"; - const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "a" ) ).toHaveLength( 1 ); - } ); - it( "should filter out map elements", () => { // The head element seems to be removed by the parser we employ. const html = "<!DOCTYPE html>\n<title>About artificial intelligence<map name=>AI</map>\n\n"; diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index eb5a165d4a0..a6b3a5e81f8 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -5,7 +5,7 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. -export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "a", "del", "cite", "label", +export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "del", "cite", "label", "map", "noscript", "output" ]; const permanentFilters = [ @@ -50,7 +50,6 @@ const permanentFilters = [ elementHasName( "title" ), elementHasName( "var" ), elementHasName( "#comment" ), - elementHasName( "a" ), elementHasName( "del" ), elementHasName( "cite" ), elementHasName( "label" ), From 45e5f90af1637fb1e71841f77b4e24f3d5fb950f Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 11:10:35 +0200 Subject: [PATCH 335/616] adapt TextSpec.js to changes in combineIntoImplicitParagraphs.js --- packages/yoastseo/spec/parse/structure/TextSpec.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/parse/structure/TextSpec.js b/packages/yoastseo/spec/parse/structure/TextSpec.js index b1e63e2a57c..3ac9ba20c25 100644 --- a/packages/yoastseo/spec/parse/structure/TextSpec.js +++ b/packages/yoastseo/spec/parse/structure/TextSpec.js @@ -2,8 +2,9 @@ import Text from "../../../src/parse/structure/Text"; describe( "A test for the Text object", function() { it( "should correctly construct a Text object", function() { - expect( new Text( "This is a text. It consists of 2 sentences" ) ).toEqual( { - name: "#text", value: "This is a text. It consists of 2 sentences", + expect( new Text( { value: "This is a text. It consists of 2 sentences", + sourceCodeLocation: { startOffset: 0, endOffset: 40 } } ) ).toEqual( { + name: "#text", value: "This is a text. It consists of 2 sentences", sourceCodeRange: { startOffset: 0, endOffset: 40 }, } ); } ); } ); From 55de5e544db904f5aa4bf2d13e0f89a6055a126f Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 11:10:58 +0200 Subject: [PATCH 336/616] adapt filterBeforeTokenizingSpec.js to changes in combineIntoImplicitParagraphs.js --- .../build/private/filterBeforeTokenizingSpec.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js index eb57224f5af..61b48099b52 100644 --- a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js @@ -21,6 +21,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", @@ -107,6 +111,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Hello, world!", + sourceCodeRange: { + startOffset: 53, + endOffset: 66, + }, }, ], sourceCodeLocation: { @@ -160,6 +168,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", @@ -215,6 +227,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", From 53a758b0810a37559af0a1707449a2a39aa54a01 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 11:11:09 +0200 Subject: [PATCH 337/616] adapt adaptSpec.js to changes in combineIntoImplicitParagraphs.js --- .../spec/parse/build/private/adaptSpec.js | 301 +++++++++++++++++- 1 file changed, 288 insertions(+), 13 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/adaptSpec.js b/packages/yoastseo/spec/parse/build/private/adaptSpec.js index 05395403764..24d67ae3426 100644 --- a/packages/yoastseo/spec/parse/build/private/adaptSpec.js +++ b/packages/yoastseo/spec/parse/build/private/adaptSpec.js @@ -8,7 +8,7 @@ describe( "The adapt function", () => { it( "adapts a basic div element", () => { const html = "

    Hello, world!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -17,17 +17,45 @@ describe( "The adapt function", childNodes: [ { attributes: {}, childNodes: [ { + name: "p", + isImplicit: false, + sourceCodeLocation: { + startOffset: 5, + endOffset: 39, + startTag: { + startOffset: 5, + endOffset: 22, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, attributes: { "class": new Set( [ "yoast" ] ), }, childNodes: [ { name: "#text", value: "Hello, world!", + sourceCodeRange: { + startOffset: 22, + endOffset: 35, + }, } ], - isImplicit: false, - name: "p", } ], name: "div", + sourceCodeLocation: { + startOffset: 0, + endOffset: 45, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 39, + endOffset: 45, + }, + }, } ], name: "#document-fragment", }; @@ -37,21 +65,20 @@ describe( "The adapt function", it( "adapts a heading element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); - + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); const expected = new Node( "#document-fragment", {}, [ new Heading( 1, {}, [ - new Text( "Hello World!" ), - ] ), + new Text( { value: "Hello World!", sourceCodeLocation: { startOffset: 4, endOffset: 16 } } ), + ], { startOffset: 0, endOffset: 21, startTag: { startOffset: 0, endOffset: 4 }, endTag: { startOffset: 16, endOffset: 21 } } ), ] ); expect( adaptedTree ).toEqual( expected ); } ); it( "adapts a tree with a code element inside a paragraph", () => { const html = "

    Hello World! function() Hello Yoast!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -63,24 +90,60 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 56, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 52, + endOffset: 56, + }, + }, childNodes: [ { name: "#text", value: "Hello World! ", + sourceCodeRange: { + startOffset: 3, + endOffset: 16, + }, }, { name: "code", attributes: {}, + sourceCodeLocation: { + startOffset: 16, + endOffset: 39, + startTag: { + startOffset: 16, + endOffset: 22, + }, + endTag: { + startOffset: 32, + endOffset: 39, + }, + }, childNodes: [ { name: "#text", value: "function()", + sourceCodeRange: { + startOffset: 22, + endOffset: 32, + }, }, ], }, { name: "#text", value: " Hello Yoast!", + sourceCodeRange: { + startOffset: 39, + endOffset: 52, + }, }, ], }, @@ -91,7 +154,7 @@ describe( "The adapt function", it( "adapts a tree with a code element within a sentence", () => { const html = "

    Hello push() World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -103,24 +166,60 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 39, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, childNodes: [ { name: "#text", value: "Hello ", + sourceCodeRange: { + startOffset: 3, + endOffset: 9, + }, }, { name: "code", attributes: {}, + sourceCodeLocation: { + startOffset: 9, + endOffset: 28, + startTag: { + startOffset: 9, + endOffset: 15, + }, + endTag: { + startOffset: 21, + endOffset: 28, + }, + }, childNodes: [ { name: "#text", value: "push()", + sourceCodeRange: { + startOffset: 15, + endOffset: 21, + }, }, ], }, { name: "#text", value: " World!", + sourceCodeRange: { + startOffset: 28, + endOffset: 35, + }, }, ], }, @@ -131,7 +230,7 @@ describe( "The adapt function", it( "adapts a tree with a script element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -142,19 +241,51 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 69, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 63, + endOffset: 69, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 44, + }, childNodes: [ { name: "script", attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 44, + startTag: { + startOffset: 5, + endOffset: 13, + }, + endTag: { + startOffset: 35, + endOffset: 44, + }, + }, childNodes: [ { name: "#text", value: "alert(\"Hello World!\");", + sourceCodeRange: { + startOffset: 13, + endOffset: 35, + }, }, ], }, @@ -164,10 +295,26 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 44, + endOffset: 63, + startTag: { + startOffset: 44, + endOffset: 47, + }, + endTag: { + startOffset: 59, + endOffset: 63, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 47, + endOffset: 59, + }, }, ], }, @@ -180,7 +327,7 @@ describe( "The adapt function", it( "adapts a tree with a script element within a paragraph", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -191,25 +338,69 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 70, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 64, + endOffset: 70, + }, + }, childNodes: [ { name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 64, + startTag: { + startOffset: 5, + endOffset: 8, + }, + endTag: { + startOffset: 60, + endOffset: 64, + }, + }, childNodes: [ { name: "script", attributes: {}, + sourceCodeLocation: { + startOffset: 8, + endOffset: 47, + startTag: { + startOffset: 8, + endOffset: 16, + }, + endTag: { + startOffset: 38, + endOffset: 47, + }, + }, childNodes: [ { name: "#text", value: "alert(\"Hello World!\");", + sourceCodeRange: { + startOffset: 16, + endOffset: 38, + }, }, ], }, { name: "#text", value: " Hello World!", + sourceCodeRange: { + startOffset: 47, + endOffset: 60, + }, }, ], }, @@ -222,7 +413,7 @@ describe( "The adapt function", it( "adapts a tree with a style element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -233,15 +424,35 @@ describe( "The adapt function", { name: "style", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 36, + startTag: { + startOffset: 0, + endOffset: 7, + }, + endTag: { + startOffset: 28, + endOffset: 36, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 7, + endOffset: 28, + }, childNodes: [ { name: "#text", value: "div { color: #FF00FF}", + sourceCodeRange: { + startOffset: 7, + endOffset: 28, + }, }, ], }, @@ -251,10 +462,26 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 36, + endOffset: 55, + startTag: { + startOffset: 36, + endOffset: 39, + }, + endTag: { + startOffset: 51, + endOffset: 55, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 39, + endOffset: 51, + }, }, ], }, @@ -266,7 +493,7 @@ describe( "The adapt function", it( "adapts a tree with a blockquote element", () => { const html = "

    Hello World!

    Hello Yoast!
    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -277,30 +504,78 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 67, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 61, + endOffset: 67, + }, + }, childNodes: [ { name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 24, + startTag: { + startOffset: 5, + endOffset: 8, + }, + endTag: { + startOffset: 20, + endOffset: 24, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 8, + endOffset: 20, + }, }, ], }, { name: "blockquote", attributes: {}, + sourceCodeLocation: { + startOffset: 24, + endOffset: 61, + startTag: { + startOffset: 24, + endOffset: 36, + }, + endTag: { + startOffset: 48, + endOffset: 61, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 36, + endOffset: 48, + }, childNodes: [ { name: "#text", value: "Hello Yoast!", + sourceCodeRange: { + startOffset: 36, + endOffset: 48, + }, }, ], }, From 2ce70642a42298d4dc68106f2f889c83efab504b Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 13:45:57 +0200 Subject: [PATCH 338/616] remove console logs --- .../private/getTextElementPositionsSpec.js | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js index 82184e82d1f..fa6bd1e847f 100644 --- a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js +++ b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js @@ -510,13 +510,12 @@ describe( "A test for getting positions of sentences", () => { it( "should correctly add positions to an implicit paragraph", function() { const html = "Hello world!"; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - // console.log( tree ); const paragraph = tree.childNodes[ 0 ]; const tokens = [ "Hello", " ", "world", "!" ].map( string => new Token( string ) ); - // console.log( tokens ); + const [ hello, space, world, bang ] = getTextElementPositions( paragraph, tokens ); - console.log( hello, space, world, bang ); + expect( hello.sourceCodeRange ).toEqual( { startOffset: 0, endOffset: 5 } ); expect( space.sourceCodeRange ).toEqual( { startOffset: 5, endOffset: 6 } ); expect( world.sourceCodeRange ).toEqual( { startOffset: 6, endOffset: 11 } ); @@ -524,17 +523,19 @@ describe( "A test for getting positions of sentences", () => { } ); it( "correctly calculates the position of an image caption", () => { - const html = "
    [caption id=\"attachment_3341501\" align=\"alignnone\" width=\"300\"]\"alt\" An image with the keyword in the caption.[/caption]
    "; + const html = "
    [caption id=\"attachment_3341501\" align=\"alignnone\" width=\"300\"]" + + "\"alt\"" + + " An image with the keyword in the caption.[/caption]
    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); const div = tree.childNodes[ 0 ]; const caption = div.childNodes[ 0 ]; - console.log( caption ); + const captionText = caption.childNodes[ 2 ]; - const tokens = [ " ", "An", " ", "image", " ", "with", " ", "the", " ", "keyword", " ", "in", " ", "the", " ", "caption", "." ].map( string => new Token( string ) ); - console.log( tokens ); - const [ space0, an, space1, image, space2, withToken, space3, the, space4, keyword, space5, inToken, space6, the2, space7, captionToken, dot ] = getTextElementPositions( captionText, tokens ); + const tokens = [ " ", "An", " ", "image", " ", "with", " ", "the", " ", "keyword", " ", "in", " ", "the", " ", "caption", "." ].map( + string => new Token( string ) ); - console.log( an ); + const [ space0, an, space1, image, space2, withToken, space3, the, + space4, keyword, space5, inToken, space6, the2, space7, captionToken, dot ] = getTextElementPositions( captionText, tokens ); expect( space0.sourceCodeRange ).toEqual( { startOffset: 148, endOffset: 149 } ); expect( an.sourceCodeRange ).toEqual( { startOffset: 149, endOffset: 151 } ); From d43b34053537adbd74d48a718934bd4eca6c60b4 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 13:47:13 +0200 Subject: [PATCH 339/616] create separate function to get text element start --- .../build/private/getTextElementPositions.js | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index de0720694ee..9ba24dac97d 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -93,6 +93,22 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { return textElementStart; } +/** + * Gets the initial start position of a text element. + * @param {Node} node The node to get the start position from. + * @param {Number} startOffset The start position of the node in the source code. + * @returns {number} The start position of the text element. + */ +const getTextElementStart = ( node, startOffset ) => { + if ( node.name === "#text" && node.sourceCodeRange && node.sourceCodeRange.startOffset ) { + return node.sourceCodeRange.startOffset; + } else if ( node instanceof Paragraph && node.isImplicit ) { + return node.sourceCodeLocation.startOffset; + } + + return startOffset >= 0 ? startOffset : node.sourceCodeLocation.startTag.endOffset; +}; + /** * Gets the start and end positions of text elements (sentences or tokens) in the source code. * @@ -114,14 +130,8 @@ export default function getTextElementPositions( node, textElements, startOffset * end tags, set the start position to the start of the node. Otherwise, set the start position to the end of the * node's start tag. */ - let textElementStart; - if ( node.name === "#text" && node.sourceCodeRange && node.sourceCodeRange.startOffset ) { - textElementStart = node.sourceCodeRange.startOffset; - } else if ( node instanceof Paragraph && node.isImplicit ) { - textElementStart = node.sourceCodeLocation.startOffset; - } else { - textElementStart = startOffset >= 0 ? startOffset : node.sourceCodeLocation.startTag.endOffset; - } + let textElementStart = getTextElementStart( node, startOffset ); + let textElementEnd; let descendantTagPositions = []; From c72c99546b56d6fa21d7a4def287f1f179def355 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 13:48:27 +0200 Subject: [PATCH 340/616] put logic in separate functions --- .../private/combineIntoImplicitParagraphs.js | 130 +++++++++--------- 1 file changed, 65 insertions(+), 65 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index 4ec356111f8..9bbea422617 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -26,6 +26,58 @@ function hasChildren( node ) { return node && node.childNodes.length > 0; } +/** + * Updates the source code location of an implicit paragraph based on its first child. + * @param {Paragraph} implicitParagraph The implicit paragraph to update. + * + * @returns {void} + */ +const updateImplicitParagraphLocation = ( implicitParagraph ) => { + if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, + } ); + } + } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && + ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, + endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, + } ); + } + } +}; + +/** + * Updates the source code location of the implicit paragraph and the current source code location based on it's child nodes. + * @param {Node} node The current node. + * @param {Paragraph} implicitParagraph The implicit paragraph. + * @param {SourceCodeLocation} currentSourceCodeLocation The current source code location. + * + * @returns {void} + */ +const updateSourceCodeLocation = ( node, implicitParagraph, currentSourceCodeLocation ) => { + if ( ! isEmpty( node.sourceCodeLocation ) ) { + // Update the endOffset of the current implicit paragraph to be the start of the current node. + if ( ! isEmpty( implicitParagraph.sourceCodeLocation ) ) { + // implicitParagraph.childNodes[ 0 ]; + implicitParagraph.sourceCodeLocation.endOffset = node.sourceCodeLocation.startOffset; + } + + updateImplicitParagraphLocation( implicitParagraph ); + + // Update the startOffset of the next implicit paragraph to be the end of the current node. + currentSourceCodeLocation.startOffset = node.sourceCodeLocation.endOffset; + } +}; + /** * Combines series of consecutive phrasing content ("inline" tags like `a` and `span`, and text) into implicit paragraphs. * @@ -39,22 +91,8 @@ function hasChildren( node ) { function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { const newNodes = []; let currentSourceCodeLocation = {}; - // console.log( nodes ); - - - //
    foo

    bar

    baz
    - // For implicit paragraphs, strip off the start and end tag information from the parent's source code location. - if ( ! isEmpty( parentSourceCodeLocation ) ) { - currentSourceCodeLocation = { - startOffset: parentSourceCodeLocation.startTag - ? parentSourceCodeLocation.startTag.endOffset - : parentSourceCodeLocation.startOffset, - endOffset: parentSourceCodeLocation.endTag - ? parentSourceCodeLocation.endTag.startOffset - : parentSourceCodeLocation.endOffset, - }; - } else { + if ( isEmpty( parentSourceCodeLocation ) ) { const firstNode = nodes[ 0 ]; const lastNode = nodes[ nodes.length - 1 ]; @@ -64,6 +102,15 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { endOffset: lastNode.sourceCodeLocation.endOffset, } ); } + } else { + currentSourceCodeLocation = { + startOffset: parentSourceCodeLocation.startTag + ? parentSourceCodeLocation.startTag.endOffset + : parentSourceCodeLocation.startOffset, + endOffset: parentSourceCodeLocation.endTag + ? parentSourceCodeLocation.endTag.startOffset + : parentSourceCodeLocation.endOffset, + }; } let implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); @@ -73,36 +120,7 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { } else { if ( hasChildren( implicitParagraph ) ) { // The implicit paragraph has children: end the current implicit paragraph and start a new one. - - if ( ! isEmpty( node.sourceCodeLocation ) ) { - // Update the endOffset of the current implicit paragraph to be the start of the current node. - if ( ! isEmpty( implicitParagraph.sourceCodeLocation ) ) { - // implicitParagraph.childNodes[ 0 ]; - implicitParagraph.sourceCodeLocation.endOffset = node.sourceCodeLocation.startOffset; - } - if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, - } ); - } - } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && - ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, - } ); - } - } - // Update the startOffset of the next implicit paragraph to be the end of the current node. - currentSourceCodeLocation.startOffset = node.sourceCodeLocation.endOffset; - } + updateSourceCodeLocation( node, implicitParagraph, currentSourceCodeLocation ); newNodes.push( implicitParagraph ); implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); @@ -112,26 +130,8 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { } ); if ( hasChildren( implicitParagraph ) ) { - if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, - } ); - } - } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && - ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, - } ); - } - } + updateImplicitParagraphLocation( implicitParagraph ); + newNodes.push( implicitParagraph ); } return newNodes; From feecdd8b9647cf6d365c156b01405cd420125767 Mon Sep 17 00:00:00 2001 From: hdvos Date: Mon, 28 Aug 2023 17:11:19 +0200 Subject: [PATCH 341/616] add documentation to getTextElementStart --- .../build/private/getTextElementPositions.js | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index 9ba24dac97d..bcf4e13ce18 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -95,18 +95,20 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { /** * Gets the initial start position of a text element. - * @param {Node} node The node to get the start position from. - * @param {Number} startOffset The start position of the node in the source code. + * @param {Node} childNode The childNode to get the start position from. + * @param {Number} parentNodeStartOffset The start position of the childNode in the source code. * @returns {number} The start position of the text element. */ -const getTextElementStart = ( node, startOffset ) => { - if ( node.name === "#text" && node.sourceCodeRange && node.sourceCodeRange.startOffset ) { - return node.sourceCodeRange.startOffset; - } else if ( node instanceof Paragraph && node.isImplicit ) { - return node.sourceCodeLocation.startOffset; +const getTextElementStart = ( childNode, parentNodeStartOffset ) => { + // A check if the childNode is a text. This is needed since text nodes have a sorceCodeRange, but no sourceCodeLocation. + if ( childNode.name === "#text" && childNode.sourceCodeRange && childNode.sourceCodeRange.startOffset ) { + return childNode.sourceCodeRange.startOffset; + } else if ( childNode instanceof Paragraph && childNode.isImplicit ) { + return childNode.sourceCodeLocation.startOffset; } - return startOffset >= 0 ? startOffset : node.sourceCodeLocation.startTag.endOffset; + // if the childnode does not have sourcecodeLocation, we use the parentNodeStartOffset of the parent childNode. + return parentNodeStartOffset >= 0 ? parentNodeStartOffset : childNode.sourceCodeLocation.startTag.endOffset; }; /** From 138b2465dc0a8116475e4b42ecd5dd31a54f7f1c Mon Sep 17 00:00:00 2001 From: Hanna Worku Date: Mon, 28 Aug 2023 12:39:54 -0700 Subject: [PATCH 342/616] update list of excluded elements --- .../spec/parse/build/private/filterTreeSpec.js | 11 +++-------- .../src/parse/build/private/alwaysFilterElements.js | 5 ++--- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 70eb5809335..97e592660c7 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -213,14 +213,9 @@ const samplesWithTwoOccurrences = [ "degrees angle, how big is the remaining angle?

    Fun for the whole family

    ", }, { - element: "del", - html: "\n\n

    These concepts include terms and taxonomies," + - " among others

    ", - }, - { - element: "label", - html: "\n\n", + element: "form", + html: "\n\n", }, ]; diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index a6b3a5e81f8..7a8f99c3363 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -5,7 +5,7 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. -export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "del", "cite", "label", +export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "cite", "form", "map", "noscript", "output" ]; const permanentFilters = [ @@ -50,9 +50,8 @@ const permanentFilters = [ elementHasName( "title" ), elementHasName( "var" ), elementHasName( "#comment" ), - elementHasName( "del" ), elementHasName( "cite" ), - elementHasName( "label" ), + elementHasName( "form" ), elementHasName( "map" ), elementHasName( "noscript" ), elementHasName( "output" ), From 3abcabb44031c225a9110bb78919d9f996dadeef Mon Sep 17 00:00:00 2001 From: hdvos Date: Tue, 29 Aug 2023 08:36:30 +0200 Subject: [PATCH 343/616] add inlinde documentation --- .../build/private/combineIntoImplicitParagraphs.js | 11 ++++++++++- .../parse/build/private/getTextElementPositions.js | 6 +++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index 9bbea422617..4d726ef0561 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -33,7 +33,11 @@ function hasChildren( node ) { * @returns {void} */ const updateImplicitParagraphLocation = ( implicitParagraph ) => { + // Check if there is a source code range or location on the first child node. The reason for this is that tree-nodes have a source code location + // and text-nodes have a source code range. Another approach would be is to check whether we are dealing with a text-node or a tree-node, + // but this way we also do a bit of defensive programming. if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { + // Check if the implicit paragraph already has a source code location. If so, update the start offset. else, set a new source code location. if ( implicitParagraph.sourceCodeLocation ) { implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; } else { @@ -44,6 +48,7 @@ const updateImplicitParagraphLocation = ( implicitParagraph ) => { } } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { + // Check if the implicit paragraph already has a source code location. If so, update the start offset. else, set a new source code location. if ( implicitParagraph.sourceCodeLocation ) { implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; } else { @@ -116,10 +121,14 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { let implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); nodes.forEach( node => { if ( isPhrasingContent( node.name ) && ! isInterElementWhitespace( node ) ) { + // If the node is phrasing content, add it to the implicit paragraph. implicitParagraph.childNodes.push( node ); } else { + // If the node is not phrasing content, this means that the implicit paragraph has ended. if ( hasChildren( implicitParagraph ) ) { - // The implicit paragraph has children: end the current implicit paragraph and start a new one. + // If the implicit paragraph has children this means an implicit paragraph was created: + // end the current implicit paragraph and start a new one. + // In updateSourceCodeLocation try we update the source code location of the implicit paragraph based on its child nodes. updateSourceCodeLocation( node, implicitParagraph, currentSourceCodeLocation ); newNodes.push( implicitParagraph ); diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index bcf4e13ce18..7224ae81477 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -95,12 +95,13 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { /** * Gets the initial start position of a text element. + * * @param {Node} childNode The childNode to get the start position from. * @param {Number} parentNodeStartOffset The start position of the childNode in the source code. * @returns {number} The start position of the text element. */ const getTextElementStart = ( childNode, parentNodeStartOffset ) => { - // A check if the childNode is a text. This is needed since text nodes have a sorceCodeRange, but no sourceCodeLocation. + // A check if the childNode is a text. This is needed since text nodes have a sourceCodeRange, but no sourceCodeLocation. if ( childNode.name === "#text" && childNode.sourceCodeRange && childNode.sourceCodeRange.startOffset ) { return childNode.sourceCodeRange.startOffset; } else if ( childNode instanceof Paragraph && childNode.isImplicit ) { @@ -141,6 +142,9 @@ export default function getTextElementPositions( node, textElements, startOffset * Check if the node has any descendant nodes that have a sourceCodeLocation property (all nodes other than Text nodes * should have this property). If such nodes exist, store the positions of each node's opening and closing tags in * an array. These positions will have to be taken into account when calculating the position of the text elements. + * + * A node can be a tree-node or a text-node. Tree-nodes have a findAll method, while text-nodes do not. + * That's why we check whether the node has a findAll method. */ if ( node.findAll ) { const descendantNodes = node.findAll( descendantNode => descendantNode.sourceCodeLocation, true ); From 9e0d1aff0481e8360f53fa4721d982be09f5f268 Mon Sep 17 00:00:00 2001 From: marinakoleva Date: Wed, 30 Aug 2023 11:15:47 +0300 Subject: [PATCH 344/616] add step to change & to #amp; to the buildTreeNoTokenize function --- packages/yoastseo/spec/specHelpers/parse/buildTree.js | 7 ++++++- packages/yoastseo/src/parse/build/private/tokenize.js | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index 2648f4c28d9..f446404a16a 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,6 +4,7 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; +import { htmlEntitiesWithAmpersandRegex } from "../../../src/parse/build/private/htmlEntities"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -24,7 +25,11 @@ export default function buildTree( paper, researcher ) { * @returns {void} */ export function buildTreeNoTokenize( paper ) { - let tree = adapt( parseFragment( paper.getText(), { sourceCodeLocationInfo: true } ) ); + let html = paper.getText(); + // change & to #amp; + html = html.replace( htmlEntitiesWithAmpersandRegex, "#$1" ); + + let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); tree = filterTree( tree, permanentFilters ); paper.setTree( tree ); } diff --git a/packages/yoastseo/src/parse/build/private/tokenize.js b/packages/yoastseo/src/parse/build/private/tokenize.js index c953d3d0b35..4f45a12b8c4 100644 --- a/packages/yoastseo/src/parse/build/private/tokenize.js +++ b/packages/yoastseo/src/parse/build/private/tokenize.js @@ -38,8 +38,8 @@ function getSentences( node, languageProcessor ) { return sentences.map( sentence => { // After the position info is determined, change & and other entities to their encoded versions, // without changing the position information. - for ( let i; i < htmlEntitiesArray.length; i++ ) { - sentence = sentence.replace( htmlEntitiesArray[ i ], encodedHtmlEntitiesArray[ i ] ); + for ( let i = 0; i < htmlEntitiesArray.length; i++ ) { + sentence.text = sentence.text.replace( htmlEntitiesArray[ i ], encodedHtmlEntitiesArray[ i ] ); } return getTokens( node, sentence, languageProcessor ); } ); From a590758328e82d47f629e31d5d976ee75ac6be94 Mon Sep 17 00:00:00 2001 From: hdvos Date: Wed, 30 Aug 2023 10:44:09 +0200 Subject: [PATCH 345/616] delete commented out code --- .../src/parse/build/private/combineIntoImplicitParagraphs.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index 4d726ef0561..dee304ff18e 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -72,7 +72,6 @@ const updateSourceCodeLocation = ( node, implicitParagraph, currentSourceCodeLoc if ( ! isEmpty( node.sourceCodeLocation ) ) { // Update the endOffset of the current implicit paragraph to be the start of the current node. if ( ! isEmpty( implicitParagraph.sourceCodeLocation ) ) { - // implicitParagraph.childNodes[ 0 ]; implicitParagraph.sourceCodeLocation.endOffset = node.sourceCodeLocation.startOffset; } From 519586200b8c5b26d5409b9699bd244bfaf0c2d1 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Fri, 1 Sep 2023 15:03:15 +0200 Subject: [PATCH 346/616] Boy scouting in the spec files --- .../sentence/getSentencesFromTreeSpec.js | 27 +-- .../researches/getAnchorsWithKeyphraseSpec.js | 203 +++++++++--------- .../private/filterBeforeTokenizingSpec.js | 24 +-- .../parse/build/private/filterTreeSpec.js | 42 ++-- .../spec/parse/build/private/tokenizeSpec.js | 3 +- .../yoastseo/spec/parse/structure/NodeSpec.js | 2 +- .../spec/parse/traverse/findAllInTreeSpec.js | 2 +- packages/yoastseo/src/parse/build/build.js | 2 +- packages/yoastseo/src/parse/structure/Text.js | 14 +- 9 files changed, 162 insertions(+), 157 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js index c9cfd506726..b1c5f2a4999 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/sentence/getSentencesFromTreeSpec.js @@ -6,7 +6,8 @@ import EnglishResearcher from "../../../../src/languageProcessing/languages/en/R describe( "test to get sentences from the tree", () => { let researcher; beforeEach( () => { - researcher = new EnglishResearcher(); + const mockPaper = new Paper( "" ); + researcher = new EnglishResearcher( mockPaper ); } ); it( "returns the sentences from paragraph and heading nodes", () => { const paper = new Paper( "

    A very intelligent cat loves their human. A dog is very cute.

    A subheading 3" + @@ -15,7 +16,7 @@ describe( "test to get sentences from the tree", () => { buildTree( paper, researcher ); expect( getSentencesFromTree( paper ) ).toEqual( [ { - sourceCodeRange: { endOffset: 49, startOffset: 8 }, + sourceCodeRange: { startOffset: 8, endOffset: 49 }, parentStartOffset: 8, text: "A very intelligent cat loves their human.", tokens: [ @@ -60,7 +61,7 @@ describe( "test to get sentences from the tree", () => { parentClientId: "", }, { - sourceCodeRange: { endOffset: 91, startOffset: 77 }, + sourceCodeRange: { startOffset: 77, endOffset: 91 }, parentStartOffset: 77, text: "A subheading 3", tokens: [ @@ -90,7 +91,7 @@ describe( "test to get sentences from the tree", () => { parentClientId: "", }, { - sourceCodeRange: { endOffset: 128, startOffset: 114 }, + sourceCodeRange: { startOffset: 114, endOffset: 128 }, parentStartOffset: 114, text: "A subheading 4", tokens: [ @@ -105,7 +106,7 @@ describe( "test to get sentences from the tree", () => { parentClientId: "", }, { - sourceCodeRange: { endOffset: 143, startOffset: 133 }, + sourceCodeRange: { startOffset: 133, endOffset: 143 }, parentStartOffset: 133, text: "more text.", tokens: [ @@ -126,16 +127,16 @@ describe( "test to get sentences from the tree", () => { researcher.setPaper( paper ); buildTree( paper, researcher ); expect( getSentencesFromTree( paper ) ).toEqual( [ - { sourceCodeRange: { endOffset: 19, startOffset: 3 }, + { sourceCodeRange: { startOffset: 3, endOffset: 19 }, text: "A cute red panda", tokens: [ - { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, - { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: " " }, - { sourceCodeRange: { endOffset: 9, startOffset: 5 }, text: "cute" }, - { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, - { sourceCodeRange: { endOffset: 13, startOffset: 10 }, text: "red" }, - { sourceCodeRange: { endOffset: 14, startOffset: 13 }, text: " " }, - { sourceCodeRange: { endOffset: 19, startOffset: 14 }, text: "panda" }, + { sourceCodeRange: { startOffset: 3, endOffset: 4 }, text: "A" }, + { sourceCodeRange: { startOffset: 4, endOffset: 5 }, text: " " }, + { sourceCodeRange: { startOffset: 5, endOffset: 9 }, text: "cute" }, + { sourceCodeRange: { startOffset: 9, endOffset: 10 }, text: " " }, + { sourceCodeRange: { startOffset: 10, endOffset: 13 }, text: "red" }, + { sourceCodeRange: { startOffset: 13, endOffset: 14 }, text: " " }, + { sourceCodeRange: { startOffset: 14, endOffset: 19 }, text: "panda" }, ], parentStartOffset: 3, isParentFirstSectionOfBlock: false, diff --git a/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js index d25be3ae409..a130e4e2a4a 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getAnchorsWithKeyphraseSpec.js @@ -11,7 +11,7 @@ const morphologyDataJA = getMorphologyData( "ja" ); describe( "A test for getting the anchors that contain the keyphrase or synonym", () => { it( "should return 0 if the paper text is empty", () => { const attributes = { - permalink: "http://yoast.com", + permalink: "https://yoast.com", keyword: "cats", }; const mockPaper = new Paper( "", attributes ); @@ -24,9 +24,9 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should return 0 if neither the keyphrase nor the synonym is set", () => { const attributes = { - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "string fluffy bunny", attributes ); + const mockPaper = new Paper( "string fluffy bunny", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -38,10 +38,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" "that refers to the current url", () => { const attributes = { keyword: "good companion for an orange cat", - permalink: "http://yoast.com/this-page/", + permalink: "https://yoast.com/this-page/", }; - const mockPaper = new Paper( "string good companion for an orange cat", attributes ); + const mockPaper = new Paper( "string good companion for an orange cat", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -51,7 +51,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should return 0 if the anchor tag doesn't have href attribute", () => { const attributes = { - permalink: "http://yoast.com", + permalink: "https://yoast.com", keyword: "cats", }; const mockPaper = new Paper( "string good companion for an orange cat", attributes ); @@ -65,9 +65,9 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should ignore the keyphrase in an url when referencing to the current url with a hash", () => { let attributes = { keyword: "cat", - permalink: "http://example.org/cat#top", + permalink: "https://example.org/cat#top", }; - let mockPaper = new Paper( "string good companion for an orange cat", attributes ); + let mockPaper = new Paper( "string good companion for an orange cat", attributes ); let researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -75,9 +75,9 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" attributes = { keyword: "cat", - permalink: "http://example.org/cat", + permalink: "https://example.org/cat", }; - mockPaper = new Paper( "There was seen a turkish van together with a tortie", attributes ); + mockPaper = new Paper( "There was seen a turkish van together with a tortie", attributes ); researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); buildTree( mockPaper, researcher ); @@ -88,7 +88,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should return 0 if the keyphrase is found in an anchor text of a fragment on the same page", () => { const attributes = { keyword: "good companion for an orange cat", - permalink: "http://yoast.com/this-page/", + permalink: "https://yoast.com/this-page/", }; const mockPaper = new Paper( "string good companion for an orange cat", attributes ); @@ -102,10 +102,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should return 0 if neither the keyphrase nor the synonym is found in the anchors", () => { const attributes = { keyword: "cats", - permalink: "http://yoast.com", + permalink: "https://yoast.com", synonyms: "Felis catus", }; - const mockPaper = new Paper( "string fluffy bunny", attributes ); + const mockPaper = new Paper( "string fluffy bunny", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -116,7 +116,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should return 0 when the anchor text contains more content words than just the keyphrase or synonym", () => { const attributes = { keyword: "cats", - permalink: "http://yoast.com", + permalink: "https://yoast.com", synonyms: "Felis catus", }; const mockPaper = new Paper( "A fluffy cat and a smart felis catus were seen pouncing together. ", attributes ); @@ -131,7 +131,7 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should find a match when the keyphrase is found in the anchor text, even when it only contains function words", () => { const attributes = { keyword: "Mrs. always tries to be awesome", - permalink: "http://yoast.com", + permalink: "https://yoast.com", synonyms: "Felis catus", }; const mockPaper = new Paper( "A Mrs. always tries to be awesome were seen pouncing together. ", attributes ); @@ -148,10 +148,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" childNodes: [ { name: "#text", value: "Mrs. always tries to be awesome", sourceCodeRange: { startOffset: 24, endOffset: 55 } } ], name: "a", sourceCodeLocation: { - endOffset: 59, - endTag: { endOffset: 59, startOffset: 55 }, startOffset: 2, - startTag: { endOffset: 24, startOffset: 2 }, + endOffset: 59, + startTag: { startOffset: 2, endOffset: 24 }, + endTag: { startOffset: 55, endOffset: 59 }, }, } ] ); } ); @@ -159,10 +159,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" it( "should find a match of a keyphrase in the anchor text of an internal link", () => { const attributes = { keyword: "fluffy and furry cat", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "string many fluffy and furry cats", attributes ); + const mockPaper = new Paper( "string many fluffy and furry cats", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -171,24 +171,25 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { - attributes: { href: "http://yoast.com/some-other-page/" }, - childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 51, endOffset: 77 } } ], + attributes: { href: "https://yoast.com/some-other-page/" }, + childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 52, endOffset: 78 } } ], name: "a", sourceCodeLocation: { - endOffset: 81, - endTag: { endOffset: 81, startOffset: 77 }, startOffset: 7, - startTag: { endOffset: 51, startOffset: 7 } } }, + endOffset: 82, + startTag: { startOffset: 7, endOffset: 52 }, + endTag: { startOffset: 78, endOffset: 82 }, + } }, ] ); } ); it( "should find a match of a keyphrase in the anchor text of a link with a hash", () => { const attributes = { keyword: "food for cats", - permalink: "http://example.org/keyword#top", + permalink: "https://example.org/keyword#top", }; - const mockPaper = new Paper( "How to prepare food for cats.", attributes ); + const mockPaper = new Paper( "How to prepare food for cats.", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -197,14 +198,15 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { - attributes: { href: "http://test.com/keyword#top" }, - childNodes: [ { name: "#text", value: "food for cats", sourceCodeRange: { startOffset: 53, endOffset: 66 } } ], + attributes: { href: "https://test.com/keyword#top" }, + childNodes: [ { name: "#text", value: "food for cats", sourceCodeRange: { startOffset: 54, endOffset: 67 } } ], name: "a", sourceCodeLocation: { - endOffset: 70, - endTag: { endOffset: 70, startOffset: 66 }, startOffset: 15, - startTag: { endOffset: 53, startOffset: 15 } } }, + endOffset: 71, + startTag: { startOffset: 15, endOffset: 54 }, + endTag: { startOffset: 67, endOffset: 71 }, + } }, ] ); } ); @@ -212,10 +214,10 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" const attributes = { keyword: "felis catus", synonyms: "fluffy and furry cat", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "string many fluffy and furry cats", attributes ); + const mockPaper = new Paper( "string many fluffy and furry cats", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -224,24 +226,25 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { - attributes: { href: "http://example.com/some-other-page/" }, - childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 53, endOffset: 79 } } ], + attributes: { href: "https://example.com/some-other-page/" }, + childNodes: [ { name: "#text", value: "many fluffy and furry cats", sourceCodeRange: { startOffset: 54, endOffset: 80 } } ], name: "a", sourceCodeLocation: { - endOffset: 83, - endTag: { endOffset: 83, startOffset: 79 }, startOffset: 7, - startTag: { endOffset: 53, startOffset: 7 } } }, + endOffset: 84, + startTag: { startOffset: 7, endOffset: 54 }, + endTag: { startOffset: 80, endOffset: 84 }, + } }, ] ); } ); it( "should still find a match of a keyphrase in the anchor text containing html tags", () => { const attributes = { keyword: "food for cats", - permalink: "http://example.org/this-page", + permalink: "https://example.org/this-page", }; - const mockPaper = new Paper( "How to prepare food for cats.", attributes ); + const mockPaper = new Paper( "How to prepare food for cats.", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -250,26 +253,28 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" expect( result.anchorsWithKeyphraseCount ).toEqual( 1 ); expect( result.anchorsWithKeyphrase ).toEqual( [ { - attributes: { href: "http://test.com/that-page" }, + attributes: { href: "https://test.com/that-page" }, childNodes: [ - { name: "#text", value: "food for ", sourceCodeRange: { startOffset: 51, endOffset: 60 } }, + { name: "#text", value: "food for ", sourceCodeRange: { startOffset: 52, endOffset: 61 } }, { attributes: {}, - childNodes: [ { name: "#text", value: "cats", sourceCodeRange: { startOffset: 68, endOffset: 72 } } ], + childNodes: [ { name: "#text", value: "cats", sourceCodeRange: { startOffset: 69, endOffset: 73 } } ], name: "strong", sourceCodeLocation: { - endOffset: 81, - endTag: { endOffset: 81, startOffset: 72 }, - startOffset: 60, - startTag: { endOffset: 68, startOffset: 60 } }, + startOffset: 61, + endOffset: 82, + startTag: { startOffset: 61, endOffset: 69 }, + endTag: { startOffset: 73, endOffset: 82 }, + }, }, ], name: "a", sourceCodeLocation: { - endOffset: 85, - endTag: { endOffset: 85, startOffset: 81 }, startOffset: 15, - startTag: { endOffset: 51, startOffset: 15 } } }, + endOffset: 86, + startTag: { startOffset: 15, endOffset: 52 }, + endTag: { startOffset: 82, endOffset: 86 }, + } }, ] ); } ); @@ -277,12 +282,12 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" const attributes = { keyword: "key word and key phrase", synonyms: "interesting article, and another exciting paper", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my keys wording phrased" + - " as well as the lovely articles which are interesting, " + - "and excited papers", attributes ); + const mockPaper = new Paper( "hello, here is a link with my keys wording phrased" + + " as well as the lovely articles which are interesting, " + + "and excited papers", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -294,12 +299,12 @@ describe( "A test for getting the anchors that contain the keyphrase or synonym" const attributes = { keyword: "key word and key phrase", synonyms: "interesting article", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my keys wording " + - " as well as the lovely articles which are , " + - "and excited papers", attributes ); + const mockPaper = new Paper( "hello, here is a link with my keys wording " + + " as well as the lovely articles which are , " + + "and excited papers", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -312,10 +317,10 @@ describe( "a test for anchors and its attributes when the exact match of a keyph it( "should match all words from keyphrase in the link text and vice versa when the keyphrase is enclosed in double quotes", function() { const attributes = { keyword: "\"walking in nature\"", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my walking in nature", attributes ); + const mockPaper = new Paper( "hello, here is a link with my walking in nature", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -326,10 +331,10 @@ describe( "a test for anchors and its attributes when the exact match of a keyph "and the keyphrase is preceded by a function word in the anchor text", function() { const attributes = { keyword: "\"walking in nature\"", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my " + + const mockPaper = new Paper( "hello, here is a link with my " + "immediately walking in nature", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -341,10 +346,10 @@ describe( "a test for anchors and its attributes when the exact match of a keyph "and the keyphrase is followed by a content word in the anchor text", function() { const attributes = { keyword: "\"walking in nature\"", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my " + + const mockPaper = new Paper( "hello, here is a link with my " + "walking in nature activity", attributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -356,10 +361,10 @@ describe( "a test for anchors and its attributes when the exact match of a keyph "and the keyphrase appears in a different form in the anchor text", function() { const attributes = { keyword: "\"walking in nature\"", - permalink: "http://example.org/keyword", + permalink: "https://example.org/keyword", }; - const mockPaper = new Paper( "hello, here is a link with my " + + const mockPaper = new Paper( "hello, here is a link with my " + "walks in nature", attributes ); const researcher = new EnglishResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyData ); @@ -373,9 +378,9 @@ describe( "a test for anchors and its attributes when the exact match of a keyph const paperAttributes = { keyword: "something and tortie", synonyms: "\"cats and dogs\", tortoiseshell", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "A text with a link, cats and dogs", + const mockPaper = new Paper( "A text with a link, cats and dogs", paperAttributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -388,9 +393,9 @@ describe( "a test for anchors and its attributes when the exact match of a keyph const paperAttributes = { keyword: "\"tabby and tortie\"", synonyms: "\"cats and dogs\", tortoiseshell", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "A text with a tabby and tortie, cats and dogs", + const mockPaper = new Paper( "A text with a tabby and tortie, cats and dogs", paperAttributes ); const researcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -401,9 +406,9 @@ describe( "a test for anchors and its attributes when the exact match of a keyph it( "should match the keyphrase in the anchor text when the keyphrase is enclosed in double quotes in Japanese", function() { const paperAttributes = { keyword: "『読ん一冊の本』", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 読ん一冊の本", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 読ん一冊の本", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -414,9 +419,9 @@ describe( "a test for anchors and its attributes when the exact match of a keyph "and the keyphrase is preceded by a function word in the anchor text", function() { const paperAttributes = { keyword: "『読ん一冊の本』", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, から読ん一冊の本", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, から読ん一冊の本", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -428,9 +433,9 @@ describe( "a test for anchors and its attributes when the exact match of a keyph const paperAttributes = { keyword: "言葉", synonyms: "『小さく花の刺繍』", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さく花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さく花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -444,15 +449,15 @@ describe( "a test for anchors and its attributes in languages that have a custom describe( "a test for when the morphology data is not available", () => { let paperAttributes = { keyword: "リンク", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; it( "assesses the anchor text where not all content words in the text are present in the keyphrase", function() { paperAttributes = { keyword: "読ん一冊の本", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 猫と読ん一冊の本", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 猫と読ん一冊の本", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -462,9 +467,9 @@ describe( "a test for anchors and its attributes in languages that have a custom it( "assesses the anchor text where all content words in the text present in the keyphrase: should find a match", function() { paperAttributes = { keyword: "から小さい花の刺繍", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -475,9 +480,9 @@ describe( "a test for anchors and its attributes in languages that have a custom "should not find a match", function() { paperAttributes = { keyword: "から小さく花の刺繍", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -488,10 +493,10 @@ describe( "a test for anchors and its attributes in languages that have a custom paperAttributes = { keyword: "から小さく花の刺繍", synonyms: "猫用のフード, 猫用食品", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さく花の刺繍" + - " から猫用のフード", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さく花の刺繍" + + " から猫用のフード", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); @@ -503,9 +508,9 @@ describe( "a test for anchors and its attributes in languages that have a custom it( "assesses the anchor text where not all content words in the text present in the keyphrase: should not find a match", function() { const paperAttributes = { keyword: "読ん一冊の本", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 猫と読ん一冊の本", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 猫と読ん一冊の本", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, researcher ); @@ -516,9 +521,9 @@ describe( "a test for anchors and its attributes in languages that have a custom it( "assesses the anchor text where all content words in the text present in the keyphrase: should find a match", function() { const paperAttributes = { keyword: "から小さい花の刺繍", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, researcher ); @@ -530,10 +535,10 @@ describe( "a test for anchors and its attributes in languages that have a custom " should find matches", function() { const paperAttributes = { keyword: "から小さく花の刺繍", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍" + - " 小さける花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍" + + " 小さける花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, researcher ); @@ -545,10 +550,10 @@ describe( "a test for anchors and its attributes in languages that have a custom const paperAttributes = { keyword: "猫用食品", synonyms: "小さく花の刺繍", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク," + - " から小さい花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク," + + " から小さい花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); researcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, researcher ); @@ -561,9 +566,9 @@ describe( "a test for anchors and its attributes in languages that have a custom const paperAttributes = { keyword: "「小さく花の刺繍」", synonyms: "something, something else", - permalink: "http://yoast.com", + permalink: "https://yoast.com", }; - const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); + const mockPaper = new Paper( "言葉 リンク, 小さい花の刺繍", paperAttributes ); const researcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, researcher ); diff --git a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js index 61b48099b52..7f39f532c54 100644 --- a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js @@ -45,16 +45,16 @@ describe( "A test for filterBeforeTokenizing", () => { }, ], sourceCodeLocation: { - endOffset: 58, - endTag: { - endOffset: 58, - startOffset: 54, - }, startOffset: 0, + endOffset: 58, startTag: { endOffset: 3, startOffset: 0, }, + endTag: { + startOffset: 54, + endOffset: 58, + }, }, }, ], @@ -178,8 +178,8 @@ describe( "A test for filterBeforeTokenizing", () => { attributes: {}, childNodes: [], sourceCodeLocation: { - endOffset: 71, startOffset: 22, + endOffset: 71, startTag: { startOffset: 22, endOffset: 28, @@ -192,15 +192,15 @@ describe( "A test for filterBeforeTokenizing", () => { }, ], sourceCodeLocation: { - endOffset: 75, - endTag: { - endOffset: 75, - startOffset: 71, - }, startOffset: 0, + endOffset: 75, startTag: { - endOffset: 3, startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 71, + endOffset: 75, }, }, }, diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 84fefa8d018..5859238a209 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -290,8 +290,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 34, startOffset: 0, + endOffset: 34, }, }, { @@ -299,8 +299,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 55, startOffset: 34, + endOffset: 55, }, }, { @@ -320,15 +320,15 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo isImplicit: false, name: "p", sourceCodeLocation: { - endOffset: 88, - endTag: { - endOffset: 88, - startOffset: 84, - }, startOffset: 56, + endOffset: 88, startTag: { - endOffset: 59, startOffset: 56, + endOffset: 59, + }, + endTag: { + startOffset: 84, + endOffset: 88, }, }, }, @@ -342,8 +342,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 111, startOffset: 89, + endOffset: 111, }, }, ], @@ -367,8 +367,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 39, startOffset: 0, + endOffset: 39, }, }, { @@ -386,8 +386,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 324, startOffset: 284, + endOffset: 324, }, }, { @@ -402,15 +402,15 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo isImplicit: false, name: "p", sourceCodeLocation: { - endOffset: 358, - endTag: { - endOffset: 358, - startOffset: 354, - }, startOffset: 324, + endOffset: 358, startTag: { - endOffset: 327, startOffset: 324, + endOffset: 327, + }, + endTag: { + startOffset: 354, + endOffset: 358, }, }, }, @@ -438,8 +438,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 71, startOffset: 0, + endOffset: 71, }, }, { @@ -457,8 +457,8 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo childNodes: [], name: "#comment", sourceCodeLocation: { - endOffset: 404, startOffset: 359, + endOffset: 404, }, }, { @@ -473,11 +473,11 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo level: 2, name: "h2", sourceCodeLocation: { - endOffset: 422, startOffset: 404, + endOffset: 422, startTag: { - endOffset: 408, startOffset: 404, + endOffset: 408, }, endTag: { startOffset: 417, diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 2fe5869fe28..06491656e8e 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -5,6 +5,7 @@ import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/ import { buildTreeNoTokenize } from "../../../specHelpers/parse/buildTree"; import LanguageProcessor from "../../../../src/parse/language/LanguageProcessor"; import SourceCodeLocation from "../../../../src/parse/structure/SourceCodeLocation"; + describe( "A test for the tokenize function", function() { it( "should correctly tokenize a paragraph of one sentence", function() { @@ -378,7 +379,7 @@ describe( "A test for the tokenize function", } ); describe( "A test for tokenizing a japanese sentence", function() { - it( "should correctly tokenize a simple Japanse sentence.", function() { + it( "should correctly tokenize a simple Japanese sentence.", function() { const mockPaper = new Paper( "

    犬が大好き\u3002

    ", { locale: "ja_JP" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); diff --git a/packages/yoastseo/spec/parse/structure/NodeSpec.js b/packages/yoastseo/spec/parse/structure/NodeSpec.js index 3ce0d2c7399..ed670e0382f 100644 --- a/packages/yoastseo/spec/parse/structure/NodeSpec.js +++ b/packages/yoastseo/spec/parse/structure/NodeSpec.js @@ -15,7 +15,7 @@ describe( "A test for the findAll method", () => { let paper; beforeEach( () => { - paper = new Paper(); + paper = new Paper( "" ); } ); it( "should find all occurrences of a p tag", function() { paper._text = "

    Hello, world!

    Hello, yoast!

    "; diff --git a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js index d9a3733406c..7f3a61378a2 100644 --- a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js +++ b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js @@ -11,7 +11,7 @@ beforeEach( () => { const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); languageProcessor = new LanguageProcessor( researcher ); - paper = new Paper(); + paper = new Paper( "" ); } ); describe( "A test for findAllInTree", () => { diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index 9a723e5604d..abb9fdd3ce6 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -14,7 +14,7 @@ import filterShortcodesFromTree from "../../languageProcessing/helpers/sanitize/ * * @param {Paper} paper The paper to build the tree from. * @param {LanguageProcessor} languageProcessor The language processor to use. - * @param {string[]} shortcodes An array of all active shortcodes. + * @param {string[]} [shortcodes] An optional array of all active shortcodes. * * @returns {Node} The tree representation of the HTML string. */ diff --git a/packages/yoastseo/src/parse/structure/Text.js b/packages/yoastseo/src/parse/structure/Text.js index ecf8e257601..f9c703424fd 100644 --- a/packages/yoastseo/src/parse/structure/Text.js +++ b/packages/yoastseo/src/parse/structure/Text.js @@ -4,24 +4,22 @@ import SourceCodeLocation from "./SourceCodeLocation"; */ class Text { /** - * Creates a new text. + * Creates a new Text object, that consist of some text and a source code range. * - * @param {string} tree The tree that potentially contains text. + * @param {object} textNode The current #text node in the parse5 tree. */ - constructor( tree ) { + constructor( textNode ) { this.name = "#text"; /** * This text's content. * * @type {string} */ - this.value = tree.value; - - const { startOffset, endOffset } = tree.sourceCodeLocation; + this.value = textNode.value; this.sourceCodeRange = new SourceCodeLocation( { - startOffset: startOffset, - endOffset: endOffset, + startOffset: textNode.sourceCodeLocation.startOffset, + endOffset: textNode.sourceCodeLocation.endOffset, } ); } } From a9f2dffe9b9702f788efc216c29b9b182a46d4b2 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Fri, 1 Sep 2023 15:37:48 +0200 Subject: [PATCH 347/616] Improves the readability of the code a bit --- .../private/combineIntoImplicitParagraphs.js | 59 +++++++++++-------- .../build/private/getTextElementPositions.js | 13 ++-- 2 files changed, 38 insertions(+), 34 deletions(-) diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index dee304ff18e..94c55068e48 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -2,6 +2,7 @@ import isPhrasingContent from "./isPhrasingContent"; import { Paragraph } from "../../structure"; import { isEmpty } from "lodash-es"; import SourceCodeLocation from "../../structure/SourceCodeLocation"; + /** * Checks whether a node is inter-element whitespace. * @@ -26,37 +27,44 @@ function hasChildren( node ) { return node && node.childNodes.length > 0; } +/** + * Sets the new source code location for an implicit paragraph. + * If the implicit paragraph already has a source code location, only update the start offset, if not, set a new source code location. + * + * @param {Paragraph} implicitParagraph The implicit paragraph to update. + * @param {number} newStartOffset The new start offset. + * @param {number} newEndOffset The new end offset. + * @returns {void} + */ +const setNewLocation = ( implicitParagraph, newStartOffset, newEndOffset ) => { + if ( implicitParagraph.sourceCodeLocation ) { + implicitParagraph.sourceCodeLocation.startOffset = newStartOffset; + } else { + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { + startOffset: newStartOffset, + endOffset: newEndOffset, + } ); + } +}; + /** * Updates the source code location of an implicit paragraph based on its first child. * @param {Paragraph} implicitParagraph The implicit paragraph to update. - * * @returns {void} */ const updateImplicitParagraphLocation = ( implicitParagraph ) => { // Check if there is a source code range or location on the first child node. The reason for this is that tree-nodes have a source code location // and text-nodes have a source code range. Another approach would be is to check whether we are dealing with a text-node or a tree-node, // but this way we also do a bit of defensive programming. - if ( implicitParagraph.childNodes[ 0 ].sourceCodeRange && ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeRange ) ) { - // Check if the implicit paragraph already has a source code location. If so, update the start offset. else, set a new source code location. - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeRange.endOffset, - } ); - } - } else if ( implicitParagraph.childNodes[ 0 ].sourceCodeLocation && - ! isEmpty( implicitParagraph.childNodes[ 0 ].sourceCodeLocation ) ) { - // Check if the implicit paragraph already has a source code location. If so, update the start offset. else, set a new source code location. - if ( implicitParagraph.sourceCodeLocation ) { - implicitParagraph.sourceCodeLocation.startOffset = implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset; - } else { - implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { - startOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.startOffset, - endOffset: implicitParagraph.childNodes[ 0 ].sourceCodeLocation.endOffset, - } ); - } + const firstChild = implicitParagraph.childNodes[ 0 ]; + if ( firstChild.sourceCodeRange && ! isEmpty( firstChild.sourceCodeRange ) ) { + const newStartOffset = firstChild.sourceCodeRange.startOffset; + const newEndOffset = firstChild.sourceCodeRange.endOffset; + setNewLocation( implicitParagraph, newStartOffset, newEndOffset ); + } else if ( firstChild.sourceCodeLocation && ! isEmpty( firstChild.sourceCodeLocation ) ) { + const newStartOffset = firstChild.sourceCodeLocation.startOffset; + const newEndOffset = firstChild.sourceCodeLocation.endOffset; + setNewLocation( implicitParagraph, newStartOffset, newEndOffset ); } }; @@ -107,14 +115,14 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { } ); } } else { - currentSourceCodeLocation = { + currentSourceCodeLocation = new SourceCodeLocation( { startOffset: parentSourceCodeLocation.startTag ? parentSourceCodeLocation.startTag.endOffset : parentSourceCodeLocation.startOffset, endOffset: parentSourceCodeLocation.endTag ? parentSourceCodeLocation.endTag.startOffset : parentSourceCodeLocation.endOffset, - }; + } ); } let implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); @@ -127,7 +135,8 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { if ( hasChildren( implicitParagraph ) ) { // If the implicit paragraph has children this means an implicit paragraph was created: // end the current implicit paragraph and start a new one. - // In updateSourceCodeLocation try we update the source code location of the implicit paragraph based on its child nodes. + + // But before pushing, we need to update the source code location of the implicit paragraph based on its child nodes. updateSourceCodeLocation( node, implicitParagraph, currentSourceCodeLocation ); newNodes.push( implicitParagraph ); diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index 7224ae81477..795d4e0f27b 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -94,21 +94,21 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { } /** - * Gets the initial start position of a text element. + * Retrieves the initial start position of a text element. * - * @param {Node} childNode The childNode to get the start position from. + * @param {Paragraph|Text} childNode The childNode to retrieve the start position from. * @param {Number} parentNodeStartOffset The start position of the childNode in the source code. * @returns {number} The start position of the text element. */ const getTextElementStart = ( childNode, parentNodeStartOffset ) => { - // A check if the childNode is a text. This is needed since text nodes have a sourceCodeRange, but no sourceCodeLocation. + // A check if the childNode is a text. Text nodes have a sourceCodeRange, but no sourceCodeLocation. if ( childNode.name === "#text" && childNode.sourceCodeRange && childNode.sourceCodeRange.startOffset ) { return childNode.sourceCodeRange.startOffset; } else if ( childNode instanceof Paragraph && childNode.isImplicit ) { return childNode.sourceCodeLocation.startOffset; } - // if the childnode does not have sourcecodeLocation, we use the parentNodeStartOffset of the parent childNode. + // If the childNode does not have a source code location, we use the parentNodeStartOffset of the parent childNode. return parentNodeStartOffset >= 0 ? parentNodeStartOffset : childNode.sourceCodeLocation.startTag.endOffset; }; @@ -142,17 +142,12 @@ export default function getTextElementPositions( node, textElements, startOffset * Check if the node has any descendant nodes that have a sourceCodeLocation property (all nodes other than Text nodes * should have this property). If such nodes exist, store the positions of each node's opening and closing tags in * an array. These positions will have to be taken into account when calculating the position of the text elements. - * - * A node can be a tree-node or a text-node. Tree-nodes have a findAll method, while text-nodes do not. - * That's why we check whether the node has a findAll method. */ if ( node.findAll ) { const descendantNodes = node.findAll( descendantNode => descendantNode.sourceCodeLocation, true ); if ( descendantNodes.length > 0 ) { descendantTagPositions = getDescendantPositions( descendantNodes ); } - } else { - descendantTagPositions = []; } textElements.forEach( ( textElement ) => { From 8d5fdfa71e308a02f4e0f703ce0a4ae5466c24fd Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Mon, 4 Sep 2023 08:59:01 +0200 Subject: [PATCH 348/616] Revamps the implementation to be more in line with WordPress as well as some clean-up --- .../spec/parse/build/private/tokenizeSpec.js | 100 +++++++++++++++++- .../spec/specHelpers/parse/buildTree.js | 6 +- .../helpers/word/getWordsForHTMLParser.js | 4 +- packages/yoastseo/src/parse/build/build.js | 6 +- .../src/parse/build/private/htmlEntities.js | 22 ++-- .../src/parse/build/private/tokenize.js | 31 +++--- 6 files changed, 138 insertions(+), 31 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 52c34217dfb..06ec54ee4d9 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -12,7 +12,6 @@ describe( "A test for the tokenize function", const mockResearcher = new EnglishResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { @@ -53,7 +52,6 @@ describe( "A test for the tokenize function", const mockResearcher = new EnglishResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ @@ -229,7 +227,6 @@ describe( "A test for the tokenize function", const mockResearcher = new EnglishResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len const result = tokenize( mockPaper.getTree(), languageProcessor ); expect( result ).toEqual( { name: "#document-fragment", @@ -370,6 +367,103 @@ describe( "A test for the tokenize function", ], } ); } ); + + it( "should correctly tokenize a sentence containing an HTML entity", function() { + const mockPaper = new Paper( "

    This is a paragraph&

    " ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + const result = tokenize( mockPaper.getTree(), languageProcessor ); + expect( result ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is a paragraph#amp;", + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 27, + }, + text: "This is a paragraph&", + tokens: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 7, + }, + text: "This", + }, + { + sourceCodeRange: { + startOffset: 7, + endOffset: 8, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 8, + endOffset: 10, + }, + text: "is", + }, + { + sourceCodeRange: { + startOffset: 10, + endOffset: 11, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 11, + endOffset: 12, + }, + text: "a", + }, + { + sourceCodeRange: { + startOffset: 12, + endOffset: 13, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 13, + endOffset: 27, + }, + text: "paragraph&", + }, + ], + }, + ], + sourceCodeLocation: { + startOffset: 0, + endOffset: 31, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 27, + endOffset: 31, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); } ); describe( "A test for tokenizing a japanese sentence", function() { diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index f446404a16a..19e7c4c969f 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,7 +4,7 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; -import { htmlEntitiesWithAmpersandRegex } from "../../../src/parse/build/private/htmlEntities"; +import { htmlEntitiesRegex } from "../../../src/parse/build/private/htmlEntities"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -26,8 +26,8 @@ export default function buildTree( paper, researcher ) { */ export function buildTreeNoTokenize( paper ) { let html = paper.getText(); - // change & to #amp; - html = html.replace( htmlEntitiesWithAmpersandRegex, "#$1" ); + // Change HTML entities like "&" to "#amp;" to prevent early conversion to "&" -- which would invalidate token positions. + html = html.replace( htmlEntitiesRegex, "#$1" ); let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); tree = filterTree( tree, permanentFilters ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index fb8c2080501..8de1d745e68 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -1,4 +1,5 @@ import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePunctuation"; +import { hashedHtmlEntities, hashedHtmlEntitiesEndRegex } from "../../../parse/build/private/htmlEntities"; /* * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a @@ -48,7 +49,8 @@ const getWordsForHTMLParser = ( text ) => { token = token.slice( 1 ); } // Add all punctuation marks that occur after the last letter of the token to the posttokens array. - while ( punctuationRegexEnd.test( token ) ) { + // Here, prevent matching with a hashed HTML entity. + while ( punctuationRegexEnd.test( token ) && ! hashedHtmlEntitiesEndRegex.test( token ) ) { // Using unshift here because we are iterating from the end of the string to the beginning, // and we want to keep the order of the punctuation marks. // Therefore, we add them to the start of the array. diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index 0cf78b0c5c9..255caa37c24 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -8,7 +8,7 @@ import permanentFilters from "./private/alwaysFilterElements"; import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; import parseBlocks from "./private/parseBlocks"; import filterShortcodesFromTree from "../../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; -import { htmlEntitiesWithAmpersandRegex } from "./private/htmlEntities"; +import { htmlEntitiesRegex } from "./private/htmlEntities"; /** * Parses the HTML string to a tree representation of the HTML document. @@ -21,8 +21,8 @@ import { htmlEntitiesWithAmpersandRegex } from "./private/htmlEntities"; */ export default function build( paper, languageProcessor, shortcodes ) { let html = paper.getText(); - // change & to #amp; - html = html.replace( htmlEntitiesWithAmpersandRegex, "#$1" ); + // Change HTML entities like "&" to "#amp;" to prevent early conversion to "&" -- which would invalidate token positions. + html = html.replace( htmlEntitiesRegex, "#$1" ); let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); if ( tree.childNodes && tree.childNodes.length > 0 ) { diff --git a/packages/yoastseo/src/parse/build/private/htmlEntities.js b/packages/yoastseo/src/parse/build/private/htmlEntities.js index 941685d0179..270ab20027d 100644 --- a/packages/yoastseo/src/parse/build/private/htmlEntities.js +++ b/packages/yoastseo/src/parse/build/private/htmlEntities.js @@ -1,10 +1,18 @@ -export const htmlEntities = [ "amp;", "lt;", "gt;", "quot;", "ndash", "mdash;", "copy;", - "reg;", "trade;", "asymp;", "ne;", "pound;", "euro;", "deg" ]; +// Contains characters usually converted HTML entities (cf. _wp_specialchars). +export const htmlEntities = new Map( [ + [ "amp;", "&" ], + [ "lt;", "<" ], + [ "gt;", ">" ], + [ "quot;", '"' ], + [ "#x27;", "'" ], +] ); -export const htmlEntitiesWithAmpersandRegex = new RegExp( "&(" + htmlEntities.join( "|" ) + ")", "ig" ); +// Contains characters along with their hashed HTML entities. +export const hashedHtmlEntities = new Map(); +htmlEntities.forEach( ( v, k ) => hashedHtmlEntities.set( "#" + k, v ) ); -export const htmlEntitiesWithHashRegex = new RegExp( "#(" + htmlEntities.join( "|" ) + ")", "ig" ); +// Regex to find all HTML entities. +export const htmlEntitiesRegex = new RegExp( "&(" + [ ...htmlEntities.keys() ].join( "|" ) + ")", "ig" ); -export const htmlEntitiesArray = htmlEntities.map( entity => "&" + entity ); -// Note: the order between these two arrays should align between the decoded and encoded version of the entities. -export const encodedHtmlEntitiesArray = [ "&", "<", ">", "\"", "–", "—", "©", "®", "™", "≈", "≠", "£", "€", "°" ]; +// Regex to find hashed HTML entities at the end of a string. +export const hashedHtmlEntitiesEndRegex = new RegExp( "(" + [ ...hashedHtmlEntities.keys() ].join( "|" ) + ")$" ); diff --git a/packages/yoastseo/src/parse/build/private/tokenize.js b/packages/yoastseo/src/parse/build/private/tokenize.js index 4f45a12b8c4..089c4716f8f 100644 --- a/packages/yoastseo/src/parse/build/private/tokenize.js +++ b/packages/yoastseo/src/parse/build/private/tokenize.js @@ -1,18 +1,18 @@ import { Heading, Paragraph } from "../../structure"; import getTextElementPositions from "./getTextElementPositions"; -import { htmlEntitiesArray, encodedHtmlEntitiesArray, htmlEntitiesWithHashRegex } from "./htmlEntities"; +import { hashedHtmlEntities } from "./htmlEntities"; /** * Splits the sentence into tokens, determines their positions in the source code, and puts them on the sentence. * * @param {Paragraph|Heading} node The paragraph or heading node to split into sentences. * @param {Sentence} sentence The sentence. - * @param {function} LanguageProcessor The languageprocessor for the current language. + * @param {LanguageProcessor} languageProcessor The language processor for the current language. * * @returns {Sentence} The sentence, with tokens. */ -function getTokens( node, sentence, LanguageProcessor ) { - sentence.tokens = LanguageProcessor.splitIntoTokens( sentence ); +function getTokens( node, sentence, languageProcessor ) { + sentence.tokens = languageProcessor.splitIntoTokens( sentence ); sentence.tokens = getTextElementPositions( node, sentence.tokens, sentence.sourceCodeRange.startOffset ); return sentence; } @@ -27,21 +27,24 @@ function getTokens( node, sentence, LanguageProcessor ) { * @returns {Sentence[]} The node's sentences. */ function getSentences( node, languageProcessor ) { - // Change "#amp;" back to "&" - let innerText = node.innerText(); - innerText = innerText.replace( htmlEntitiesWithHashRegex, "&$1" ); // Split text into sentences. - let sentences = languageProcessor.splitIntoSentences( innerText ); + let sentences = languageProcessor.splitIntoSentences( node.innerText() ); // Add position information to the sentences. sentences = getTextElementPositions( node, sentences ); // Tokenize sentences into tokens. return sentences.map( sentence => { - // After the position info is determined, change & and other entities to their encoded versions, - // without changing the position information. - for ( let i = 0; i < htmlEntitiesArray.length; i++ ) { - sentence.text = sentence.text.replace( htmlEntitiesArray[ i ], encodedHtmlEntitiesArray[ i ] ); - } - return getTokens( node, sentence, languageProcessor ); + sentence = getTokens( node, sentence, languageProcessor ); + // Now positions have been determined, change HTML entities that had earlier been converted to hashed versions back to their short version. + // For example, "&" was earlier converted into "#amp;" and is now converted into "&". + // We make this change in both the Sentence and the accompanying Tokens. + hashedHtmlEntities.forEach( ( character, hashedHtmlEntity ) => { + sentence.text = sentence.text.replaceAll( hashedHtmlEntity, character ); + sentence.tokens.map( token => { + token.text = token.text.replaceAll( hashedHtmlEntity, character ); + return token; + } ); + } ); + return sentence; } ); } From 62cc8b11ab9ae1e50da95303c09f88a23e33e470 Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Mon, 4 Sep 2023 09:00:11 +0200 Subject: [PATCH 349/616] Fixes a spelling error and removes an eslint override --- packages/yoastseo/spec/parse/build/private/tokenizeSpec.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 06ec54ee4d9..56dfde9318b 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -466,13 +466,12 @@ describe( "A test for the tokenize function", } ); } ); -describe( "A test for tokenizing a japanese sentence", function() { - it( "should correctly tokenize a simple Japanse sentence.", function() { +describe( "A test for tokenizing a Japanese sentence", function() { + it( "should correctly tokenize a simple Japanese sentence.", function() { const mockPaper = new Paper( "

    犬が大好き\u3002

    ", { locale: "ja_JP" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ From 3496cd07f1800cb62197c83b46ac209d4d74c4d0 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Mon, 4 Sep 2023 16:00:07 +0200 Subject: [PATCH 350/616] Add upsell when promotion is on. --- admin/class-premium-upsell-admin-block.php | 10 +++++++++- css/src/yst_plugin_tools.css | 12 ++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/admin/class-premium-upsell-admin-block.php b/admin/class-premium-upsell-admin-block.php index d14fbf6be2d..416175628fa 100644 --- a/admin/class-premium-upsell-admin-block.php +++ b/admin/class-premium-upsell-admin-block.php @@ -5,6 +5,8 @@ * @package WPSEO\Admin */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + /** * Class WPSEO_Premium_Upsell_Admin_Block */ @@ -77,8 +79,14 @@ public function render() { $button_text ); - echo '
    '; + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + $bf_label = \esc_html__( 'BLACK FRIDAY', 'wordpress-seo' ); + $sale_label = \esc_html__( '30% OFF', 'wordpress-seo' ); + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Already escaped above. + echo "
    $bf_label $sale_label
    "; + } + echo '
    '; echo '
    '; echo '

    ' . sprintf( diff --git a/css/src/yst_plugin_tools.css b/css/src/yst_plugin_tools.css index 45a79076e11..98c46dea7b9 100644 --- a/css/src/yst_plugin_tools.css +++ b/css/src/yst_plugin_tools.css @@ -702,6 +702,18 @@ body.toplevel_page_wpseo_dashboard .wp-badge { margin-top: 2em; } +.black-friday-container{ + margin-bottom: -2em; + max-width: 640px; + display: flex; + background-color: #1F2937; + padding: 8px 16px; + border-bottom: 2px solid #FCD34D; +} +.black-friday-container span{ + color:#FCD34D; + font-size: 1.2rem; +} .yoast_premium_upsell--header { color: #A4286A; margin-top: 0.3em; From f703844ab036149866d7f581bb0619953fbb1984 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Wed, 6 Sep 2023 13:29:20 +0200 Subject: [PATCH 351/616] Add conditional sale badge. --- admin/views/licenses.php | 20 ++++++++++++---- css/src/yoast-extensions.css | 45 +++++++++++++++++++++++++++++++++--- 2 files changed, 58 insertions(+), 7 deletions(-) diff --git a/admin/views/licenses.php b/admin/views/licenses.php index 86c78c74efd..5bb34981841 100644 --- a/admin/views/licenses.php +++ b/admin/views/licenses.php @@ -6,6 +6,8 @@ * @since 5.1 */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + if ( ! defined( 'WPSEO_VERSION' ) ) { header( 'Status: 403 Forbidden' ); header( 'HTTP/1.1 403 Forbidden' ); @@ -125,6 +127,14 @@ esc_html__( '(Opens in a new browser tab)', 'wordpress-seo' ) ); +$sale_badge = ''; + +if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + /* translators: %1$s expands to opening span, %2$s expands to closing span */ + $sale_badge_span = sprintf( __( '%1$sSALE 30%% OFF!%2$s', 'wordpress-seo' ), '', '' ); + + $sale_badge = '
    ' . $sale_badge_span . '
    '; +} ?>
    @@ -133,13 +143,12 @@
    +

    ', - '', + /* translators: 1: expands to Yoast SEO Premium */ + esc_html__( 'Drive more traffic to your site with %1$s', 'wordpress-seo' ), // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Reason: The `get_title` value is hardcoded; only passed through the WPSEO_Extensions class. $premium_extension['title'] ); @@ -283,6 +292,9 @@ } ?>
    + has_valid_subscription( $slug ) || ! $addon_manager->is_installed( $slug )) : ?> + +

    diff --git a/css/src/yoast-extensions.css b/css/src/yoast-extensions.css index 15f23897246..82d67faa8a6 100644 --- a/css/src/yoast-extensions.css +++ b/css/src/yoast-extensions.css @@ -15,7 +15,7 @@ @import url(../src/icons.css); .yoast-list--usp { padding: 0; - margin-bottom: 1em; + margin-bottom: 2rem; font-family: Arial, sans-serif; } @@ -498,7 +498,7 @@ a.promoblock:hover { background-size: contain; width: 1em; height: 100%; - background-position: left 0.4em; + background-position: left 0.3em; } .yoast-button--purple { @@ -645,8 +645,32 @@ a.promoblock:hover { } } +.yoast-seo-premium-extension-sale-badge { + margin-top: -30px; +} + +.yoast-seo-premium-extension-sale-badge span { + color: rgb(252, 211, 77); + font-size: 14px; + font-weight: 600; + background: rgb(31, 41, 55); + padding: 6px 12px; + border-radius: 14px; +} + .yoast-seo-premium-extension { margin: 2em 0.5em 1.5em; + box-shadow: 0 1px 6px 0 rgba(0, 0, 0, 0.3); + border: 1px solid #dcdcdc; + padding: 16px; + background: #fff; + max-width: 712px; +} + +.yoast-seo-premium-extension h2 { + margin-top: 16px; + font-size: 1.5rem; + color: rgb(166 30 105); } .yoast-seo-premium-extension:before, .yoast-seo-premium-extension:after { @@ -657,9 +681,24 @@ a.promoblock:hover { .yoast-seo-premium-extension:after { clear: both; } +.yoast-seo-premium-benefits__item { + margin-bottom: 8px; + line-height: 1; +} +.yoast-seo-premium-benefits__item span { + color: rgb(64,64,64); +} .yoast-seo-premium-benefits__title { - font-weight: 600; + font-weight: 700; + line-height: 24px; + font-size: .9rem; +} + +.yoast-seo-premium-benefits__description { + font-size: .9rem; + line-height: 24px; + font-weight: 400; } .yoast-seo-premium-benefits__description:before { From 4e93f2a13f7f5d742ab445c3af60d2e8f9185a14 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Wed, 6 Sep 2023 13:54:47 +0200 Subject: [PATCH 352/616] CS --- admin/views/licenses.php | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/admin/views/licenses.php b/admin/views/licenses.php index 5bb34981841..04bd18c859e 100644 --- a/admin/views/licenses.php +++ b/admin/views/licenses.php @@ -131,7 +131,7 @@ if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { /* translators: %1$s expands to opening span, %2$s expands to closing span */ - $sale_badge_span = sprintf( __( '%1$sSALE 30%% OFF!%2$s', 'wordpress-seo' ), '', '' ); + $sale_badge_span = sprintf( esc_html__( '%1$sSALE 30%% OFF!%2$s', 'wordpress-seo' ), '', '' ); $sale_badge = '
    ' . $sale_badge_span . '
    '; } @@ -143,7 +143,7 @@
    - +

    - has_valid_subscription( $slug ) || ! $addon_manager->is_installed( $slug )) : ?> - + has_valid_subscription( $slug ) || ! $addon_manager->is_installed( $slug ) ) : ?> +

    From b2a8e906019266c0d763c567677ecf90d9312206 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Wed, 6 Sep 2023 21:47:24 +0200 Subject: [PATCH 353/616] Add bf add. --- inc/class-addon-manager.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/inc/class-addon-manager.php b/inc/class-addon-manager.php index cf3f1f08dd4..3511824aac8 100644 --- a/inc/class-addon-manager.php +++ b/inc/class-addon-manager.php @@ -5,6 +5,8 @@ * @package WPSEO\Inc */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + /** * Represents the addon manager. */ @@ -399,6 +401,15 @@ public function expired_subscription_warning( $plugin_data ) { $subscription = $this->get_subscription( $plugin_data['slug'] ); if ( $subscription && $this->has_subscription_expired( $subscription ) ) { $addon_link = ( isset( $this->addon_details[ $plugin_data['slug'] ] ) ) ? $this->addon_details[ $plugin_data['slug'] ]['short_link_renewal'] : $this->addon_details[ self::PREMIUM_SLUG ]['short_link_renewal']; + + $sale_copy = ''; + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + $sale_copy = sprintf( + /* translators: %1$s is a
    tag. */ + esc_html__( '%1$s Now with 30%% Black Friday Discount!', 'wordpress-seo' ), + '
    ' + ); + } echo '

    '; echo ' ' . sprintf( @@ -407,7 +418,10 @@ public function expired_subscription_warning( $plugin_data ) { esc_html( $plugin_data['name'] ), '', '' - ) . ''; + ) . + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Output is escaped above. + $sale_copy + . ''; } } From 5f0b1a33b62f57d559ddff23aa2114c4a9c45054 Mon Sep 17 00:00:00 2001 From: marinakoleva Date: Thu, 7 Sep 2023 15:42:43 +0200 Subject: [PATCH 354/616] aadding a few unit tests --- .../helpers/word/getWordsForHTMLParserSpec.js | 10 +++++++ .../researches/keywordCountSpec.js | 30 ++++++++++++++----- .../spec/parse/build/private/tokenizeSpec.js | 4 +++ .../helpers/word/getWordsForHTMLParser.js | 2 +- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js index 5b88d7c74d7..03ccad80ec9 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js @@ -32,6 +32,11 @@ const testCases = [ text: "a phrase with an apostrophe's", expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "apostrophe's" ], }, + { + description: "correctly tokenizes a phrase with an ampersand", + text: "a phrase with an ampersand&", + expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "ampersand", "&" ], + }, { description: "correctly tokenizes a phrase between quotes", text: "\"a phrase between quotes\"", @@ -68,6 +73,11 @@ const testCases = [ text: "[caption id=\"attachment_3341501\" align=\"alignnone\" width=\"300\"]", expectedResult: [ "[", "caption", " ", "id=\"attachment_3341501", "\"", " ", "align=\"alignnone", "\"", " ", "width=\"300", "\"", "]" ], }, + { + description: "doesn't match with a hashed HTML entity (in this case, #amp;)", + text: "a phrase with an ampersand#amp;", + expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "ampersand#amp;" ], + }, ]; describe.each( testCases )( "getWordsForHTMLParser", ( { description, text, expectedResult } ) => { diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 0a6b9172334..dae22a380e8 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -757,15 +757,29 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "can match an occurrence of keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", - paper: new Paper( "

    A string with a keyphrase&.

    ", { keyword: "keyphrase&" } ), + description: "can match 2 occurrences of a keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", + paper: new Paper( "

    A string keyphrase& with a keyphrase&.

    ", { keyword: "keyphrase&" } ), keyphraseForms: [ [ "keyphrase" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with a keyphrase&.", - original: "A string with a keyphrase&.", - position: { endOffset: 28, startOffset: 19, - startOffsetBlock: 16, - endOffsetBlock: 25, + expectedCount: 2, + expectedMarkings: [ new Mark( { + marked: "A string keyphrase& with a " + + "keyphrase&.", + original: "A string keyphrase& with a keyphrase&.", + position: { endOffset: 21, startOffset: 12, + startOffsetBlock: 9, + endOffsetBlock: 18, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "A string keyphrase& with a " + + "keyphrase&.", + original: "A string keyphrase& with a keyphrase&.", + // Position information is expected to be longer than the actual string by 4 characters, because "&" should be processed as "#amp;". + position: { endOffset: 43, startOffset: 34, + startOffsetBlock: 31, + endOffsetBlock: 40, attributeId: "", clientId: "", isFirstSection: false, diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index f1ce6dc4312..74824b918fc 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -388,6 +388,10 @@ describe( "A test for the tokenize function", childNodes: [ { name: "#text", + sourceCodeRange: { + startOffset: 3, + endOffset: 27, + }, value: "This is a paragraph#amp;", }, ], diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index 8de1d745e68..24c57e42d38 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -1,5 +1,5 @@ import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePunctuation"; -import { hashedHtmlEntities, hashedHtmlEntitiesEndRegex } from "../../../parse/build/private/htmlEntities"; +import { hashedHtmlEntitiesEndRegex } from "../../../parse/build/private/htmlEntities"; /* * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a From c99a42be9287733830aea930fd8d39cf3b3096b9 Mon Sep 17 00:00:00 2001 From: marinakoleva Date: Thu, 7 Sep 2023 17:58:29 +0200 Subject: [PATCH 355/616] adding more unit tests --- .../helpers/word/getAllWordsFromTreeSpec.js | 4 +- .../seo/KeywordDensityAssessmentSpec.js | 42 +++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js index faa58cdf7e0..51101ad58b9 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js @@ -17,8 +17,8 @@ describe( "a test for getting words from the tree", () => { expect( getAllWordsFromTree( paper ) ).toEqual( [ "A", "very", "intelligent", "cat", "loves", "their", "human", "A", "dog", "is", "very", "cute", "A", "subheading", "3", "text", "text", "text", "A", "subheading", "4", "more", "text" ] ); } ); - it( "should get the correct words from text containing   and word enclosed in double quotes", () => { - const paper = new Paper( "

    What's black, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!

    " ); + it( "should get the correct words from text containing  , an ampersand and word enclosed in double quotes", () => { + const paper = new Paper( "

    What's black&, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!

    " ); researcher.setPaper( paper ); buildTree( paper, researcher ); expect( getAllWordsFromTree( paper ).length ).toEqual( 15 ); diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index 9ac938465cb..e91ce53d8d7 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -292,6 +292,48 @@ describe( "A test for marking the keyphrase", function() { expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); + it( "returns markers for a keyphrase containing an ampersand, occurring twice in the sentence", function() { + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); + const paper = new Paper( "

    This is a very interesting paper with a keyword& and another keyword&.

    ", { keyword: "keyword" } ); + const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + + keyphraseDensityAssessment.getResult( paper, researcher ); + const expected = [ + new Mark( { + marked: "This is a very interesting paper with a " + + "keyword& and another " + + "keyword&.", + original: "This is a very interesting paper with a keyword& and another keyword&.", + position: { + startOffset: 43, + endOffset: 50, + startOffsetBlock: 40, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "This is a very interesting paper with a " + + "keyword& and another " + + "keyword&.", + original: "This is a very interesting paper with a keyword& and another keyword&.", + position: { + // Position should take into account the preceding & character -- it should be counted as 5 characters (&) instead of 1 (&). + startOffset: 68, + endOffset: 75, + startOffsetBlock: 65, + endOffsetBlock: 72, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ]; + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); + } ); + it( "returns markers for a keyphrase containing numbers", function() { const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

    This is the release of YoastSEO 9.3.

    ", { keyword: "YoastSEO 9.3" } ); From 13592e326ee5c469fa122f7be494c0739e67168a Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 13:24:21 +0200 Subject: [PATCH 356/616] Add css for Black Friday upsell --- css/src/yst_plugin_tools.css | 61 ++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 6 deletions(-) diff --git a/css/src/yst_plugin_tools.css b/css/src/yst_plugin_tools.css index 98c46dea7b9..883e5fd6cd1 100644 --- a/css/src/yst_plugin_tools.css +++ b/css/src/yst_plugin_tools.css @@ -552,7 +552,7 @@ body.toplevel_page_wpseo_dashboard .wp-badge { } .yoast-sidebar__product { - background: #A4286A; + background: #a61e69; padding: 24px; margin-top: 34px; border-radius: 8px; @@ -567,6 +567,8 @@ body.toplevel_page_wpseo_dashboard .wp-badge { .yoast-get-premium-title { line-height: 27px; + margin-top: 0; + margin-bottom: 12px; } .yoast-get-premium-title span { @@ -588,7 +590,7 @@ body.toplevel_page_wpseo_dashboard .wp-badge { } .yoast-sidebar__product p { - margin-bottom: 16px; + margin-bottom: 12px; margin-top: 0; font-size: 1rem; } @@ -598,7 +600,7 @@ body.toplevel_page_wpseo_dashboard .wp-badge { } .yoast-sidebar__product .review-container { - margin-top: 24px; + margin-top: 16px; } .yoast-sidebar__product .review-container a { @@ -606,9 +608,14 @@ body.toplevel_page_wpseo_dashboard .wp-badge { color: #FFFFFF; } +.yoast-sidebar__product .review-container a .claim { + display: block; + color: #FFFFFF; + margin-bottom: 12px; +} + .yoast-sidebar__product .review-container .title { - font-weight: bold; - font-size: 14px; + font-weight: 500; margin-bottom: 8px; color: #FFFFFF; } @@ -628,8 +635,50 @@ body.toplevel_page_wpseo_dashboard .wp-badge { } .yoast-sidebar__product .review-container .rating .rating-text { + font-size: 16px; + font-weight: 600; +} + +.yoast-sidebar__product .sidebar__sale_banner_container { + width: calc(100% + 48px); + margin-left: -24px; + overflow-x: hidden; + overflow-y: initial; + margin-top: -40px; +} + +.yoast-sidebar__product .sidebar__sale_banner_container .sidebar__sale_banner { + background: #000; + color: #fcd34d; + width: calc(100% + 60px); + line-height: 30px; + margin-left: -30px; + transform: rotate(-5deg); font-size: 20px; - line-height: 24px; + font-weight: 500; + letter-spacing: 0.5px; + text-align: center; + z-index: 1; + margin-top: 20px; + margin-bottom: 20px; + box-shadow: 0 -1px 4px 0 #fcd34d, 0 1px 4px 0 #fcd34d, 0 -1px 0 0 #fcd34d, 0 1px 0 0 #fcd34d; + padding: 7px 0; +} + +.yoast-sidebar__product .sidebar__sale_banner_container .sidebar__sale_banner .banner_text { + display: inline-block; + margin: 0 40px; +} + +.yoast-sidebar__product .sidebar__sale_text { + text-align: center; + border-top: solid 1px #ffffff; + font-style: italic; + +} +.yoast-sidebar__product .sidebar__sale_text p { + font-size: 12.5px; + margin: 12.5px 0; } .yoast-sidebar__section { From ef73d36a1243e5ff023ef0b8bb63f103f4c715c3 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 13:24:29 +0200 Subject: [PATCH 357/616] Add new g2 logo --- packages/js/images/g2_logo_white_optm.svg | 1 + 1 file changed, 1 insertion(+) create mode 100644 packages/js/images/g2_logo_white_optm.svg diff --git a/packages/js/images/g2_logo_white_optm.svg b/packages/js/images/g2_logo_white_optm.svg new file mode 100644 index 00000000000..ff4e2e19899 --- /dev/null +++ b/packages/js/images/g2_logo_white_optm.svg @@ -0,0 +1 @@ + \ No newline at end of file From 06af682c3e90ce93e96fdccd43a13952774a317b Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 13:24:49 +0200 Subject: [PATCH 358/616] Show the Black Friday ad when it's time --- src/presenters/admin/sidebar-presenter.php | 38 +++++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/src/presenters/admin/sidebar-presenter.php b/src/presenters/admin/sidebar-presenter.php index 2b88d682736..028e5fb0bad 100644 --- a/src/presenters/admin/sidebar-presenter.php +++ b/src/presenters/admin/sidebar-presenter.php @@ -4,6 +4,7 @@ use WPSEO_Shortlinker; use Yoast\WP\SEO\Presenters\Abstract_Presenter; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Presenter class for the Yoast SEO sidebar. @@ -16,6 +17,8 @@ class Sidebar_Presenter extends Abstract_Presenter { * @return string The sidebar HTML. */ public function present() { + $title = \__( 'BLACK FRIDAY - 30% OFF', 'wordpress-seo' ); + $assets_uri = \trailingslashit( \plugin_dir_url( \WPSEO_FILE ) ); $buy_yoast_seo_shortlink = WPSEO_Shortlinker::get( 'https://yoa.st/jj' ); \ob_start(); @@ -42,6 +45,13 @@ class="attachment-full size-full content-visible" sizes="(min-width: 1321px) 75px"> + classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) : ?> + +

    ', '' ); + \printf( \esc_html__( 'Enhance your website’s visibility with Yoast SEO Premium. Unlock %1$shigher traffic%2$s and %1$smaximize conversions%2$s!', 'wordpress-seo' ), '', '' ); ?>

    + classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) : ?> +
    +

    -

    + -

    + - - - - - - + + + + + + 4.6 / 5 From 78fd578d22cc60d9c3adf48acd9da74aeb15910c Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Thu, 7 Sep 2023 16:44:50 +0200 Subject: [PATCH 359/616] First implementation of sidebar promo --- css/src/admin-global.css | 31 +++++++++++++ .../components/BlackFridaySaleNotification.js | 45 +++++++++++++++++++ .../js/src/components/fills/SidebarFill.js | 2 + 3 files changed, 78 insertions(+) create mode 100644 packages/js/src/components/BlackFridaySaleNotification.js diff --git a/css/src/admin-global.css b/css/src/admin-global.css index df1b6363738..0a57ff8e828 100644 --- a/css/src/admin-global.css +++ b/css/src/admin-global.css @@ -855,4 +855,35 @@ body.folded .wpseo-admin-submit-fixed { .yoast-dashicons-notice { color: #dba617; } + +#black-friday-sale-notification.notice-yoast { + border-color: #fcd34d; + border-width: 2px; + border-radius: 8px; + background: #fff; + + margin: 5px 0 15px; + padding: 1px 12px; + + margin-left: 0; + margin-top: 20px; +} + +#black-friday-sale-notification .notice-yoast__header { + margin-bottom: 2px; +} + +.yoast-bf-sale-badge { + display: block; + position:absolute; + border-radius: 8px; + top: -10px; + left: 12px; + background-color: #1F2937; + color: #FCD34D; + padding: 2px 8px; + font-size: 10px; + font-weight: 600; + line-height: normal; +} /*# sourceMappingURL=admin-global.css.map */ diff --git a/packages/js/src/components/BlackFridaySaleNotification.js b/packages/js/src/components/BlackFridaySaleNotification.js new file mode 100644 index 00000000000..cee1cc7a3f4 --- /dev/null +++ b/packages/js/src/components/BlackFridaySaleNotification.js @@ -0,0 +1,45 @@ +import { __ } from "@wordpress/i18n"; +import { useSelect } from "@wordpress/data"; +import PropTypes from "prop-types"; + +import PersistentDismissableNotification from "../containers/PersistentDismissableNotification"; + +/** + * @param {string} store The Redux store identifier from which to determine dismissed state. + * @param {JSX.Element} image The image or null if no image. + * @param {string} url The URL for the register now link. + * + * @returns {JSX.Element} The BlackFridaySaleNotification component. + */ +const BlackFridaySaleNotification = ( { + store = "yoast-seo/editor", + url, + ...props +} ) => { + const isPremium = useSelect( select => select( store ).getIsPremium() ); + + return isPremium ? null : ( + + 30% OFF! +
    + { __( "Buy now!", "wordpress-seo" ) } + + + ); +}; + +BlackFridaySaleNotification.propTypes = { + store: PropTypes.string, + image: PropTypes.elementType, + url: PropTypes.string.isRequired, +}; + +export default BlackFridaySaleNotification; diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index f90a53e82e2..89ae7626b5b 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -19,6 +19,7 @@ import SidebarCollapsible from "../SidebarCollapsible"; import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; +import BlackFridaySaleNotification from "../BlackFridaySaleNotification"; import BlackFridayPromoNotification from "../BlackFridayPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; @@ -45,6 +46,7 @@ export default function SidebarFill( { settings } ) {
    + { isWooCommerce && blackFridayBlockEditorUrl ? : From 0ca006ef4d8782f8717f9afc510e3efc7c337ea2 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 15:17:13 +0200 Subject: [PATCH 360/616] Add integration to dismiss notification --- .../alerts/black-friday-sale-notification.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/integrations/alerts/black-friday-sale-notification.php diff --git a/src/integrations/alerts/black-friday-sale-notification.php b/src/integrations/alerts/black-friday-sale-notification.php new file mode 100644 index 00000000000..1903768c660 --- /dev/null +++ b/src/integrations/alerts/black-friday-sale-notification.php @@ -0,0 +1,16 @@ + Date: Fri, 8 Sep 2023 15:17:57 +0200 Subject: [PATCH 361/616] Fix the relation between the Black Friday notificastion and the fre webinar one --- packages/js/src/components/fills/SidebarFill.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 89ae7626b5b..fec6146da18 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -1,5 +1,6 @@ /* External dependencies */ import { Fill } from "@wordpress/components"; +import { select } from "@wordpress/data"; import { Fragment } from "@wordpress/element"; import PropTypes from "prop-types"; import { __ } from "@wordpress/i18n"; @@ -20,7 +21,7 @@ import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; import BlackFridaySaleNotification from "../BlackFridaySaleNotification"; -import BlackFridayPromoNotification from "../BlackFridayPromoNotification"; + import KeywordUpsell from "../KeywordUpsell"; /* eslint-disable complexity */ @@ -39,6 +40,7 @@ export default function SidebarFill( { settings } ) { const webinarIntroBlockEditorUrl = get( window, "wpseoScriptData.webinarIntroBlockEditorUrl", "https://yoa.st/webinar-intro-block-editor" ); const blackFridayBlockEditorUrl = get( window, "wpseoScriptData.blackFridayBlockEditorUrl", "https://yoa.st/black-friday-checklist" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); + const isBlackFridayAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-sale-notification" ); return ( @@ -46,11 +48,9 @@ export default function SidebarFill( { settings } ) {
    - - { isWooCommerce && blackFridayBlockEditorUrl - ? - : - } + + { /* eslint-disable-next-line max-len */ } + { isBlackFridayAlertDismissed && }
    { settings.isKeywordAnalysisActive && From 10fede202f0d366f763da19198582e54390111de Mon Sep 17 00:00:00 2001 From: Martijn van der Klis Date: Fri, 8 Sep 2023 16:25:55 +0200 Subject: [PATCH 362/616] Adds some additional tests for checking HTML entities --- .../researches/keywordCountSpec.js | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index dae22a380e8..ef8241bddf4 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -758,8 +758,8 @@ const testCasesWithSpecialCharacters = [ }, { description: "can match 2 occurrences of a keyphrase ending in & as in 'keyphrase&', and output correct Marks objects", - paper: new Paper( "

    A string keyphrase& with a keyphrase&.

    ", { keyword: "keyphrase&" } ), - keyphraseForms: [ [ "keyphrase" ] ], + paper: new Paper( "

    A string keyphrase& with a keyphrase&.

    ", { keyword: "keyphrase&" } ), + keyphraseForms: [ [ "keyphrase&" ] ], expectedCount: 2, expectedMarkings: [ new Mark( { marked: "A string keyphrase& with a " + @@ -786,6 +786,46 @@ const testCasesWithSpecialCharacters = [ } } ) ], skip: false, }, + { + description: "can match an occurrence of a keyphrase after HTML entities, and output correct Marks objects", + paper: new Paper( "

    I find a&b to be > c&d for dog food.

    ", { keyword: "dog" } ), + keyphraseForms: [ [ "dog" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "I find a&b to be > c&d for dog food.", + original: "I find a&b to be > c&d for dog food.", + position: { + startOffsetBlock: 38, + endOffsetBlock: 41, + startOffset: 41, + endOffset: 44, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "can match an occurrence of a keyphrase containing an & as in 'a&b', and output correct Marks objects", + paper: new Paper( "

    At a&b they have the best stuff.

    ", { keyword: "a&b" } ), + keyphraseForms: [ [ "a&b" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "At a&b they have the best stuff.", + original: "At a&b they have the best stuff.", + position: { + startOffsetBlock: 3, + endOffsetBlock: 10, + startOffset: 6, + endOffset: 13, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, { description: "doesn't match unhyphenated occurrences in the text if the keyphrase is hyphenated.", paper: new Paper( "

    A string with a key word.

    ", { keyword: "key-word" } ), From f1f1126f3ce07593c0972c7c2ce4556dddbd6849 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 17:01:18 +0200 Subject: [PATCH 363/616] Add parameter to not check for warnings --- .../src/components/TimeConstrainedNotification.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/packages/js/src/components/TimeConstrainedNotification.js b/packages/js/src/components/TimeConstrainedNotification.js index e66a25f3af5..ba31dfb37cc 100644 --- a/packages/js/src/components/TimeConstrainedNotification.js +++ b/packages/js/src/components/TimeConstrainedNotification.js @@ -5,9 +5,15 @@ import PersistentDismissableNotification from "../containers/PersistentDismissab /** * Checks if there are any warnings. * + * @param {boolean} shouldCheckForWarnings Whether to check for warnings. + * * @returns {boolean} Whether there are any warnings. */ -const areThereAnyWarnings = () => { +const areThereAnyWarnings = ( shouldCheckForWarnings ) => { + if ( ! shouldCheckForWarnings ) { + return false; + } + const warningsFree = select( "yoast-seo/editor" ).getWarningMessage(); const warningsPremium = select( "yoast-seo-premium/editor" )?.getMetaboxWarning() ?? []; @@ -17,6 +23,7 @@ const areThereAnyWarnings = () => { /** * @param {string} store The Redux store identifier from which to determine dismissed state. * @param {JSX.Element} image The image or null if no image. + * @param {boolean} shouldCheckForWarnings Whether to check for warnings. * @param {string} title The title of the notification. * @param {string} promoId The promotion id. * @param {string} alertKey The unique id for the alert. @@ -27,6 +34,7 @@ const areThereAnyWarnings = () => { export const TimeConstrainedNotification = ( { store = "yoast-seo/editor", image: Image = null, + shouldCheckForWarnings = false, title, promoId, alertKey, @@ -37,7 +45,7 @@ export const TimeConstrainedNotification = ( { return ( - ! areThereAnyWarnings() && promotionActive && Date: Fri, 8 Sep 2023 17:01:33 +0200 Subject: [PATCH 364/616] Align with new parameter --- .../js/src/components/BlackFridayProductEditorChecklistPromo.js | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js index 04b35e50996..9c96414c4e0 100644 --- a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js +++ b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js @@ -17,6 +17,7 @@ export const BlackFridayProductEditorChecklistPromo = () => { return ( Date: Fri, 8 Sep 2023 17:02:05 +0200 Subject: [PATCH 365/616] Use TimeConstrainedNotification instead of vanilla PersistentDismissableNotification component --- .../BlackFridayPromoNotification.js | 48 ++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/packages/js/src/components/BlackFridayPromoNotification.js b/packages/js/src/components/BlackFridayPromoNotification.js index cd1865415c3..6f7005a624e 100644 --- a/packages/js/src/components/BlackFridayPromoNotification.js +++ b/packages/js/src/components/BlackFridayPromoNotification.js @@ -1,8 +1,9 @@ +import { select } from "@wordpress/data"; +import { createInterpolateElement } from "@wordpress/element"; import { __, sprintf } from "@wordpress/i18n"; -import PropTypes from "prop-types"; +import { addQueryArgs } from "@wordpress/url"; -import PersistentDismissableNotification from "../containers/PersistentDismissableNotification"; -import { ReactComponent as DefaultImage } from "../../../../images/succes_marieke_bubble_optm.svg"; +import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** * @param {string} store The Redux store identifier from which to determine dismissed state. @@ -12,43 +13,36 @@ import { ReactComponent as DefaultImage } from "../../../../images/succes_mariek * @returns {JSX.Element} The BlackFridayPromoNotification component. */ const BlackFridayPromoNotification = ( { - store = "yoast-seo/editor", - image: Image = DefaultImage, - url, ...props } ) => { - const rawAdContent = sprintf( + const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); + const body = createInterpolateElement( + sprintf( /* translators: %1$s expands to Yoast, %2$s expands to a 'strong' start tag, %2$s to a 'strong' end tag. */ - __( "The %1$s %2$sultimate Black Friday checklist%3$s helps you prepare in time, so you can boost your results during this sale.", "wordpress-seo" ), - "Yoast", - "", - "" + __( "The %1$s %2$sultimate Black Friday checklist%3$s helps you prepare in time, so you can boost your results during this sale.", "wordpress-seo" ), + "Yoast", + "", + "" ), + { + strong: , + } ); - const re = /<\/?strong>/; - const parts = rawAdContent.split( re ); - return ( - - { parts[ 0 ] }{ parts[ 1 ] }{ parts[ 2 ] } -   + { body } +   { __( "Get the checklist and start optimizing now!", "wordpress-seo" ) } - + ); }; -BlackFridayPromoNotification.propTypes = { - store: PropTypes.string, - image: PropTypes.elementType, - url: PropTypes.string.isRequired, -}; - export default BlackFridayPromoNotification; From c76d45c1d1be53eb92d2f4d5bf9cabb347bde454 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Fri, 8 Sep 2023 17:02:16 +0200 Subject: [PATCH 366/616] Arrange various notifications --- packages/js/src/components/fills/SidebarFill.js | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index fec6146da18..26db08f45bc 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -21,6 +21,7 @@ import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; import BlackFridaySaleNotification from "../BlackFridaySaleNotification"; +import BlackFridayPromoNotification from "../BlackFridayPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; @@ -41,16 +42,20 @@ export default function SidebarFill( { settings } ) { const blackFridayBlockEditorUrl = get( window, "wpseoScriptData.blackFridayBlockEditorUrl", "https://yoa.st/black-friday-checklist" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); const isBlackFridayAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-sale-notification" ); - + const isBlackFridayPromoAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-promo-notification" ); return (
    + { /* eslint-disable max-len */ } + { isBlackFridayAlertDismissed && + isBlackFridayPromoAlertDismissed && + + } + { isWooCommerce && blackFridayBlockEditorUrl && } - { /* eslint-disable-next-line max-len */ } - { isBlackFridayAlertDismissed && }
    { settings.isKeywordAnalysisActive && From 6a55b4bdd3c4629ab607d25b4f98438d692959b8 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Sun, 10 Sep 2023 01:13:53 +0200 Subject: [PATCH 367/616] Move the check to a helper --- packages/js/src/helpers/checkForMetaboxWarnings.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 packages/js/src/helpers/checkForMetaboxWarnings.js diff --git a/packages/js/src/helpers/checkForMetaboxWarnings.js b/packages/js/src/helpers/checkForMetaboxWarnings.js new file mode 100644 index 00000000000..811fdad8cdd --- /dev/null +++ b/packages/js/src/helpers/checkForMetaboxWarnings.js @@ -0,0 +1,13 @@ +import { select } from "@wordpress/data"; +/** + * Checks if there are any warnings in the metabox. + * + * @returns {boolean} Whether there are any warnings. + */ +export const checkForMetaboxWarnings = () => { + const isPremium = select( "yoast-seo/editor" ).getIsPremium(); + const warningsFree = select( "yoast-seo/editor" ).getWarningMessage(); + const warningsPremium = isPremium ? select( "yoast-seo-premium/editor" )?.getMetaboxWarning() ?? [] : false; + + return warningsPremium.length > 0 || warningsFree.length > 0; +}; From d43619354b637dd4c948c5582b8f02d8db609dea Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Sun, 10 Sep 2023 01:14:21 +0200 Subject: [PATCH 368/616] Create an HOC to add warnings check to a component --- .../higherorder/withMetaboxWarningsCheck.js | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 packages/js/src/components/higherorder/withMetaboxWarningsCheck.js diff --git a/packages/js/src/components/higherorder/withMetaboxWarningsCheck.js b/packages/js/src/components/higherorder/withMetaboxWarningsCheck.js new file mode 100644 index 00000000000..ffebef3f448 --- /dev/null +++ b/packages/js/src/components/higherorder/withMetaboxWarningsCheck.js @@ -0,0 +1,22 @@ +import { checkForMetaboxWarnings } from "../../helpers/checkForMetaboxWarnings"; +/** + * Adds a check for any warnings in the metabox before rendering the component. + * @param {JSX.ElementClass} Component The component. + * @returns {JSX.ElementClass} The wrapped component. + */ +export const withMetaboxWarningsCheck = ( Component ) => { + /** + * @param {Object} props The props. + * @returns {JSX.Element} The element. + */ + const MetaboxWarningsCheck = ( props ) => { + return ( + ! checkForMetaboxWarnings() && + + ); + }; + + return MetaboxWarningsCheck; +}; From 4f752cc808b1790960a2633ed22f7b2db1c45393 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Sun, 10 Sep 2023 01:14:36 +0200 Subject: [PATCH 369/616] Remove check logic --- .../components/TimeConstrainedNotification.js | 24 +------------------ 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/packages/js/src/components/TimeConstrainedNotification.js b/packages/js/src/components/TimeConstrainedNotification.js index ba31dfb37cc..2053844b240 100644 --- a/packages/js/src/components/TimeConstrainedNotification.js +++ b/packages/js/src/components/TimeConstrainedNotification.js @@ -2,28 +2,9 @@ import PropTypes from "prop-types"; import { select } from "@wordpress/data"; import PersistentDismissableNotification from "../containers/PersistentDismissableNotification"; -/** - * Checks if there are any warnings. - * - * @param {boolean} shouldCheckForWarnings Whether to check for warnings. - * - * @returns {boolean} Whether there are any warnings. - */ -const areThereAnyWarnings = ( shouldCheckForWarnings ) => { - if ( ! shouldCheckForWarnings ) { - return false; - } - - const warningsFree = select( "yoast-seo/editor" ).getWarningMessage(); - const warningsPremium = select( "yoast-seo-premium/editor" )?.getMetaboxWarning() ?? []; - - return warningsPremium.length > 0 || warningsFree.length > 0; -}; - /** * @param {string} store The Redux store identifier from which to determine dismissed state. * @param {JSX.Element} image The image or null if no image. - * @param {boolean} shouldCheckForWarnings Whether to check for warnings. * @param {string} title The title of the notification. * @param {string} promoId The promotion id. * @param {string} alertKey The unique id for the alert. @@ -34,7 +15,6 @@ const areThereAnyWarnings = ( shouldCheckForWarnings ) => { export const TimeConstrainedNotification = ( { store = "yoast-seo/editor", image: Image = null, - shouldCheckForWarnings = false, title, promoId, alertKey, @@ -43,9 +23,8 @@ export const TimeConstrainedNotification = ( { } ) => { const promotionActive = select( store ).isPromotionActive( promoId ); - return ( - ! areThereAnyWarnings( shouldCheckForWarnings ) && promotionActive && Date: Sun, 10 Sep 2023 01:14:47 +0200 Subject: [PATCH 370/616] Remove unused prop --- .../js/src/components/BlackFridayProductEditorChecklistPromo.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js index 9c96414c4e0..04b35e50996 100644 --- a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js +++ b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js @@ -17,7 +17,6 @@ export const BlackFridayProductEditorChecklistPromo = () => { return ( Date: Sun, 10 Sep 2023 01:14:55 +0200 Subject: [PATCH 371/616] Use the HOC --- packages/js/src/components/fills/MetaboxFill.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/js/src/components/fills/MetaboxFill.js b/packages/js/src/components/fills/MetaboxFill.js index f33a922031b..fc27440f31a 100644 --- a/packages/js/src/components/fills/MetaboxFill.js +++ b/packages/js/src/components/fills/MetaboxFill.js @@ -25,6 +25,7 @@ import PremiumSEOAnalysisModal from "../modals/PremiumSEOAnalysisModal"; import KeywordUpsell from "../KeywordUpsell"; import { BlackFridayProductEditorChecklistPromo } from "../BlackFridayProductEditorChecklistPromo"; import { isWooCommerceActive } from "../../helpers/isWooCommerceActive"; +import { withMetaboxWarningsCheck } from "../higherorder/withMetaboxWarningsCheck"; /* eslint-disable complexity */ /** @@ -58,6 +59,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo return isProduct && isWooCommerceActive(); }; + const BlackFridayProductEditorChecklistPromoWithMetaboxWarningsCheck = withMetaboxWarningsCheck( BlackFridayProductEditorChecklistPromo ); return ( <> { isWordProofIntegrationActive() && } @@ -72,7 +74,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo key="time-constrained-notification" renderPriority={ 2 } > - { shouldShowWooCommercePromo() && } + { shouldShowWooCommercePromo() && } { settings.isKeywordAnalysisActive && Date: Mon, 11 Sep 2023 10:18:33 +0200 Subject: [PATCH 372/616] Revert copy --- src/presenters/admin/sidebar-presenter.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/presenters/admin/sidebar-presenter.php b/src/presenters/admin/sidebar-presenter.php index 028e5fb0bad..d0694cf27ab 100644 --- a/src/presenters/admin/sidebar-presenter.php +++ b/src/presenters/admin/sidebar-presenter.php @@ -61,7 +61,7 @@ class="attachment-full size-full content-visible"

    ', '' ); + \printf( \esc_html__( 'Be more efficient in creating titles and meta descriptions with the help of AI. %1$sGet 24/7 support%2$s and boost your website’s visibility.', 'wordpress-seo' ), '', '' ); ?>

    classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) : ?> From 97eaac2f8b0103a392b7f9cc845715b77b50ca24 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 12:25:13 +0200 Subject: [PATCH 373/616] Renamed for continuity with other names --- ...js => BlackFridaySidebarChecklistPromo.js} | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) rename packages/js/src/components/{BlackFridayPromoNotification.js => BlackFridaySidebarChecklistPromo.js} (73%) diff --git a/packages/js/src/components/BlackFridayPromoNotification.js b/packages/js/src/components/BlackFridaySidebarChecklistPromo.js similarity index 73% rename from packages/js/src/components/BlackFridayPromoNotification.js rename to packages/js/src/components/BlackFridaySidebarChecklistPromo.js index 6f7005a624e..6f08810a54d 100644 --- a/packages/js/src/components/BlackFridayPromoNotification.js +++ b/packages/js/src/components/BlackFridaySidebarChecklistPromo.js @@ -6,13 +6,13 @@ import { addQueryArgs } from "@wordpress/url"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * @param {string} store The Redux store identifier from which to determine dismissed state. - * @param {JSX.Element} image The image or null if no image. - * @param {string} url The URL for the register now link. + * The BlackFridaySidebarChecklistPromo component. * - * @returns {JSX.Element} The BlackFridayPromoNotification component. + * @param {Object} props The props. + * + * @returns {JSX.Element} The BlackFridaySidebarChecklistPromo component. */ -const BlackFridayPromoNotification = ( { +export const BlackFridaySidebarChecklistPromo = ( { ...props } ) => { const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); @@ -29,11 +29,10 @@ const BlackFridayPromoNotification = ( { ); return ( @@ -44,5 +43,3 @@ const BlackFridayPromoNotification = ( { ); }; - -export default BlackFridayPromoNotification; From 0ebe9b75476d2cf8705717331548a51bd88be9e9 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 12:25:41 +0200 Subject: [PATCH 374/616] Re-arranged props --- .../src/components/BlackFridayProductEditorChecklistPromo.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js index 04b35e50996..0276b7f18cd 100644 --- a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js +++ b/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js @@ -16,10 +16,11 @@ export const BlackFridayProductEditorChecklistPromo = () => { return ( { createInterpolateElement( From 1f18e553a429585cefd4fbc2a93db97bd5384348 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 12:26:01 +0200 Subject: [PATCH 375/616] Use TimeConstrainedNotification --- .../components/BlackFridaySaleNotification.js | 41 ++++++++----------- 1 file changed, 16 insertions(+), 25 deletions(-) diff --git a/packages/js/src/components/BlackFridaySaleNotification.js b/packages/js/src/components/BlackFridaySaleNotification.js index cee1cc7a3f4..f380b9e9a25 100644 --- a/packages/js/src/components/BlackFridaySaleNotification.js +++ b/packages/js/src/components/BlackFridaySaleNotification.js @@ -1,45 +1,36 @@ import { __ } from "@wordpress/i18n"; -import { useSelect } from "@wordpress/data"; -import PropTypes from "prop-types"; +import { select } from "@wordpress/data"; +import { addQueryArgs } from "@wordpress/url"; -import PersistentDismissableNotification from "../containers/PersistentDismissableNotification"; +import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * @param {string} store The Redux store identifier from which to determine dismissed state. - * @param {JSX.Element} image The image or null if no image. - * @param {string} url The URL for the register now link. + * The BlackFridaySaleNotification component. + * + * @param {Object} props The props. * * @returns {JSX.Element} The BlackFridaySaleNotification component. */ -const BlackFridaySaleNotification = ( { - store = "yoast-seo/editor", - url, +export const BlackFridaySaleNotification = ( { ...props } ) => { - const isPremium = useSelect( select => select( store ).getIsPremium() ); + const isPremium = select( "yoast-seo/editor" ).getIsPremium(); + const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); return isPremium ? null : ( - 30% OFF! - + { __( "Buy now!", "wordpress-seo" ) } - + ); }; - -BlackFridaySaleNotification.propTypes = { - store: PropTypes.string, - image: PropTypes.elementType, - url: PropTypes.string.isRequired, -}; - -export default BlackFridaySaleNotification; From 5276525ad5bdac76cca530f3d3fdb7e348c5f190 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 12:26:25 +0200 Subject: [PATCH 376/616] Renamed for clarity --- .../alerts/black-friday-promo-notification.php | 16 ---------------- .../alerts/black-friday-sale-notification.php | 2 +- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 src/integrations/alerts/black-friday-promo-notification.php diff --git a/src/integrations/alerts/black-friday-promo-notification.php b/src/integrations/alerts/black-friday-promo-notification.php deleted file mode 100644 index be993d6b999..00000000000 --- a/src/integrations/alerts/black-friday-promo-notification.php +++ /dev/null @@ -1,16 +0,0 @@ - Date: Mon, 11 Sep 2023 12:28:04 +0200 Subject: [PATCH 377/616] Renamed promotion id --- src/promotions/domain/black-friday-promotion.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/promotions/domain/black-friday-promotion.php b/src/promotions/domain/black-friday-promotion.php index f809b11542d..c553fc3d18f 100644 --- a/src/promotions/domain/black-friday-promotion.php +++ b/src/promotions/domain/black-friday-promotion.php @@ -14,7 +14,7 @@ class Black_Friday_Promotion extends Abstract_Promotion implements Promotion_Int */ public function __construct() { parent::__construct( - 'black_friday_2023', + 'black_friday_2023_sale', new Time_Interval( \gmmktime( 11, 00, 00, 11, 23, 2023 ), \gmmktime( 11, 00, 00, 11, 28, 2023 ) ) ); } From 84a1dd6f604245661d53ea2d66b6b5df921f3e80 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 12:28:25 +0200 Subject: [PATCH 378/616] Add notification for sidebar checklist promo --- ...ack-friday-sidebar-checklist-notification.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 src/integrations/alerts/black-friday-sidebar-checklist-notification.php diff --git a/src/integrations/alerts/black-friday-sidebar-checklist-notification.php b/src/integrations/alerts/black-friday-sidebar-checklist-notification.php new file mode 100644 index 00000000000..71ec00ed0b7 --- /dev/null +++ b/src/integrations/alerts/black-friday-sidebar-checklist-notification.php @@ -0,0 +1,16 @@ + Date: Mon, 11 Sep 2023 12:29:21 +0200 Subject: [PATCH 379/616] Renamed function for clarity --- packages/js/src/components/fills/MetaboxFill.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/js/src/components/fills/MetaboxFill.js b/packages/js/src/components/fills/MetaboxFill.js index fc27440f31a..4c1d795e1c8 100644 --- a/packages/js/src/components/fills/MetaboxFill.js +++ b/packages/js/src/components/fills/MetaboxFill.js @@ -54,7 +54,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo * * @returns {boolean} Whether the WooCommerce promo should be shown. */ - const shouldShowWooCommercePromo = () => { + const shouldShowWooCommerceChecklistPromo = () => { const isProduct = select( "yoast-seo/editor" ).getIsProduct(); return isProduct && isWooCommerceActive(); }; @@ -74,7 +74,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo key="time-constrained-notification" renderPriority={ 2 } > - { shouldShowWooCommercePromo() && } + { shouldShowWooCommerceChecklistPromo() && }
    { settings.isKeywordAnalysisActive && Date: Mon, 11 Sep 2023 13:09:38 +0200 Subject: [PATCH 380/616] Use service instead of injecting the Promotion_Manager --- src/integrations/third-party/elementor.php | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/integrations/third-party/elementor.php b/src/integrations/third-party/elementor.php index 3c3d743db6c..12713f9c850 100644 --- a/src/integrations/third-party/elementor.php +++ b/src/integrations/third-party/elementor.php @@ -26,7 +26,7 @@ use Yoast\WP\SEO\Integrations\Integration_Interface; use Yoast\WP\SEO\Introductions\Infrastructure\Wistia_Embed_Permission_Repository; use Yoast\WP\SEO\Presenters\Admin\Meta_Fields_Presenter; -use Yoast\WP\SEO\Promotions\Application\Promotion_Manager_Interface; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Integrates the Yoast SEO metabox in the Elementor editor. @@ -101,13 +101,6 @@ class Elementor implements Integration_Interface { */ protected $inclusive_language_analysis; - /** - * The promotions manager. - * - * @var Promotion_Manager_Interface - */ - private $promotion_manager; - /** * Returns the conditionals based in which this loadable should be active. * @@ -123,18 +116,15 @@ public static function get_conditionals() { * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. * @param Options_Helper $options The options helper. * @param Capability_Helper $capability The capability helper. - * @param Promotion_Manager_Interface $promotion_manager The promotions manager. */ public function __construct( WPSEO_Admin_Asset_Manager $asset_manager, Options_Helper $options, - Capability_Helper $capability, - Promotion_Manager_Interface $promotion_manager + Capability_Helper $capability ) { $this->asset_manager = $asset_manager; $this->options = $options; $this->capability = $capability; - $this->promotion_manager = $promotion_manager; $this->seo_analysis = new WPSEO_Metabox_Analysis_SEO(); $this->readability_analysis = new WPSEO_Metabox_Analysis_Readability(); @@ -468,7 +458,7 @@ public function enqueue() { ], 'dismissedAlerts' => $dismissed_alerts, 'webinarIntroElementorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-elementor' ), - 'blackFridayBlockEditorUrl' => ( $this->promotion_manager->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', + 'currentPromotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'usedKeywordsNonce' => \wp_create_nonce( 'wpseo-keyword-usage-and-post-types' ), 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), From a7f4342d32f9f8422ad70093262f635c3f934d57 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:10:56 +0200 Subject: [PATCH 381/616] Pass current promotion to Elementor --- packages/js/src/elementor/initializers/editor-store.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/js/src/elementor/initializers/editor-store.js b/packages/js/src/elementor/initializers/editor-store.js index df7b6e17263..db30a1325c1 100644 --- a/packages/js/src/elementor/initializers/editor-store.js +++ b/packages/js/src/elementor/initializers/editor-store.js @@ -53,6 +53,8 @@ const populateStore = store => { store.dispatch( actions.setWincherAutomaticKeyphaseTracking( window.wpseoScriptData.metabox.wincherAutoAddKeyphrases ) ); store.dispatch( actions.setDismissedAlerts( get( window, "wpseoScriptData.dismissedAlerts", {} ) ) ); + store.dispatch( actions.setCurrentPromotions( get( window, "wpseoScriptData.currentPromotions", {} ) ) ); + store.dispatch( actions.setIsPremium( Boolean( get( window, "wpseoScriptData.metabox.isPremium", false ) ) ) ); store.dispatch( actions.setLinkParams( get( window, "wpseoScriptData.linkParams", {} ) ) ); From 80642502ff9482027af1826153a44b62ec397017 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:11:23 +0200 Subject: [PATCH 382/616] Restructure rendering logic for the promotions --- .../js/src/components/fills/SidebarFill.js | 33 ++++++++++++++----- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 26db08f45bc..dcb6146764d 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -20,8 +20,8 @@ import SidebarCollapsible from "../SidebarCollapsible"; import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; -import BlackFridaySaleNotification from "../BlackFridaySaleNotification"; -import BlackFridayPromoNotification from "../BlackFridayPromoNotification"; +import { BlackFridaySaleNotification } from "../BlackFridaySaleNotification"; +import { BlackFridaySidebarChecklistPromo } from "../BlackFridaySidebarChecklistPromo"; import KeywordUpsell from "../KeywordUpsell"; @@ -38,11 +38,27 @@ import KeywordUpsell from "../KeywordUpsell"; * @constructor */ export default function SidebarFill( { settings } ) { + /** + * Checks if the Webinar promotion should be shown. + * @returns {boolean} Whether the Webinar promotion should be shown. + */ + const shouldShowWebinarPromoNotification = () => { + const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); + const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); + const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); + const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); + + if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || + ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { + return false; + } + + return true; + }; + const webinarIntroBlockEditorUrl = get( window, "wpseoScriptData.webinarIntroBlockEditorUrl", "https://yoa.st/webinar-intro-block-editor" ); - const blackFridayBlockEditorUrl = get( window, "wpseoScriptData.blackFridayBlockEditorUrl", "https://yoa.st/black-friday-checklist" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); - const isBlackFridayAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-sale-notification" ); - const isBlackFridayPromoAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-promo-notification" ); + return ( @@ -50,12 +66,11 @@ export default function SidebarFill( { settings } ) {
    { /* eslint-disable max-len */ } - { isBlackFridayAlertDismissed && - isBlackFridayPromoAlertDismissed && + { shouldShowWebinarPromoNotification() && } - { isWooCommerce && blackFridayBlockEditorUrl && } - + { isWooCommerce && } +
    { settings.isKeywordAnalysisActive && From 753b273cf1743fc344d8387037519769808ec032 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:11:41 +0200 Subject: [PATCH 383/616] Allow for choosing which store to use --- .../src/components/BlackFridaySaleNotification.js | 13 ++++++++++--- .../components/BlackFridaySidebarChecklistPromo.js | 11 +++++++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/packages/js/src/components/BlackFridaySaleNotification.js b/packages/js/src/components/BlackFridaySaleNotification.js index f380b9e9a25..765b9473bac 100644 --- a/packages/js/src/components/BlackFridaySaleNotification.js +++ b/packages/js/src/components/BlackFridaySaleNotification.js @@ -1,28 +1,31 @@ import { __ } from "@wordpress/i18n"; import { select } from "@wordpress/data"; import { addQueryArgs } from "@wordpress/url"; +import PropTypes from "prop-types"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** * The BlackFridaySaleNotification component. * + * @param {string} store The store to use. Defaults to {@code yoast-seo/editor} * @param {Object} props The props. * * @returns {JSX.Element} The BlackFridaySaleNotification component. */ export const BlackFridaySaleNotification = ( { + store = "yoast-seo/editor", ...props } ) => { - const isPremium = select( "yoast-seo/editor" ).getIsPremium(); - const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); + const isPremium = select( store ).getIsPremium(); + const linkParams = select( store ).selectLinkParams(); return isPremium ? null : ( ); }; + +BlackFridaySaleNotification.propTypes = { + store: PropTypes.string, +}; diff --git a/packages/js/src/components/BlackFridaySidebarChecklistPromo.js b/packages/js/src/components/BlackFridaySidebarChecklistPromo.js index 6f08810a54d..eecb50d0dbe 100644 --- a/packages/js/src/components/BlackFridaySidebarChecklistPromo.js +++ b/packages/js/src/components/BlackFridaySidebarChecklistPromo.js @@ -2,20 +2,23 @@ import { select } from "@wordpress/data"; import { createInterpolateElement } from "@wordpress/element"; import { __, sprintf } from "@wordpress/i18n"; import { addQueryArgs } from "@wordpress/url"; +import PropTypes from "prop-types"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** * The BlackFridaySidebarChecklistPromo component. * + * @param {string} store The store to use. Defaults to {@code yoast-seo/editor * @param {Object} props The props. * * @returns {JSX.Element} The BlackFridaySidebarChecklistPromo component. */ export const BlackFridaySidebarChecklistPromo = ( { + store = "yoast-seo/editor", ...props } ) => { - const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); + const linkParams = select( store ).selectLinkParams(); const body = createInterpolateElement( sprintf( /* translators: %1$s expands to Yoast, %2$s expands to a 'strong' start tag, %2$s to a 'strong' end tag. */ @@ -32,7 +35,7 @@ export const BlackFridaySidebarChecklistPromo = ( { id="black-friday-2023-sidebar-checklist" promoId="black_friday_2023_checklist" alertKey="black-friday-2023-sidebar-checklist" - store="yoast-seo/editor" + store={ store } title={ __( "Is your WooCommerce store ready for Black Friday?", "wordpress-seo" ) } { ...props } > @@ -43,3 +46,7 @@ export const BlackFridaySidebarChecklistPromo = ( { ); }; + +BlackFridaySidebarChecklistPromo.propTypes = { + store: PropTypes.string, +}; From f77fc31518eebaf858b4dda74a195043b12b1956 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:12:01 +0200 Subject: [PATCH 384/616] Add BF Promo notification and restructure checklist notification --- .../components/fills/ElementorFill.js | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 8eb3ed8db2d..67e4d3c9d19 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -1,5 +1,6 @@ // External dependencies. import { Fill } from "@wordpress/components"; +import { select } from "@wordpress/data"; import { Fragment, useEffect } from "@wordpress/element"; import { __ } from "@wordpress/i18n"; import { get } from "lodash"; @@ -22,7 +23,8 @@ import WincherSEOPerformanceModal from "../../../containers/WincherSEOPerformanc import { isWordProofIntegrationActive } from "../../../helpers/wordproof"; import WordProofAuthenticationModals from "../../../components/modals/WordProofAuthenticationModals"; import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; -import BlackFridayPromoNotification from "../../../components/BlackFridayPromoNotification"; +import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; +import { BlackFridaySaleNotification } from "../../../components/BlackFridaySaleNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ @@ -48,8 +50,25 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { return null; } + /** + * Checks if the Webinar promotion should be shown. + * @returns {boolean} Whether the Webinar promotion should be shown. + */ + const shouldShowWebinarPromoNotification = () => { + const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); + const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); + const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); + const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); + + if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || + ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { + return false; + } + + return true; + }; + const webinarIntroElementorUrl = get( window, "wpseoScriptData.webinarIntroElementorUrl", "https://yoa.st/webinar-intro-elementor" ); - const blackFridayBlockEditorUrl = get( window, "wpseoScriptData.blackFridayBlockEditorUrl", "https://yoa.st/black-friday-checklist" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); return ( @@ -59,9 +78,11 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { - { isWooCommerce && blackFridayBlockEditorUrl - ? - : } + { shouldShowWebinarPromoNotification() && + + } + { isWooCommerce && } + { settings.isKeywordAnalysisActive && From 8c7fc80684c60401498b5842d5a40d881c96933b Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:12:23 +0200 Subject: [PATCH 385/616] Pass to the settings store what is needed to render the promotions --- admin/class-config.php | 3 ++- packages/js/src/initializers/settings-store.js | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/admin/class-config.php b/admin/class-config.php index 47dcb5048e8..62b431461f9 100644 --- a/admin/class-config.php +++ b/admin/class-config.php @@ -91,9 +91,10 @@ public function config_page_scripts() { 'isRtl' => is_rtl(), 'isPremium' => YoastSEO()->helpers->product->is_premium(), 'isWooCommerceActive' => $woocommerce_conditional->is_met(), + 'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(), 'webinarIntroSettingsUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-settings' ), - 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', 'webinarIntroFirstTimeConfigUrl' => $this->get_webinar_shortlink(), + 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), ]; diff --git a/packages/js/src/initializers/settings-store.js b/packages/js/src/initializers/settings-store.js index 63b40d03400..94f522b78ac 100644 --- a/packages/js/src/initializers/settings-store.js +++ b/packages/js/src/initializers/settings-store.js @@ -3,9 +3,9 @@ import { reducers, selectors, actions } from "@yoast/externals/redux"; import { get } from "lodash"; import * as controls from "../redux/controls/dismissedAlerts"; -const { dismissedAlerts, isPremium } = reducers; -const { isAlertDismissed, getIsPremium } = selectors; -const { dismissAlert, setDismissedAlerts, setIsPremium } = actions; +const { currentPromotions, dismissedAlerts, isPremium, linkParams } = reducers; +const { isAlertDismissed, getIsPremium, isPromotionActive, selectLinkParams } = selectors; +const { dismissAlert, setCurrentPromotions, setDismissedAlerts, setLinkParams, setIsPremium } = actions; /** * Populates the store. @@ -17,6 +17,8 @@ const { dismissAlert, setDismissedAlerts, setIsPremium } = actions; function populateStore( store ) { store.dispatch( setDismissedAlerts( get( window, "wpseoScriptData.dismissedAlerts", {} ) ) ); store.dispatch( setIsPremium( Boolean( get( window, "wpseoScriptData.isPremium", false ) ) ) ); + store.dispatch( setCurrentPromotions( get( window, "wpseoScriptData.currentPromotions", {} ) ) ); + store.dispatch( setLinkParams( get( window, "wpseoScriptData.linkParams", {} ) ) ); } /** @@ -27,16 +29,22 @@ function populateStore( store ) { export default function initSettingsStore() { const store = registerStore( "yoast-seo/settings", { reducer: combineReducers( { + currentPromotions, dismissedAlerts, isPremium, + linkParams, } ), selectors: { isAlertDismissed, getIsPremium, + isPromotionActive, + selectLinkParams, }, actions: { dismissAlert, + setCurrentPromotions, setDismissedAlerts, + setLinkParams, setIsPremium, }, controls, From 5422b18c2f828111ca4c1bf6cc13d9fa40ade732 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:12:38 +0200 Subject: [PATCH 386/616] WIP --- packages/js/src/initializers/settings-header.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index 5b356de179f..5a68af7eab9 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -2,7 +2,7 @@ import { get } from "lodash"; import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; -import BlackFridayPromoNotification from "../components/BlackFridayPromoNotification"; +import { BlackFridaySidebarChecklistPromo } from "../components/BlackFridaySidebarChecklistPromo"; /** @@ -14,14 +14,13 @@ const initSettingsHeader = () => { const reactRoot = document.getElementById( "yst-settings-header-root" ); const isRtl = Boolean( get( window, "wpseoScriptData.isRtl", false ) ); const webinarIntroSettingsUrl = get( window, "wpseoScriptData.webinarIntroSettingsUrl", "https://yoa.st/webinar-intro-settings" ); - const blackFridayBlockEditorUrl = get( window, "wpseoScriptData.blackFridayBlockEditorUrl", "https://yoa.st/black-friday-checklist" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); if ( reactRoot ) { render( - { isWooCommerce && blackFridayBlockEditorUrl - ? + { isWooCommerce + ? : } , reactRoot From 4d8a4292196716c8d2a368811ce8ca191418e051 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:31:37 +0200 Subject: [PATCH 387/616] Change name for continuity --- ...kFridaySaleNotification.js => BlackFridaySalePromo.js} | 8 ++++---- packages/js/src/components/fills/SidebarFill.js | 4 ++-- .../js/src/elementor/components/fills/ElementorFill.js | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) rename packages/js/src/components/{BlackFridaySaleNotification.js => BlackFridaySalePromo.js} (84%) diff --git a/packages/js/src/components/BlackFridaySaleNotification.js b/packages/js/src/components/BlackFridaySalePromo.js similarity index 84% rename from packages/js/src/components/BlackFridaySaleNotification.js rename to packages/js/src/components/BlackFridaySalePromo.js index 765b9473bac..a8c18fe0f7e 100644 --- a/packages/js/src/components/BlackFridaySaleNotification.js +++ b/packages/js/src/components/BlackFridaySalePromo.js @@ -6,14 +6,14 @@ import PropTypes from "prop-types"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * The BlackFridaySaleNotification component. + * The BlackFridaySalePromo component. * * @param {string} store The store to use. Defaults to {@code yoast-seo/editor} * @param {Object} props The props. * - * @returns {JSX.Element} The BlackFridaySaleNotification component. + * @returns {JSX.Element} The BlackFridaySalePromo component. */ -export const BlackFridaySaleNotification = ( { +export const BlackFridaySalePromo = ( { store = "yoast-seo/editor", ...props } ) => { @@ -38,6 +38,6 @@ export const BlackFridaySaleNotification = ( { ); }; -BlackFridaySaleNotification.propTypes = { +BlackFridaySalePromo.propTypes = { store: PropTypes.string, }; diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index dcb6146764d..fdff3e69e5b 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -20,7 +20,7 @@ import SidebarCollapsible from "../SidebarCollapsible"; import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; -import { BlackFridaySaleNotification } from "../BlackFridaySaleNotification"; +import { BlackFridaySalePromo } from "../BlackFridaySalePromo"; import { BlackFridaySidebarChecklistPromo } from "../BlackFridaySidebarChecklistPromo"; import KeywordUpsell from "../KeywordUpsell"; @@ -70,7 +70,7 @@ export default function SidebarFill( { settings } ) { } { isWooCommerce && } - +
    { settings.isKeywordAnalysisActive && diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 67e4d3c9d19..853e193115b 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -24,7 +24,7 @@ import { isWordProofIntegrationActive } from "../../../helpers/wordproof"; import WordProofAuthenticationModals from "../../../components/modals/WordProofAuthenticationModals"; import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; -import { BlackFridaySaleNotification } from "../../../components/BlackFridaySaleNotification"; +import { BlackFridaySalePromo } from "../../../components/BlackFridaySalePromo"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ @@ -82,7 +82,7 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { } { isWooCommerce && } - + { settings.isKeywordAnalysisActive && From d5a3f935d01b862ead86dc242d05e85a89d293e9 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:36:54 +0200 Subject: [PATCH 388/616] Moved shouldShowWebinarPromoNotification to an helper --- .../js/src/components/fills/SidebarFill.js | 20 +------------------ .../components/fills/ElementorFill.js | 19 +----------------- .../shuldShowWebinarPromoNotification.js | 19 ++++++++++++++++++ 3 files changed, 21 insertions(+), 37 deletions(-) create mode 100644 packages/js/src/helpers/shuldShowWebinarPromoNotification.js diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index fdff3e69e5b..0df1cc60d1c 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -22,7 +22,7 @@ import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceMo import WebinarPromoNotification from "../WebinarPromoNotification"; import { BlackFridaySalePromo } from "../BlackFridaySalePromo"; import { BlackFridaySidebarChecklistPromo } from "../BlackFridaySidebarChecklistPromo"; - +import { shouldShowWebinarPromoNotification } from "../../helpers/shuldShowWebinarPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; /* eslint-disable complexity */ @@ -38,24 +38,6 @@ import KeywordUpsell from "../KeywordUpsell"; * @constructor */ export default function SidebarFill( { settings } ) { - /** - * Checks if the Webinar promotion should be shown. - * @returns {boolean} Whether the Webinar promotion should be shown. - */ - const shouldShowWebinarPromoNotification = () => { - const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); - const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); - const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); - const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); - - if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || - ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { - return false; - } - - return true; - }; - const webinarIntroBlockEditorUrl = get( window, "wpseoScriptData.webinarIntroBlockEditorUrl", "https://yoa.st/webinar-intro-block-editor" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 853e193115b..4d6f967b8a2 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -25,6 +25,7 @@ import WordProofAuthenticationModals from "../../../components/modals/WordProofA import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; import { BlackFridaySalePromo } from "../../../components/BlackFridaySalePromo"; +import { shouldShowWebinarPromoNotification } from "../../../helpers/shuldShowWebinarPromoNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ @@ -50,24 +51,6 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { return null; } - /** - * Checks if the Webinar promotion should be shown. - * @returns {boolean} Whether the Webinar promotion should be shown. - */ - const shouldShowWebinarPromoNotification = () => { - const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); - const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); - const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); - const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); - - if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || - ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { - return false; - } - - return true; - }; - const webinarIntroElementorUrl = get( window, "wpseoScriptData.webinarIntroElementorUrl", "https://yoa.st/webinar-intro-elementor" ); const isWooCommerce = get( window, "wpseoScriptData.isWooCommerceActive", "" ); diff --git a/packages/js/src/helpers/shuldShowWebinarPromoNotification.js b/packages/js/src/helpers/shuldShowWebinarPromoNotification.js new file mode 100644 index 00000000000..0f951bbf4ef --- /dev/null +++ b/packages/js/src/helpers/shuldShowWebinarPromoNotification.js @@ -0,0 +1,19 @@ +import { select } from "@wordpress/data"; + +/** + * Checks if the Webinar promotion should be shown. + * @returns {boolean} Whether the Webinar promotion should be shown. + */ +export const shouldShowWebinarPromoNotification = () => { + const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); + const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); + const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); + const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); + + if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || + ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { + return false; + } + + return true; +}; From 52e3a6dd36569e15ed642173f542376e5e10a7f3 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 13:37:20 +0200 Subject: [PATCH 389/616] Remove unused import --- packages/js/src/components/fills/SidebarFill.js | 1 - packages/js/src/elementor/components/fills/ElementorFill.js | 1 - 2 files changed, 2 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 0df1cc60d1c..85ec4c56dbf 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -1,6 +1,5 @@ /* External dependencies */ import { Fill } from "@wordpress/components"; -import { select } from "@wordpress/data"; import { Fragment } from "@wordpress/element"; import PropTypes from "prop-types"; import { __ } from "@wordpress/i18n"; diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 4d6f967b8a2..aba00f6faf5 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -1,6 +1,5 @@ // External dependencies. import { Fill } from "@wordpress/components"; -import { select } from "@wordpress/data"; import { Fragment, useEffect } from "@wordpress/element"; import { __ } from "@wordpress/i18n"; import { get } from "lodash"; From c77ac46207b6f1260981f496f24510800918efd9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Mon, 11 Sep 2023 13:47:25 +0200 Subject: [PATCH 390/616] Add clientId information and the start and endoffset of the Mark relative to the block --- .../yoastseo/src/languageProcessing/researches/h1s.js | 6 +++++- .../src/scoring/assessments/seo/SingleH1Assessment.js | 8 +++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index aceff10b0b2..8bb4544a2a1 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -15,7 +15,11 @@ export default function( paper ) { { tag: "h1", content: h1Match.findAll( node => node.name === "#text" ).map( textNode => textNode.value ).join( "" ), - position: { startOffset: h1Match.sourceCodeLocation.startTag.endOffset, endOffset: h1Match.sourceCodeLocation.endTag.startOffset }, + position: { + startOffset: h1Match.sourceCodeLocation.startTag.endOffset, + endOffset: h1Match.sourceCodeLocation.endTag.startOffset, + clientId: h1Match.clientId, + }, } ) ); diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js index 456cea2f4a4..e845fc57405 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js @@ -95,7 +95,13 @@ class SingleH1Assessment extends Assessment { return new Mark( { original: "

    " + h1.content + "

    ", marked: "

    " + marker( h1.content ) + "

    ", - position: { startOffset: h1.position.startOffset, endOffset: h1.position.endOffset }, + position: { + startOffset: h1.position.startOffset, + endOffset: h1.position.endOffset, + startOffsetBlock: 0, + endOffsetBlock: h1.content.length, + clientId: h1.position.clientId, + }, } ); } ); } From 47db67b9769d3234cc49d7d6b49aa40605903b2e Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 14:04:32 +0200 Subject: [PATCH 391/616] Split check into two checks to be used also in the dashboard --- .../js/src/components/fills/SidebarFill.js | 4 +-- .../components/fills/ElementorFill.js | 4 +-- .../shuldShowWebinarPromoNotification.js | 35 +++++++++++++++---- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 85ec4c56dbf..cdfd6058adc 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -21,7 +21,7 @@ import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceMo import WebinarPromoNotification from "../WebinarPromoNotification"; import { BlackFridaySalePromo } from "../BlackFridaySalePromo"; import { BlackFridaySidebarChecklistPromo } from "../BlackFridaySidebarChecklistPromo"; -import { shouldShowWebinarPromoNotification } from "../../helpers/shuldShowWebinarPromoNotification"; +import { shouldShowWebinarPromoNotificationInSidebar } from "../../helpers/shuldShowWebinarPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; /* eslint-disable complexity */ @@ -47,7 +47,7 @@ export default function SidebarFill( { settings } ) {
    { /* eslint-disable max-len */ } - { shouldShowWebinarPromoNotification() && + { shouldShowWebinarPromoNotificationInSidebar() && } { isWooCommerce && } diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index aba00f6faf5..382738489bb 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -24,7 +24,7 @@ import WordProofAuthenticationModals from "../../../components/modals/WordProofA import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; import { BlackFridaySalePromo } from "../../../components/BlackFridaySalePromo"; -import { shouldShowWebinarPromoNotification } from "../../../helpers/shuldShowWebinarPromoNotification"; +import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shuldShowWebinarPromoNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ @@ -60,7 +60,7 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { - { shouldShowWebinarPromoNotification() && + { shouldShowWebinarPromoNotificationInSidebar() && } { isWooCommerce && } diff --git a/packages/js/src/helpers/shuldShowWebinarPromoNotification.js b/packages/js/src/helpers/shuldShowWebinarPromoNotification.js index 0f951bbf4ef..18fd8520e36 100644 --- a/packages/js/src/helpers/shuldShowWebinarPromoNotification.js +++ b/packages/js/src/helpers/shuldShowWebinarPromoNotification.js @@ -1,19 +1,40 @@ import { select } from "@wordpress/data"; /** - * Checks if the Webinar promotion should be shown. + * Checks if the Webinar promotion should be shown in the dashboard. + * + * @param {string} store The store to use. + * * @returns {boolean} Whether the Webinar promotion should be shown. */ -export const shouldShowWebinarPromoNotification = () => { - const isBlackFridaySalePromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_sale" ); - const isBlackFridaySaleAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sale" ); - const isBlackFridayChecklistPromoActive = select( "yoast-seo/editor" ).isPromotionActive( "black_friday_2023_checklist" ); - const isBlackFridayChecklistAlertDismissed = select( "yoast-seo/editor" ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); +const shouldShowWebinarPromoNotificationInDashboard = ( store = "yoast-seo/editor" ) => { + const isBlackFridayChecklistPromoActive = select( store ).isPromotionActive( "black_friday_2023_checklist" ); + const isBlackFridayChecklistAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); + + if ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) { + return false; + } + + return true; +}; + +/** + * Checks if the Webinar promotion should be shown in the sidebar. + * + * @param {string} store The store to use. + * + * @returns {boolean} Whether the Webinar promotion should be shown. + */ +const shouldShowWebinarPromoNotificationInSidebar = ( store = "yoast-seo/editor" ) => { + const isBlackFridaySalePromoActive = select( store ).isPromotionActive( "black_friday_2023_sale" ); + const isBlackFridaySaleAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sale" ); if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || - ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) ) { + ! shouldShowWebinarPromoNotificationInDashboard( store ) ) { return false; } return true; }; + +export { shouldShowWebinarPromoNotificationInDashboard, shouldShowWebinarPromoNotificationInSidebar }; From ac5e5953d4c4e25190bb34d6e1e7a0cc10549181 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Mon, 11 Sep 2023 14:04:45 +0200 Subject: [PATCH 392/616] Webinar promo management --- packages/js/src/initializers/settings-header.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index 5a68af7eab9..6f55d616a55 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -3,7 +3,7 @@ import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../components/BlackFridaySidebarChecklistPromo"; - +import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shuldShowWebinarPromoNotification"; /** * Initializes the React settings header, just below the title. @@ -19,9 +19,8 @@ const initSettingsHeader = () => { if ( reactRoot ) { render( - { isWooCommerce - ? - : } + { isWooCommerce && } + { shouldShowWebinarPromoNotificationInDashboard( "yoast-seo/settings" ) && } , reactRoot ); From b9a12361160338325d3a54d15cc4933c789acfc1 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 06:34:42 +0200 Subject: [PATCH 393/616] Remove duplicated logic for applying annotation for headings --- packages/js/src/decorator/gutenberg.js | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/packages/js/src/decorator/gutenberg.js b/packages/js/src/decorator/gutenberg.js index f3c86e09972..c648f142274 100644 --- a/packages/js/src/decorator/gutenberg.js +++ b/packages/js/src/decorator/gutenberg.js @@ -82,19 +82,6 @@ const ANNOTATION_ATTRIBUTES = { ], }; -const ASSESSMENT_SPECIFIC_ANNOTATION_ATTRIBUTES = { - singleH1: { - "core/heading": [ - { - key: "content", - filter: ( blockAttributes ) => { - return blockAttributes.level === 1; - }, - }, - ], - }, -}; - /** * Retrieves the next annotation from the annotation queue. * @@ -153,15 +140,11 @@ export function isAnnotationAvailable() { * @returns {string[]} The attributes that we have annotation support for. */ function getAttributesWithAnnotationSupport( blockTypeName ) { - const activeMarker = select( "yoast-seo/editor" ).getActiveMarker(); - - const assessmentAttributes = ASSESSMENT_SPECIFIC_ANNOTATION_ATTRIBUTES[ activeMarker ] || ANNOTATION_ATTRIBUTES; - - if ( ! assessmentAttributes.hasOwnProperty( blockTypeName ) ) { + if ( ! ANNOTATION_ATTRIBUTES.hasOwnProperty( blockTypeName ) ) { return []; } - return assessmentAttributes[ blockTypeName ]; + return ANNOTATION_ATTRIBUTES[ blockTypeName ]; } /** From e22bc51c43a4c398ddd620c8ee4b19ad6b9e6951 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 06:34:52 +0200 Subject: [PATCH 394/616] Add unit tests --- .../helpers/getAnnotationsHelpers.test.js | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js index 85eb9597ec4..9888dfed2dc 100644 --- a/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js +++ b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js @@ -433,6 +433,68 @@ describe( "a test for retrieving annotations from non-Yoast blocks", () => { }, ]; + expect( annotations ).toEqual( resultWithAnnotation ); + } ); + it( "returns an annotation if there is an applicable marker for a heading block: " + + "the heading text contains some styling such as strong tags", () => { + // The string we want to highlight: "red panda's". + create.mockImplementation( () => { + return { text: "The origin of red pandas" }; + } ); + + const mockBlock = { + clientId: "34f61542-0902-44f7-ab48-gyildhgfgh6463286", + name: "core/heading", + isValid: true, + attributes: { + content: "The origin of red pandas", + }, + innerBlocks: [], + originalContent: "

    The origin of red pandas

    ", + }; + + const mockAttribute = { + key: "content", + }; + + const mockMarks = [ + new Mark( { + original: "

    The origin of red pandas

    ", + marked: "

    The origin of red pandas

    ", + position: { + startOffsetBlock: 0, + endOffsetBlock: 24, + clientId: "34f61542-0902-44f7-ab48-gyildhgfgh6463286", + }, + } ), + new Mark( { + original: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + marked: "A raw food diet for cats is more digestible than a diet of plant-based foods.", + position: { + startOffsetBlock: 2, + endOffsetBlock: 10, + isFirstSection: false, + clientId: "2821282d-aead-4191-a844-c43568cd112d", + attributeId: "faq-question-1674124378605", + }, + } ), + ]; + + select.mockReturnValue( { + getActiveMarker: jest.fn( () => "singleH1" ), + } ); + + const annotations = getAnnotationsForWPBlock( mockAttribute, mockBlock, mockMarks ); + + const resultWithAnnotation = [ + { + startOffset: 0, + endOffset: 24, + block: "34f61542-0902-44f7-ab48-gyildhgfgh6463286", + richTextIdentifier: "content", + }, + ]; + expect( annotations ).toEqual( resultWithAnnotation ); } ); } ); From 381d6d72e5e6cd0fb8d0aa96b99be9ab12af5185 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 07:00:52 +0200 Subject: [PATCH 395/616] Add empty string as the default client id when it's not available from the tree --- packages/yoastseo/src/languageProcessing/researches/h1s.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index 8bb4544a2a1..ac22e07b4c2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -18,7 +18,7 @@ export default function( paper ) { position: { startOffset: h1Match.sourceCodeLocation.startTag.endOffset, endOffset: h1Match.sourceCodeLocation.endTag.startOffset, - clientId: h1Match.clientId, + clientId: h1Match.clientId || "", }, } ) ); From 5d73de548b3baca6d6047e6c015d28ce41028e89 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 07:01:02 +0200 Subject: [PATCH 396/616] Adjust unit tests --- .../languageProcessing/researches/h1sSpec.js | 23 +++++++++---------- .../assessments/seo/SingleH1AssessmentSpec.js | 17 +++++++++++--- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js index 6fda6ad2679..f47661c2644 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js @@ -15,8 +15,7 @@ describe( "Gets all H1s in the text", function() { const mockPaper = new Paper( "

    first h1

    some content

    content h2

    " ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - // console.log(mockPaper.getTree()); - expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } } ] ); + expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } } ] ); } ); it( "should return all H1s in the text", function() { @@ -25,8 +24,8 @@ describe( "Gets all H1s in the text", function() { buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } }, - { tag: "h1", content: "second h1", position: { startOffset: 37, endOffset: 46 } }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 37, endOffset: 46, clientId: "" } }, ] ); } ); @@ -36,8 +35,8 @@ describe( "Gets all H1s in the text", function() { buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12 } }, - { tag: "h1", content: "second h1", position: { startOffset: 21, endOffset: 30 } }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 21, endOffset: 30, clientId: "" } }, ] ); } ); @@ -47,8 +46,8 @@ describe( "Gets all H1s in the text", function() { buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: { startOffset: 12, endOffset: 20 } }, - { tag: "h1", content: "second h1", position: { startOffset: 29, endOffset: 38 } }, + { tag: "h1", content: "first h1", position: { startOffset: 12, endOffset: 20, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 29, endOffset: 38, clientId: "" } }, ] ); } ); @@ -58,8 +57,8 @@ describe( "Gets all H1s in the text", function() { buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: { startOffset: 9, endOffset: 17 } }, - { tag: "h1", content: "second h1", position: { startOffset: 64, endOffset: 73 } }, + { tag: "h1", content: "first h1", position: { startOffset: 9, endOffset: 17, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 64, endOffset: 73, clientId: "" } }, ] ); } ); @@ -131,8 +130,8 @@ describe( "Gets all H1s in the text", function() { buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "Voice Search", position: { startOffset: 4, endOffset: 16 } }, - { tag: "h1", content: "So what will the future bring?", position: { startOffset: 3903, endOffset: 3933 } }, + { tag: "h1", content: "Voice Search", position: { startOffset: 4, endOffset: 16, clientId: "" } }, + { tag: "h1", content: "So what will the future bring?", position: { startOffset: 3903, endOffset: 3933, clientId: "" } }, ] ); } ); } ); diff --git a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js index 4590e5258be..bd7bf54bb27 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js @@ -66,8 +66,13 @@ describe( "A test for marking incorrect H1s in the body", function() { new Mark( { original: "

    heading

    ", marked: "

    heading

    ", - position: { startOffset: 22, endOffset: 29 } } - ), + position: { + startOffset: 22, + endOffset: 29, + clientId: "", + endOffsetBlock: 7, + startOffsetBlock: 0, + } } ), ]; expect( h1Assessment.getMarks() ).toEqual( expected ); @@ -82,7 +87,13 @@ describe( "A test for marking incorrect H1s in the body", function() { new Mark( { original: "

    heading

    ", marked: "

    heading

    ", - position: { startOffset: 4, endOffset: 11 }, + position: { + startOffset: 4, + endOffset: 11, + clientId: "", + endOffsetBlock: 7, + startOffsetBlock: 0, + }, } ), ]; From 2b298391e956d0bbf18de8642fe8b746581a33a9 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 07:18:23 +0200 Subject: [PATCH 397/616] simplify code --- packages/yoastseo/src/languageProcessing/researches/h1s.js | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index ac22e07b4c2..dc972285597 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -10,8 +10,7 @@ export default function( paper ) { const h1Matches = tree.findAll( node => node.name === "h1" ); - - const h1s = h1Matches.map( h1Match => ( + return h1Matches.map( h1Match => ( { tag: "h1", content: h1Match.findAll( node => node.name === "#text" ).map( textNode => textNode.value ).join( "" ), @@ -22,6 +21,4 @@ export default function( paper ) { }, } ) ); - - return h1s; } From 9ba6806e2f2ef31a4c34e616735f96d75a552555 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 09:10:16 +0200 Subject: [PATCH 398/616] Fix typo in filename --- packages/js/src/components/fills/SidebarFill.js | 2 +- packages/js/src/elementor/components/fills/ElementorFill.js | 2 +- ...omoNotification.js => shouldShowWebinarPromoNotification.js} | 0 packages/js/src/initializers/settings-header.js | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename packages/js/src/helpers/{shuldShowWebinarPromoNotification.js => shouldShowWebinarPromoNotification.js} (100%) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index cdfd6058adc..7b28f98b182 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -21,7 +21,7 @@ import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceMo import WebinarPromoNotification from "../WebinarPromoNotification"; import { BlackFridaySalePromo } from "../BlackFridaySalePromo"; import { BlackFridaySidebarChecklistPromo } from "../BlackFridaySidebarChecklistPromo"; -import { shouldShowWebinarPromoNotificationInSidebar } from "../../helpers/shuldShowWebinarPromoNotification"; +import { shouldShowWebinarPromoNotificationInSidebar } from "../../helpers/shouldShowWebinarPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; /* eslint-disable complexity */ diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 382738489bb..e39f0ffe0e0 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -24,7 +24,7 @@ import WordProofAuthenticationModals from "../../../components/modals/WordProofA import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; import { BlackFridaySalePromo } from "../../../components/BlackFridaySalePromo"; -import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shuldShowWebinarPromoNotification"; +import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shouldShowWebinarPromoNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ diff --git a/packages/js/src/helpers/shuldShowWebinarPromoNotification.js b/packages/js/src/helpers/shouldShowWebinarPromoNotification.js similarity index 100% rename from packages/js/src/helpers/shuldShowWebinarPromoNotification.js rename to packages/js/src/helpers/shouldShowWebinarPromoNotification.js diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index 6f55d616a55..b5f2f925d9d 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -3,7 +3,7 @@ import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromo } from "../components/BlackFridaySidebarChecklistPromo"; -import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shuldShowWebinarPromoNotification"; +import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shouldShowWebinarPromoNotification"; /** * Initializes the React settings header, just below the title. From 460e0a1163e9a01d355ca2245ae231c9670d3475 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 11:44:42 +0200 Subject: [PATCH 399/616] Adjust comment --- .../js/tests/decorator/helpers/getAnnotationsHelpers.test.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js index 9888dfed2dc..a4206df6a90 100644 --- a/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js +++ b/packages/js/tests/decorator/helpers/getAnnotationsHelpers.test.js @@ -437,7 +437,7 @@ describe( "a test for retrieving annotations from non-Yoast blocks", () => { } ); it( "returns an annotation if there is an applicable marker for a heading block: " + "the heading text contains some styling such as strong tags", () => { - // The string we want to highlight: "red panda's". + // The string we want to highlight: "The origin of red pandas". create.mockImplementation( () => { return { text: "The origin of red pandas" }; } ); From 1d53ec22282ffebdab2d4b3da53d85b070b5a061 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 12:12:57 +0200 Subject: [PATCH 400/616] Add uspport for notice in metabox --- .../js/src/components/BlackFridaySalePromo.js | 32 ++++++++++++++----- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/packages/js/src/components/BlackFridaySalePromo.js b/packages/js/src/components/BlackFridaySalePromo.js index a8c18fe0f7e..e0c40b1de3f 100644 --- a/packages/js/src/components/BlackFridaySalePromo.js +++ b/packages/js/src/components/BlackFridaySalePromo.js @@ -1,43 +1,59 @@ -import { __ } from "@wordpress/i18n"; +import { __, sprintf } from "@wordpress/i18n"; import { select } from "@wordpress/data"; import { addQueryArgs } from "@wordpress/url"; +import { createInterpolateElement } from "@wordpress/element"; import PropTypes from "prop-types"; - import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** * The BlackFridaySalePromo component. * * @param {string} store The store to use. Defaults to {@code yoast-seo/editor} + * @param {string} location Where the notice will be shown. Defaults to {@code sidebar} * @param {Object} props The props. * * @returns {JSX.Element} The BlackFridaySalePromo component. */ export const BlackFridaySalePromo = ( { store = "yoast-seo/editor", + location = "sidebar", ...props } ) => { const isPremium = select( store ).getIsPremium(); const linkParams = select( store ).selectLinkParams(); - + const title = location === "sidebar" + ? __( "BLACK FRIDAY SALE: YOAST SEO PREMIUM", "wordpress-seo" ) + : createInterpolateElement( + sprintf( + /* Translators: %1$s expands to a link on yoast.com, %2$s expands to the anchor end tag. */ + __( "BLACK FRIDAY SALE: YOAST SEO PREMIUM %1$sBuy now!%2$s", "wordpress-seo" ), + "", + "" + ), + { + // eslint-disable-next-line jsx-a11y/anchor-has-content + a: , + } + ); return isPremium ? null : ( 30% OFF! - - { __( "Buy now!", "wordpress-seo" ) } - + { location === "sidebar" && + { __( "Buy now!", "wordpress-seo" ) } + } ); }; BlackFridaySalePromo.propTypes = { store: PropTypes.string, + location: PropTypes.oneOf( [ "sidebar", "metabox" ] ), }; From 0ecbad97b167cc38545cf35d8818ee4ec0755f72 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 12:13:07 +0200 Subject: [PATCH 401/616] Add metabox classes and rename old ones --- css/src/admin-global.css | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/css/src/admin-global.css b/css/src/admin-global.css index 0a57ff8e828..5df7e529ff3 100644 --- a/css/src/admin-global.css +++ b/css/src/admin-global.css @@ -856,7 +856,7 @@ body.folded .wpseo-admin-submit-fixed { color: #dba617; } -#black-friday-sale-notification.notice-yoast { +#black-friday-2023-sale-sidebar.notice-yoast { border-color: #fcd34d; border-width: 2px; border-radius: 8px; @@ -869,10 +869,40 @@ body.folded .wpseo-admin-submit-fixed { margin-top: 20px; } -#black-friday-sale-notification .notice-yoast__header { +#black-friday-2023-sale-sidebar .notice-yoast__header { margin-bottom: 2px; } +#black-friday-2023-sale-metabox.notice-yoast { + border-color: #fcd34d; + border-width: 2px; + border-radius: 8px; + background: #fff; + + margin: 20px; +} + +#black-friday-2023-sale-metabox h2.notice-yoast__header-heading { + padding: 0px; +} + +#black-friday-2023-sale-metabox .notice-yoast__container { + padding-bottom: 0px; +} + +#black-friday-2023-sale-metabox .notice-yoast__container p { + display: inline; +} + +#black-friday-2023-sale-metabox .notice-yoast__header { + margin-bottom: 8px; +} + +#black-friday-2023-sale-metabox .notice-yoast__header a { + font-weight: 400; + margin-left: 13px; +} + .yoast-bf-sale-badge { display: block; position:absolute; From 7c42af3e8ead57338c1009b723eb5171e3c990d6 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 12:13:24 +0200 Subject: [PATCH 402/616] Show notice in metabox --- packages/js/src/components/fills/MetaboxFill.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/js/src/components/fills/MetaboxFill.js b/packages/js/src/components/fills/MetaboxFill.js index 4c1d795e1c8..b8e7fae93d6 100644 --- a/packages/js/src/components/fills/MetaboxFill.js +++ b/packages/js/src/components/fills/MetaboxFill.js @@ -24,6 +24,7 @@ import WordProofAuthenticationModals from "../../components/modals/WordProofAuth import PremiumSEOAnalysisModal from "../modals/PremiumSEOAnalysisModal"; import KeywordUpsell from "../KeywordUpsell"; import { BlackFridayProductEditorChecklistPromo } from "../BlackFridayProductEditorChecklistPromo"; +import { BlackFridaySalePromo } from "../BlackFridaySalePromo"; import { isWooCommerceActive } from "../../helpers/isWooCommerceActive"; import { withMetaboxWarningsCheck } from "../higherorder/withMetaboxWarningsCheck"; @@ -60,6 +61,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo }; const BlackFridayProductEditorChecklistPromoWithMetaboxWarningsCheck = withMetaboxWarningsCheck( BlackFridayProductEditorChecklistPromo ); + const BlackFridaySalePromoWithMetaboxWarningsCheck = withMetaboxWarningsCheck( BlackFridaySalePromo ); return ( <> { isWordProofIntegrationActive() && } @@ -75,6 +77,7 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo renderPriority={ 2 } > { shouldShowWooCommerceChecklistPromo() && } +
    { settings.isKeywordAnalysisActive && Date: Tue, 12 Sep 2023 12:22:09 +0200 Subject: [PATCH 403/616] Fix cs --- packages/js/src/components/BlackFridaySalePromo.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/components/BlackFridaySalePromo.js b/packages/js/src/components/BlackFridaySalePromo.js index e0c40b1de3f..d6893bb8d02 100644 --- a/packages/js/src/components/BlackFridaySalePromo.js +++ b/packages/js/src/components/BlackFridaySalePromo.js @@ -37,7 +37,7 @@ export const BlackFridaySalePromo = ( { ); return isPremium ? null : ( Date: Tue, 12 Sep 2023 12:51:55 +0200 Subject: [PATCH 404/616] Adapt unit tests in buildSpec.js --- .../yoastseo/spec/parse/build/buildSpec.js | 205 ++---------------- 1 file changed, 20 insertions(+), 185 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index ca6437e3319..66c0203b7db 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -600,144 +600,43 @@ describe( "The parse function", () => { const languageProcessor = new LanguageProcessor( researcher ); expect( build( paper, languageProcessor ) ).toEqual( { - name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 39, - startOffset: 0, - }, - }, - { - name: "#text", - value: "\n", - sourceCodeRange: { startOffset: 39, endOffset: 40 }, - }, - { - name: "#text", - value: "\n", - sourceCodeRange: { startOffset: 287, endOffset: 288 }, - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 328, - startOffset: 288, - }, - }, + { name: "#text", sourceCodeRange: { endOffset: 40, startOffset: 39 }, value: "\n" }, + { name: "#text", sourceCodeRange: { endOffset: 288, startOffset: 287 }, value: "\n" }, { attributes: {}, childNodes: [ { name: "#text", - value: "This is the first sentence.", - sourceCodeRange: { startOffset: 331, endOffset: 358 }, - }, + sourceCodeRange: { endOffset: 358, startOffset: 331 }, + value: "This is the first sentence." }, ], isImplicit: false, name: "p", sentences: [ { - sourceCodeRange: { - endOffset: 358, - startOffset: 331, - }, + sourceCodeRange: { endOffset: 358, startOffset: 331 }, text: "This is the first sentence.", tokens: [ - { - sourceCodeRange: { - endOffset: 335, - startOffset: 331, - }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 336, - startOffset: 335, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 338, - startOffset: 336, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 339, - startOffset: 338, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 342, - startOffset: 339, - }, - text: "the", - }, - { - sourceCodeRange: { - endOffset: 343, - startOffset: 342, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 348, - startOffset: 343, - }, - text: "first", - }, - { - sourceCodeRange: { - endOffset: 349, - startOffset: 348, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 357, - startOffset: 349, - }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 358, - startOffset: 357, - }, - text: ".", - }, - ], - }, - ], + { sourceCodeRange: { endOffset: 335, startOffset: 331 }, text: "This" }, + { sourceCodeRange: { endOffset: 336, startOffset: 335 }, text: " " }, + { sourceCodeRange: { endOffset: 338, startOffset: 336 }, text: "is" }, + { sourceCodeRange: { endOffset: 339, startOffset: 338 }, text: " " }, + { sourceCodeRange: { endOffset: 342, startOffset: 339 }, text: "the" }, + { sourceCodeRange: { endOffset: 343, startOffset: 342 }, text: " " }, + { sourceCodeRange: { endOffset: 348, startOffset: 343 }, text: "first" }, + { sourceCodeRange: { endOffset: 349, startOffset: 348 }, text: " " }, + { sourceCodeRange: { endOffset: 357, startOffset: 349 }, text: "sentence" }, + { sourceCodeRange: { endOffset: 358, startOffset: 357 }, text: "." }, + ] } ], sourceCodeLocation: { endOffset: 362, - endTag: { - endOffset: 362, - startOffset: 358, - }, + endTag: { endOffset: 362, startOffset: 358 }, startOffset: 328, - startTag: { - endOffset: 331, - startOffset: 328, - }, - }, - }, - ], - } + startTag: { endOffset: 331, startOffset: 328 }, + } } ], + name: "#document-fragment" } ); } ); it( "parses an HTML text with a Yoast breadcrumbs widget in Elementor, which should be filtered out", () => { @@ -1037,12 +936,6 @@ describe( "The parse function", () => { sourceCodeRange: { startOffset: 26, endOffset: 39 }, } ], childNodes: [ - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 8, endOffset: 26 }, - }, { name: "#text", value: "Hello, world!", @@ -1112,12 +1005,6 @@ describe( "The parse function", () => { value: "Hello, ", sourceCodeRange: { startOffset: 8, endOffset: 15 }, }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 15, endOffset: 33 }, - }, { name: "#text", value: " world!", @@ -1540,12 +1427,6 @@ describe( "The parse function", () => { name: "#document-fragment", attributes: {}, childNodes: [ - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 0, endOffset: 21 }, - }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 21, endOffset: 22 } }, { @@ -1608,33 +1489,14 @@ describe( "The parse function", () => { }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 108, endOffset: 109 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 109, endOffset: 131 }, - }, { name: "#text", value: "\n\n", sourceCodeRange: { startOffset: 131, endOffset: 133 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 133, endOffset: 149 }, - }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 149, endOffset: 150 } }, { name: "ul", attributes: {}, childNodes: [ - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 154, endOffset: 175 }, - clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", - }, { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", sourceCodeRange: { startOffset: 175, endOffset: 176 } }, { @@ -1674,22 +1536,8 @@ describe( "The parse function", () => { }, { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", sourceCodeRange: { startOffset: 196, endOffset: 197 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 197, endOffset: 219 }, - clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", - }, { name: "#text", value: "\n\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", sourceCodeRange: { startOffset: 219, endOffset: 221 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 221, endOffset: 242 }, - clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", - }, { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", sourceCodeRange: { startOffset: 242, endOffset: 243 } }, { @@ -1728,13 +1576,6 @@ describe( "The parse function", () => { }, { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", sourceCodeRange: { startOffset: 261, endOffset: 262 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 262, endOffset: 284 }, - clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", - }, ], sourceCodeLocation: { startTag: { startOffset: 150, endOffset: 154 }, @@ -1746,12 +1587,6 @@ describe( "The parse function", () => { }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 289, endOffset: 290 } }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 290, endOffset: 307 }, - }, ], } ); From c9c55b81ca342e8492f419a6026bcd347e6d63df Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 12:57:09 +0200 Subject: [PATCH 405/616] Adapt unit tests in filterTreeSpec.js --- .../parse/build/private/filterTreeSpec.js | 63 ------------------- 1 file changed, 63 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 4a9f9980a76..f54d00d605f 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -319,24 +319,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 0, - endOffset: 34, - }, - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 34, - endOffset: 55, - }, - }, { name: "#text", value: "\n", @@ -371,15 +353,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo value: "\n", sourceCodeRange: { startOffset: 88, endOffset: 89 }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 89, - endOffset: 111, - }, - }, ], } ); } ); @@ -396,15 +369,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 0, - endOffset: 39, - }, - }, { name: "#text", value: "\n", @@ -415,15 +379,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo value: "\n", sourceCodeRange: { startOffset: 283, endOffset: 284 }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 284, - endOffset: 324, - }, - }, { attributes: {}, childNodes: [ @@ -467,15 +422,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 0, - endOffset: 71, - }, - }, { name: "#text", value: "\n", @@ -486,15 +432,6 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo value: "\n", sourceCodeRange: { startOffset: 358, endOffset: 359 }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - startOffset: 359, - endOffset: 404, - }, - }, { attributes: {}, childNodes: [ From 99f90d7500f5973ed96061c5306c97950f9d9ea3 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Tue, 12 Sep 2023 13:00:04 +0200 Subject: [PATCH 406/616] Adapt unit tests in buildSpec.js --- .../yoastseo/spec/parse/build/buildSpec.js | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index 66c0203b7db..547092cfdd5 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -1609,18 +1609,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { endOffset: 34, startOffset: 0 }, - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { endOffset: 55, startOffset: 34 }, - }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 55, endOffset: 56 } }, { attributes: {}, @@ -1673,12 +1661,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 132, endOffset: 133 } }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { endOffset: 155, startOffset: 133 }, - }, ], } ); } ); @@ -1765,15 +1747,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 51, - startOffset: 20, - }, - }, ], } ); } ); @@ -1852,12 +1825,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" startTag: { endOffset: 5, startOffset: 0 }, }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { endOffset: 116, startOffset: 85 }, - }, ], } ); } ); @@ -1934,12 +1901,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" name: "#document-fragment", attributes: {}, childNodes: [ - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 0, endOffset: 458 }, - }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 458, endOffset: 459 } }, { name: "div", @@ -2235,7 +2196,6 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", }, { name: "#text", value: "\n", sourceCodeRange: { startOffset: 1070, endOffset: 1071 } }, - { name: "#comment", attributes: {}, childNodes: [], sourceCodeLocation: { startOffset: 1071, endOffset: 1099 } }, ], } ); From 10031602c0d601903897212dab2b34f5feae0e78 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 13:03:40 +0200 Subject: [PATCH 407/616] Allow for any type in the title prop --- packages/js/src/components/TimeConstrainedNotification.js | 4 ++-- .../js/src/containers/PersistentDismissableNotification.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/js/src/components/TimeConstrainedNotification.js b/packages/js/src/components/TimeConstrainedNotification.js index 2053844b240..6d7f1a8e6d3 100644 --- a/packages/js/src/components/TimeConstrainedNotification.js +++ b/packages/js/src/components/TimeConstrainedNotification.js @@ -5,7 +5,7 @@ import PersistentDismissableNotification from "../containers/PersistentDismissab /** * @param {string} store The Redux store identifier from which to determine dismissed state. * @param {JSX.Element} image The image or null if no image. - * @param {string} title The title of the notification. + * @param {object | string} title The title of the notification. * @param {string} promoId The promotion id. * @param {string} alertKey The unique id for the alert. * @param {JSX.node} children The content of the notification. @@ -40,7 +40,7 @@ export const TimeConstrainedNotification = ( { TimeConstrainedNotification.propTypes = { store: PropTypes.string, image: PropTypes.elementType, - title: PropTypes.string.isRequired, + title: PropTypes.any.isRequired, promoId: PropTypes.string.isRequired, alertKey: PropTypes.string.isRequired, children: PropTypes.node, diff --git a/packages/js/src/containers/PersistentDismissableNotification.js b/packages/js/src/containers/PersistentDismissableNotification.js index ce80ff26787..e00fb7fbb6a 100644 --- a/packages/js/src/containers/PersistentDismissableNotification.js +++ b/packages/js/src/containers/PersistentDismissableNotification.js @@ -5,7 +5,7 @@ import withPersistentDismiss from "./withPersistentDismiss"; /** * @param {string} id The id. * @param {boolean} hasIcon Wether or not to show icon before title. - * @param {string} title The title. + * @param {object | string} title The title. * @param {JSX.Element|null} image The image or null if no image. * @param {isAlertDismissed} boolean Wether or not the notification is dismissed. * @param {Function} onDismissed The dismissal prop to be renamed for Notification component. @@ -44,7 +44,7 @@ PersistentDismissableNotification.propTypes = { children: PropTypes.node.isRequired, id: PropTypes.string.isRequired, hasIcon: PropTypes.bool, - title: PropTypes.string.isRequired, + title: PropTypes.any.isRequired, image: PropTypes.elementType, isAlertDismissed: PropTypes.bool.isRequired, onDismissed: PropTypes.func.isRequired, From cc1377a0271a80edc534503e4d27f729b6203c7c Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 13:08:20 +0200 Subject: [PATCH 408/616] Fix cs --- admin/class-config.php | 2 +- ...black-friday-sidebar-checklist-notification.php | 2 +- src/integrations/third-party/elementor.php | 14 +++++++------- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/admin/class-config.php b/admin/class-config.php index 62b431461f9..69d99529f1d 100644 --- a/admin/class-config.php +++ b/admin/class-config.php @@ -94,7 +94,7 @@ public function config_page_scripts() { 'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(), 'webinarIntroSettingsUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-settings' ), 'webinarIntroFirstTimeConfigUrl' => $this->get_webinar_shortlink(), - 'linkParams' => WPSEO_Shortlinker::get_query_params(), + 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), ]; diff --git a/src/integrations/alerts/black-friday-sidebar-checklist-notification.php b/src/integrations/alerts/black-friday-sidebar-checklist-notification.php index 71ec00ed0b7..4e4c7cd488d 100644 --- a/src/integrations/alerts/black-friday-sidebar-checklist-notification.php +++ b/src/integrations/alerts/black-friday-sidebar-checklist-notification.php @@ -5,7 +5,7 @@ /** * Black_Friday_Promo_Notification class. */ -class Black_Friday_Promo_Notification extends Abstract_Dismissable_Alert { +class Black_Friday_Sidebar_Checklist_Notification extends Abstract_Dismissable_Alert { /** * Holds the alert identifier. diff --git a/src/integrations/third-party/elementor.php b/src/integrations/third-party/elementor.php index 12713f9c850..dc215f1cd63 100644 --- a/src/integrations/third-party/elementor.php +++ b/src/integrations/third-party/elementor.php @@ -113,18 +113,18 @@ public static function get_conditionals() { /** * Constructor. * - * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. - * @param Options_Helper $options The options helper. - * @param Capability_Helper $capability The capability helper. + * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. + * @param Options_Helper $options The options helper. + * @param Capability_Helper $capability The capability helper. */ public function __construct( WPSEO_Admin_Asset_Manager $asset_manager, Options_Helper $options, Capability_Helper $capability ) { - $this->asset_manager = $asset_manager; - $this->options = $options; - $this->capability = $capability; + $this->asset_manager = $asset_manager; + $this->options = $options; + $this->capability = $capability; $this->seo_analysis = new WPSEO_Metabox_Analysis_SEO(); $this->readability_analysis = new WPSEO_Metabox_Analysis_Readability(); @@ -458,7 +458,7 @@ public function enqueue() { ], 'dismissedAlerts' => $dismissed_alerts, 'webinarIntroElementorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-elementor' ), - 'currentPromotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), + 'currentPromotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'usedKeywordsNonce' => \wp_create_nonce( 'wpseo-keyword-usage-and-post-types' ), 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), From e8337207428c76308e3ad917a4987ef9081fad8d Mon Sep 17 00:00:00 2001 From: marinakoleva Date: Tue, 12 Sep 2023 16:06:28 +0200 Subject: [PATCH 409/616] makes hashedHtmlRegex work for all positions in the word and adds unit tests --- .../helpers/word/getWordsForHTMLParserSpec.js | 11 ++----- .../spec/parse/build/private/tokenizeSpec.js | 1 + .../helpers/word/getWordsForHTMLParser.js | 9 +++--- .../src/parse/build/private/htmlEntities.js | 29 ++++++++++++++----- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js index 03ccad80ec9..bda5aff226b 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getWordsForHTMLParserSpec.js @@ -32,11 +32,6 @@ const testCases = [ text: "a phrase with an apostrophe's", expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "apostrophe's" ], }, - { - description: "correctly tokenizes a phrase with an ampersand", - text: "a phrase with an ampersand&", - expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "ampersand", "&" ], - }, { description: "correctly tokenizes a phrase between quotes", text: "\"a phrase between quotes\"", @@ -74,9 +69,9 @@ const testCases = [ expectedResult: [ "[", "caption", " ", "id=\"attachment_3341501", "\"", " ", "align=\"alignnone", "\"", " ", "width=\"300", "\"", "]" ], }, { - description: "doesn't match with a hashed HTML entity (in this case, #amp;)", - text: "a phrase with an ampersand#amp;", - expectedResult: [ "a", " ", "phrase", " ", "with", " ", "an", " ", "ampersand#amp;" ], + description: "doesn't match with a hashed HTML entity (in this case, '#trade;' for '™') in the beginning or the end of the word", + text: "one trademark#trade;, and another '#trade;trademark'", + expectedResult: [ "one", " ", "trademark#trade;", ",", " ", "and", " ", "(another)", " ", "'", "#trade;trademark", "'" ], }, ]; diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 74824b918fc..0402389341d 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -375,6 +375,7 @@ describe( "A test for the tokenize function", } ); it( "should correctly tokenize a sentence containing an HTML entity", function() { + // This tests the scenario where an ampersand character "&" enters the Paper as "&" in Classic editor. const mockPaper = new Paper( "

    This is a paragraph&

    " ); const mockResearcher = new EnglishResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js index 24c57e42d38..5ee3569a867 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -1,5 +1,5 @@ import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePunctuation"; -import { hashedHtmlEntitiesEndRegex } from "../../../parse/build/private/htmlEntities"; +import { hashedHtmlEntitiesRegex } from "../../../parse/build/private/htmlEntities"; /* * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a @@ -44,13 +44,14 @@ const getWordsForHTMLParser = ( text ) => { const postTokens = []; // Add all punctuation marks that occur before the first letter of the token to the pretokens array. - while ( punctuationRegexStart.test( token ) ) { + // Also, prevent matching with a hashed HTML entity. + while ( punctuationRegexStart.test( token ) && ! hashedHtmlEntitiesRegex.test( token ) ) { preTokens.push( token[ 0 ] ); token = token.slice( 1 ); } // Add all punctuation marks that occur after the last letter of the token to the posttokens array. - // Here, prevent matching with a hashed HTML entity. - while ( punctuationRegexEnd.test( token ) && ! hashedHtmlEntitiesEndRegex.test( token ) ) { + // Also, prevent matching with a hashed HTML entity. + while ( punctuationRegexEnd.test( token ) && ! hashedHtmlEntitiesRegex.test( token ) ) { // Using unshift here because we are iterating from the end of the string to the beginning, // and we want to keep the order of the punctuation marks. // Therefore, we add them to the start of the array. diff --git a/packages/yoastseo/src/parse/build/private/htmlEntities.js b/packages/yoastseo/src/parse/build/private/htmlEntities.js index 270ab20027d..2c429c9c898 100644 --- a/packages/yoastseo/src/parse/build/private/htmlEntities.js +++ b/packages/yoastseo/src/parse/build/private/htmlEntities.js @@ -1,18 +1,31 @@ -// Contains characters usually converted HTML entities (cf. _wp_specialchars). +// Contains 1) special characters usually converted into HTML entities (cf. _wp_specialchars). +// And 2) their corresponding HTML entities, stripped from the initial ampersand (e.g. 'lt;' instead of '<'). export const htmlEntities = new Map( [ [ "amp;", "&" ], [ "lt;", "<" ], [ "gt;", ">" ], [ "quot;", '"' ], - [ "#x27;", "'" ], + [ "apos;", "'" ], + [ "ndash;", "–" ], + [ "mdash;", "—" ], + [ "copy;", "©" ], + [ "reg;", "®" ], + [ "trade;", "™" ], + [ "pound;", "£" ], + [ "yen;", "¥" ], + [ "euro;", "€" ], + [ "dollar;", "$" ], + [ "deg;", "°" ], + [ "asymp;", "≈" ], + [ "ne;", "≠" ], ] ); -// Contains characters along with their hashed HTML entities. -export const hashedHtmlEntities = new Map(); -htmlEntities.forEach( ( v, k ) => hashedHtmlEntities.set( "#" + k, v ) ); - // Regex to find all HTML entities. export const htmlEntitiesRegex = new RegExp( "&(" + [ ...htmlEntities.keys() ].join( "|" ) + ")", "ig" ); -// Regex to find hashed HTML entities at the end of a string. -export const hashedHtmlEntitiesEndRegex = new RegExp( "(" + [ ...hashedHtmlEntities.keys() ].join( "|" ) + ")$" ); +// Contains special characters along with their hashed HTML entities (e.g. '#amp;' instead of '&' for the ampersand character '&'). +export const hashedHtmlEntities = new Map(); +htmlEntities.forEach( ( v, k ) => hashedHtmlEntities.set( "#" + k, v ) ); + +// Regex to find hashed HTML entities. +export const hashedHtmlEntitiesRegex = new RegExp( "(" + [ ...hashedHtmlEntities.keys() ].join( "|" ) + ")", "g" ); From d7fb8ead5dfe3e3508b37f65dc9525babb0a755a Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 17:13:05 +0200 Subject: [PATCH 410/616] Fix translations --- packages/js/src/components/BlackFridaySalePromo.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/packages/js/src/components/BlackFridaySalePromo.js b/packages/js/src/components/BlackFridaySalePromo.js index d6893bb8d02..9fb8101df5e 100644 --- a/packages/js/src/components/BlackFridaySalePromo.js +++ b/packages/js/src/components/BlackFridaySalePromo.js @@ -22,11 +22,15 @@ export const BlackFridaySalePromo = ( { const isPremium = select( store ).getIsPremium(); const linkParams = select( store ).selectLinkParams(); const title = location === "sidebar" - ? __( "BLACK FRIDAY SALE: YOAST SEO PREMIUM", "wordpress-seo" ) + ? sprintf( + __( "BLACK FRIDAY SALE: %1$s", "wordpress-seo" ), + "YOAST SEO PREMIUM" + ) : createInterpolateElement( sprintf( - /* Translators: %1$s expands to a link on yoast.com, %2$s expands to the anchor end tag. */ - __( "BLACK FRIDAY SALE: YOAST SEO PREMIUM %1$sBuy now!%2$s", "wordpress-seo" ), + /* Translators: %1$s expands to YOAST SEO PREMIUM, %2$s expands to a link on yoast.com, %3$s expands to the anchor end tag. */ + __( "BLACK FRIDAY SALE: %1$s, %2$sBuy now!%3$s", "wordpress-seo" ), + "YOAST SEO PREMIUM", "", "" ), @@ -45,7 +49,7 @@ export const BlackFridaySalePromo = ( { image={ Image } { ...props } > - 30% OFF! + { __( "30% OFF!", "wordpress-seo" ) } { location === "sidebar" && { __( "Buy now!", "wordpress-seo" ) } } From ed141f45e5dd64e402dab2aad2be37889d218dd8 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 17:13:24 +0200 Subject: [PATCH 411/616] Fix evaluating length on null --- packages/js/src/helpers/checkForMetaboxWarnings.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/helpers/checkForMetaboxWarnings.js b/packages/js/src/helpers/checkForMetaboxWarnings.js index 811fdad8cdd..3d76d563144 100644 --- a/packages/js/src/helpers/checkForMetaboxWarnings.js +++ b/packages/js/src/helpers/checkForMetaboxWarnings.js @@ -7,7 +7,7 @@ import { select } from "@wordpress/data"; export const checkForMetaboxWarnings = () => { const isPremium = select( "yoast-seo/editor" ).getIsPremium(); const warningsFree = select( "yoast-seo/editor" ).getWarningMessage(); - const warningsPremium = isPremium ? select( "yoast-seo-premium/editor" )?.getMetaboxWarning() ?? [] : false; + const warningsPremium = isPremium ? select( "yoast-seo-premium/editor" )?.getMetaboxWarning() ?? [] : []; return warningsPremium.length > 0 || warningsFree.length > 0; }; From 4eb3afc8663a0b9c493eb8586f2f88853105aebe Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Tue, 12 Sep 2023 17:14:22 +0200 Subject: [PATCH 412/616] Use dependency injection instead of service interface --- src/integrations/third-party/elementor.php | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/integrations/third-party/elementor.php b/src/integrations/third-party/elementor.php index dc215f1cd63..cc47b5f7e40 100644 --- a/src/integrations/third-party/elementor.php +++ b/src/integrations/third-party/elementor.php @@ -101,6 +101,13 @@ class Elementor implements Integration_Interface { */ protected $inclusive_language_analysis; + /** + * Holds the promotion manager. + * + * @var Promotion_Manager + */ + protected $promotion_manager; + /** * Returns the conditionals based in which this loadable should be active. * @@ -116,15 +123,18 @@ public static function get_conditionals() { * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. * @param Options_Helper $options The options helper. * @param Capability_Helper $capability The capability helper. + * @param Promotion_Manager $promotion_manager The promotion manager. */ public function __construct( WPSEO_Admin_Asset_Manager $asset_manager, Options_Helper $options, - Capability_Helper $capability + Capability_Helper $capability, + Promotion_Manager $promotion_manager ) { - $this->asset_manager = $asset_manager; - $this->options = $options; - $this->capability = $capability; + $this->asset_manager = $asset_manager; + $this->options = $options; + $this->capability = $capability; + $this->promotion_manager = $promotion_manager; $this->seo_analysis = new WPSEO_Metabox_Analysis_SEO(); $this->readability_analysis = new WPSEO_Metabox_Analysis_Readability(); @@ -458,7 +468,7 @@ public function enqueue() { ], 'dismissedAlerts' => $dismissed_alerts, 'webinarIntroElementorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-elementor' ), - 'currentPromotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), + 'currentPromotions' => $this->promotion_manager->get_current_promotions(), 'usedKeywordsNonce' => \wp_create_nonce( 'wpseo-keyword-usage-and-post-types' ), 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), From a968f5e8069a763a79b34f576251cfb98e429f0c Mon Sep 17 00:00:00 2001 From: marinakoleva Date: Tue, 12 Sep 2023 17:36:49 +0200 Subject: [PATCH 413/616] adds a unit test to buildSpec and removes one from KeywordDensityAssessmentSpec --- .../helpers/word/getAllWordsFromTreeSpec.js | 4 +- .../yoastseo/spec/parse/build/buildSpec.js | 30 +++++++------ .../spec/parse/build/private/tokenizeSpec.js | 2 + .../seo/KeywordDensityAssessmentSpec.js | 42 ------------------- 4 files changed, 21 insertions(+), 57 deletions(-) diff --git a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js index 51101ad58b9..faa58cdf7e0 100644 --- a/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js +++ b/packages/yoastseo/spec/languageProcessing/helpers/word/getAllWordsFromTreeSpec.js @@ -17,8 +17,8 @@ describe( "a test for getting words from the tree", () => { expect( getAllWordsFromTree( paper ) ).toEqual( [ "A", "very", "intelligent", "cat", "loves", "their", "human", "A", "dog", "is", "very", "cute", "A", "subheading", "3", "text", "text", "text", "A", "subheading", "4", "more", "text" ] ); } ); - it( "should get the correct words from text containing  , an ampersand and word enclosed in double quotes", () => { - const paper = new Paper( "

    What's black&, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!

    " ); + it( "should get the correct words from text containing   and word enclosed in double quotes", () => { + const paper = new Paper( "

    What's black, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!

    " ); researcher.setPaper( paper ); buildTree( paper, researcher ); expect( getAllWordsFromTree( paper ).length ).toEqual( 15 ); diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index ca6437e3319..a4423525c41 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -6,8 +6,8 @@ import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/s import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; describe( "The parse function", () => { - it( "parses a basic HTML text", () => { - const html = "

    Hello, world!

    "; + it( "parses a basic HTML text with an HTML entity (&)", () => { + const html = "

    Hello, world & beyond!

    "; const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, @@ -21,14 +21,14 @@ describe( "The parse function", () => { name: "div", sourceCodeLocation: { startOffset: 0, - endOffset: 45, + endOffset: 58, startTag: { startOffset: 0, endOffset: 5, }, endTag: { - startOffset: 39, - endOffset: 45, + startOffset: 52, + endOffset: 58, }, }, attributes: {}, @@ -39,31 +39,35 @@ describe( "The parse function", () => { "class": new Set( [ "yoast" ] ), }, sentences: [ { - text: "Hello, world!", - sourceCodeRange: { startOffset: 22, endOffset: 35 }, + text: "Hello, world & beyond!", + sourceCodeRange: { startOffset: 22, endOffset: 48 }, tokens: [ { text: "Hello", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, { text: ",", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, { text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, { text: "world", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, - { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: " ", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: "&", sourceCodeRange: { startOffset: 35, endOffset: 40 } }, + { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } }, + { text: "beyond", sourceCodeRange: { startOffset: 41, endOffset: 47 } }, + { text: "!", sourceCodeRange: { startOffset: 47, endOffset: 48 } }, ], } ], childNodes: [ { name: "#text", - value: "Hello, world!", - sourceCodeRange: { startOffset: 22, endOffset: 35 }, + value: "Hello, world #amp; beyond!", + sourceCodeRange: { startOffset: 22, endOffset: 48 }, } ], sourceCodeLocation: { startOffset: 5, - endOffset: 39, + endOffset: 52, startTag: { startOffset: 5, endOffset: 22, }, endTag: { - startOffset: 35, - endOffset: 39, + startOffset: 48, + endOffset: 52, }, }, } ], diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 0402389341d..979f1355f0c 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -376,6 +376,8 @@ describe( "A test for the tokenize function", it( "should correctly tokenize a sentence containing an HTML entity", function() { // This tests the scenario where an ampersand character "&" enters the Paper as "&" in Classic editor. + // "&" is first transformed to "#amp;" and then turned back to "&" during tokenization. + // The length of "&" should be 5 characters (like "&"), instead of 1. const mockPaper = new Paper( "

    This is a paragraph&

    " ); const mockResearcher = new EnglishResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index e91ce53d8d7..9ac938465cb 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -292,48 +292,6 @@ describe( "A test for marking the keyphrase", function() { expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); - it( "returns markers for a keyphrase containing an ampersand, occurring twice in the sentence", function() { - const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); - const paper = new Paper( "

    This is a very interesting paper with a keyword& and another keyword&.

    ", { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - keyphraseDensityAssessment.getResult( paper, researcher ); - const expected = [ - new Mark( { - marked: "This is a very interesting paper with a " + - "keyword& and another " + - "keyword&.", - original: "This is a very interesting paper with a keyword& and another keyword&.", - position: { - startOffset: 43, - endOffset: 50, - startOffsetBlock: 40, - endOffsetBlock: 47, - attributeId: "", - clientId: "", - isFirstSection: false, - }, - } ), - new Mark( { - marked: "This is a very interesting paper with a " + - "keyword& and another " + - "keyword&.", - original: "This is a very interesting paper with a keyword& and another keyword&.", - position: { - // Position should take into account the preceding & character -- it should be counted as 5 characters (&) instead of 1 (&). - startOffset: 68, - endOffset: 75, - startOffsetBlock: 65, - endOffsetBlock: 72, - attributeId: "", - clientId: "", - isFirstSection: false, - }, - } ) ]; - expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); - } ); - it( "returns markers for a keyphrase containing numbers", function() { const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

    This is the release of YoastSEO 9.3.

    ", { keyword: "YoastSEO 9.3" } ); From 79f7af014ab0b78d83e2a701978b7e79526d5c6d Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 10:35:02 +0200 Subject: [PATCH 414/616] Conforming naming --- admin/class-premium-upsell-admin-block.php | 2 +- admin/metabox/class-metabox.php | 2 +- css/src/admin-global.css | 16 ++++++++-------- ...lackFridayProductEditorChecklistPromotion.js} | 16 ++++++++++------ ...ridaySalePromo.js => BlackFridayPromotion.js} | 14 +++++++------- ...s => BlackFridaySidebarChecklistPromotion.js} | 10 +++++----- packages/js/src/components/fills/MetaboxFill.js | 12 ++++++------ packages/js/src/components/fills/SidebarFill.js | 8 ++++---- .../elementor/components/fills/ElementorFill.js | 8 ++++---- .../shouldShowWebinarPromoNotification.js | 8 ++++---- packages/js/src/initializers/settings-header.js | 4 ++-- .../black-friday-promotion-notification.php | 16 ++++++++++++++++ .../alerts/black-friday-sale-notification.php | 16 ---------------- .../domain/black-friday-checklist-promotion.php | 4 ++-- src/promotions/domain/black-friday-promotion.php | 2 +- 15 files changed, 71 insertions(+), 67 deletions(-) rename packages/js/src/components/{BlackFridayProductEditorChecklistPromo.js => BlackFridayProductEditorChecklistPromotion.js} (78%) rename packages/js/src/components/{BlackFridaySalePromo.js => BlackFridayPromotion.js} (85%) rename packages/js/src/components/{BlackFridaySidebarChecklistPromo.js => BlackFridaySidebarChecklistPromotion.js} (84%) create mode 100644 src/integrations/alerts/black-friday-promotion-notification.php delete mode 100644 src/integrations/alerts/black-friday-sale-notification.php diff --git a/admin/class-premium-upsell-admin-block.php b/admin/class-premium-upsell-admin-block.php index 416175628fa..749afac0c97 100644 --- a/admin/class-premium-upsell-admin-block.php +++ b/admin/class-premium-upsell-admin-block.php @@ -79,7 +79,7 @@ public function render() { $button_text ); - if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { $bf_label = \esc_html__( 'BLACK FRIDAY', 'wordpress-seo' ); $sale_label = \esc_html__( '30% OFF', 'wordpress-seo' ); // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Already escaped above. diff --git a/admin/metabox/class-metabox.php b/admin/metabox/class-metabox.php index 1a3a4b70711..1bdd19fabee 100644 --- a/admin/metabox/class-metabox.php +++ b/admin/metabox/class-metabox.php @@ -932,7 +932,7 @@ public function enqueue() { 'dismissedAlerts' => $dismissed_alerts, 'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(), 'webinarIntroBlockEditorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-block-editor' ), - 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', + 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black-friday-2023-checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', 'isJetpackBoostActive' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Active_Conditional::class )->is_met() : false, 'isJetpackBoostNotPremium' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Not_Premium_Conditional::class )->is_met() : false, 'isWooCommerceActive' => $woocommerce_active, diff --git a/css/src/admin-global.css b/css/src/admin-global.css index 5df7e529ff3..5f3cb5aa4a5 100644 --- a/css/src/admin-global.css +++ b/css/src/admin-global.css @@ -856,7 +856,7 @@ body.folded .wpseo-admin-submit-fixed { color: #dba617; } -#black-friday-2023-sale-sidebar.notice-yoast { +#black-friday-2023-promotion-sidebar.notice-yoast { border-color: #fcd34d; border-width: 2px; border-radius: 8px; @@ -869,11 +869,11 @@ body.folded .wpseo-admin-submit-fixed { margin-top: 20px; } -#black-friday-2023-sale-sidebar .notice-yoast__header { +#black-friday-2023-promotion-sidebar .notice-yoast__header { margin-bottom: 2px; } -#black-friday-2023-sale-metabox.notice-yoast { +#black-friday-2023-promotion-metabox.notice-yoast { border-color: #fcd34d; border-width: 2px; border-radius: 8px; @@ -882,23 +882,23 @@ body.folded .wpseo-admin-submit-fixed { margin: 20px; } -#black-friday-2023-sale-metabox h2.notice-yoast__header-heading { +#black-friday-2023-promotion-metabox h2.notice-yoast__header-heading { padding: 0px; } -#black-friday-2023-sale-metabox .notice-yoast__container { +#black-friday-2023-promotion-metabox .notice-yoast__container { padding-bottom: 0px; } -#black-friday-2023-sale-metabox .notice-yoast__container p { +#black-friday-2023-promotion-metabox .notice-yoast__container p { display: inline; } -#black-friday-2023-sale-metabox .notice-yoast__header { +#black-friday-2023-promotion-metabox .notice-yoast__header { margin-bottom: 8px; } -#black-friday-2023-sale-metabox .notice-yoast__header a { +#black-friday-2023-promotion-metabox .notice-yoast__header a { font-weight: 400; margin-left: 13px; } diff --git a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js b/packages/js/src/components/BlackFridayProductEditorChecklistPromotion.js similarity index 78% rename from packages/js/src/components/BlackFridayProductEditorChecklistPromo.js rename to packages/js/src/components/BlackFridayProductEditorChecklistPromotion.js index 0276b7f18cd..2223e86cbbb 100644 --- a/packages/js/src/components/BlackFridayProductEditorChecklistPromo.js +++ b/packages/js/src/components/BlackFridayProductEditorChecklistPromotion.js @@ -7,20 +7,24 @@ import { ReactComponent as DefaultImage } from "../../../../images/succes_mariek import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * The BlackFridayProductEditorChecklistPromo component. + * The BlackFridayProductEditorChecklistPromotion component. * - * @returns {JSX.Element} The BlackFridayProductEditorChecklistPromo component. + * @returns {JSX.Element} The BlackFridayProductEditorChecklistPromotion component. */ -export const BlackFridayProductEditorChecklistPromo = () => { +export const BlackFridayProductEditorChecklistPromotion = () => { const linkParams = select( "yoast-seo/editor" ).selectLinkParams(); - + const title = sprintf( + /* translators: %1$s expands to 'WooCommerce'. */ + __( "Is your %1$s store ready for Black Friday?", "wordpress-seo" ), + "WooCommerce" + ); return ( { createInterpolateElement( diff --git a/packages/js/src/components/BlackFridaySalePromo.js b/packages/js/src/components/BlackFridayPromotion.js similarity index 85% rename from packages/js/src/components/BlackFridaySalePromo.js rename to packages/js/src/components/BlackFridayPromotion.js index 9fb8101df5e..f7114c3047d 100644 --- a/packages/js/src/components/BlackFridaySalePromo.js +++ b/packages/js/src/components/BlackFridayPromotion.js @@ -6,15 +6,15 @@ import PropTypes from "prop-types"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * The BlackFridaySalePromo component. + * The BlackFridayPromotion component. * * @param {string} store The store to use. Defaults to {@code yoast-seo/editor} * @param {string} location Where the notice will be shown. Defaults to {@code sidebar} * @param {Object} props The props. * - * @returns {JSX.Element} The BlackFridaySalePromo component. + * @returns {JSX.Element} The BlackFridayPromotion component. */ -export const BlackFridaySalePromo = ( { +export const BlackFridayPromotion = ( { store = "yoast-seo/editor", location = "sidebar", ...props @@ -41,9 +41,9 @@ export const BlackFridaySalePromo = ( { ); return isPremium ? null : ( { @@ -33,7 +33,7 @@ export const BlackFridaySidebarChecklistPromo = ( { return ( { isWordProofIntegrationActive() && } @@ -76,8 +76,8 @@ export default function MetaboxFill( { settings, wincherKeyphrases, setWincherNo key="time-constrained-notification" renderPriority={ 2 } > - { shouldShowWooCommerceChecklistPromo() && } - + { shouldShowWooCommerceChecklistPromo() && } +
    { settings.isKeywordAnalysisActive && } - { isWooCommerce && } - + { isWooCommerce && } +
    { settings.isKeywordAnalysisActive && diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index e39f0ffe0e0..41337e323a6 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -22,8 +22,8 @@ import WincherSEOPerformanceModal from "../../../containers/WincherSEOPerformanc import { isWordProofIntegrationActive } from "../../../helpers/wordproof"; import WordProofAuthenticationModals from "../../../components/modals/WordProofAuthenticationModals"; import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; -import { BlackFridaySidebarChecklistPromo } from "../../../components/BlackFridaySidebarChecklistPromo"; -import { BlackFridaySalePromo } from "../../../components/BlackFridaySalePromo"; +import { BlackFridaysidebarChecklistPromotion } from "../../../components/BlackFridaysidebarChecklistPromotion"; +import { BlackFridayPromotion } from "../../../components/BlackFridayPromotion"; import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shouldShowWebinarPromoNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; @@ -63,8 +63,8 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { { shouldShowWebinarPromoNotificationInSidebar() && } - { isWooCommerce && } - + { isWooCommerce && } + { settings.isKeywordAnalysisActive && diff --git a/packages/js/src/helpers/shouldShowWebinarPromoNotification.js b/packages/js/src/helpers/shouldShowWebinarPromoNotification.js index 18fd8520e36..3e4dd2fca25 100644 --- a/packages/js/src/helpers/shouldShowWebinarPromoNotification.js +++ b/packages/js/src/helpers/shouldShowWebinarPromoNotification.js @@ -8,7 +8,7 @@ import { select } from "@wordpress/data"; * @returns {boolean} Whether the Webinar promotion should be shown. */ const shouldShowWebinarPromoNotificationInDashboard = ( store = "yoast-seo/editor" ) => { - const isBlackFridayChecklistPromoActive = select( store ).isPromotionActive( "black_friday_2023_checklist" ); + const isBlackFridayChecklistPromoActive = select( store ).isPromotionActive( "black-friday-2023-checklist" ); const isBlackFridayChecklistAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); if ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) { @@ -26,10 +26,10 @@ const shouldShowWebinarPromoNotificationInDashboard = ( store = "yoast-seo/edito * @returns {boolean} Whether the Webinar promotion should be shown. */ const shouldShowWebinarPromoNotificationInSidebar = ( store = "yoast-seo/editor" ) => { - const isBlackFridaySalePromoActive = select( store ).isPromotionActive( "black_friday_2023_sale" ); - const isBlackFridaySaleAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sale" ); + const isBlackFridayPromotionActive = select( store ).isPromotionActive( "black-friday-2023-promotion" ); + const isBlackFridaySaleAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-promotion" ); - if ( ( isBlackFridaySalePromoActive && ! isBlackFridaySaleAlertDismissed ) || + if ( ( isBlackFridayPromotionActive && ! isBlackFridaySaleAlertDismissed ) || ! shouldShowWebinarPromoNotificationInDashboard( store ) ) { return false; } diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index b5f2f925d9d..427b4834fc6 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -2,7 +2,7 @@ import { get } from "lodash"; import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; -import { BlackFridaySidebarChecklistPromo } from "../components/BlackFridaySidebarChecklistPromo"; +import { BlackFridaysidebarChecklistPromotion } from "../components/BlackFridaysidebarChecklistPromotion"; import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shouldShowWebinarPromoNotification"; /** @@ -19,7 +19,7 @@ const initSettingsHeader = () => { if ( reactRoot ) { render( - { isWooCommerce && } + { isWooCommerce && } { shouldShowWebinarPromoNotificationInDashboard( "yoast-seo/settings" ) && } , reactRoot diff --git a/src/integrations/alerts/black-friday-promotion-notification.php b/src/integrations/alerts/black-friday-promotion-notification.php new file mode 100644 index 00000000000..92e69566766 --- /dev/null +++ b/src/integrations/alerts/black-friday-promotion-notification.php @@ -0,0 +1,16 @@ + Date: Wed, 13 Sep 2023 12:00:16 +0200 Subject: [PATCH 415/616] Fix casing --- .../components/BlackFridaySidebarChecklistPromotion.js | 8 ++++---- packages/js/src/components/fills/SidebarFill.js | 4 ++-- .../js/src/elementor/components/fills/ElementorFill.js | 4 ++-- packages/js/src/initializers/settings-header.js | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/js/src/components/BlackFridaySidebarChecklistPromotion.js b/packages/js/src/components/BlackFridaySidebarChecklistPromotion.js index 5223a98db9f..3e382118409 100644 --- a/packages/js/src/components/BlackFridaySidebarChecklistPromotion.js +++ b/packages/js/src/components/BlackFridaySidebarChecklistPromotion.js @@ -7,14 +7,14 @@ import PropTypes from "prop-types"; import { TimeConstrainedNotification } from "./TimeConstrainedNotification"; /** - * The BlackFridaysidebarChecklistPromotion component. + * The BlackFridaySidebarChecklistPromotion component. * * @param {string} store The store to use. Defaults to {@code yoast-seo/editor * @param {Object} props The props. * - * @returns {JSX.Element} The BlackFridaysidebarChecklistPromotion component. + * @returns {JSX.Element} The BlackFridaySidebarChecklistPromotion component. */ -export const BlackFridaysidebarChecklistPromotion = ( { +export const BlackFridaySidebarChecklistPromotion = ( { store = "yoast-seo/editor", ...props } ) => { @@ -47,6 +47,6 @@ export const BlackFridaysidebarChecklistPromotion = ( { ); }; -BlackFridaysidebarChecklistPromotion.propTypes = { +BlackFridaySidebarChecklistPromotion.propTypes = { store: PropTypes.string, }; diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 452143390af..9a8e65f91bd 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -20,7 +20,7 @@ import AdvancedSettings from "../../containers/AdvancedSettings"; import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceModal"; import WebinarPromoNotification from "../WebinarPromoNotification"; import { BlackFridayPromotion } from "../BlackFridayPromotion"; -import { BlackFridaysidebarChecklistPromotion } from "../BlackFridaysidebarChecklistPromotion"; +import { BlackFridaySidebarChecklistPromotion } from "../BlackFridaySidebarChecklistPromotion"; import { shouldShowWebinarPromoNotificationInSidebar } from "../../helpers/shouldShowWebinarPromoNotification"; import KeywordUpsell from "../KeywordUpsell"; @@ -50,7 +50,7 @@ export default function SidebarFill( { settings } ) { { shouldShowWebinarPromoNotificationInSidebar() && } - { isWooCommerce && } + { isWooCommerce && }
    diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 41337e323a6..14251002bc4 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -22,7 +22,7 @@ import WincherSEOPerformanceModal from "../../../containers/WincherSEOPerformanc import { isWordProofIntegrationActive } from "../../../helpers/wordproof"; import WordProofAuthenticationModals from "../../../components/modals/WordProofAuthenticationModals"; import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; -import { BlackFridaysidebarChecklistPromotion } from "../../../components/BlackFridaysidebarChecklistPromotion"; +import { BlackFridaySidebarChecklistPromotion } from "../../../components/BlackFridaySidebarChecklistPromotion"; import { BlackFridayPromotion } from "../../../components/BlackFridayPromotion"; import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shouldShowWebinarPromoNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; @@ -63,7 +63,7 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { { shouldShowWebinarPromoNotificationInSidebar() && } - { isWooCommerce && } + { isWooCommerce && } diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index 427b4834fc6..029f35a58ff 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -2,7 +2,7 @@ import { get } from "lodash"; import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; -import { BlackFridaysidebarChecklistPromotion } from "../components/BlackFridaysidebarChecklistPromotion"; +import { BlackFridaySidebarChecklistPromotion } from "../components/BlackFridaySidebarChecklistPromotion"; import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shouldShowWebinarPromoNotification"; /** @@ -19,7 +19,7 @@ const initSettingsHeader = () => { if ( reactRoot ) { render( - { isWooCommerce && } + { isWooCommerce && } { shouldShowWebinarPromoNotificationInDashboard( "yoast-seo/settings" ) && } , reactRoot From e12bd2c2e296a3420ab43e41743d794a1a0124a6 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 12:30:23 +0200 Subject: [PATCH 416/616] Renamed and refactored --- .../shouldShowWebinarPromoNotification.js | 40 ------------------- .../shouldShowWebinarPromotionNotification.js | 39 ++++++++++++++++++ 2 files changed, 39 insertions(+), 40 deletions(-) delete mode 100644 packages/js/src/helpers/shouldShowWebinarPromoNotification.js create mode 100644 packages/js/src/helpers/shouldShowWebinarPromotionNotification.js diff --git a/packages/js/src/helpers/shouldShowWebinarPromoNotification.js b/packages/js/src/helpers/shouldShowWebinarPromoNotification.js deleted file mode 100644 index 3e4dd2fca25..00000000000 --- a/packages/js/src/helpers/shouldShowWebinarPromoNotification.js +++ /dev/null @@ -1,40 +0,0 @@ -import { select } from "@wordpress/data"; - -/** - * Checks if the Webinar promotion should be shown in the dashboard. - * - * @param {string} store The store to use. - * - * @returns {boolean} Whether the Webinar promotion should be shown. - */ -const shouldShowWebinarPromoNotificationInDashboard = ( store = "yoast-seo/editor" ) => { - const isBlackFridayChecklistPromoActive = select( store ).isPromotionActive( "black-friday-2023-checklist" ); - const isBlackFridayChecklistAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); - - if ( isBlackFridayChecklistPromoActive && ! isBlackFridayChecklistAlertDismissed ) { - return false; - } - - return true; -}; - -/** - * Checks if the Webinar promotion should be shown in the sidebar. - * - * @param {string} store The store to use. - * - * @returns {boolean} Whether the Webinar promotion should be shown. - */ -const shouldShowWebinarPromoNotificationInSidebar = ( store = "yoast-seo/editor" ) => { - const isBlackFridayPromotionActive = select( store ).isPromotionActive( "black-friday-2023-promotion" ); - const isBlackFridaySaleAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-promotion" ); - - if ( ( isBlackFridayPromotionActive && ! isBlackFridaySaleAlertDismissed ) || - ! shouldShowWebinarPromoNotificationInDashboard( store ) ) { - return false; - } - - return true; -}; - -export { shouldShowWebinarPromoNotificationInDashboard, shouldShowWebinarPromoNotificationInSidebar }; diff --git a/packages/js/src/helpers/shouldShowWebinarPromotionNotification.js b/packages/js/src/helpers/shouldShowWebinarPromotionNotification.js new file mode 100644 index 00000000000..5175aeb457d --- /dev/null +++ b/packages/js/src/helpers/shouldShowWebinarPromotionNotification.js @@ -0,0 +1,39 @@ +import { select } from "@wordpress/data"; + +/** + * Checks if the Webinar promotion should be shown in the dashboard. + * + * @param {string} store The store to use. + * + * @returns {boolean} Whether the Webinar promotion should be shown. + */ +const shouldShowWebinarPromotionNotificationInDashboard = ( store = "yoast-seo/editor" ) => { + const isBlackFridayChecklistPromotionActive = select( store ).isPromotionActive( "black-friday-2023-checklist" ); + const isBlackFridayChecklistAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-sidebar-checklist" ); + + if ( isBlackFridayChecklistPromotionActive ) { + return isBlackFridayChecklistAlertDismissed; + } + + return true; +}; + +/** + * Checks if the Webinar promotion should be shown in the sidebar. + * + * @param {string} store The store to use. + * + * @returns {boolean} Whether the Webinar promotion should be shown. + */ +const shouldShowWebinarPromotionNotificationInSidebar = ( store = "yoast-seo/editor" ) => { + const isBlackFridayPromotionActive = select( store ).isPromotionActive( "black-friday-2023-promotion" ); + const isBlackFridayPromotionAlertDismissed = select( store ).isAlertDismissed( "black-friday-2023-promotion" ); + + if ( isBlackFridayPromotionActive ) { + return isBlackFridayPromotionAlertDismissed; + } + + return shouldShowWebinarPromotionNotificationInDashboard( store ); +}; + +export { shouldShowWebinarPromotionNotificationInDashboard, shouldShowWebinarPromotionNotificationInSidebar }; From 82e8a62a5c3f4be4b73a0451e114310123be5304 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 12:30:43 +0200 Subject: [PATCH 417/616] Renamed class --- packages/js/src/components/fills/SidebarFill.js | 4 ++-- packages/js/src/elementor/components/fills/ElementorFill.js | 4 ++-- packages/js/src/initializers/settings-header.js | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/js/src/components/fills/SidebarFill.js b/packages/js/src/components/fills/SidebarFill.js index 9a8e65f91bd..e453e157beb 100644 --- a/packages/js/src/components/fills/SidebarFill.js +++ b/packages/js/src/components/fills/SidebarFill.js @@ -21,7 +21,7 @@ import WincherSEOPerformanceModal from "../../containers/WincherSEOPerformanceMo import WebinarPromoNotification from "../WebinarPromoNotification"; import { BlackFridayPromotion } from "../BlackFridayPromotion"; import { BlackFridaySidebarChecklistPromotion } from "../BlackFridaySidebarChecklistPromotion"; -import { shouldShowWebinarPromoNotificationInSidebar } from "../../helpers/shouldShowWebinarPromoNotification"; +import { shouldShowWebinarPromotionNotificationInSidebar } from "../../helpers/shouldShowWebinarPromotionNotification"; import KeywordUpsell from "../KeywordUpsell"; /* eslint-disable complexity */ @@ -47,7 +47,7 @@ export default function SidebarFill( { settings } ) {
    { /* eslint-disable max-len */ } - { shouldShowWebinarPromoNotificationInSidebar() && + { shouldShowWebinarPromotionNotificationInSidebar() && } { isWooCommerce && } diff --git a/packages/js/src/elementor/components/fills/ElementorFill.js b/packages/js/src/elementor/components/fills/ElementorFill.js index 14251002bc4..27a91a65220 100644 --- a/packages/js/src/elementor/components/fills/ElementorFill.js +++ b/packages/js/src/elementor/components/fills/ElementorFill.js @@ -24,7 +24,7 @@ import WordProofAuthenticationModals from "../../../components/modals/WordProofA import WebinarPromoNotification from "../../../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromotion } from "../../../components/BlackFridaySidebarChecklistPromotion"; import { BlackFridayPromotion } from "../../../components/BlackFridayPromotion"; -import { shouldShowWebinarPromoNotificationInSidebar } from "../../../helpers/shouldShowWebinarPromoNotification"; +import { shouldShowWebinarPromotionNotificationInSidebar } from "../../../helpers/shouldShowWebinarPromotionNotification"; import KeywordUpsell from "../../../components/KeywordUpsell"; /* eslint-disable complexity */ @@ -60,7 +60,7 @@ export default function ElementorFill( { isLoading, onLoad, settings } ) { - { shouldShowWebinarPromoNotificationInSidebar() && + { shouldShowWebinarPromotionNotificationInSidebar() && } { isWooCommerce && } diff --git a/packages/js/src/initializers/settings-header.js b/packages/js/src/initializers/settings-header.js index 029f35a58ff..5df2552a529 100644 --- a/packages/js/src/initializers/settings-header.js +++ b/packages/js/src/initializers/settings-header.js @@ -3,7 +3,7 @@ import { render } from "@wordpress/element"; import { ThemeProvider } from "styled-components"; import WebinarPromoNotification from "../components/WebinarPromoNotification"; import { BlackFridaySidebarChecklistPromotion } from "../components/BlackFridaySidebarChecklistPromotion"; -import { shouldShowWebinarPromoNotificationInDashboard } from "../helpers/shouldShowWebinarPromoNotification"; +import { shouldShowWebinarPromotionNotificationInDashboard } from "../helpers/shouldShowWebinarPromotionNotification"; /** * Initializes the React settings header, just below the title. @@ -20,7 +20,7 @@ const initSettingsHeader = () => { render( { isWooCommerce && } - { shouldShowWebinarPromoNotificationInDashboard( "yoast-seo/settings" ) && } + { shouldShowWebinarPromotionNotificationInDashboard( "yoast-seo/settings" ) && } , reactRoot ); From f8afc3886f1788acc87fe22f5da31883081cf165 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Wed, 13 Sep 2023 14:37:28 +0200 Subject: [PATCH 418/616] Add extra sale banner --- css/src/adminbar.css | 10 ++++++++++ inc/class-wpseo-admin-bar-menu.php | 18 ++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/css/src/adminbar.css b/css/src/adminbar.css index 1077dc7b0d1..23631f03aea 100644 --- a/css/src/adminbar.css +++ b/css/src/adminbar.css @@ -21,6 +21,16 @@ color: #fff !important; } +#wpadminbar .quicklinks #wp-admin-bar-wpseo-menu #wp-admin-bar-wpseo-menu-default li#wp-admin-bar-wpseo-get-premium span { + color: rgb(252, 211, 77); + border: solid rgb(252, 211, 77) 1px; + font-size: 13px; + font-weight: 600; + background: rgb(31, 41, 55); + padding: 1px 4px; + border-radius: 14px; +} + #wpadminbar .quicklinks #wp-admin-bar-wpseo-menu .wpseo-focus-keyword { max-width: 100px !important; display: inline-block !important; diff --git a/inc/class-wpseo-admin-bar-menu.php b/inc/class-wpseo-admin-bar-menu.php index 60a42e0247c..5b0ce4ef9c9 100644 --- a/inc/class-wpseo-admin-bar-menu.php +++ b/inc/class-wpseo-admin-bar-menu.php @@ -156,6 +156,7 @@ protected function get_is_seo_enabled() { if ( is_null( $this->is_seo_enabled ) ) { $this->is_seo_enabled = ( new WPSEO_Metabox_Analysis_SEO() )->is_enabled(); } + return $this->is_seo_enabled; } @@ -168,6 +169,7 @@ protected function get_is_readability_enabled() { if ( is_null( $this->is_readability_enabled ) ) { $this->is_readability_enabled = ( new WPSEO_Metabox_Analysis_Readability() )->is_enabled(); } + return $this->is_readability_enabled; } @@ -180,6 +182,7 @@ protected function get_current_indexable() { if ( is_null( $this->current_indexable ) ) { $this->current_indexable = $this->indexable_repository->for_current_page(); } + return $this->current_indexable; } @@ -237,7 +240,8 @@ public function add_menu( WP_Admin_Bar $wp_admin_bar ) { [ 'parent' => self::MENU_IDENTIFIER, 'id' => 'wpseo-seo-score', - 'title' => __( 'SEO score', 'wordpress-seo' ) . ': ' . $this->score_icon_helper->for_seo( $indexable, 'adminbar-sub-menu-score' )->present(), + 'title' => __( 'SEO score', 'wordpress-seo' ) . ': ' . $this->score_icon_helper->for_seo( $indexable, 'adminbar-sub-menu-score' ) + ->present(), 'meta' => [ 'tabindex' => '0' ], ] ); @@ -248,7 +252,8 @@ public function add_menu( WP_Admin_Bar $wp_admin_bar ) { [ 'parent' => self::MENU_IDENTIFIER, 'id' => 'wpseo-readability-score', - 'title' => __( 'Readability', 'wordpress-seo' ) . ': ' . $this->score_icon_helper->for_readability( $indexable->readability_score, 'adminbar-sub-menu-score' )->present(), + 'title' => __( 'Readability', 'wordpress-seo' ) . ': ' . $this->score_icon_helper->for_readability( $indexable->readability_score, 'adminbar-sub-menu-score' ) + ->present(), 'meta' => [ 'tabindex' => '0' ], ] ); @@ -585,15 +590,20 @@ protected function add_get_help_submenu( WP_Admin_Bar $wp_admin_bar ) { * @return void */ protected function add_premium_link( WP_Admin_Bar $wp_admin_bar ) { + $sale_percentage = sprintf( + '%1$s', + __( '-30%', 'wordpress-seo' ) + ); $wp_admin_bar->add_menu( [ 'parent' => self::MENU_IDENTIFIER, 'id' => 'wpseo-get-premium', // Circumvent an issue in the WP admin bar API in order to pass `data` attributes. See https://core.trac.wordpress.org/ticket/38636. 'title' => sprintf( - '%2$s »', + '%2$s » %3$s', $this->shortlinker->build_shortlink( 'https://yoa.st/admin-bar-get-premium' ), - __( 'Get Yoast SEO Premium', 'wordpress-seo' ) + __( 'Get Yoast SEO Premium', 'wordpress-seo' ), + $sale_percentage ), 'meta' => [ 'tabindex' => '0', From 349fbd62ce9505c909fc423cd94a74944fd490bb Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Wed, 13 Sep 2023 14:39:11 +0200 Subject: [PATCH 419/616] Add bf check --- inc/class-wpseo-admin-bar-menu.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/inc/class-wpseo-admin-bar-menu.php b/inc/class-wpseo-admin-bar-menu.php index 5b0ce4ef9c9..e7ae091c9d2 100644 --- a/inc/class-wpseo-admin-bar-menu.php +++ b/inc/class-wpseo-admin-bar-menu.php @@ -10,6 +10,7 @@ use Yoast\WP\SEO\Integrations\Support_Integration; use Yoast\WP\SEO\Models\Indexable; use Yoast\WP\SEO\Presenters\Admin\Premium_Badge_Presenter; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; use Yoast\WP\SEO\Repositories\Indexable_Repository; /** @@ -590,10 +591,13 @@ protected function add_get_help_submenu( WP_Admin_Bar $wp_admin_bar ) { * @return void */ protected function add_premium_link( WP_Admin_Bar $wp_admin_bar ) { - $sale_percentage = sprintf( - '%1$s', - __( '-30%', 'wordpress-seo' ) - ); + $sale_percentage = ''; + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + $sale_percentage = sprintf( + '%1$s', + __( '-30%', 'wordpress-seo' ) + ); + } $wp_admin_bar->add_menu( [ 'parent' => self::MENU_IDENTIFIER, From 3a71db5cfa7d333f1c1f80d7b7824eeec4f09e59 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 16:12:33 +0200 Subject: [PATCH 420/616] Add badge class for menu --- css/src/admin-global.css | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/css/src/admin-global.css b/css/src/admin-global.css index df1b6363738..013c2e50451 100644 --- a/css/src/admin-global.css +++ b/css/src/admin-global.css @@ -855,4 +855,16 @@ body.folded .wpseo-admin-submit-fixed { .yoast-dashicons-notice { color: #dba617; } + +.yoast-menu-bf-sale-badge { + margin-left: 5px; + border-radius: 8px; + border: 1px solid #FCD34D; + background-color: #1F2937; + color: #FCD34D; + padding: 2px 8px; + font-size: 10px; + font-weight: 600; + line-height: normal; +} /*# sourceMappingURL=admin-global.css.map */ From b32d235e01ecf9cb36a001027d4101022ef54a20 Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 16:24:56 +0200 Subject: [PATCH 421/616] Add the badge to the menu --- admin/menu/class-base-menu.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/admin/menu/class-base-menu.php b/admin/menu/class-base-menu.php index 59cda24f245..6d0f3c10136 100644 --- a/admin/menu/class-base-menu.php +++ b/admin/menu/class-base-menu.php @@ -4,6 +4,7 @@ * * @package WPSEO\Admin\Menu */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Admin menu base class. @@ -255,7 +256,9 @@ protected function get_license_page_title() { static $title = null; if ( $title === null ) { - $title = __( 'Premium', 'wordpress-seo' ); + $title = YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ? + __( 'Premium', 'wordpress-seo' ) . '' . __( '30% OFF', 'wordpress-seo' ) . '' : + __( 'Premium', 'wordpress-seo' ); } return $title; From a1f608891164987a3e8aa03ad3d25110a9ec2e1d Mon Sep 17 00:00:00 2001 From: Paolo Luigi Scala Date: Wed, 13 Sep 2023 16:37:35 +0200 Subject: [PATCH 422/616] Fix cs --- admin/menu/class-base-menu.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/admin/menu/class-base-menu.php b/admin/menu/class-base-menu.php index 6d0f3c10136..68e3c1f178f 100644 --- a/admin/menu/class-base-menu.php +++ b/admin/menu/class-base-menu.php @@ -4,6 +4,7 @@ * * @package WPSEO\Admin\Menu */ + use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** @@ -256,9 +257,7 @@ protected function get_license_page_title() { static $title = null; if ( $title === null ) { - $title = YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ? - __( 'Premium', 'wordpress-seo' ) . '' . __( '30% OFF', 'wordpress-seo' ) . '' : - __( 'Premium', 'wordpress-seo' ); + $title = YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ? __( 'Premium', 'wordpress-seo' ) . '' . __( '30% OFF', 'wordpress-seo' ) . '' : __( 'Premium', 'wordpress-seo' ); } return $title; From eb78924a42f57d097ee1c3e25da33c4588dd7b58 Mon Sep 17 00:00:00 2001 From: Thijs van der Heijden Date: Thu, 14 Sep 2023 12:49:01 +0200 Subject: [PATCH 423/616] Update strings. --- admin/views/licenses.php | 2 +- inc/class-addon-manager.php | 2 +- src/presenters/admin/sidebar-presenter.php | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/admin/views/licenses.php b/admin/views/licenses.php index 04bd18c859e..e5630180a77 100644 --- a/admin/views/licenses.php +++ b/admin/views/licenses.php @@ -129,7 +129,7 @@ $sale_badge = ''; -if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { +if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { /* translators: %1$s expands to opening span, %2$s expands to closing span */ $sale_badge_span = sprintf( esc_html__( '%1$sSALE 30%% OFF!%2$s', 'wordpress-seo' ), '', '' ); diff --git a/inc/class-addon-manager.php b/inc/class-addon-manager.php index 3511824aac8..775d316d0e2 100644 --- a/inc/class-addon-manager.php +++ b/inc/class-addon-manager.php @@ -403,7 +403,7 @@ public function expired_subscription_warning( $plugin_data ) { $addon_link = ( isset( $this->addon_details[ $plugin_data['slug'] ] ) ) ? $this->addon_details[ $plugin_data['slug'] ]['short_link_renewal'] : $this->addon_details[ self::PREMIUM_SLUG ]['short_link_renewal']; $sale_copy = ''; - if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) { + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { $sale_copy = sprintf( /* translators: %1$s is a
    tag. */ esc_html__( '%1$s Now with 30%% Black Friday Discount!', 'wordpress-seo' ), diff --git a/src/presenters/admin/sidebar-presenter.php b/src/presenters/admin/sidebar-presenter.php index d0694cf27ab..f05bdfc04b7 100644 --- a/src/presenters/admin/sidebar-presenter.php +++ b/src/presenters/admin/sidebar-presenter.php @@ -45,7 +45,7 @@ class="attachment-full size-full content-visible" sizes="(min-width: 1321px) 75px"> - classes->get( Promotion_Manager::class )->is( 'black_friday_2023' ) ) : ?> + classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) : ?>

    ', '' ); + echo \esc_html__( 'Use AI to generate titles and meta descriptions, automatically redirect deleted pages, get 24/7 support and much, much more!', 'wordpress-seo' ); ?>

    classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) : ?> From 301471f49562572695018a2934f5670b17bd758a Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Mon, 25 Sep 2023 16:34:24 +0200 Subject: [PATCH 459/616] BF banner for js upsell sidbar --- .../components/premium-upsell-card.js | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/packages/js/src/shared-admin/components/premium-upsell-card.js b/packages/js/src/shared-admin/components/premium-upsell-card.js index 3a50a666f2f..6880c206f35 100644 --- a/packages/js/src/shared-admin/components/premium-upsell-card.js +++ b/packages/js/src/shared-admin/components/premium-upsell-card.js @@ -2,6 +2,7 @@ import { ArrowNarrowRightIcon } from "@heroicons/react/outline"; import { createInterpolateElement, useMemo } from "@wordpress/element"; import { __, sprintf } from "@wordpress/i18n"; import { Button, Title } from "@yoast/ui-library"; +import { get } from "lodash"; import PropTypes from "prop-types"; import { ReactComponent as StarHalf } from "../../../images/star-rating-half.svg"; import { ReactComponent as Star } from "../../../images/star-rating-star.svg"; @@ -15,17 +16,8 @@ import { ReactComponent as G2Logo } from "./g2-logo-white.svg"; * @returns {JSX.Element} The premium upsell card. */ export const PremiumUpsellCard = ( { link, linkProps } ) => { - const info = useMemo( () => createInterpolateElement( - sprintf( - /* translators: %1$s and %2$s expand to opening and closing tags. */ - __( "Be more efficient in creating titles and meta descriptions with the help of AI. %1$sGet 24/7 support%2$s and boost your website’s visibility.", "wordpress-seo" ), - "", - "" - ), - { - strong: , - } - ), [] ); + const info = useMemo( () => __( "Use AI to generate titles and meta descriptions, automatically redirect deleted pages, get 24/7 support and much, much more!", "wordpress-seo" ), [] ); + const isBlackFriday = get( window, "wpseoScriptData.isBlackFriday", "" ); const getPremium = createInterpolateElement( sprintf( /* translators: %1$s and %2$s expand to a span wrap to avoid linebreaks. %3$s expands to "Yoast SEO Premium". */ @@ -46,6 +38,11 @@ export const PremiumUpsellCard = ( { link, linkProps } ) => { > + { isBlackFriday &&
    +
    + { __( "BLACK FRIDAY - 30% OFF", "wordpress-seo" ) } +
    +
    } { getPremium } @@ -59,7 +56,7 @@ export const PremiumUpsellCard = ( { link, linkProps } ) => { className="yst-flex yst-justify-center yst-gap-2 yst-mt-4 focus:yst-ring-offset-primary-500" { ...linkProps } > - { getPremium } + { isBlackFriday ? __( "Claim your 30% off now!", "wordpress-seo" ) : getPremium } Date: Mon, 25 Sep 2023 16:35:06 +0200 Subject: [PATCH 460/616] css from php admin upsell sidebar with modifications --- css/src/new-settings.css | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/css/src/new-settings.css b/css/src/new-settings.css index e7f07987a31..036ac3c996e 100644 --- a/css/src/new-settings.css +++ b/css/src/new-settings.css @@ -241,6 +241,37 @@ body.seo_page_wpseo_page_settings { yst-bg-slate-200 yst-text-slate-900; } + + .sidebar__sale_banner_container .sidebar__sale_banner { + background: #000; + color: #fcd34d; + width: calc(100% + 60px); + line-height: 30px; + margin-left: -30px; + transform: rotate(-5deg); + font-size: 18px; + font-weight: 500; + letter-spacing: 0.5px; + text-align: center; + margin-top: 20px; + margin-bottom: 10px; + box-shadow: 0 -1px 4px 0 #fcd34d, 0 1px 4px 0 #fcd34d, 0 -1px 0 0 #fcd34d, 0 1px 0 0 #fcd34d; + padding: 7px 0; + } + + .sidebar__sale_banner_container .sidebar__sale_banner .banner_text { + display: inline-block; + margin: 0 35px; + } + + .sidebar__sale_banner_container { + width: calc(100% + 48px); + margin-left: -24px; + overflow: hidden; + padding-bottom: 10px; + margin-bottom: -25px; + margin-top: -5px; + } } /* RTL */ From bb4a63362a746be7d2b8bd0bc913a468ff8946b3 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Mon, 25 Sep 2023 16:51:53 +0200 Subject: [PATCH 461/616] convert css to tailwind classes --- css/src/new-settings.css | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/css/src/new-settings.css b/css/src/new-settings.css index 036ac3c996e..d47e927f475 100644 --- a/css/src/new-settings.css +++ b/css/src/new-settings.css @@ -243,25 +243,18 @@ body.seo_page_wpseo_page_settings { } .sidebar__sale_banner_container .sidebar__sale_banner { - background: #000; color: #fcd34d; width: calc(100% + 60px); line-height: 30px; margin-left: -30px; transform: rotate(-5deg); - font-size: 18px; - font-weight: 500; letter-spacing: 0.5px; - text-align: center; - margin-top: 20px; - margin-bottom: 10px; box-shadow: 0 -1px 4px 0 #fcd34d, 0 1px 4px 0 #fcd34d, 0 -1px 0 0 #fcd34d, 0 1px 0 0 #fcd34d; - padding: 7px 0; + @apply yst-text-lg yst-font-bold yst-bg-black yst-mt-5 yst-text-center yst-mb-[10px] yst-px-0 yst-py-1; } .sidebar__sale_banner_container .sidebar__sale_banner .banner_text { - display: inline-block; - margin: 0 35px; + @apply yst-inline-block yst-my-0 yst-mx-[35px]; } .sidebar__sale_banner_container { From be115f0b5bfb231970133054d4da97ed7c86ed96 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 10:14:37 +0200 Subject: [PATCH 462/616] located black friday banner styles to seperate css file --- admin/class-admin-asset-manager.php | 5 +++++ css/src/black-friday-banner.css | 22 +++++++++++++++++++++ css/src/new-settings.css | 24 ----------------------- src/integrations/settings-integration.php | 3 +++ src/integrations/support-integration.php | 9 +++++++-- 5 files changed, 37 insertions(+), 26 deletions(-) create mode 100644 css/src/black-friday-banner.css diff --git a/admin/class-admin-asset-manager.php b/admin/class-admin-asset-manager.php index 8fb569b1d85..05ef3368e34 100644 --- a/admin/class-admin-asset-manager.php +++ b/admin/class-admin-asset-manager.php @@ -621,6 +621,11 @@ protected function styles_to_be_registered() { 'src' => 'new-settings-' . $flat_version, 'deps' => [ self::PREFIX . 'tailwind' ], ], + [ + 'name' => 'black-friday-banner', + 'src' => 'black-friday-banner-' . $flat_version, + 'deps' => [ self::PREFIX . 'tailwind' ], + ], [ 'name' => 'academy', 'src' => 'academy-' . $flat_version, diff --git a/css/src/black-friday-banner.css b/css/src/black-friday-banner.css new file mode 100644 index 00000000000..90d7958cb73 --- /dev/null +++ b/css/src/black-friday-banner.css @@ -0,0 +1,22 @@ +.sidebar__sale_banner_container .sidebar__sale_banner { + color: #fcd34d; + width: calc(100% + 60px); + line-height: 30px; + margin-left: -30px; + transform: rotate(-5deg); + letter-spacing: 0.5px; + box-shadow: 0 -1px 4px 0 #fcd34d, 0 1px 4px 0 #fcd34d, 0 -1px 0 0 #fcd34d, 0 1px 0 0 #fcd34d; + @apply yst-text-lg yst-font-bold yst-bg-black yst-mt-5 yst-text-center yst-mb-[10px] yst-px-0 yst-py-1; +} + +.sidebar__sale_banner_container .sidebar__sale_banner .banner_text { + @apply yst-inline-block yst-my-0 yst-mx-[35px]; +} + +.sidebar__sale_banner_container { + width: calc(100% + 48px); + margin-left: -24px; + margin-bottom: -25px; + margin-top: -5px; + @apply yst-overflow-hidden yst-pb-[10px]; +} \ No newline at end of file diff --git a/css/src/new-settings.css b/css/src/new-settings.css index d47e927f475..e7f07987a31 100644 --- a/css/src/new-settings.css +++ b/css/src/new-settings.css @@ -241,30 +241,6 @@ body.seo_page_wpseo_page_settings { yst-bg-slate-200 yst-text-slate-900; } - - .sidebar__sale_banner_container .sidebar__sale_banner { - color: #fcd34d; - width: calc(100% + 60px); - line-height: 30px; - margin-left: -30px; - transform: rotate(-5deg); - letter-spacing: 0.5px; - box-shadow: 0 -1px 4px 0 #fcd34d, 0 1px 4px 0 #fcd34d, 0 -1px 0 0 #fcd34d, 0 1px 0 0 #fcd34d; - @apply yst-text-lg yst-font-bold yst-bg-black yst-mt-5 yst-text-center yst-mb-[10px] yst-px-0 yst-py-1; - } - - .sidebar__sale_banner_container .sidebar__sale_banner .banner_text { - @apply yst-inline-block yst-my-0 yst-mx-[35px]; - } - - .sidebar__sale_banner_container { - width: calc(100% + 48px); - margin-left: -24px; - overflow: hidden; - padding-bottom: 10px; - margin-bottom: -25px; - margin-top: -5px; - } } /* RTL */ diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index e7741c58e21..063d7e2ed9d 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -366,6 +366,9 @@ public function enqueue_assets() { \wp_enqueue_media(); $this->asset_manager->enqueue_script( 'new-settings' ); $this->asset_manager->enqueue_style( 'new-settings' ); + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + $this->asset_manager->enqueue_style( 'black-friday-banner' ); + } $this->asset_manager->localize_script( 'new-settings', 'wpseoScriptData', $this->get_script_data() ); } diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index fc8fe0c2a4f..582e11b8330 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -8,6 +8,7 @@ use Yoast\WP\SEO\Helpers\Current_Page_Helper; use Yoast\WP\SEO\Helpers\Product_Helper; use Yoast\WP\SEO\Helpers\Short_Link_Helper; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Class Support_Integration. @@ -130,6 +131,9 @@ public function enqueue_assets() { \remove_action( 'admin_print_scripts', 'print_emoji_detection_script' ); $this->asset_manager->enqueue_script( 'support' ); $this->asset_manager->enqueue_style( 'support' ); + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + $this->asset_manager->enqueue_style( 'black-friday-banner' ); + } $this->asset_manager->localize_script( 'support', 'wpseoScriptData', $this->get_script_data() ); } @@ -152,7 +156,7 @@ public function remove_notices() { */ public function get_script_data() { return [ - 'preferences' => [ + 'preferences' => [ 'isPremium' => $this->product_helper->is_premium(), 'isRtl' => \is_rtl(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), @@ -161,7 +165,8 @@ public function get_script_data() { 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], ], - 'linkParams' => $this->shortlink_helper->get_query_params(), + 'linkParams' => $this->shortlink_helper->get_query_params(), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } } From 2722ef7426880221b4822c7a9f6e7a903cee2d61 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 11:08:33 +0200 Subject: [PATCH 463/616] Update unit tests for support integration --- .../integrations/support-integration-test.php | 85 ++++++++++++++++--- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/tests/unit/integrations/support-integration-test.php b/tests/unit/integrations/support-integration-test.php index cf995c02180..e6f157c56c9 100644 --- a/tests/unit/integrations/support-integration-test.php +++ b/tests/unit/integrations/support-integration-test.php @@ -11,6 +11,7 @@ use Yoast\WP\SEO\Helpers\Product_Helper; use Yoast\WP\SEO\Helpers\Short_Link_Helper; use Yoast\WP\SEO\Integrations\Support_Integration; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; use Yoast\WP\SEO\Tests\Unit\TestCase; /** @@ -23,33 +24,40 @@ class Support_Integration_Test extends TestCase { const PAGE = 'wpseo_page_support'; /** - * Holds the WPSEO_Admin_Asset_Manager. + * Holds the WPSEO_Admin_Asset_Manager mock. * * @var Mockery\MockInterface|WPSEO_Admin_Asset_Manager */ private $asset_manager; /** - * Holds the Current_Page_Helper. + * Holds the Current_Page_Helper mock. * - * @var Current_Page_Helper + * @var Mockery\MockInterface|Current_Page_Helper */ private $current_page_helper; /** - * Holds the Product_Helper. + * Holds the Product_Helper mock. * - * @var Product_Helper + * @var Mockery\MockInterface|Product_Helper */ private $product_helper; /** - * Holds the Short_Link_Helper. + * Holds the Short_Link_Helper mock. * - * @var Short_Link_Helper + * @var Mockery\MockInterface|Short_Link_Helper */ private $shortlink_helper; + /** + * Holds the Promotion_Manager mock. + * + * @var Mockery\MockInterface|Promotion_Manager + */ + private $promotion_manager; + /** * The class under test. * @@ -67,6 +75,7 @@ public function set_up() { $this->current_page_helper = Mockery::mock( Current_Page_Helper::class ); $this->product_helper = Mockery::mock( Product_Helper::class ); $this->shortlink_helper = Mockery::mock( Short_Link_Helper::class ); + $this->promotion_manager = Mockery::mock( Promotion_Manager::class ); $this->instance = new Support_Integration( $this->asset_manager, @@ -216,17 +225,47 @@ public function test_remove_notices() { $this->instance->remove_notices(); } + /** + * Data provider for test_enqueue_assets + * + * @return array + */ + public function data_provider_enqueu_black_friday_style() { + return [ + 'is black friday' => [ + 'is_black_friday' => true, + 'expected' => 1, + ], + 'is not black friday' => [ + 'is_black_friday' => false, + 'expected' => 0, + ], + ]; + } + /** * Test enqueue_assets * * @covers ::enqueue_assets * @covers ::get_script_data + * + * @dataProvider data_provider_enqueu_black_friday_style + * + * @param bool $is_black_friday Whether it is black friday or not. + * @param int $expected The number of times the action should be called. + * @return void */ - public function test_enqueue_assets() { + public function test_enqueue_assets( $is_black_friday, $expected ) { Monkey\Functions\expect( 'remove_action' ) ->with( 'admin_print_scripts', 'print_emoji_detection_script' ) ->once(); + // In enqueue_assets. + $this->assert_black_friday_promotion( $is_black_friday ); + + // In get_script_data method. + $this->assert_black_friday_promotion( $is_black_friday ); + $this->asset_manager ->expects( 'enqueue_script' ) ->with( 'support' ) @@ -237,6 +276,11 @@ public function test_enqueue_assets() { ->with( 'support' ) ->once(); + $this->asset_manager + ->expects( 'enqueue_style' ) + ->with( 'black-friday-banner' ) + ->times( $expected ); + $this->asset_manager ->expects( 'localize_script' ) ->once(); @@ -289,8 +333,10 @@ public function expect_get_script_data() { public function test_get_script_data() { $link_params = $this->expect_get_script_data(); + $this->assert_black_friday_promotion(); + $expected = [ - 'preferences' => [ + 'preferences' => [ 'isPremium' => false, 'isRtl' => false, 'pluginUrl' => 'http://basic.wordpress.test/wp-content/worspress-seo', @@ -299,9 +345,28 @@ public function test_get_script_data() { 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], ], - 'linkParams' => $link_params, + 'linkParams' => $link_params, + 'isBlackFriday' => false, ]; $this->assertSame( $expected, $this->instance->get_script_data() ); } + + /** + * Asserts the check for the black friday promotion. + * + * @param bool $is_black_friday Whether it is black friday or not. + * @return void + */ + protected function assert_black_friday_promotion( $is_black_friday = false ) { + $this->promotion_manager->expects( 'is' ) + ->with( 'black-friday-2023-promotion' ) + ->once() + ->andReturn( $is_black_friday ); + + $container = $this->create_container_with( [ Promotion_Manager::class => $this->promotion_manager ] ); + + Monkey\Functions\expect( 'YoastSEO' ) + ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $container ) ] ); + } } From 44e098f6386aba665f3042bbb24f01b72e02f8f0 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 12:58:51 +0200 Subject: [PATCH 464/616] use isBlackFriday prop from store --- .../js/src/settings/components/sidebar-recommendations.js | 3 ++- .../js/src/shared-admin/components/premium-upsell-card.js | 6 +++--- packages/js/src/support/app.js | 3 ++- src/integrations/settings-integration.php | 2 +- src/integrations/support-integration.php | 2 +- tests/unit/integrations/support-integration-test.php | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/packages/js/src/settings/components/sidebar-recommendations.js b/packages/js/src/settings/components/sidebar-recommendations.js index 4f22251e513..dd4fdacc564 100644 --- a/packages/js/src/settings/components/sidebar-recommendations.js +++ b/packages/js/src/settings/components/sidebar-recommendations.js @@ -6,6 +6,7 @@ import { useSelectSettings } from "../hooks"; */ const SidebarRecommendations = () => { const isPremium = useSelectSettings( "selectPreference", [], "isPremium" ); + const isBlackFriday = useSelectSettings( "selectPreference", [], "isBlackFriday" ); const premiumLink = useSelectSettings( "selectLink", [], "https://yoa.st/jj" ); const premiumUpsellConfig = useSelectSettings( "selectUpsellSettingsAsProps" ); const academyLink = useSelectSettings( "selectLink", [], "https://yoa.st/3t6" ); @@ -16,7 +17,7 @@ const SidebarRecommendations = () => { return ( - + ); diff --git a/packages/js/src/shared-admin/components/premium-upsell-card.js b/packages/js/src/shared-admin/components/premium-upsell-card.js index 6880c206f35..bac8b5b2c65 100644 --- a/packages/js/src/shared-admin/components/premium-upsell-card.js +++ b/packages/js/src/shared-admin/components/premium-upsell-card.js @@ -2,7 +2,6 @@ import { ArrowNarrowRightIcon } from "@heroicons/react/outline"; import { createInterpolateElement, useMemo } from "@wordpress/element"; import { __, sprintf } from "@wordpress/i18n"; import { Button, Title } from "@yoast/ui-library"; -import { get } from "lodash"; import PropTypes from "prop-types"; import { ReactComponent as StarHalf } from "../../../images/star-rating-half.svg"; import { ReactComponent as Star } from "../../../images/star-rating-star.svg"; @@ -13,11 +12,11 @@ import { ReactComponent as G2Logo } from "./g2-logo-white.svg"; /** * @param {string} link The link. * @param {Object} [linkProps] Extra link props. + * @param {boolean} [isBlackFriday] Whether it is Black Friday. * @returns {JSX.Element} The premium upsell card. */ -export const PremiumUpsellCard = ( { link, linkProps } ) => { +export const PremiumUpsellCard = ( { link, linkProps, isBlackFriday } ) => { const info = useMemo( () => __( "Use AI to generate titles and meta descriptions, automatically redirect deleted pages, get 24/7 support and much, much more!", "wordpress-seo" ), [] ); - const isBlackFriday = get( window, "wpseoScriptData.isBlackFriday", "" ); const getPremium = createInterpolateElement( sprintf( /* translators: %1$s and %2$s expand to a span wrap to avoid linebreaks. %3$s expands to "Yoast SEO Premium". */ @@ -87,6 +86,7 @@ export const PremiumUpsellCard = ( { link, linkProps } ) => { PremiumUpsellCard.propTypes = { link: PropTypes.string.isRequired, linkProps: PropTypes.object, + isBlackFriday: PropTypes.bool, }; PremiumUpsellCard.defaultProps = { diff --git a/packages/js/src/support/app.js b/packages/js/src/support/app.js index c35b9fdd1e0..b8ca29789c3 100644 --- a/packages/js/src/support/app.js +++ b/packages/js/src/support/app.js @@ -33,6 +33,7 @@ const openHelpScoutBeacon = () => { */ export const App = () => { const isPremium = useSelectSupport( "selectPreference", [], "isPremium", false ); + const isBlackFriday = useSelectSupport( "selectPreference", [], "isBlackFriday", false ); const premiumUpsellConfig = useSelectSupport( "selectUpsellSettingsAsProps" ); const pluginUrl = useSelectSupport( "selectPreference", [], "pluginUrl", "" ); const linkParams = useSelectSupport( "selectLinkParams" ); @@ -231,7 +232,7 @@ export const App = () => { { ! isPremium && ( - + ) } diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index 063d7e2ed9d..164c599e36d 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -433,7 +433,6 @@ protected function get_script_data() { 'taxonomies' => $transformed_taxonomies, 'fallbacks' => $this->get_fallbacks(), 'showNewContentTypeNotification' => $show_new_content_type_notification, - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } @@ -488,6 +487,7 @@ protected function get_preferences( $settings ) { 'upsellSettings' => $this->get_upsell_settings(), 'siteRepresentsPerson' => $this->get_site_represents_person( $settings ), 'siteBasicsPolicies' => $this->get_site_basics_policies( $settings ), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index 582e11b8330..684ef12fde9 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -164,9 +164,9 @@ public function get_script_data() { 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ], 'linkParams' => $this->shortlink_helper->get_query_params(), - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } } diff --git a/tests/unit/integrations/support-integration-test.php b/tests/unit/integrations/support-integration-test.php index e6f157c56c9..9941dd3f812 100644 --- a/tests/unit/integrations/support-integration-test.php +++ b/tests/unit/integrations/support-integration-test.php @@ -344,9 +344,9 @@ public function test_get_script_data() { 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], + 'isBlackFriday' => false, ], 'linkParams' => $link_params, - 'isBlackFriday' => false, ]; $this->assertSame( $expected, $this->instance->get_script_data() ); From c79865a3aec3e064702c173da9fc57b6d9f0bfb6 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 13:04:02 +0200 Subject: [PATCH 465/616] php fix cs --- src/integrations/settings-integration.php | 2 +- src/integrations/support-integration.php | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index 164c599e36d..a40e85ed447 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -487,7 +487,7 @@ protected function get_preferences( $settings ) { 'upsellSettings' => $this->get_upsell_settings(), 'siteRepresentsPerson' => $this->get_site_represents_person( $settings ), 'siteBasicsPolicies' => $this->get_site_basics_policies( $settings ), - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index 684ef12fde9..f96ee535ac2 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -164,7 +164,7 @@ public function get_script_data() { 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ], 'linkParams' => $this->shortlink_helper->get_query_params(), ]; From 97a357760f682438409bad6bd229b16439ef6529 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 13:18:28 +0200 Subject: [PATCH 466/616] refactor position in window object --- src/integrations/settings-integration.php | 2 +- src/integrations/support-integration.php | 2 +- tests/unit/integrations/support-integration-test.php | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index a40e85ed447..3a56aa1daa7 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -461,6 +461,7 @@ protected function get_preferences( $settings ) { 'isWooCommerceActive' => $this->woocommerce_helper->is_active(), 'isLocalSeoActive' => \defined( 'WPSEO_LOCAL_FILE' ), 'isNewsSeoActive' => \defined( 'WPSEO_NEWS_FILE' ), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), 'siteUrl' => \get_bloginfo( 'url' ), 'siteTitle' => \get_bloginfo( 'name' ), 'sitemapUrl' => WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ), @@ -487,7 +488,6 @@ protected function get_preferences( $settings ) { 'upsellSettings' => $this->get_upsell_settings(), 'siteRepresentsPerson' => $this->get_site_represents_person( $settings ), 'siteBasicsPolicies' => $this->get_site_basics_policies( $settings ), - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ]; } diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index f96ee535ac2..2dac952ebc9 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -159,12 +159,12 @@ public function get_script_data() { 'preferences' => [ 'isPremium' => $this->product_helper->is_premium(), 'isRtl' => \is_rtl(), + 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), ], 'linkParams' => $this->shortlink_helper->get_query_params(), ]; diff --git a/tests/unit/integrations/support-integration-test.php b/tests/unit/integrations/support-integration-test.php index 9941dd3f812..47f92627f10 100644 --- a/tests/unit/integrations/support-integration-test.php +++ b/tests/unit/integrations/support-integration-test.php @@ -339,12 +339,12 @@ public function test_get_script_data() { 'preferences' => [ 'isPremium' => false, 'isRtl' => false, + 'isBlackFriday' => false, 'pluginUrl' => 'http://basic.wordpress.test/wp-content/worspress-seo', 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], - 'isBlackFriday' => false, ], 'linkParams' => $link_params, ]; From 4be713a58f9fd3083b3178de3ee06ab7373210bb Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 13:48:13 +0200 Subject: [PATCH 467/616] Changed Black Friday Prop to promotions --- .../js/src/settings/components/sidebar-recommendations.js | 4 ++-- .../js/src/shared-admin/components/premium-upsell-card.js | 8 +++++--- packages/js/src/support/app.js | 4 ++-- src/integrations/settings-integration.php | 2 +- src/integrations/support-integration.php | 2 +- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/packages/js/src/settings/components/sidebar-recommendations.js b/packages/js/src/settings/components/sidebar-recommendations.js index dd4fdacc564..e303235784a 100644 --- a/packages/js/src/settings/components/sidebar-recommendations.js +++ b/packages/js/src/settings/components/sidebar-recommendations.js @@ -6,7 +6,7 @@ import { useSelectSettings } from "../hooks"; */ const SidebarRecommendations = () => { const isPremium = useSelectSettings( "selectPreference", [], "isPremium" ); - const isBlackFriday = useSelectSettings( "selectPreference", [], "isBlackFriday" ); + const promotions = useSelectSettings( "selectPreference", [], "promotions" ); const premiumLink = useSelectSettings( "selectLink", [], "https://yoa.st/jj" ); const premiumUpsellConfig = useSelectSettings( "selectUpsellSettingsAsProps" ); const academyLink = useSelectSettings( "selectLink", [], "https://yoa.st/3t6" ); @@ -17,7 +17,7 @@ const SidebarRecommendations = () => { return ( - + ); diff --git a/packages/js/src/shared-admin/components/premium-upsell-card.js b/packages/js/src/shared-admin/components/premium-upsell-card.js index bac8b5b2c65..215a237d08e 100644 --- a/packages/js/src/shared-admin/components/premium-upsell-card.js +++ b/packages/js/src/shared-admin/components/premium-upsell-card.js @@ -12,10 +12,10 @@ import { ReactComponent as G2Logo } from "./g2-logo-white.svg"; /** * @param {string} link The link. * @param {Object} [linkProps] Extra link props. - * @param {boolean} [isBlackFriday] Whether it is Black Friday. + * @param {array} [promotions] Promotions. * @returns {JSX.Element} The premium upsell card. */ -export const PremiumUpsellCard = ( { link, linkProps, isBlackFriday } ) => { +export const PremiumUpsellCard = ( { link, linkProps, promotions } ) => { const info = useMemo( () => __( "Use AI to generate titles and meta descriptions, automatically redirect deleted pages, get 24/7 support and much, much more!", "wordpress-seo" ), [] ); const getPremium = createInterpolateElement( sprintf( @@ -29,6 +29,7 @@ export const PremiumUpsellCard = ( { link, linkProps, isBlackFriday } ) => { nowrap: , } ); + const isBlackFriday = promotions.includes( "black-friday-2023-promotion" ); return (
    @@ -86,9 +87,10 @@ export const PremiumUpsellCard = ( { link, linkProps, isBlackFriday } ) => { PremiumUpsellCard.propTypes = { link: PropTypes.string.isRequired, linkProps: PropTypes.object, - isBlackFriday: PropTypes.bool, + promotions: PropTypes.array, }; PremiumUpsellCard.defaultProps = { linkProps: {}, + promotions: [], }; diff --git a/packages/js/src/support/app.js b/packages/js/src/support/app.js index b8ca29789c3..d8ddab3f274 100644 --- a/packages/js/src/support/app.js +++ b/packages/js/src/support/app.js @@ -33,7 +33,7 @@ const openHelpScoutBeacon = () => { */ export const App = () => { const isPremium = useSelectSupport( "selectPreference", [], "isPremium", false ); - const isBlackFriday = useSelectSupport( "selectPreference", [], "isBlackFriday", false ); + const promotions = useSelectSupport( "selectPreference", [], "promotions", false ); const premiumUpsellConfig = useSelectSupport( "selectUpsellSettingsAsProps" ); const pluginUrl = useSelectSupport( "selectPreference", [], "pluginUrl", "" ); const linkParams = useSelectSupport( "selectLinkParams" ); @@ -232,7 +232,7 @@ export const App = () => { { ! isPremium && ( - + ) } diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index 3a56aa1daa7..e5d851299cc 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -461,7 +461,7 @@ protected function get_preferences( $settings ) { 'isWooCommerceActive' => $this->woocommerce_helper->is_active(), 'isLocalSeoActive' => \defined( 'WPSEO_LOCAL_FILE' ), 'isNewsSeoActive' => \defined( 'WPSEO_NEWS_FILE' ), - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), + 'promotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'siteUrl' => \get_bloginfo( 'url' ), 'siteTitle' => \get_bloginfo( 'name' ), 'sitemapUrl' => WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ), diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index 2dac952ebc9..edfa99e6258 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -159,7 +159,7 @@ public function get_script_data() { 'preferences' => [ 'isPremium' => $this->product_helper->is_premium(), 'isRtl' => \is_rtl(), - 'isBlackFriday' => YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ), + 'promotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', From cae77deddd8d02ad0fe89c1de0c1f250ad0049c1 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 14:01:35 +0200 Subject: [PATCH 468/616] fix support integration tests --- .../integrations/support-integration-test.php | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/tests/unit/integrations/support-integration-test.php b/tests/unit/integrations/support-integration-test.php index 47f92627f10..6c642a3a67c 100644 --- a/tests/unit/integrations/support-integration-test.php +++ b/tests/unit/integrations/support-integration-test.php @@ -58,6 +58,13 @@ class Support_Integration_Test extends TestCase { */ private $promotion_manager; + /** + * Holds the container. + * + * @var Mockery\MockInterface|Container + */ + private $container; + /** * The class under test. * @@ -76,6 +83,7 @@ public function set_up() { $this->product_helper = Mockery::mock( Product_Helper::class ); $this->shortlink_helper = Mockery::mock( Short_Link_Helper::class ); $this->promotion_manager = Mockery::mock( Promotion_Manager::class ); + $this->container = $this->create_container_with( [ Promotion_Manager::class => $this->promotion_manager ] ); $this->instance = new Support_Integration( $this->asset_manager, @@ -260,11 +268,18 @@ public function test_enqueue_assets( $is_black_friday, $expected ) { ->with( 'admin_print_scripts', 'print_emoji_detection_script' ) ->once(); - // In enqueue_assets. - $this->assert_black_friday_promotion( $is_black_friday ); // In get_script_data method. - $this->assert_black_friday_promotion( $is_black_friday ); + $this->assert_promotions(); + + // In enqueue_assets method. + $this->promotion_manager->expects( 'is' ) + ->with( 'black-friday-2023-promotion' ) + ->once() + ->andReturn( $is_black_friday ); + + Monkey\Functions\expect( 'YoastSEO' ) + ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $this->container ) ] ); $this->asset_manager ->expects( 'enqueue_script' ) @@ -333,13 +348,13 @@ public function expect_get_script_data() { public function test_get_script_data() { $link_params = $this->expect_get_script_data(); - $this->assert_black_friday_promotion(); + $this->assert_promotions(); $expected = [ 'preferences' => [ 'isPremium' => false, 'isRtl' => false, - 'isBlackFriday' => false, + 'promotions' => [ 'black-friday-2023-promotion' ], 'pluginUrl' => 'http://basic.wordpress.test/wp-content/worspress-seo', 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', @@ -353,20 +368,16 @@ public function test_get_script_data() { } /** - * Asserts the check for the black friday promotion. + * Asserts the check for promotions. * - * @param bool $is_black_friday Whether it is black friday or not. * @return void */ - protected function assert_black_friday_promotion( $is_black_friday = false ) { - $this->promotion_manager->expects( 'is' ) - ->with( 'black-friday-2023-promotion' ) + protected function assert_promotions() { + $this->promotion_manager->expects( 'get_current_promotions' ) ->once() - ->andReturn( $is_black_friday ); - - $container = $this->create_container_with( [ Promotion_Manager::class => $this->promotion_manager ] ); + ->andReturn( [ 'black-friday-2023-promotion' ] ); Monkey\Functions\expect( 'YoastSEO' ) - ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $container ) ] ); + ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $this->container ) ] ); } } From 3075ffecd1dd53ad9307020f7c9bffb9a1f66b05 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 14:11:54 +0200 Subject: [PATCH 469/616] fix default value for promotions --- packages/js/src/support/app.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/support/app.js b/packages/js/src/support/app.js index d8ddab3f274..872468e4cf7 100644 --- a/packages/js/src/support/app.js +++ b/packages/js/src/support/app.js @@ -33,7 +33,7 @@ const openHelpScoutBeacon = () => { */ export const App = () => { const isPremium = useSelectSupport( "selectPreference", [], "isPremium", false ); - const promotions = useSelectSupport( "selectPreference", [], "promotions", false ); + const promotions = useSelectSupport( "selectPreference", [], "promotions", [] ); const premiumUpsellConfig = useSelectSupport( "selectUpsellSettingsAsProps" ); const pluginUrl = useSelectSupport( "selectPreference", [], "pluginUrl", "" ); const linkParams = useSelectSupport( "selectLinkParams" ); From 1f7a01c746290fed6a41d823c64b30627484c4c9 Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 14:12:44 +0200 Subject: [PATCH 470/616] add default value to promotions --- packages/js/src/settings/components/sidebar-recommendations.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/js/src/settings/components/sidebar-recommendations.js b/packages/js/src/settings/components/sidebar-recommendations.js index e303235784a..c710b8de73c 100644 --- a/packages/js/src/settings/components/sidebar-recommendations.js +++ b/packages/js/src/settings/components/sidebar-recommendations.js @@ -6,7 +6,7 @@ import { useSelectSettings } from "../hooks"; */ const SidebarRecommendations = () => { const isPremium = useSelectSettings( "selectPreference", [], "isPremium" ); - const promotions = useSelectSettings( "selectPreference", [], "promotions" ); + const promotions = useSelectSettings( "selectPreference", [], "promotions", [] ); const premiumLink = useSelectSettings( "selectLink", [], "https://yoa.st/jj" ); const premiumUpsellConfig = useSelectSettings( "selectUpsellSettingsAsProps" ); const academyLink = useSelectSettings( "selectLink", [], "https://yoa.st/3t6" ); From 52964b017b618a718b7fa4b8030a941992c5931c Mon Sep 17 00:00:00 2001 From: Vraja Das Date: Tue, 26 Sep 2023 16:04:53 +0200 Subject: [PATCH 471/616] Add same 30% text --- .../components/premium-upsell-card.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/packages/js/src/shared-admin/components/premium-upsell-card.js b/packages/js/src/shared-admin/components/premium-upsell-card.js index 215a237d08e..535b8819398 100644 --- a/packages/js/src/shared-admin/components/premium-upsell-card.js +++ b/packages/js/src/shared-admin/components/premium-upsell-card.js @@ -30,6 +30,17 @@ export const PremiumUpsellCard = ( { link, linkProps, promotions } ) => { } ); const isBlackFriday = promotions.includes( "black-friday-2023-promotion" ); + const saveMoneyText = createInterpolateElement( + sprintf( + /* translators: %1$s and %2$s expand to strong tags. */ + __( "%1$sSAVE 30%%%2$s on your 12 month subscription", "wordpress-seo" ), + "", + "" + ), + { + strong: , + } + ); return (
    @@ -47,6 +58,11 @@ export const PremiumUpsellCard = ( { link, linkProps, promotions } ) => { { getPremium }

    { info }

    + { isBlackFriday &&
    +

    + { saveMoneyText } +

    +
    }
    ' . PHP_EOL . PHP_EOL; diff --git a/admin/menu/class-admin-menu.php b/admin/menu/class-admin-menu.php index df97b3185c1..c0cf22e4e0a 100644 --- a/admin/menu/class-admin-menu.php +++ b/admin/menu/class-admin-menu.php @@ -114,12 +114,10 @@ protected function get_notification_counter() { $notification_count = $notification_center->get_notification_count(); // Add main page. - /* translators: %s: number of notifications */ + /* translators: %s: number of notifications; Hidden accessibility text. */ $notifications = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) ); - $counter = sprintf( '%2$s', $notification_count, $notifications ); - - return $counter; + return sprintf( '%2$s', $notification_count, $notifications ); } /** diff --git a/admin/taxonomy/class-taxonomy-columns.php b/admin/taxonomy/class-taxonomy-columns.php index 91905940e7b..b5eb8aab1db 100644 --- a/admin/taxonomy/class-taxonomy-columns.php +++ b/admin/taxonomy/class-taxonomy-columns.php @@ -84,11 +84,15 @@ public function add_columns( array $columns ) { $new_columns[ $column_name ] = $column_value; if ( $column_name === 'description' && $this->analysis_seo->is_enabled() ) { - $new_columns['wpseo-score'] = '' . __( 'SEO score', 'wordpress-seo' ) . ''; + $new_columns['wpseo-score'] = '' . + /* translators: Hidden accessibility text. */ + __( 'SEO score', 'wordpress-seo' ) . ''; } if ( $column_name === 'description' && $this->analysis_readability->is_enabled() ) { - $new_columns['wpseo-score-readability'] = '' . __( 'Readability score', 'wordpress-seo' ) . ''; + $new_columns['wpseo-score-readability'] = '' . + /* translators: Hidden accessibility text. */ + __( 'Readability score', 'wordpress-seo' ) . ''; } } diff --git a/admin/views/licenses.php b/admin/views/licenses.php index 86c78c74efd..235b13becb2 100644 --- a/admin/views/licenses.php +++ b/admin/views/licenses.php @@ -122,6 +122,7 @@ $wpseo_extensions_header = sprintf( __( '%1$s Extensions', 'wordpress-seo' ), 'Yoast SEO' ); $new_tab_message = sprintf( '%1$s', + /* translators: Hidden accessibility text. */ esc_html__( '(Opens in a new browser tab)', 'wordpress-seo' ) ); diff --git a/admin/views/partial-notifications-template.php b/admin/views/partial-notifications-template.php index 43835d4791c..c09dca5b291 100644 --- a/admin/views/partial-notifications-template.php +++ b/admin/views/partial-notifications-template.php @@ -35,6 +35,7 @@ function _yoast_display_notifications( $notifications_list, $status ) { case 'active': $button = sprintf( '', + /* translators: Hidden accessibility text. */ esc_html__( 'Hide this item.', 'wordpress-seo' ) ); break; @@ -42,6 +43,7 @@ function _yoast_display_notifications( $notifications_list, $status ) { case 'dismissed': $button = sprintf( '', + /* translators: Hidden accessibility text. */ esc_html__( 'Show this item.', 'wordpress-seo' ) ); break; diff --git a/admin/views/tabs/network/features.php b/admin/views/tabs/network/features.php index 811c190b5a8..e25df24a825 100644 --- a/admin/views/tabs/network/features.php +++ b/admin/views/tabs/network/features.php @@ -58,7 +58,7 @@ $feature_help = new WPSEO_Admin_Help_Panel( WPSEO_Option::ALLOW_KEY_PREFIX . $feature->setting, - /* translators: %s expands to a feature's name */ + /* translators: %s expands to a feature's name; Hidden accessibility text. */ sprintf( esc_html__( 'Help on: %s', 'wordpress-seo' ), esc_html( $feature->name ) ), $help_text ); diff --git a/admin/views/tabs/network/integrations.php b/admin/views/tabs/network/integrations.php index 61b66364511..a85ede2e664 100644 --- a/admin/views/tabs/network/integrations.php +++ b/admin/views/tabs/network/integrations.php @@ -46,7 +46,7 @@ $feature_help = new WPSEO_Admin_Help_Panel( WPSEO_Option::ALLOW_KEY_PREFIX . $integration->setting, - /* translators: %s expands to an integration's name */ + /* translators: %s expands to an integration's name; Hidden accessibility text. */ sprintf( esc_html__( 'Help on: %s', 'wordpress-seo' ), esc_html( $integration->name ) ), $help_text ); diff --git a/admin/watchers/class-slug-change-watcher.php b/admin/watchers/class-slug-change-watcher.php index 7b5e3db5366..0d96066a7a9 100644 --- a/admin/watchers/class-slug-change-watcher.php +++ b/admin/watchers/class-slug-change-watcher.php @@ -227,6 +227,7 @@ protected function get_message( $first_sentence, $action, $object_label ) { . '

    ' /* translators: %s expands to Yoast SEO Premium */ . sprintf( __( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ) + /* translators: Hidden accessibility text. */ . '' . __( '(Opens in a new browser tab)', 'wordpress-seo' ) . '' . '' . '

    '; diff --git a/inc/class-wpseo-admin-bar-menu.php b/inc/class-wpseo-admin-bar-menu.php index 60a42e0247c..8df520d7e57 100644 --- a/inc/class-wpseo-admin-bar-menu.php +++ b/inc/class-wpseo-admin-bar-menu.php @@ -699,6 +699,7 @@ protected function add_network_settings_submenu( WP_Admin_Bar $wp_admin_bar ) { * @return string Admin bar title markup. */ protected function get_title() { + /* translators: Hidden accessibility text. */ return ''; } @@ -850,7 +851,7 @@ protected function get_notification_counter() { return ''; } - /* translators: %s: number of notifications */ + /* translators: %s: number of notifications; Hidden accessibility text. */ $counter_screen_reader_text = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) ); return sprintf( '
    %s
    ', $notification_count, $counter_screen_reader_text ); diff --git a/packages/components/src/data-model/DataModel.js b/packages/components/src/data-model/DataModel.js index da6f1f55c2c..bbd9eb89753 100644 --- a/packages/components/src/data-model/DataModel.js +++ b/packages/components/src/data-model/DataModel.js @@ -21,7 +21,7 @@ const dataItemProps = { * @returns {HTMLElement} A list item. */ const DataItem = ( props ) => { - /* Translators: %d expands to number of occurrences. */ + /* translators: %d expands to number of occurrences; Hidden accessibility text. */ const screenReaderText = sprintf( __( "%d occurrences", "wordpress-seo" ), props.number ); return (
  • Date: Wed, 27 Sep 2023 16:25:37 +0200 Subject: [PATCH 481/616] Fix impossible translation AFAIK, the retrieving of translations won't follow variables Coincidentally, the implementers of this variable seem to all already call `__` on the string itself. --- packages/components/src/help-icon/HelpIcon.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/components/src/help-icon/HelpIcon.js b/packages/components/src/help-icon/HelpIcon.js index cbce015ff54..480ab6a75f3 100644 --- a/packages/components/src/help-icon/HelpIcon.js +++ b/packages/components/src/help-icon/HelpIcon.js @@ -46,8 +46,8 @@ const HelpIcon = ( { linkTo, linkText } ) => ( /> - { __( linkText, "wordpress-seo" ) } { __( "(Opens in a new browser tab)", "wordpress-seo" ) } + { linkText } ); From f34eda9bf5ea7d1a1ed01c341dd1513d9817231e Mon Sep 17 00:00:00 2001 From: Igor <35524806+igorschoester@users.noreply.github.com> Date: Wed, 27 Sep 2023 16:28:35 +0200 Subject: [PATCH 482/616] Following HelpIcon' linkText --- packages/components/src/help-icon/HelpIcon.js | 7 ++++++- packages/js/src/components/AdvancedSettings.js | 5 +++++ packages/js/src/components/SchemaTab.js | 2 ++ packages/js/src/components/WincherPostPublish.js | 1 + packages/js/src/components/WincherSEOPerformance.js | 1 + packages/js/src/components/WordProofTimestampToggle.js | 4 ++-- .../js/src/insights/components/estimated-reading-time.js | 1 + packages/js/src/insights/components/flesch-reading-ease.js | 1 + packages/js/src/insights/components/text-formality.js | 1 + packages/js/src/insights/components/text-length.js | 2 ++ 10 files changed, 22 insertions(+), 3 deletions(-) diff --git a/packages/components/src/help-icon/HelpIcon.js b/packages/components/src/help-icon/HelpIcon.js index 480ab6a75f3..4f3131aee7c 100644 --- a/packages/components/src/help-icon/HelpIcon.js +++ b/packages/components/src/help-icon/HelpIcon.js @@ -46,8 +46,13 @@ const HelpIcon = ( { linkTo, linkText } ) => ( /> - { __( "(Opens in a new browser tab)", "wordpress-seo" ) } { linkText } + + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } + ); diff --git a/packages/js/src/components/AdvancedSettings.js b/packages/js/src/components/AdvancedSettings.js index f6315ca5480..07d37428a9f 100644 --- a/packages/js/src/components/AdvancedSettings.js +++ b/packages/js/src/components/AdvancedSettings.js @@ -92,6 +92,7 @@ const MetaRobotsNoIndex = ( { noIndex, onNoIndexChange, editorContext, isPrivate options={ metaRobotsNoIndexOptions } selected={ noIndex } linkTo={ wpseoAdminL10n[ "shortlinks.advanced.allow_search_engines" ] } + /* translators: Hidden accessibility text. */ linkText={ __( "Learn more about the no-index setting on our help page.", "wordpress-seo" ) } /> ; @@ -132,6 +133,7 @@ const MetaRobotsNoFollow = ( { noFollow, onNoFollowChange, postTypeName } ) => { onChange={ onNoFollowChange } selected={ noFollow } linkTo={ wpseoAdminL10n[ "shortlinks.advanced.follow_links" ] } + /* translators: Hidden accessibility text. */ linkText={ __( "Learn more about the no-follow setting on our help page.", "wordpress-seo" ) } />; } } @@ -169,6 +171,7 @@ const MetaRobotsAdvanced = ( { advanced, onAdvancedChange } ) => { ] } selected={ advanced } linkTo={ wpseoAdminL10n[ "shortlinks.advanced.meta_robots" ] } + /* translators: Hidden accessibility text. */ linkText={ __( "Learn more about advanced meta robots settings on our help page.", "wordpress-seo" ) } />; } } @@ -197,6 +200,7 @@ const BreadcrumbsTitle = ( { breadcrumbsTitle, onBreadcrumbsTitleChange } ) => { onChange={ onBreadcrumbsTitleChange } value={ breadcrumbsTitle } linkTo={ wpseoAdminL10n[ "shortlinks.advanced.breadcrumbs_title" ] } + /* translators: Hidden accessibility text. */ linkText={ __( "Learn more about the breadcrumbs title setting on our help page.", "wordpress-seo" ) } />; } @@ -226,6 +230,7 @@ const CanonicalURL = ( { canonical, onCanonicalChange } ) => { onChange={ onCanonicalChange } value={ canonical } linkTo={ "https://yoa.st/canonical-url" } + /* translators: Hidden accessibility text. */ linkText={ __( "Learn more about canonical URLs on our help page.", "wordpress-seo" ) } />; } diff --git a/packages/js/src/components/SchemaTab.js b/packages/js/src/components/SchemaTab.js index 724983a7538..0c1e6abba5f 100644 --- a/packages/js/src/components/SchemaTab.js +++ b/packages/js/src/components/SchemaTab.js @@ -122,6 +122,7 @@ const Header = ( props ) => { return
  • { __( "Twitter", "wordpress-seo" ) } - { __( "(Opens in a new browser tab)", "wordpress-seo" ) } + + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } +
  • diff --git a/packages/js/src/components/SEMrushRelatedKeyphrasesModal.js b/packages/js/src/components/SEMrushRelatedKeyphrasesModal.js index 78859225a11..dd0b22741ba 100644 --- a/packages/js/src/components/SEMrushRelatedKeyphrasesModal.js +++ b/packages/js/src/components/SEMrushRelatedKeyphrasesModal.js @@ -209,8 +209,11 @@ class SEMrushRelatedKeyphrasesModal extends Component { onClick={ this.onLinkClick } > { __( "Get related keyphrases", "wordpress-seo" ) } - - { __( "(Opens in a new browser window)", "wordpress-seo" ) } + + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + }
    } diff --git a/packages/js/src/components/contentAnalysis/InclusiveLanguageAnalysis.js b/packages/js/src/components/contentAnalysis/InclusiveLanguageAnalysis.js index b3d1f3afa61..6b065bfc123 100644 --- a/packages/js/src/components/contentAnalysis/InclusiveLanguageAnalysis.js +++ b/packages/js/src/components/contentAnalysis/InclusiveLanguageAnalysis.js @@ -74,7 +74,10 @@ const InclusiveLanguageAnalysis = ( props ) => { className="dashicons" > - { __( "Learn more about the inclusive language analysis", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about the inclusive language analysis", "wordpress-seo" ) + } @@ -137,7 +140,10 @@ const InclusiveLanguageAnalysis = ( props ) => { className="dashicons" > - { __( "Learn more about the inclusive language analysis", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about the inclusive language analysis", "wordpress-seo" ) + } diff --git a/packages/js/src/components/contentAnalysis/KeywordInput.js b/packages/js/src/components/contentAnalysis/KeywordInput.js index 28604c634eb..727dac09962 100644 --- a/packages/js/src/components/contentAnalysis/KeywordInput.js +++ b/packages/js/src/components/contentAnalysis/KeywordInput.js @@ -44,7 +44,10 @@ class KeywordInput extends Component { className="dashicons" > - { __( "Help on choosing the perfect focus keyphrase", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Help on choosing the perfect focus keyphrase", "wordpress-seo" ) + } ); diff --git a/packages/js/src/components/contentAnalysis/ReadabilityAnalysis.js b/packages/js/src/components/contentAnalysis/ReadabilityAnalysis.js index e7f96071077..0b44e9aa110 100644 --- a/packages/js/src/components/contentAnalysis/ReadabilityAnalysis.js +++ b/packages/js/src/components/contentAnalysis/ReadabilityAnalysis.js @@ -55,7 +55,10 @@ class ReadabilityAnalysis extends Component { className="dashicons" > - { __( "Learn more about the readability analysis", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about the readability analysis", "wordpress-seo" ) + } diff --git a/packages/js/src/components/modals/SEMrushKeyphrasesTable.js b/packages/js/src/components/modals/SEMrushKeyphrasesTable.js index da3e1e055f5..cd1ac50edeb 100644 --- a/packages/js/src/components/modals/SEMrushKeyphrasesTable.js +++ b/packages/js/src/components/modals/SEMrushKeyphrasesTable.js @@ -109,7 +109,10 @@ class SEMrushKeyphrasesTable extends Component { className="dashicons" > - { __( "Learn more about the related keyphrases volume", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about the related keyphrases volume", "wordpress-seo" ) + } @@ -123,7 +126,10 @@ class SEMrushKeyphrasesTable extends Component { className="dashicons" > - { __( "Learn more about the related keyphrases trend", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about the related keyphrases trend", "wordpress-seo" ) + } diff --git a/packages/js/src/containers/PersistentDismissableNotification.js b/packages/js/src/containers/PersistentDismissableNotification.js index ce80ff26787..c4d87a2ae8f 100644 --- a/packages/js/src/containers/PersistentDismissableNotification.js +++ b/packages/js/src/containers/PersistentDismissableNotification.js @@ -34,7 +34,12 @@ const PersistentDismissableNotification = ( { { Image && }

    ); diff --git a/packages/js/src/inline-links/inline.js b/packages/js/src/inline-links/inline.js index 31f68f3a424..9683af4dfbe 100644 --- a/packages/js/src/inline-links/inline.js +++ b/packages/js/src/inline-links/inline.js @@ -277,7 +277,10 @@ function InlineLinkUI( { className="dashicons" > - { __( "Learn more about marking a link as nofollow or sponsored.", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "Learn more about marking a link as nofollow or sponsored.", "wordpress-seo" ) + } ; diff --git a/packages/js/src/insights/components/prominent-words.js b/packages/js/src/insights/components/prominent-words.js index 2ca3d5592e4..a21b8191739 100644 --- a/packages/js/src/insights/components/prominent-words.js +++ b/packages/js/src/insights/components/prominent-words.js @@ -98,7 +98,7 @@ const ProminentWords = ( { location } ) => { // eslint-disable-line complexity
    ' . $upsell_button . '

    ' . PHP_EOL . PHP_EOL; } diff --git a/composer.json b/composer.json index 4d03071993c..15b9649c27e 100644 --- a/composer.json +++ b/composer.json @@ -81,7 +81,7 @@ "Yoast\\WP\\SEO\\Composer\\Actions::check_coding_standards" ], "check-cs-thresholds": [ - "@putenv YOASTCS_THRESHOLD_ERRORS=130", + "@putenv YOASTCS_THRESHOLD_ERRORS=129", "@putenv YOASTCS_THRESHOLD_WARNINGS=202", "Yoast\\WP\\SEO\\Composer\\Actions::check_cs_thresholds" ], From 7337b988d80916ddd8b9df4210d3cb037a38362f Mon Sep 17 00:00:00 2001 From: Igor <35524806+igorschoester@users.noreply.github.com> Date: Thu, 28 Sep 2023 08:24:37 +0200 Subject: [PATCH 485/616] Following `yst-sr-only` class --- packages/js/src/academy/app.js | 1 + packages/js/src/components/JetpackBoost.js | 14 ++++++-- .../social-profiles/social-field-array.js | 7 +++- .../tailwind-components/tailwind-modal.js | 7 +++- .../integrations-page/algolia-integration.js | 3 ++ .../jetpack-boost-integration.js | 6 +++- .../integrations-page/simple-integration.js | 3 ++ .../toggleable-integration.js | 3 ++ .../woocommerce-integration.js | 1 + packages/js/src/settings/app.js | 2 ++ .../js/src/settings/components/form-layout.js | 5 ++- .../components/formik-page-select-field.js | 1 + .../src/settings/components/notifications.js | 1 + packages/js/src/settings/components/search.js | 7 +++- .../js/src/settings/routes/site-features.js | 35 ++++++++++++++++--- ...generate-titles-and-descriptions-upsell.js | 5 ++- .../shared-admin/components/outbound-link.js | 7 +++- .../src/support/components/resource-card.js | 5 ++- 18 files changed, 98 insertions(+), 15 deletions(-) diff --git a/packages/js/src/academy/app.js b/packages/js/src/academy/app.js index b585dab838f..22b73de904a 100644 --- a/packages/js/src/academy/app.js +++ b/packages/js/src/academy/app.js @@ -277,6 +277,7 @@ const App = () => { { __( "Start free trial lesson", "wordpress-seo" ) } { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } diff --git a/packages/js/src/components/JetpackBoost.js b/packages/js/src/components/JetpackBoost.js index d6f4860e7a9..5e150e405e1 100644 --- a/packages/js/src/components/JetpackBoost.js +++ b/packages/js/src/components/JetpackBoost.js @@ -61,7 +61,12 @@ const JetpackBoost = ( { store, isAlertDismissed, onDismissed } ) => {
    @@ -88,7 +93,12 @@ const JetpackBoost = ( { store, isAlertDismissed, onDismissed } ) => { ) } </span> <ExternalLinkIcon className="yst-inline yst-ml-1 yst-h-4 yst-w-4 yst-text-indigo-600" /> - <span className="yst-sr-only">{ __( "(Opens in a new browser tab)", "wordpress-seo" ) }</span> + <span className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } + </span> </Link> </Root> </PluginPrePublishPanel> diff --git a/packages/js/src/first-time-configuration/tailwind-components/steps/social-profiles/social-field-array.js b/packages/js/src/first-time-configuration/tailwind-components/steps/social-profiles/social-field-array.js index ce9719814e7..8cacdc3e913 100644 --- a/packages/js/src/first-time-configuration/tailwind-components/steps/social-profiles/social-field-array.js +++ b/packages/js/src/first-time-configuration/tailwind-components/steps/social-profiles/social-field-array.js @@ -50,7 +50,12 @@ const SocialFieldArray = ( { items, onAddProfile, onRemoveProfile, onChangeProfi data-index={ index } onClick={ handleRemove } > - <span className="yst-sr-only">{ __( "Delete item", "wordpress-seo" ) }</span> + <span className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Delete item", "wordpress-seo" ) + } + </span> <TrashIcon className="yst-relative yst--top-0.5 yst-w-5 yst-h-5" /> </button> </div> diff --git a/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js b/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js index cbd04f8fa77..f6b4f5bfb27 100644 --- a/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js +++ b/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js @@ -55,7 +55,12 @@ export default function TailwindModal( { isOpen, handleClose, hasCloseButton, ch onClick={ handleClose } className="yst-bg-white yst-rounded-md yst-text-slate-400 hover:yst-text-slate-600 focus:yst-outline-none focus:yst-ring-2 focus:yst-ring-offset-2 focus:yst-ring-primary-500" > - <span className="yst-sr-only">{ __( "Close", "wordpress-seo" ) }</span> + <span className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Close", "wordpress-seo" ) + } + </span> <XIcon className="yst-h-6 yst-w-6" /> </button> </div> } diff --git a/packages/js/src/integrations-page/algolia-integration.js b/packages/js/src/integrations-page/algolia-integration.js index 7f91b8f4037..9888806569f 100644 --- a/packages/js/src/integrations-page/algolia-integration.js +++ b/packages/js/src/integrations-page/algolia-integration.js @@ -77,6 +77,7 @@ export const AlgoliaIntegration = ( { /> } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -98,6 +99,7 @@ export const AlgoliaIntegration = ( { Learn more <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -136,6 +138,7 @@ export const AlgoliaIntegration = ( { { __( "Unlock with Premium", "wordpress-seo" ) } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> diff --git a/packages/js/src/integrations-page/jetpack-boost-integration.js b/packages/js/src/integrations-page/jetpack-boost-integration.js index a3fc744acfb..53b05deb678 100644 --- a/packages/js/src/integrations-page/jetpack-boost-integration.js +++ b/packages/js/src/integrations-page/jetpack-boost-integration.js @@ -116,6 +116,7 @@ export const JetpackBoostIntegration = () => { /> } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -135,7 +136,10 @@ export const JetpackBoostIntegration = () => { > { learnMoreLinkText } <span className="yst-sr-only"> - { __( "(Opens in a new browser tab)", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } </span> <ArrowSmRightIcon className="yst-h-4 yst-w-4 yst-ml-1 yst-icon-rtl" /> </Link> } diff --git a/packages/js/src/integrations-page/simple-integration.js b/packages/js/src/integrations-page/simple-integration.js index 741b9a3706f..5d07626f8d3 100644 --- a/packages/js/src/integrations-page/simple-integration.js +++ b/packages/js/src/integrations-page/simple-integration.js @@ -29,6 +29,7 @@ export const SimpleIntegration = ( { integration, isActive, children } ) => { { integration.logo && <IntegrationLogo alt={ `${integration.name} logo` } className={ `${ isActive ? "" : "yst-opacity-50 yst-filter yst-grayscale" }` } /> } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -61,6 +62,7 @@ export const SimpleIntegration = ( { integration, isActive, children } ) => { Learn more <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -86,6 +88,7 @@ export const SimpleIntegration = ( { integration, isActive, children } ) => { { __( "Unlock with Premium", "wordpress-seo" ) } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> diff --git a/packages/js/src/integrations-page/toggleable-integration.js b/packages/js/src/integrations-page/toggleable-integration.js index cab395e3793..a8d066bf8c6 100644 --- a/packages/js/src/integrations-page/toggleable-integration.js +++ b/packages/js/src/integrations-page/toggleable-integration.js @@ -74,6 +74,7 @@ export const ToggleableIntegration = ( { /> } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -95,6 +96,7 @@ export const ToggleableIntegration = ( { Learn more <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> @@ -125,6 +127,7 @@ export const ToggleableIntegration = ( { { __( "Unlock with Premium", "wordpress-seo" ) } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> diff --git a/packages/js/src/integrations-page/woocommerce-integration.js b/packages/js/src/integrations-page/woocommerce-integration.js index 229c3f14bb0..4c1608ad434 100644 --- a/packages/js/src/integrations-page/woocommerce-integration.js +++ b/packages/js/src/integrations-page/woocommerce-integration.js @@ -86,6 +86,7 @@ export const WoocommerceIntegration = ( { } <span className="yst-sr-only"> { + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) } </span> diff --git a/packages/js/src/settings/app.js b/packages/js/src/settings/app.js index c1de4cc0564..f74c5b39c31 100644 --- a/packages/js/src/settings/app.js +++ b/packages/js/src/settings/app.js @@ -247,7 +247,9 @@ const App = () => { <SidebarNavigation.Mobile openButtonId="button-open-settings-navigation-mobile" closeButtonId="button-close-settings-navigation-mobile" + /* translators: Hidden accessibility text. */ openButtonScreenReaderText={ __( "Open settings navigation", "wordpress-seo" ) } + /* translators: Hidden accessibility text. */ closeButtonScreenReaderText={ __( "Close settings navigation", "wordpress-seo" ) } aria-label={ __( "Settings navigation", "wordpress-seo" ) } > diff --git a/packages/js/src/settings/components/form-layout.js b/packages/js/src/settings/components/form-layout.js index 54f86017245..15b3bc3c492 100644 --- a/packages/js/src/settings/components/form-layout.js +++ b/packages/js/src/settings/components/form-layout.js @@ -56,7 +56,10 @@ const FormLayout = ( { children } ) => { { __( "Discard changes", "wordpress-seo" ) } </Button> <Modal onClose={ unsetRequestUndo } isOpen={ isRequestUndo }> - <Modal.Panel closeButtonScreenReaderText={ __( "Close", "wordpress-seo" ) }> + <Modal.Panel + /* translators: Hidden accessibility text. */ + closeButtonScreenReaderText={ __( "Close", "wordpress-seo" ) } + > <div className="sm:yst-flex sm:yst-items-start"> <div className="yst-mx-auto yst-flex-shrink-0 yst-flex yst-items-center yst-justify-center yst-h-12 yst-w-12 yst-rounded-full yst-bg-red-100 sm:yst-mx-0 sm:yst-h-10 sm:yst-w-10" diff --git a/packages/js/src/settings/components/formik-page-select-field.js b/packages/js/src/settings/components/formik-page-select-field.js index d1dfff10dde..a22eea7c3cf 100644 --- a/packages/js/src/settings/components/formik-page-select-field.js +++ b/packages/js/src/settings/components/formik-page-select-field.js @@ -87,6 +87,7 @@ const FormikPageSelectField = ( { name, id, className = "", ...props } ) => { onQueryChange={ handleQueryChange } className={ classNames( className, props.disabled && "yst-autocomplete--disabled" ) } nullable={ true } + /* translators: Hidden accessibility text. */ clearButtonScreenReaderText={ __( "Clear selection", "wordpress-seo" ) } { ...props } > diff --git a/packages/js/src/settings/components/notifications.js b/packages/js/src/settings/components/notifications.js index 28af3200f1a..02adb10b394 100644 --- a/packages/js/src/settings/components/notifications.js +++ b/packages/js/src/settings/components/notifications.js @@ -81,6 +81,7 @@ const Notifications = () => { ...notification, onDismiss: removeNotification, autoDismiss: notification.variant === "success" ? 5000 : null, + /* translators: Hidden accessibility text. */ dismissScreenReaderLabel: __( "Dismiss", "wordpress-seo" ), } ) ), [ notifications ] ); diff --git a/packages/js/src/settings/components/search.js b/packages/js/src/settings/components/search.js index e3282f99575..23b446e36e1 100644 --- a/packages/js/src/settings/components/search.js +++ b/packages/js/src/settings/components/search.js @@ -261,7 +261,12 @@ const Search = ( { buttonId = "button-search", modalId = "modal-search" } ) => { onClick={ setClose } className="yst-modal__close-button" > - <span className="yst-sr-only">{ __( "Close", "wordpress-seo" ) }</span> + <span className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Close", "wordpress-seo" ) + } + </span> <XIcon className="yst-h-6 yst-w-6" { ...ariaSvgProps } /> </button> </div> diff --git a/packages/js/src/settings/routes/site-features.js b/packages/js/src/settings/routes/site-features.js index b1dd1df46bc..211f93c537d 100644 --- a/packages/js/src/settings/routes/site-features.js +++ b/packages/js/src/settings/routes/site-features.js @@ -187,7 +187,12 @@ const SiteFeatures = () => { <FormLayout> <div className="yst-max-w-6xl"> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only">{ __( "Writing", "wordpress-seo" ) }</legend> + <legend className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Writing", "wordpress-seo" ) + } + </legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2"> { __( "Writing", "wordpress-seo" ) } @@ -252,7 +257,12 @@ const SiteFeatures = () => { </fieldset> <hr className="yst-my-8" /> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only">{ __( "Site structure", "wordpress-seo" ) }</legend> + <legend className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Site structure", "wordpress-seo" ) + } + </legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2"> { __( "Site structure", "wordpress-seo" ) } @@ -296,7 +306,12 @@ const SiteFeatures = () => { </fieldset> <hr className="yst-my-8" /> <fieldset id="section-social-sharing" className="yst-min-w-0"> - <legend className="yst-sr-only">{ __( "Social sharing", "wordpress-seo" ) }</legend> + <legend className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Social sharing", "wordpress-seo" ) + } + </legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2" className="yst-mb-2"> { __( "Social sharing", "wordpress-seo" ) } @@ -339,7 +354,12 @@ const SiteFeatures = () => { </fieldset> <hr className="yst-my-8" /> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only">{ __( "Tools", "wordpress-seo" ) }</legend> + <legend className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "Tools", "wordpress-seo" ) + } + </legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2"> { __( "Tools", "wordpress-seo" ) } @@ -366,7 +386,12 @@ const SiteFeatures = () => { </fieldset> <hr className="yst-my-8" /> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only">{ __( "APIs", "wordpress-seo" ) }</legend> + <legend className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "APIs", "wordpress-seo" ) + } + </legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2"> { __( "APIs", "wordpress-seo" ) } diff --git a/packages/js/src/shared-admin/components/ai-generate-titles-and-descriptions-upsell.js b/packages/js/src/shared-admin/components/ai-generate-titles-and-descriptions-upsell.js index b5ca0398e94..f6a7c74b349 100644 --- a/packages/js/src/shared-admin/components/ai-generate-titles-and-descriptions-upsell.js +++ b/packages/js/src/shared-admin/components/ai-generate-titles-and-descriptions-upsell.js @@ -78,7 +78,10 @@ export const AiGenerateTitlesAndDescriptionsUpsell = ( { learnMoreLink, upsellLi <LockOpenIcon className="yst--ml-1 yst-mr-2 yst-h-5 yst-w-5" /> { __( "Unlock with Premium", "wordpress-seo" ) } <span className="yst-sr-only"> - { __( "(Opens in a new browser tab)", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } </span> </Button> </div> diff --git a/packages/js/src/shared-admin/components/outbound-link.js b/packages/js/src/shared-admin/components/outbound-link.js index c72eaa7728f..93abfaab90d 100644 --- a/packages/js/src/shared-admin/components/outbound-link.js +++ b/packages/js/src/shared-admin/components/outbound-link.js @@ -11,7 +11,12 @@ import PropTypes from "prop-types"; export const OutboundLink = ( { href, children, ...props } ) => ( <Link target="_blank" rel="noopener noreferrer" { ...props } href={ href }> { children } - <span className="yst-sr-only">{ __( "(Opens in a new browser tab)", "wordpress-seo" ) }</span> + <span className="yst-sr-only"> + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } + </span> </Link> ); OutboundLink.propTypes = { diff --git a/packages/js/src/support/components/resource-card.js b/packages/js/src/support/components/resource-card.js index 99ec9c5b538..66ffbd6b06e 100644 --- a/packages/js/src/support/components/resource-card.js +++ b/packages/js/src/support/components/resource-card.js @@ -36,7 +36,10 @@ export const ResourceCard = ( { imageSrc, title, description, linkHref, linkText > { linkText } <span className="yst-sr-only"> - { __( "(Opens in a new browser tab)", "wordpress-seo" ) } + { + /* translators: Hidden accessibility text. */ + __( "(Opens in a new browser tab)", "wordpress-seo" ) + } </span> <ArrowSmRightIcon className="yst-h-4 yst-w-4 yst-ml-1 yst-icon-rtl" /> </Link> From 46de64bc21226277743836d902538c92e66ef63e Mon Sep 17 00:00:00 2001 From: Mykola Shlyakhtun <mykola.shlyakhtun@gmail.com> Date: Thu, 28 Sep 2023 10:14:46 +0300 Subject: [PATCH 486/616] Remove automatic rel=nofollow on links in RSS feeds#131 By default not add nofollow for links. --- src/integrations/front-end/rss-footer-embed.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/integrations/front-end/rss-footer-embed.php b/src/integrations/front-end/rss-footer-embed.php index 631e0bfd102..cce46ce8d87 100644 --- a/src/integrations/front-end/rss-footer-embed.php +++ b/src/integrations/front-end/rss-footer-embed.php @@ -182,13 +182,13 @@ protected function get_replace_vars( $link_template, $post ) { protected function get_link_template() { /** * Filter: 'nofollow_rss_links' - Allow the developer to determine whether or not to follow the links in - * the bits Yoast SEO adds to the RSS feed, defaults to true. + * the bits Yoast SEO adds to the RSS feed, defaults to false. * * @api bool $unsigned Whether or not to follow the links in RSS feed, defaults to true. * * @since 1.4.20 */ - if ( \apply_filters( 'nofollow_rss_links', true ) ) { + if ( \apply_filters( 'nofollow_rss_links', false ) ) { return '<a rel="nofollow" href="%1$s">%2$s</a>'; } From ccd18095ff3f586c9e221678f54d46acdbccb69a Mon Sep 17 00:00:00 2001 From: Nicolas Lemoine <nico.lemoine@gmail.com> Date: Thu, 28 Sep 2023 10:18:07 +0200 Subject: [PATCH 487/616] Improve primary term helper performance No need to get post terms if primary term id is falsy. --- inc/class-wpseo-primary-term.php | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/inc/class-wpseo-primary-term.php b/inc/class-wpseo-primary-term.php index ed7cd2633ef..29d9ed47a0b 100644 --- a/inc/class-wpseo-primary-term.php +++ b/inc/class-wpseo-primary-term.php @@ -43,6 +43,10 @@ public function __construct( $taxonomy_name, $post_id ) { public function get_primary_term() { $primary_term = get_post_meta( $this->post_ID, WPSEO_Meta::$meta_prefix . 'primary_' . $this->taxonomy_name, true ); + if ( ! $primary_term ) { + return false; + } + $terms = $this->get_terms(); if ( ! in_array( (int) $primary_term, wp_list_pluck( $terms, 'term_id' ), true ) ) { From b1b40ceaa888dd5fa5d4c55a2dd51714f9afd52e Mon Sep 17 00:00:00 2001 From: Igor <35524806+igorschoester@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:19:35 +0200 Subject: [PATCH 488/616] Remove instances where the same translation is used in a non-hidden way As there is apparently only one comment available for all references in the code. This prevents a misleading comment when not all references are hidden. --- admin/class-meta-columns.php | 2 - admin/formatter/class-metabox-formatter.php | 1 + admin/taxonomy/class-taxonomy-columns.php | 2 - inc/class-wpseo-admin-bar-menu.php | 1 - .../src/image-select/ImageSelect.js | 2 - packages/helpers/src/makeOutboundLink.js | 1 + .../js/src/analysis/getIndicatorForScore.js | 10 ---- .../tailwind-components/tailwind-modal.js | 7 +-- .../js/src/settings/components/form-layout.js | 5 +- packages/js/src/settings/components/search.js | 7 +-- .../js/src/settings/routes/site-features.js | 51 ++++--------------- .../how-to/components/HowTo.js | 10 +--- .../src/snippet-preview/SnippetPreview.js | 10 +--- src/presenters/admin/help-link-presenter.php | 3 +- 14 files changed, 20 insertions(+), 92 deletions(-) diff --git a/admin/class-meta-columns.php b/admin/class-meta-columns.php index ed791d674bc..1f348cee81e 100644 --- a/admin/class-meta-columns.php +++ b/admin/class-meta-columns.php @@ -100,7 +100,6 @@ public function column_heading( $columns ) { $added_columns['wpseo-score'] = '<span class="yoast-column-seo-score yoast-column-header-has-tooltip" data-tooltip-text="' . esc_attr__( 'SEO score', 'wordpress-seo' ) . '"><span class="screen-reader-text">' . - /* translators: Hidden accessibility text. */ __( 'SEO score', 'wordpress-seo' ) . '</span></span></span>'; } @@ -109,7 +108,6 @@ public function column_heading( $columns ) { $added_columns['wpseo-score-readability'] = '<span class="yoast-column-readability yoast-column-header-has-tooltip" data-tooltip-text="' . esc_attr__( 'Readability score', 'wordpress-seo' ) . '"><span class="screen-reader-text">' . - /* translators: Hidden accessibility text. */ __( 'Readability score', 'wordpress-seo' ) . '</span></span></span>'; } diff --git a/admin/formatter/class-metabox-formatter.php b/admin/formatter/class-metabox-formatter.php index 9100491e1c0..d7689f45437 100644 --- a/admin/formatter/class-metabox-formatter.php +++ b/admin/formatter/class-metabox-formatter.php @@ -240,6 +240,7 @@ private function get_content_analysis_component_translations() { 'content-analysis.highlight' => __( 'Highlight this result in the text', 'wordpress-seo' ), 'content-analysis.nohighlight' => __( 'Remove highlight from the text', 'wordpress-seo' ), 'content-analysis.disabledButton' => __( 'Marks are disabled in current view', 'wordpress-seo' ), + /* translators: Hidden accessibility text. */ 'a11yNotice.opensInNewTab' => __( '(Opens in a new browser tab)', 'wordpress-seo' ), ]; } diff --git a/admin/taxonomy/class-taxonomy-columns.php b/admin/taxonomy/class-taxonomy-columns.php index b5eb8aab1db..6e3304d4dc4 100644 --- a/admin/taxonomy/class-taxonomy-columns.php +++ b/admin/taxonomy/class-taxonomy-columns.php @@ -85,13 +85,11 @@ public function add_columns( array $columns ) { if ( $column_name === 'description' && $this->analysis_seo->is_enabled() ) { $new_columns['wpseo-score'] = '<span class="yoast-tooltip yoast-tooltip-n yoast-tooltip-alt" data-label="' . esc_attr__( 'SEO score', 'wordpress-seo' ) . '"><span class="yoast-column-seo-score yoast-column-header-has-tooltip"><span class="screen-reader-text">' . - /* translators: Hidden accessibility text. */ __( 'SEO score', 'wordpress-seo' ) . '</span></span></span>'; } if ( $column_name === 'description' && $this->analysis_readability->is_enabled() ) { $new_columns['wpseo-score-readability'] = '<span class="yoast-tooltip yoast-tooltip-n yoast-tooltip-alt" data-label="' . esc_attr__( 'Readability score', 'wordpress-seo' ) . '"><span class="yoast-column-readability yoast-column-header-has-tooltip"><span class="screen-reader-text">' . - /* translators: Hidden accessibility text. */ __( 'Readability score', 'wordpress-seo' ) . '</span></span></span>'; } } diff --git a/inc/class-wpseo-admin-bar-menu.php b/inc/class-wpseo-admin-bar-menu.php index 8df520d7e57..eca85789a3e 100644 --- a/inc/class-wpseo-admin-bar-menu.php +++ b/inc/class-wpseo-admin-bar-menu.php @@ -699,7 +699,6 @@ protected function add_network_settings_submenu( WP_Admin_Bar $wp_admin_bar ) { * @return string Admin bar title markup. */ protected function get_title() { - /* translators: Hidden accessibility text. */ return '<div id="yoast-ab-icon" class="ab-item yoast-logo svg"><span class="screen-reader-text">' . __( 'SEO', 'wordpress-seo' ) . '</span></div>'; } diff --git a/packages/components/src/image-select/ImageSelect.js b/packages/components/src/image-select/ImageSelect.js index dc4d46a6f58..308cc9fcc6c 100644 --- a/packages/components/src/image-select/ImageSelect.js +++ b/packages/components/src/image-select/ImageSelect.js @@ -40,9 +40,7 @@ function ImageSelect( props ) { <span className="screen-reader-text"> { imageSelected - /* translators: Hidden accessibility text. */ ? __( "Replace image", "wordpress-seo" ) - /* translators: Hidden accessibility text. */ : __( "Select image", "wordpress-seo" ) } </span> diff --git a/packages/helpers/src/makeOutboundLink.js b/packages/helpers/src/makeOutboundLink.js index 364fcd3830d..7415eba0195 100644 --- a/packages/helpers/src/makeOutboundLink.js +++ b/packages/helpers/src/makeOutboundLink.js @@ -72,6 +72,7 @@ export const makeOutboundLink = ( Component = "a" ) => { React.createElement( A11yNotice, null, + /* translators: Hidden accessibility text. */ __( "(Opens in a new browser tab)", "wordpress-seo" ) ) ); diff --git a/packages/js/src/analysis/getIndicatorForScore.js b/packages/js/src/analysis/getIndicatorForScore.js index 797e798cfaf..3f6852c6dfc 100644 --- a/packages/js/src/analysis/getIndicatorForScore.js +++ b/packages/js/src/analysis/getIndicatorForScore.js @@ -14,7 +14,6 @@ function getIndicatorForRating( rating ) { case "feedback": return { className: "na", - /* translators: Hidden accessibility text. */ screenReaderText: __( "Feedback", "wordpress-seo" ), screenReaderReadabilityText: "", screenReaderInclusiveLanguageText: "", @@ -22,31 +21,22 @@ function getIndicatorForRating( rating ) { case "bad": return { className: "bad", - /* translators: Hidden accessibility text. */ screenReaderText: __( "Needs improvement", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderReadabilityText: __( "Needs improvement", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderInclusiveLanguageText: __( "Needs improvement", "wordpress-seo" ), }; case "ok": return { className: "ok", - /* translators: Hidden accessibility text. */ screenReaderText: __( "OK SEO score", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderReadabilityText: __( "OK", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderInclusiveLanguageText: __( "Potentially non-inclusive", "wordpress-seo" ), }; case "good": return { className: "good", - /* translators: Hidden accessibility text. */ screenReaderText: __( "Good SEO score", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderReadabilityText: __( "Good", "wordpress-seo" ), - /* translators: Hidden accessibility text. */ screenReaderInclusiveLanguageText: __( "Good", "wordpress-seo" ), }; default: diff --git a/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js b/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js index f6b4f5bfb27..cbd04f8fa77 100644 --- a/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js +++ b/packages/js/src/first-time-configuration/tailwind-components/tailwind-modal.js @@ -55,12 +55,7 @@ export default function TailwindModal( { isOpen, handleClose, hasCloseButton, ch onClick={ handleClose } className="yst-bg-white yst-rounded-md yst-text-slate-400 hover:yst-text-slate-600 focus:yst-outline-none focus:yst-ring-2 focus:yst-ring-offset-2 focus:yst-ring-primary-500" > - <span className="yst-sr-only"> - { - /* translators: Hidden accessibility text. */ - __( "Close", "wordpress-seo" ) - } - </span> + <span className="yst-sr-only">{ __( "Close", "wordpress-seo" ) }</span> <XIcon className="yst-h-6 yst-w-6" /> </button> </div> } diff --git a/packages/js/src/settings/components/form-layout.js b/packages/js/src/settings/components/form-layout.js index 15b3bc3c492..54f86017245 100644 --- a/packages/js/src/settings/components/form-layout.js +++ b/packages/js/src/settings/components/form-layout.js @@ -56,10 +56,7 @@ const FormLayout = ( { children } ) => { { __( "Discard changes", "wordpress-seo" ) } </Button> <Modal onClose={ unsetRequestUndo } isOpen={ isRequestUndo }> - <Modal.Panel - /* translators: Hidden accessibility text. */ - closeButtonScreenReaderText={ __( "Close", "wordpress-seo" ) } - > + <Modal.Panel closeButtonScreenReaderText={ __( "Close", "wordpress-seo" ) }> <div className="sm:yst-flex sm:yst-items-start"> <div className="yst-mx-auto yst-flex-shrink-0 yst-flex yst-items-center yst-justify-center yst-h-12 yst-w-12 yst-rounded-full yst-bg-red-100 sm:yst-mx-0 sm:yst-h-10 sm:yst-w-10" diff --git a/packages/js/src/settings/components/search.js b/packages/js/src/settings/components/search.js index 23b446e36e1..e3282f99575 100644 --- a/packages/js/src/settings/components/search.js +++ b/packages/js/src/settings/components/search.js @@ -261,12 +261,7 @@ const Search = ( { buttonId = "button-search", modalId = "modal-search" } ) => { onClick={ setClose } className="yst-modal__close-button" > - <span className="yst-sr-only"> - { - /* translators: Hidden accessibility text. */ - __( "Close", "wordpress-seo" ) - } - </span> + <span className="yst-sr-only">{ __( "Close", "wordpress-seo" ) }</span> <XIcon className="yst-h-6 yst-w-6" { ...ariaSvgProps } /> </button> </div> diff --git a/packages/js/src/settings/routes/site-features.js b/packages/js/src/settings/routes/site-features.js index 211f93c537d..4d607bd4f16 100644 --- a/packages/js/src/settings/routes/site-features.js +++ b/packages/js/src/settings/routes/site-features.js @@ -187,12 +187,7 @@ const SiteFeatures = () => { <FormLayout> <div className="yst-max-w-6xl"> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only"> - { - /* translators: Hidden accessibility text. */ - __( "Writing", "wordpress-seo" ) - } - </legend> + <legend className="yst-sr-only">{ __( "Writing", "wordpress-seo" ) }</legend> <div className="yst-max-w-screen-sm yst-mb-8"> <Title as="h2" size="2"> { __( "Writing", "wordpress-seo" ) } @@ -257,16 +252,9 @@ const SiteFeatures = () => { </fieldset> <hr className="yst-my-8" /> <fieldset className="yst-min-w-0"> - <legend className="yst-sr-only"> - { - /* translators: Hidden accessibility text. */ - __( "Site structure", "wordpress-seo" ) - } - </legend> + <legend className="yst-sr-only">{ __( "Site structure", "wordpress-seo" ) }</legend> <div className="yst-max-w-screen-sm yst-mb-8"> - <Title as="h2" size="2"> - { __( "Site structure", "wordpress-seo" ) } - + { __( "Site structure", "wordpress-seo" ) }
    {
    - - { - /* translators: Hidden accessibility text. */ - __( "Social sharing", "wordpress-seo" ) - } - + { __( "Social sharing", "wordpress-seo" ) }
    - - { __( "Social sharing", "wordpress-seo" ) } - + { __( "Social sharing", "wordpress-seo" ) }
    {

    - - { - /* translators: Hidden accessibility text. */ - __( "Tools", "wordpress-seo" ) - } - + { __( "Tools", "wordpress-seo" ) }
    - - { __( "Tools", "wordpress-seo" ) } - + { __( "Tools", "wordpress-seo" ) }
    {

    - - { - /* translators: Hidden accessibility text. */ - __( "APIs", "wordpress-seo" ) - } - + { __( "APIs", "wordpress-seo" ) }
    - - { __( "APIs", "wordpress-seo" ) } - + { __( "APIs", "wordpress-seo" ) }
    - { - /* translators: Hidden accessibility text. */ - __( "hours", "wordpress-seo" ) - } + { __( "hours", "wordpress-seo" ) } - { - /* translators: Hidden accessibility text. */ - __( "minutes", "wordpress-seo" ) - } + { __( "minutes", "wordpress-seo" ) } { this.renderUrl() } - { - /* translators: Hidden accessibility text. */ - __( "SEO title preview", "wordpress-seo" ) + ":" - } + { __( "SEO title preview", "wordpress-seo" ) + ":" } - { - /* translators: Hidden accessibility text. */ - __( "Meta description preview:", "wordpress-seo" ) - } + { __( "Meta description preview:", "wordpress-seo" ) } { this.renderDescription() } diff --git a/src/presenters/admin/help-link-presenter.php b/src/presenters/admin/help-link-presenter.php index 72c5a32f37f..45db58bfa64 100644 --- a/src/presenters/admin/help-link-presenter.php +++ b/src/presenters/admin/help-link-presenter.php @@ -72,7 +72,8 @@ public function present() { if ( $this->opens_in_new_browser_tab ) { $target_blank_attribute = ' target="_blank"'; - $new_tab_message = ' ' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ); + /* translators: Hidden accessibility text. */ + $new_tab_message = ' ' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ); } return \sprintf( From 7a0c0fc8c668e31cdb32dde0245f8f917253a3ec Mon Sep 17 00:00:00 2001 From: Igor <35524806+igorschoester@users.noreply.github.com> Date: Thu, 28 Sep 2023 11:35:01 +0200 Subject: [PATCH 489/616] Switch to note the hidden part first --- admin/menu/class-admin-menu.php | 2 +- admin/views/tabs/network/features.php | 2 +- admin/views/tabs/network/integrations.php | 2 +- inc/class-wpseo-admin-bar-menu.php | 2 +- packages/components/src/data-model/DataModel.js | 2 +- packages/js/src/insights/components/prominent-words.js | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/admin/menu/class-admin-menu.php b/admin/menu/class-admin-menu.php index c0cf22e4e0a..53d037e3893 100644 --- a/admin/menu/class-admin-menu.php +++ b/admin/menu/class-admin-menu.php @@ -114,7 +114,7 @@ protected function get_notification_counter() { $notification_count = $notification_center->get_notification_count(); // Add main page. - /* translators: %s: number of notifications; Hidden accessibility text. */ + /* translators: Hidden accessibility text; %s: number of notifications. */ $notifications = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) ); return sprintf( '%2$s', $notification_count, $notifications ); diff --git a/admin/views/tabs/network/features.php b/admin/views/tabs/network/features.php index e25df24a825..baf438f8a65 100644 --- a/admin/views/tabs/network/features.php +++ b/admin/views/tabs/network/features.php @@ -58,7 +58,7 @@ $feature_help = new WPSEO_Admin_Help_Panel( WPSEO_Option::ALLOW_KEY_PREFIX . $feature->setting, - /* translators: %s expands to a feature's name; Hidden accessibility text. */ + /* translators: Hidden accessibility text; %s expands to a feature's name. */ sprintf( esc_html__( 'Help on: %s', 'wordpress-seo' ), esc_html( $feature->name ) ), $help_text ); diff --git a/admin/views/tabs/network/integrations.php b/admin/views/tabs/network/integrations.php index a85ede2e664..c931cf95039 100644 --- a/admin/views/tabs/network/integrations.php +++ b/admin/views/tabs/network/integrations.php @@ -46,7 +46,7 @@ $feature_help = new WPSEO_Admin_Help_Panel( WPSEO_Option::ALLOW_KEY_PREFIX . $integration->setting, - /* translators: %s expands to an integration's name; Hidden accessibility text. */ + /* translators: Hidden accessibility text; %s expands to an integration's name. */ sprintf( esc_html__( 'Help on: %s', 'wordpress-seo' ), esc_html( $integration->name ) ), $help_text ); diff --git a/inc/class-wpseo-admin-bar-menu.php b/inc/class-wpseo-admin-bar-menu.php index eca85789a3e..28307b71416 100644 --- a/inc/class-wpseo-admin-bar-menu.php +++ b/inc/class-wpseo-admin-bar-menu.php @@ -850,7 +850,7 @@ protected function get_notification_counter() { return ''; } - /* translators: %s: number of notifications; Hidden accessibility text. */ + /* translators: Hidden accessibility text; %s: number of notifications. */ $counter_screen_reader_text = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) ); return sprintf( '
    %s
    ', $notification_count, $counter_screen_reader_text ); diff --git a/packages/components/src/data-model/DataModel.js b/packages/components/src/data-model/DataModel.js index bbd9eb89753..f1ebb6d1732 100644 --- a/packages/components/src/data-model/DataModel.js +++ b/packages/components/src/data-model/DataModel.js @@ -21,7 +21,7 @@ const dataItemProps = { * @returns {HTMLElement} A list item. */ const DataItem = ( props ) => { - /* translators: %d expands to number of occurrences; Hidden accessibility text. */ + /* translators: Hidden accessibility text; %d expands to number of occurrences. */ const screenReaderText = sprintf( __( "%d occurrences", "wordpress-seo" ), props.number ); return (
  • { // eslint-disable-line complexity