From e30ee63ea7f4a090ad866dd0e9a8b4bb752a6d71 Mon Sep 17 00:00:00 2001
From: Jeroen Rotty A very intelligent cat loves their human. A dog is very cute. a string of text with the keyword and another keyword in itA subheading 3" +
+ "
text text textA subheading 4
more text." );
+ buildTree( paper, researcher );
+ expect( getSentencesFromTree( paper ) ).toEqual( [
+ {
+ sourceCodeRange: { endOffset: 44, startOffset: 3 },
+ text: "A very intelligent cat loves their human.",
+ tokens: [ { text: "A" }, { text: " " }, { text: "very" }, { text: " " }, { text: "intelligent" }, { text: " " }, { text: "cat" },
+ { text: " " }, { text: "loves" }, { text: " " }, { text: "their" }, { text: " " }, { text: "human" }, { text: "." } ] },
+ {
+ sourceCodeRange: { endOffset: 64, startOffset: 44 },
+ text: " A dog is very cute.",
+ tokens: [ { text: " " }, { text: "A" }, { text: " " }, { text: "dog" }, { text: " " }, { text: "is" }, { text: " " },
+ { text: "very" }, { text: " " }, { text: "cute" }, { text: "." } ] },
+ {
+ sourceCodeRange: { endOffset: 86, startOffset: 72 },
+ text: "A subheading 3",
+ tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "3" } ] },
+ {
+ sourceCodeRange: {},
+ text: "text text text",
+ tokens: [ { text: "text" }, { text: " " }, { text: "text" }, { text: " " }, { text: "text" } ] },
+ {
+ sourceCodeRange: { endOffset: 123, startOffset: 109 },
+ text: "A subheading 4",
+ tokens: [ { text: "A" }, { text: " " }, { text: "subheading" }, { text: " " }, { text: "4" } ] },
+ {
+ sourceCodeRange: { endOffset: 138, startOffset: 128 },
+ text: "more text.", tokens: [ { text: "more" }, { text: " " }, { text: "text" }, { text: "." } ] },
+ ]
+ );
+ } );
+} );
From 5f5b5c5c74faab2f761ebb9b7501abb91f04b530 Mon Sep 17 00:00:00 2001
From: FAMarfuaty
a string of text with the key word in it, with more key words.
" ); buildTree( mockPaper, mockResearcher ); - expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); + // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "a string of text with theWaltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ - new Mark( { marked: "Waltz keepin auf mitz auf keepina string of text with the key word in it, with more key words.
" ); + const mockPaper = new Paper( "a string of text with the key word key word test word test in it, with more key words test.
" ); buildTree( mockPaper, mockResearcher ); // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 195dec1b085..ca5c0c0e475 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -20,6 +20,44 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); } + const result = []; + const tokens = sentence.tokens; + let index = 0; + let initailPosition = 0; + + let foundWords = []; + // eslint-disable-next-line no-constant-condition + while ( true ) { + const head = keyphraseForms[ index ]; + const foundPosition = tokens.slice( initailPosition ).findIndex( t => head.includes( t.text ) ); + + if ( foundPosition >= 0 ) { + if ( index > 0 ) { + if ( tokens.slice( initailPosition, foundPosition + initailPosition ).some( t => t.text.trim() !== "" ) ) { + index = 0; + foundWords = []; + continue; + } + } + + index += 1; + initailPosition += foundPosition; + foundWords.push( tokens[ initailPosition ] ); + initailPosition += 1; + } else { + if ( index === 0 ) { + break; + } + index = 0; + } + + if ( foundWords.length >= keyphraseForms.length ) { + result.push( foundWords ); + index = 0; + foundWords = []; + } + } + return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); } @@ -52,7 +90,7 @@ function getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelp const mergeConsecutiveMarkings = ( markings ) => { const newMarkings = []; - console.log(...markings, "TEST1"); + console.log( ...markings, "TEST1" ); markings.forEach( ( marking ) => { let actionDone = false; newMarkings.forEach( ( newMarking, newMarkingIndex ) => { From 01a1e2586794d7d88272b7e11d869f4ce41d2f3c Mon Sep 17 00:00:00 2001 From: hdvosa string of text with the key word key word test word test in it, with more key words test.
" ); + const mockPaper = new Paper( "a string of text with the key word in it, with more key words.
" ); buildTree( mockPaper, mockResearcher ); // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 8bf42f92342..63db800716d 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -57,10 +57,27 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto foundWords = []; } } - - return keyphraseForms.map( forms => matchWordFormsWithTokens( forms, sentence.tokens ) ); + return result; } +const markStart = "a string of text with the key word in it, with more key words.
" ); buildTree( mockPaper, mockResearcher ); - // console.log(keyphraseCount( mockPaper, mockResearcherKeyWord ).markings.map(mark => mark._properties.position)); - // expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ new Mark( { marked: "a string of text with theA string with a key_word.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ new Mark( { marked: "A string with aA string with kapaklı.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ new Mark( { marked: "A string withA string with quotes to match the key'word, even if the quotes differ.
" ); buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ new Mark( { marked: "A string with quotes to match theLorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); // Note: this behavior might change in in the future. } ); it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { - const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); + const mockPaper = new Paper( "A string with three keys (key and another key) and one word.
" ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "A string with threeLorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
", { locale: "id_ID" } ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); @@ -278,7 +293,18 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) // Decided not to remove test below as it tests the added logic of the Japanese helpers. describe( "Test for counting the keyword in a text for Japanese", () => { it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } ); + const mockPaper = new Paper( "私の猫はかわいいです。
猫私の猫はかわいい猫です。
{ return true; } return head.some( keyword => { - return matchTextWithTransliteration( token.text, keyword, "en_US" ); + return matchTextWithTransliteration( token.text, keyword, "en_US" ); // TODO get locale from params } ); }; @@ -69,25 +71,8 @@ const tokenizeKeyphraseForms = ( keyphraseForms ) => { return tokenizedKeyPhraseForms[ 0 ].map( ( _, index ) => uniq( tokenizedKeyPhraseForms.map( row => row[ index ] ) ) ); }; -/** - * Gets the matched keyphrase form(s). - * - * @param {string} sentence The sentence to check. - * @param {Array} keyphraseForms The keyphrase forms. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper A custom helper to match words with a text. - * - * @returns {Array} The array of matched keyphrase form(s). - */ -function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { - if ( matchWordCustomHelper ) { - return keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); - } - const result = []; - const tokens = sentence.tokens; - let index = 0; - let initialPosition = 0; - +// TODO: better descriptor than process +const processKeyphraseForms = ( keyphraseForms ) => { const newKeyphraseForms = []; keyphraseForms.forEach( word => { // const newWord = [] @@ -99,45 +84,100 @@ function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCusto tokenizedWordTransposed.forEach( newWord => newKeyphraseForms.push( newWord ) ); } ); + return newKeyphraseForms; +}; + +const getMatchesInTokens = ( keyphraseForms, tokens ) => { + const result = { + primaryMatches: [], + secondaryMatches: [], + }; + + // Index is the ith word from the wordforms. It indicates which word we are currently analyzing. + let keyPhraseFormsIndex = 0; + // positionInSentence keeps track of what word in the sentence we are currently at. + let positionInSentence = 0; // TODO: rename naar sentencePosition let foundWords = []; // eslint-disable-next-line no-constant-condition while ( true ) { - const head = newKeyphraseForms[ index ]; - - const foundPosition = tokens.slice( initialPosition ).findIndex( t => matchHead( head, t ) ); + const head = keyphraseForms[ keyPhraseFormsIndex ]; + const foundPosition = tokens.slice( positionInSentence ).findIndex( t => matchHead( head, t ) ); + // If an occurence of a word is found, see if the subsequent word also is find. if ( foundPosition >= 0 ) { - if ( index > 0 ) { + if ( keyPhraseFormsIndex > 0 ) { // if there are non keywords between two found keywords reset. - if ( tokens.slice( initialPosition, foundPosition + initialPosition ).some( - t => ( t.text.trim() !== "" && t.text.trim() !== "-" ) ) ) { - index = 0; + const previousHead = keyphraseForms[ Math.max( 0, keyPhraseFormsIndex - 1 ) ]; // TODO: better way to extract previoushead. + if ( tokens.slice( positionInSentence, foundPosition + positionInSentence ).some( + t => { + return previousHead.includes( t.text ); // TODO: use matchhead + } ) ) { + result.secondaryMatches.push( tokens[ positionInSentence - 1 ] ); + keyPhraseFormsIndex = 0; foundWords = []; continue; } } - index += 1; - initialPosition += foundPosition; - foundWords.push( tokens[ initialPosition ] ); - initialPosition += 1; + keyPhraseFormsIndex += 1; + positionInSentence += foundPosition; + foundWords.push( tokens[ positionInSentence ] ); + positionInSentence += 1; } else { - if ( index === 0 ) { + if ( keyPhraseFormsIndex === 0 ) { break; } - index = 0; + keyPhraseFormsIndex = 0; } - if ( foundWords.length >= newKeyphraseForms.length ) { - result.push( foundWords ); - index = 0; + if ( foundWords.length >= keyphraseForms.length ) { + result.primaryMatches.push( foundWords ); + keyPhraseFormsIndex = 0; foundWords = []; } } - + console.log( result ); return result; +}; + +/** + * Gets the matched keyphrase form(s). + * + * @param {string} sentence The sentence to check. + * @param {Array} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * + * @returns {Array} The array of matched keyphrase form(s). + */ +function getMatchesInSentence( sentence, keyphraseForms, locale, matchWordCustomHelper ) { + if ( matchWordCustomHelper ) { + // TODO: unify return types and forms. + const matches = keyphraseForms.map( forms => matchTextWithArray( sentence.text, forms, locale, matchWordCustomHelper ) ); + + console.log( { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position } ); + const matchesAsTokens = matches.map( match => { + const count = match.count; + const firstPosition = match.position; + + const moreMatches = match.matches; + } ); + // + // } + + + return { primaryMatches: matches.matches, secondaryMatches: [], position: matches.position }; + } + + const tokens = sentence.tokens; + + const newKeyphraseForms = processKeyphraseForms( keyphraseForms, locale ); + + const matches = getMatchesInTokens( newKeyphraseForms, tokens ); + console.log( matches ); + return matches; } const markStart = "A sentence with the keyword in it. Another sentence with the keyword in it.
", { keyword: "keyword" } ); + researcher.setPaper( mockPaper ); + buildTree( mockPaper, researcher ); + expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); + expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( + [ new Mark( { + fieldsToMark: [], + marked: "A sentence with the私の猫はかわいいですかわいい。
", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); @@ -355,6 +356,20 @@ describe( "Test for counting the keyword in a text for Japanese", () => { marked: "私のThis is a very interesting paper with a keyword and another keyword.
", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); @@ -219,21 +219,19 @@ describe( "A test for marking the keyword", function() { "This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.
", + { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 12.5 ); + } ); + + it( "should not skip a keyphrase in case of consecutive keyphrases with morphology sss", function() { + // Consecutive keywords are skipped, so this will match 2 times. + const mockPaper = new Paper( "This is a nice string with a keyword keyword keywords.
", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 30 ); + } ); + + it( "should recognize a keyphrase with a '$' in it", function() { const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); From 66ccd257d832d6e12ba58bd6c7938da43b94341a Mon Sep 17 00:00:00 2001 From: hdvosThis is a nice string with a keyword keyword keyword.
", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); From 7eddee6471e55889dc3c58ef61aaf5fa55f39ac2 Mon Sep 17 00:00:00 2001 From: hdvos " +
// "A flamboyant cat with a toy
\n" +
// "
A flamboyant cat with a toy
\n
This is the release of YoastSEO 9.3.
", { keyword: "YoastSEO 9.3" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); @@ -241,7 +241,8 @@ describe( "A test for marking the keyword", function() { keywordDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release ofThis is a paragraph
", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - const languageProcessor = new LanguageProcessor( mockResearcher ); - buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len - expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { attributes: {}, childNodes: [ { name: "#text", value: "This is a paragraph" } ], isImplicit: false, name: "p", sentences: [ { sourceCodeRange: { endOffset: 22, startOffset: 3 }, text: "This is a paragraph", tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { sourceCodeRange: { endOffset: 8, startOffset: 7 }, text: " " }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { sourceCodeRange: { endOffset: 11, startOffset: 10 }, text: " " }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { sourceCodeRange: { endOffset: 13, startOffset: 12 }, text: " " }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ] } ], sourceCodeLocation: { endOffset: 26, endTag: { endOffset: 26, startOffset: 22 }, startOffset: 0, startTag: { endOffset: 3, startOffset: 0 } } } ], name: "#document-fragment" } - - ); - } ); - - it( "should correctly tokenize a paragraph of two sentences", function() { - const mockPaper = new Paper( "This is a sentence. This is another sentence.
", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - const languageProcessor = new LanguageProcessor( mockResearcher ); - buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len - expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { - attributes: {}, - childNodes: [ - { +describe( "A test for the tokenize function", + function() { + it( "should correctly tokenize a paragraph of one sentence", function() { + const mockPaper = new Paper( "This is a paragraph
", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ { attributes: {}, - childNodes: [ - { - name: "#text", - value: "This is a sentence. This is another sentence.", - }, - ], + childNodes: [ { name: "#text", value: "This is a paragraph" } ], isImplicit: false, name: "p", - sentences: [ - { + sentences: [ { + sourceCodeRange: { endOffset: 22, startOffset: 3 }, + text: "This is a paragraph", + tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { sourceCodeRange: { - endOffset: 22, - startOffset: 3, + endOffset: 8, + startOffset: 7, + }, text: " ", + }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { + sourceCodeRange: { endOffset: 11, startOffset: 10 }, + text: " ", + }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { + sourceCodeRange: { endOffset: 13, startOffset: 12 }, + text: " ", + }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ], + } ], + sourceCodeLocation: { + endOffset: 26, + endTag: { endOffset: 26, startOffset: 22 }, + startOffset: 0, + startTag: { endOffset: 3, startOffset: 0 }, + }, + } ], + name: "#document-fragment", + } + ); + } ); + + it( "should correctly tokenize a paragraph of two sentences", function() { + const mockPaper = new Paper( "This is a sentence. This is another sentence.
", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is a sentence. This is another sentence.", }, - text: "This is a sentence.", - tokens: [ - { - sourceCodeRange: { - endOffset: 7, - startOffset: 3, - }, - text: "This", + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + endOffset: 22, + startOffset: 3, }, - { - sourceCodeRange: { - endOffset: 8, - startOffset: 7, + text: "This is a sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 7, + startOffset: 3, + }, + text: "This", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 10, - startOffset: 8, + { + sourceCodeRange: { + endOffset: 8, + startOffset: 7, + }, + text: " ", }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 11, - startOffset: 10, + { + sourceCodeRange: { + endOffset: 10, + startOffset: 8, + }, + text: "is", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 12, - startOffset: 11, + { + sourceCodeRange: { + endOffset: 11, + startOffset: 10, + }, + text: " ", }, - text: "a", - }, - { - sourceCodeRange: { - endOffset: 13, - startOffset: 12, + { + sourceCodeRange: { + endOffset: 12, + startOffset: 11, + }, + text: "a", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 21, - startOffset: 13, + { + sourceCodeRange: { + endOffset: 13, + startOffset: 12, + }, + text: " ", }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 22, - startOffset: 21, + { + sourceCodeRange: { + endOffset: 21, + startOffset: 13, + }, + text: "sentence", }, - text: ".", - }, - ], - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 22, + { + sourceCodeRange: { + endOffset: 22, + startOffset: 21, + }, + text: ".", + }, + ], }, - text: " This is another sentence.", - tokens: [ - { - sourceCodeRange: { - endOffset: 23, - startOffset: 22, - }, - text: " ", + { + sourceCodeRange: { + endOffset: 48, + startOffset: 22, }, - { - sourceCodeRange: { - endOffset: 27, - startOffset: 23, + text: " This is another sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 23, + startOffset: 22, + }, + text: " ", }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 28, - startOffset: 27, + { + sourceCodeRange: { + endOffset: 27, + startOffset: 23, + }, + text: "This", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 30, - startOffset: 28, + { + sourceCodeRange: { + endOffset: 28, + startOffset: 27, + }, + text: " ", }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 31, - startOffset: 30, + { + sourceCodeRange: { + endOffset: 30, + startOffset: 28, + }, + text: "is", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 38, - startOffset: 31, + { + sourceCodeRange: { + endOffset: 31, + startOffset: 30, + }, + text: " ", }, - text: "another", - }, - { - sourceCodeRange: { - endOffset: 39, - startOffset: 38, + { + sourceCodeRange: { + endOffset: 38, + startOffset: 31, + }, + text: "another", }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 47, - startOffset: 39, + { + sourceCodeRange: { + endOffset: 39, + startOffset: 38, + }, + text: " ", }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 47, + { + sourceCodeRange: { + endOffset: 47, + startOffset: 39, + }, + text: "sentence", }, - text: ".", - }, - ], - }, - ], - sourceCodeLocation: { - endOffset: 52, - endTag: { + { + sourceCodeRange: { + endOffset: 48, + startOffset: 47, + }, + text: ".", + }, + ], + }, + ], + sourceCodeLocation: { endOffset: 52, - startOffset: 48, + endTag: { + endOffset: 52, + startOffset: 48, + }, + startOffset: 0, + startTag: { + endOffset: 3, + startOffset: 0, + }, }, - startOffset: 0, - startTag: { - endOffset: 3, + }, + ], + name: "#document-fragment", + } ); + } ); + + it( "should correctly tokenize a sentence with a number with a decimal point", function() { + const mockPaper = new Paper( "This is the release of YoastSEO 9.3.
", { keyword: "YoastSEO 9.3" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + const result = tokenize( mockPaper.getTree(), languageProcessor ); + expect( result ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is the release of YoastSEO 9.3.", + }, + ], + sourceCodeLocation: { + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 39, + endOffset: 43, + }, startOffset: 0, + endOffset: 43, }, + isImplicit: false, + sentences: [ + { + text: "This is the release of YoastSEO 9.3.", + tokens: [ + { + text: "This", + sourceCodeRange: { + startOffset: 3, + endOffset: 7, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 7, + endOffset: 8, + }, + }, + { + text: "is", + sourceCodeRange: { + startOffset: 8, + endOffset: 10, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 10, + endOffset: 11, + }, + }, + { + text: "the", + sourceCodeRange: { + startOffset: 11, + endOffset: 14, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 14, + endOffset: 15, + }, + }, + { + text: "release", + sourceCodeRange: { + startOffset: 15, + endOffset: 22, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 22, + endOffset: 23, + }, + }, + { + text: "of", + sourceCodeRange: { + startOffset: 23, + endOffset: 25, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 25, + endOffset: 26, + }, + }, + { + text: "YoastSEO", + sourceCodeRange: { + startOffset: 26, + endOffset: 34, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 34, + endOffset: 35, + }, + }, + { + text: "9", + sourceCodeRange: { + startOffset: 35, + endOffset: 36, + }, + }, + { + text: ".", + sourceCodeRange: { + startOffset: 36, + endOffset: 37, + }, + }, + { + text: "3", + sourceCodeRange: { + startOffset: 37, + endOffset: 38, + }, + }, + { + text: ".", + sourceCodeRange: { + startOffset: 38, + endOffset: 39, + }, + }, + ], + sourceCodeRange: { + startOffset: 3, + endOffset: 39, + }, + }, + ], }, - }, - ], - name: "#document-fragment", + ], + } ); } ); } ); -} ); From 2370489255bbfbe20614b5317db3281a7932ba1d Mon Sep 17 00:00:00 2001 From: hdvosこんにちは世界!
こんにちは世界!
A string with keyword keyword keyword.
" ); + buildTree( mockPaper, mockResearcher ); + expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 2 ); + } ); + it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { const mockPaper = new Paper( "A string with three keys (key and another key) and one word.
" ); buildTree( mockPaper, mockResearcher ); @@ -281,8 +290,9 @@ describe( "Test for counting the keyword in a text", function() { // Note: this behavior might change in the future. } ); - it.skip( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
", { locale: "id_ID" } ); + it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { + const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
", + { locale: "id_ID" } ); buildTree( mockPaper, mockResearcher ); expect( keyphraseCount( mockPaper, mockResearcher ).count ).toBe( 1 ); } ); From a64acbbf6eb6d5cfeb6e78a6f66fdfbb6472f197 Mon Sep 17 00:00:00 2001 From: hdvos私の猫はかわいいです。
{ original: "私の猫はかわいいです。", position: { endOffset: 6, startOffset: 5 } } ) ] ); } ); - it( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { + it.skip( "counts/marks a string of text with multiple occurences of the same keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいい猫です。
{ position: { endOffset: 12, startOffset: 11 } } ) ] ); } ); - it( "counts a string of text with no keyword in it.", function() { + it.skip( "counts a string of text with no keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -359,7 +359,7 @@ describe( "Test for counting the keyword in a text for Japanese", () => { expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); - it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { + it.skip( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { const mockPaper = new Paper( "私の猫はかわいいですかわいい。
", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); From a2371f11c5434e4a2e97213bc21453d059f9d6b6 Mon Sep 17 00:00:00 2001 From: hdvosこんにちは世界!
犬が大好き
犬が大好き\u3002
", { locale: "ja_JP" } ); + const mockResearcher = new JapaneseResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + // eslint-disable-next-line max-len + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "犬が大好き。", + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 9, + }, + text: "犬が大好き。", + tokens: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 4, + }, + text: "犬", + }, + { + sourceCodeRange: { + startOffset: 4, + endOffset: 5, + }, + text: "が", + }, + { + sourceCodeRange: { + startOffset: 5, + endOffset: 8, + }, + text: "大好き", + }, + { + sourceCodeRange: { + startOffset: 8, + endOffset: 9, + }, + text: "。", + }, + ], + }, + ], + sourceCodeLocation: { + startOffset: 0, + endOffset: 13, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 9, + endOffset: 13, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); +} ); From 254d268abb4a5b2b266b464d366b3a60ddb0bc54 Mon Sep 17 00:00:00 2001 From: hdvoskeyword
in this sentence." );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 );
- } );
-
- it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() {
- const mockPaper = new Paper( "A string with three keys (key and another key) and one word." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [
+ original: "A string with KeY worD." } ) ],
+ },
+ {
+ description: "keyword counting is blind to types of apostrophe.",
+ paper: new Paper( "A string with quotes to match the key'word, even if the quotes differ.", { keyword: "key'word" } ),
+ keyphraseForms: [ [ "key'word", "key'words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( {
+ marked: "A string with quotes to match the keyword
in this sentence." ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ },
+ {
+ description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.",
+ paper: new Paper( "A string with three keys (key and another key) and one word." ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
new Mark( { marked: "A string with three a string of text with the keyword in it
", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "a string of text with thea string of text
", { keyword: "" } ), keyphraseForms: [ [ "" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { description: "counts multiple occurrences of a keyphrase consisting of multiple words.", - paper: new Paper( "a string of text with the key word in it, with more key words.", { keyword: "key word" } ), + paper: new Paper( "a string of text with the key word in it, with more key words.
", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 2, expectedMarkings: [ new Mark( { marked: "a string of text with theWaltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß" } ), keyphraseForms: [ [ "äöüß" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "Waltz keepin auf mitz auf keepinA string of text with a keyword and multiple keywords in it.
", { keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keywords" ] ], expectedCount: 2, - expectedMarkings: [ new Mark( { marked: "A string of text with aA string with a key-word.
", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a key-word.
", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with aA string with a key_word.
", { keyword: "key_word" } ), keyphraseForms: [ [ "key_word", "key_words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with 'kapaklı' as a keyword in it
", { keyword: "kapaklı" } ), keyphraseForms: [ [ "kapaklı" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with with 'A string with key&word in it
", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string withA text with
", { keyword: "image" } ), keyphraseForms: [ [ "image", "images" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { - description: "keyword counting is blind to CApiTal LeTteRs.", - paper: new Paper( "A string with KeY worD.", { keyword: "key word" } ), + description: "also matches same phrase with different capitalization.", + paper: new Paper( "A string with KeY worD.
", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string withA string with quotes to match the key'word, even if the quotes differ.
", { keyword: "key'word" } ), keyphraseForms: [ [ "key'word", "key'words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with quotes to match theA string with a $keyword.
" ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a key word.
", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 0, expectedMarkings: [], + skip: false, }, { description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", - paper: new Paper( "There is nokeyword
in this sentence." ),
+ paper: new Paper( "There is no keyword
in this sentence.
A string with three keys (key and another key) and one word.
" ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with threeLorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
", { locale: "id_ID", keyword: "keyword" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 1, + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, expectedMarkings: [ new Mark( { // eslint-disable-next-line max-len - marked: "Lorem ipsum dolor sit amet, consecteturA string with a keyword.
", { keyword: "\"keyword\"" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a keyword.
", { keyword: "\"keywords\"" } ), keyphraseForms: [ [ "keywords" ] ], expectedCount: 0, expectedMarkings: [], - skip: false, + skip: true, }, { description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", - paper: new Paper( "A string with a key phrase.", { keyword: "\"key phrase\"" } ), + paper: new Paper( "A string with a key phrase.
", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a phrase key.
", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, expectedMarkings: [], @@ -220,14 +278,15 @@ const testCases = [ }, { description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", - paper: new Paper( "A .sentence with a keyphrase.", { keyword: "\".sentence\"" } ), + paper: new Paper( "A .sentence with a keyphrase.
", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "AA very intelligent cat loves their human. A dog is very cute.
What's black, orange, sassy all over, and a crowd favorite? Yep, you guessed it - \"Torties\"!
" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 15 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [ "What's", "black", "orange", "sassy", "all", "over", "and", "a", "crowd", + "favorite", "Yep", "you", "guessed", "it", "Torties" ] ); + } ); + it( "should not return words from the excluded elements from the tree", () => { + const paper = new Paper( "" + + "" + + "From their cute little paws to their mysterious ways of sneaking up on us, cats are just the best!
The sentence above is a very compelling quote!
" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getAllWordsFromTree( paper ).length ).toEqual( 8 ); + expect( getAllWordsFromTree( paper ) ).toEqual( [ "The", "sentence", "above", "is", "a", "very", "compelling", "quote" ] ); + } ); +} ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js new file mode 100644 index 00000000000..a651b7a83b8 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -0,0 +1,22 @@ +import getSentencesFromTree from "../sentence/getSentencesFromTree"; +import { flatMap } from "lodash-es"; +import removePunctuation from "../sanitize/removePunctuation"; + +/** + * Gets the words from the tree. + * + * @param {Paper} paper The paper to get the tree and words from. + * + * @returns {String[]} Array of words retrieved from the tree. + */ +export default function( paper ) { + const sentences = getSentencesFromTree( paper ); + // Get all the tokens from each sentence. + const tokens = sentences.map( sentence => sentence.tokens ); + let words = flatMap( tokens ).map( token => token.text ); + // Remove punctuation and spaces. + words = words.map( token => removePunctuation( token ) ); + + // Filter out empty tokens + return words.filter( word => word.trim() !== "" ); +} From 4c691c4064991188723bfd7ab3c946ae3b15dba1 Mon Sep 17 00:00:00 2001 From: aidamarfuatyThis is a nice string with a keyword keyword keyword.
", { keyword: "keyword" } ); @@ -145,20 +137,11 @@ describe( "Test for counting the keyword density in a text with an English resea buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); - - // it( "should ", function() { - // const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } ); - // const mockResearcher = new EnglishResearcher( mockPaper ); - // mockResearcher.addResearchData( "morphology", morphologyDataEN ); - // buildTree( mockPaper, mockResearcher ); - // } ); - - // eslint-disable-next-line max-statements - - - it( "returns keyword density", function() { - - + it( "should recognize keyphrase separated by ", function() { + const mockPaper = new Paper( "a string with quotes to match the key word, even if the quotes differ", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); } ); } ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 5e5f146b148..5c5df0e8e42 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -1,19 +1,16 @@ -import countWords from "../helpers/word/countWords.js"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; +import getAllWordsFromTree from "../helpers/word/getAllWordsFromTree"; /** * Calculates the keyphrase density. * - * @param {Object} paper The paper containing keyphrase and text. - * @param {Object} researcher The researcher. + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. * * @returns {Object} The keyphrase density. */ export default function getKeyphraseDensity( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - let wordCount = countWords( text ); + let wordCount = getAllWordsFromTree( paper ).length; // If there is a custom getWords helper use its output for countWords. if ( getWordsCustomHelper ) { From 6740993dfae15b6d31db59aa1b49a882b62ecbf9 Mon Sep 17 00:00:00 2001 From: aidamarfuatyA very intelligent cat loves their human. A dog is very cute.
a string with nbsp to match the key word.
", { keyword: "key word" } ), + keyphraseForms: [ [ "key" ], [ "word" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string with nbsp to match theA string with three keys (key and another key) and one word.
" ), @@ -287,6 +299,17 @@ const testCases = [ // Skipped for now, coz the PR for exact matching is not yet merged. skip: true, }, + { + description: "can match dollar sign as in '$keyword' with exact matching.", + paper: new Paper( "A string with a $keyword.
", { keyword: "\"\\$keyword\"" } ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a" + nonkeyword.repeat( 859 ) + - "" + "b c, ".repeat( 20 ), { keyword: "b c" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + - " Don't overoptimize!" ); - } ); - - it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() { - const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - it( "returns a bad result if the keyword is only used once, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + keyword, { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - +// Test data for language without morphology. +const testDataWithDefaultResearcher = [ + { + description: "runs the keywordDensity on the paper without keyword in the text", + paper: new Paper( "
" + nonkeyword.repeat( 1000 ) + "
", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", + paper: new Paper( "" + nonkeyword.repeat( 999 ) + keyword + "
", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", + paper: new Paper( "" + nonkeyword.repeat( 995 ) + keyword.repeat( 5 ) + "
", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 5 times. This is great!", + }, + }, + { + description: "runs the keywordDensity on the paper with a good keyphrase density (2%)", + paper: new Paper( "" + nonkeyword.repeat( 980 ) + keyword.repeat( 20 ) + "
", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", + paper: new Paper( "" + nonkeyword.repeat( 965 ) + keyword.repeat( 35 ) + "
", { keyword: "keyword" } ), + expectedResult: { + score: -10, + text: "Keyphrase density: " + + "The keyphrase was found 35 times. That's more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "runs the keywordDensity on the paper with a very high keyphrase density (10%)", + paper: new Paper( "" + nonkeyword.repeat( 900 ) + keyword.repeat( 100 ) + "
", { keyword: "keyword" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 100 times. That's way more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", + paper: new Paper( "" + nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ) + "
", { keyword: "b c" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "does not count text inside elements we want to exclude from the analysis when calculating the recommended" + + "number of keyphrase usages", + paper: new Paper( "" + nonkeyword.repeat( 101 ) + "
" + nonkeyword.repeat( 859 ) + + "" + "b c, ".repeat( 20 ) + "", { keyword: "b c" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", + paper: new Paper( "
" + nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ) + "
", { keyword: "b c d e f" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "returns a bad result if the keyword is only used once, regardless of the density", + paper: new Paper( "" + nonkeyword.repeat( 100 ) + keyword + "
", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + + "for a text of this length. Focus on your keyphrase!", + }, + }, + { + description: "returns a good result if the keyword is used twice and " + + "the recommended count is smaller than or equal to 2, regardless of the density", + paper: new Paper( "" + nonkeyword.repeat( 100 ) + "a b c, a b c
", { keyword: "a b c", locale: "xx_XX" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 2 times. This is great!", + }, + }, +]; + +describe.each( testDataWithDefaultResearcher )( "a kephrase density test for languages without morphology", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + it( description, () => { const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + - "for a text of this length. Focus on your keyphrase!" ); - } ); - - it( "returns a good result if the keyword is used twice and " + - "the recommended count is smaller than or equal to 2, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + "a b c, a b c", { keyword: "a b c", locale: "xx_XX" } ); - const researcher = new DefaultResearcher( paper ); - buildTree( paper, researcher ); - - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 2 times. This is great!" ); - } ); - it( "applies to a paper with a keyword and a text of at least 100 words", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( true ); - } ); - - it( "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + - "we want to exclude from the analysis", function() { - const paper = new Paper( "" + nonkeyword.repeat( 100 ) + "", { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with text of 100 words but without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and with a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "applies to a Japanese paper with a keyword and a text of at least 200 characters", function() { - const paper = new Paper( longTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( true ); + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); } ); +} ); - it( "does not apply to a Japanese paper with text of less than 200 characters", function() { - const paper = new Paper( shortTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( false ); +const testDataForApplicability = [ + { + description: "applies to a paper with a keyword and a text of at least 100 words", + paper: new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + + "we want to exclude from the analysis", + paper: new Paper( "" + nonkeyword.repeat( 100 ) + "", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with text of 100 words but without a keyword", + paper: new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and with a keyword", + paper: new Paper( "" + nonkeyword.repeat( 99 ) + "
", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and without a keyword", + paper: new Paper( "" + nonkeyword.repeat( 99 ) + "
", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "applies to a Japanese paper with a keyword and a text of at least 200 characters", + paper: new Paper( "" + longTextJapanese + "
", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a Japanese paper with text of less than 200 characters", + paper: new Paper( "" + shortTextJapanese + "
", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: false, + }, +]; +describe.each( testDataForApplicability )( "assessment applicability test", ( { + description, + paper, + researcher, + expectedResult, +} ) => { + researcher.setPaper( paper ); + buildTree( paper, researcher ); + it( description, () => { + expect( new KeywordDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); } ); } ); From f593f52420e39ec556a2f1d355ead936da797b64 Mon Sep 17 00:00:00 2001 From: aidamarfuaty" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "
", { + keyword: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of synonym occurrence when the synonym contains only function words " + + "and there is no match in the text", + paper: new Paper( "" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "
", { + keyword: "", + synonyms: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains spaces " + + "and there is no match in the text", + paper: new Paper( "" + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "
", { + keyword: "かしら かい を ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, +]; + +describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( paper, researcher ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + + it( description, () => { + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); + } ); +} ); + +// eslint-disable-next-line no-warning-comments +// TODO: ADAPT the tests below. +xdescribe( "A test for keyword density in Japanese", function() { + it( "gives a very BAD result when keyword density is above 4% when the text contains way too many instances" + + " of the keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( -50 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + + "this length. Don't overoptimize!" ); + } ); + + it( "gives a BAD result when keyword density is between 3% and 4% when the text contains too many instances" + + " of the keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 16 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( -10 ); + expect( result.getText() ).toBe( "Keyphrase density:" + + " The keyphrase was found 16 times. That's more than the recommended maximum of 15 times for a text of this length." + + " Don't overoptimize!" ); + } ); + + it( "gives a BAD result when keyword density is 0", function() { + const paper = new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); + + it( "gives a BAD result when keyword density is between 0 and 0.5%", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 1 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density:" + + " The keyphrase was found 1 time. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); + + it( "gives a GOOD result when keyword density is between 0.5% and 3.5% when the text contains keyphrase forms", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "should still gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text " + + "and the keyphrase is enclosed in double quotes", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "「一冊の本を読む」", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 9 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!" ); + } ); + + it( "gives a BAD result when keyword density is between 0.5% and 3.5%, if morphology is added, but there is no morphology data", function() { + const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ); + const researcher = new JapaneseResearcher( paper ); + const result = new KeywordDensityAssessment().getResult( paper, researcher ); + expect( result.getScore() ).toBe( 4 ); + expect( result.getText() ).toBe( "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!" ); + } ); +} ); + From 483b65b47e08b0faae19040631613069870aeaa2 Mon Sep 17 00:00:00 2001 From: aidamarfuatyA string with a key-word.
", { keyword: "key-word" } ), keyphraseForms: [ [ "key-word", "key-words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a key_word.
", { keyword: "key_word" } ), keyphraseForms: [ [ "key_word", "key_words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with 'kapaklı' as a keyword in it
", { keyword: "kapaklı" } ), keyphraseForms: [ [ "kapaklı" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with 'A string with key&word in it
", { keyword: "key&word" } ), keyphraseForms: [ [ "key&word" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string withA string with quotes to match the key'word, even if the quotes differ.
", { keyword: "key'word" } ), keyphraseForms: [ [ "key'word", "key'words" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with quotes to match theA string with a $keyword.
" ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a keyword.
", { keyword: "\"keyword\"" } ), keyphraseForms: [ [ "keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a key phrase.
", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA .sentence with a keyphrase.
", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "AA string with a $keyword.
", { keyword: "\"\\$keyword\"" } ), keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aa string of text with the keyword in it
+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + content: "a string of text with the keyword in it", + }, + name: "core/paragraph", + clientId: "5615ca1f-4052-41a9-97be-be19cfd2085b", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "a string of text with theOver the years, we’ve written quite a few articles about + branding. + Branding is about getting people to relate to your company and + products. It’s also about trying to make your brand synonymous with a certain product or service. + This can be a lengthy and hard project. It can potentially cost you all of your revenue. + It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. + However, for a lot of small business owners, the investment in branding will have + to be made with a relatively small budget.
+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ", + }, + name: "core/paragraph", + clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "synonymous" ] ], + expectedCount: 1, + expectedMarkings: [ + + new Mark( { + // eslint-disable-next-line max-len + marked: " It’s also about trying to make your brandYou might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. + These all can be qualified as + ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There + are multiple ways to do this, without a huge budget. + In this post, I’ll share my thoughts on how to go about your own low-budget branding.
+ `, + { + keyword: "expertise", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. These all can be qualified as ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There are multiple ways to do this, without a huge budget. In this post, I’ll share my thoughts on how to go about your own low-budget branding.", + }, + name: "core/paragraph", + clientId: "65be5146-3395-4845-8c7c-4a79fd6e3611", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "expertise" ] ], + expectedCount: 1, + expectedMarkings: [ + + new Mark( { + // eslint-disable-next-line max-len + marked: " All have the same main goal when they start: the need to establish a name in their field ofLorem Ipsum is simply dummy text of the printing and typesetting industry.
+There are many variations of passages of Lorem Ipsum available
+Over the years, we’ve written quite a few articles about + branding. + Branding is about getting people to relate to your company and + products. It’s also about trying to make your brand synonymous with a certain product or service. + This can be a lengthy and hard project. It can potentially cost you all of your revenue. + It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. + However, for a lot of small business owners, the investment in branding will have + to be made with a relatively small budget.
+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ", + }, + name: "core/paragraph", + clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c", + innerBlocks: [], + }, + ], + } + ); + + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + + buildTree( mockPaper, mockResearcher ); + const keyWordCountResult = keyphraseCount( mockPaper, mockResearcher ); + expect( keyWordCountResult.count ).toBe( 1 ); + expect( keyWordCountResult.markings ).toEqual( expectedMarkings ); + } ); +} ); + /** * Mocks Japanese Researcher. * @param {Array} keyphraseForms The morphological forms to be added to the researcher. diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js index 98f80447499..a772265b9d7 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -9,7 +9,7 @@ const markEnd = "a string of text
", { keyword: "" } ), - keyphraseForms: [ [ "" ] ], + keyphraseForms: [], expectedCount: 0, expectedMarkings: [], skip: false, @@ -59,17 +59,6 @@ const testCases = [ position: { endOffset: 64, startOffset: 55 } } ) ], skip: false, }, - { - description: "counts a string of text with German diacritics and eszett as the keyword", - paper: new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß" } ), - keyphraseForms: [ [ "äöüß" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "Waltz keepin auf mitz auf keepinA string of text with a keyword and multiple keywords in it.
", { keyword: "keyword" } ), @@ -86,55 +75,6 @@ const testCases = [ position: { endOffset: 56, startOffset: 48 } } ) ], skip: false, }, - { - description: "counts a string with a keyword with a '-' in it", - paper: new Paper( "A string with a key-word.
", { keyword: "key-word" } ), - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a key-word.
", { keyword: "key word" } ), - keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - // Note: this behavior might change in the future. - }, - { - description: "counts a string with a keyword with a '_' in it", - paper: new Paper( "A string with a key_word.
", { keyword: "key_word" } ), - keyphraseForms: [ [ "key_word", "key_words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with 'kapaklı' as a keyword in it
", { keyword: "kapaklı" } ), - keyphraseForms: [ [ "kapaklı" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with 'A string with key&word in it
", { keyword: "key&word" } ), - keyphraseForms: [ [ "key&word" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string withA text with
", { keyword: "image" } ), @@ -154,35 +94,6 @@ const testCases = [ position: { endOffset: 25, startOffset: 17 } } ) ], skip: false, }, - { - description: "should still match keyphrase occurrence with different types of apostrophe.", - paper: new Paper( "A string with quotes to match the key'word, even if the quotes differ.
", { keyword: "key'word" } ), - keyphraseForms: [ [ "key'word", "key'words" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { - marked: "A string with quotes to match theA string with a $keyword.
" ), - keyphraseForms: [ [ "\\$keyword" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a key word.
", { keyword: "key-word" } ), - keyphraseForms: [ [ "key-word", "key-words" ] ], - expectedCount: 0, - expectedMarkings: [], - skip: false, - }, { description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", paper: new Paper( "There is no keyword
in this sentence.
a string with nbsp to match the key word.
", { keyword: "key word" } ), - keyphraseForms: [ [ "key" ], [ "word" ] ], - expectedCount: 1, - expectedMarkings: [ - new Mark( { marked: "a string with nbsp to match theA string with three keys (key and another key) and one word.
" ), @@ -325,6 +224,297 @@ describe.each( testCases )( "Test for counting the keyword in a text in english" } ); } ); +const testCasesWithSpecialCharacters = [ + { + description: "counts the keyphrase occurrence in the text with ", + paper: new Paper( "a string with nbsp to match the key word.
", { keyword: "key word" } ), + keyphraseForms: [ [ "key" ], [ "word" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string with nbsp to match theA string with a key-word.
", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with aA string with a key-word.
", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + // Note: this behavior might change in the future. + }, + { + description: "counts a string with a keyword with a '_' in it", + paper: new Paper( "A string with a key_word.
", { keyword: "key_word" } ), + keyphraseForms: [ [ "key_word", "key_words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with aA string with key&word in it
", { keyword: "key&word" } ), + keyphraseForms: [ [ "key&word" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string withA string with quotes to match the key'word, even if the quotes differ.
", { keyword: "key'word" } ), + keyphraseForms: [ [ "key'word", "key'words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { + marked: "A string with quotes to match theA string with a $keyword.
" ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with aA string with a key word.
", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "can match keyphrase enclosed in «» '«keyword»' in the text", + paper: new Paper( "A string with a «keyword».
", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a «keyword».", + marked: "A string with a «A string with a keyword.
", { keyword: "«keyword»" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with aA string with a ‹keyword›.
", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a ‹keyword›.", + marked: "A string with a ‹A string with a keyword.
", { keyword: "‹keyword›" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with a¿Keyword is it
", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "¿Keyword is it", + marked: "¿الجيدة؟
", { keyword: "الجيدة" } ), + keyphraseForms: [ [ "الجيدة" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "الجيدة؟", + marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß", locale: "de_DE" } ), + keyphraseForms: [ [ "äöüß" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "Waltz keepin auf mitz auf keepinAccion Española fue una revista.
", { keyword: "acción", locale: "es_ES" } ), + keyphraseForms: [ [ "acción" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "oeverlaatelsebesiktning and overlatelsebesiktning
", { keyword: "överlåtelsebesiktning", locale: "sv_SV" } ), + keyphraseForms: [ [ "överlåtelsebesiktning" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "København and Koebenhavn and Kobenhavn
", { keyword: "København", locale: "nb_NO" } ), + keyphraseForms: [ [ "København" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "İstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "İstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "Istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "Istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "ıstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "ıstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'A very intelligent cat loves their human. A dog is very cute.
A string with cats and dogs, and other cats and dogs.
", { keyword: "\"cats and dogs\"" } ), + keyphraseForms: [ [ "cats and dogs" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with athis is a keywords keywords keywords.
", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.
", { keyword: "keywords", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is a key key key word word word.
", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "acción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ), + keyphraseForms: [ [ "accion" ] ], + expectedCount: 0, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.
", { keyword: "acción", locale: "es_ES" } ), keyphraseForms: [ [ "acción" ] ], expectedCount: 1, expectedMarkings: [ - new Mark( { marked: "A string with cats and dogs, and other cats and dogs.
", { keyword: "\"cats and dogs\"" } ), keyphraseForms: [ [ "cats and dogs" ] ], - expectedCount: 1, - expectedMarkings: [ new Mark( { marked: "A string with aA string with a $keyword.
", { keyword: "\"\\$keyword\"" } ), - keyphraseForms: [ [ "\\$keyword" ] ], + paper: new Paper( "A string with a $keyword.
", { keyword: "\"$keyword\"" } ), + keyphraseForms: [ [ "$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a $keyword.
" ), - keyphraseForms: [ [ "\\$keyword" ] ], + paper: new Paper( "A string with a $keyword.
", { keyword: "$keyword" } ), + keyphraseForms: [ [ "$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with athis is a key word key word key word.
", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "acción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ), + paper: new Paper( "Acción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, expectedMarkings: [ @@ -524,6 +535,16 @@ const testCasesWithLocaleMapping = [ position: { endOffset: 40, startOffset: 36 } } ) ], skip: false, }, + { + description: "counts a string with with a different forms of Turkish i, kephrase: İstanbul", + paper: new Paper( "Türkçe and Turkce
", { keyword: "Türkçe", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 4, + expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "İstanbul", locale: "tr_TR" } ), diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js index 6a93386f5b3..40ae8854e74 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js @@ -11,7 +11,7 @@ import matchTextWithTransliteration from "./matchTextWithTransliteration"; * * @returns {string[]} The tokenized keyword forms. */ -const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { +export const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { // Tokenize keyword forms. const keywordFormsText = keywordForms[ 0 ][ 0 ]; return getWordsForHTMLParser( keywordFormsText ); @@ -24,7 +24,7 @@ const tokenizeKeywordFormsForExactMatching = ( keywordForms ) => { * @param {(string[])[]} keywordForms The keyword forms to match. * @param {Sentence} sentence The sentence to match the keyword forms with. * - * @returns {Object} The tokens that exactly match the keyword forms. + * @returns {Token[]} The tokens that exactly match the keyword forms. */ export const getExactMatches = ( keywordForms, sentence ) => { // Tokenize keyword forms. @@ -36,7 +36,6 @@ export const getExactMatches = ( keywordForms, sentence ) => { let keywordIndex = 0; let sentenceIndex = 0; const matches = []; - let count = 0; let currentMatch = []; while ( sentenceIndex < sentenceTokens.length ) { @@ -56,14 +55,13 @@ export const getExactMatches = ( keywordForms, sentence ) => { // Add the current match to the matches array and reset the keyword index and the current match. if ( currentMatch.length === keywordTokens.length ) { matches.push( ...currentMatch ); - count++; keywordIndex = 0; currentMatch = []; } sentenceIndex++; } - return { count: count, matches: matches }; + return matches; }; /** diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 7da0bc232ae..4d04b03df89 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -2,7 +2,10 @@ import { flatten } from "lodash-es"; import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; import { normalizeSingle } from "../helpers/sanitize/quotes"; import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; -import { getExactMatches, getNonExactMatches } from "../helpers/match/matchKeyphraseWithSentence"; +import { + matchKeyphraseWithSentence, + tokenizeKeywordFormsForExactMatching, +} from "../helpers/match/matchKeyphraseWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import matchTextWithTransliteration from "../helpers/match/matchTextWithTransliteration"; @@ -13,7 +16,7 @@ import matchTextWithTransliteration from "../helpers/match/matchTextWithTranslit * @param {string} locale The locale used for transliteration. * @returns {number} The number of matches. */ -const countMatches = ( matches, keyphraseForms, locale ) => { +const countMatches = ( matches, keyphraseForms, locale, isExactMatchRequested ) => { // the count is the number of complete matches. const matchesCopy = [ ...matches ]; @@ -31,8 +34,15 @@ const countMatches = ( matches, keyphraseForms, locale ) => { // check if any of the keyphrase forms is in the matches. const foundMatch = matchesCopy.find( match =>{ return keyphraseForm.some( keyphraseFormWord => { - // const theRegex = new RegExp( `^${keyphraseFormWord}$`, "ig" ); - // return match.text.match( theRegex ); + if ( isExactMatchRequested ) { + // keyphraseForm = [ "key phrase" ] + // keyphraseWord = "key phrase" + // match = { text: "", sourceCodeInfo: {} } + keyphraseFormWord = tokenizeKeywordFormsForExactMatching( keyphraseFormWord ); + // "key phrase" => [ "key", " ", "phrase" ] + return keyphraseFormWord.every( word => matchTextWithTransliteration( match.text, word, locale ).length > 0 ); + + } return matchTextWithTransliteration( match.text, keyphraseFormWord, locale ).length > 0; } ); } ); @@ -102,18 +112,10 @@ export function countKeyphraseInText( sentences, topicForms, locale, matchWordCu sentences.forEach( sentence => { // eslint-disable-next-line no-warning-comments // TODO: test in Japanese to see if we use this helper as well - let matchesCount = 0; - let markings = []; - if ( isExactMatchRequested ) { - const matchesInSentence = getExactMatches( topicForms.keyphraseForms, sentence ); - matchesCount = matchesInSentence.count > 2 ? 2 : matchesInSentence.count; - markings = getMarkingsInSentence( sentence, matchesInSentence.matches, matchWordCustomHelper, locale ); - } else { - const matchesInSentence = getNonExactMatches( topicForms.keyphraseForms, sentence, locale ); - const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); - matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale ); - markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); - } + const matchesInSentence = matchKeyphraseWithSentence( topicForms.keyphraseForms, sentence, locale, isExactMatchRequested ); + const matchesInSentenceWithoutConsecutiveMatches = removeConsecutiveMatches( matchesInSentence ); + const matchesCount = countMatches( matchesInSentenceWithoutConsecutiveMatches, topicForms.keyphraseForms, locale, isExactMatchRequested ); + const markings = getMarkingsInSentence( sentence, matchesInSentence, matchWordCustomHelper, locale ); result.markings.push( markings ); result.count += matchesCount; @@ -154,7 +156,7 @@ export default function keyphraseCount( paper, researcher ) { return { count: keyphraseFound.count, - markings: flatten( keyphraseFound.markings ), + markings: keyphraseFound.count === 0 ? [] : flatten( keyphraseFound.markings ), length: topicForms.keyphraseForms.length, }; } From 629f47dc0791994c4097999c0fb9e735080e78af Mon Sep 17 00:00:00 2001 From: aidamarfuatyA string with a key phrase key phrase.
", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { marked: "A string with aA string with a key phrase.
", { keyword: "\"key phrase\"" } ), @@ -264,10 +274,15 @@ const testCasesWithSpecialCharacters = [ description: "counts 'key word' in 'key-word'.", paper: new Paper( "A string with a key-word.
", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], - expectedCount: 0, - expectedMarkings: [], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a key-word.", + marked: "A string with athis is a keywords keywords keywords.
", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a keyword keyword keyword.
", { keyword: "keywords", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a key key key word word word.
", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is a key word key word key word.
", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "Acción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, - expectedMarkings: [ - new Mark( { marked: "Türkçe and Turkce
", { keyword: "Türkçe", locale: "tr_TR" } ), keyphraseForms: [ [ "Türkçe" ] ], - expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "İstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "İstanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "Istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "Istanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "istanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "istanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "ıstanbul", locale: "tr_TR" } ), keyphraseForms: [ [ "ıstanbul" ] ], expectedCount: 4, - expectedMarkings: [ new Mark( { marked: "A string with 'this is a keywords keywords keywords.
", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is athis is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is athis is a keyword keyword keyword.
", { keyword: "keywords", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "this is athis is a key key key word word word.
", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "this is athis is a key word key word key word.
", { keyword: "key words", locale: "en_US" } ), + keyphraseForms: [ [ "key" ], [ "word", "words" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "this is aA sentence about a red panda red panda red panda.
", { keyword: "\"red panda\"", locale: "en_US" } ), + keyphraseForms: [ [ "red panda" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "A sentence about athis is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "this is aA sentence about a red panda red panda red panda.
", { keyword: "\"red panda\"", locale: "en_US" } ), + keyphraseForms: [ [ "red panda" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "A sentence about aAcción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ), keyphraseForms: [ [ "accion" ] ], expectedCount: 0, @@ -527,6 +549,23 @@ const testCasesWithLocaleMapping = [ ], skip: false, }, + { + description: "still counts a string with a Turkish diacritics when exact matching is used", + paper: new Paper( "Türkçe and Turkce
", { keyword: "\"Türkçe\"", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "İstanbul", locale: "tr_TR" } ), @@ -659,100 +698,6 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyword in a } ); } ); -const testDataForConsecutiveMatching = [ - { - description: "consecutive 1", - paper: new Paper( "this is a keywords keywords keywords.
", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is athis is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is athis is a keyword keyword keyword.
", { keyword: "keywords", locale: "en_US" } ), - keyphraseForms: [ [ "keyword", "keywords" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { marked: "this is athis is a key key key word word word.
", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 2, - expectedMarkings: [ - new Mark( { - marked: "this is athis is a key word key word key word.
", { keyword: "key words", locale: "en_US" } ), - keyphraseForms: [ [ "key" ], [ "word", "words" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "this is aA sentence about a red panda red panda red panda.
", { keyword: "\"red panda\"", locale: "en_US" } ), - keyphraseForms: [ [ "red panda" ] ], - expectedCount: 3, - expectedMarkings: [ - new Mark( { marked: "A sentence about aLorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
", { locale: "id_ID", keyword: "keyword" } ), keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], @@ -149,6 +150,21 @@ const testCases = [ position: { endOffset: 67, startOffset: 60 } } ) ], skip: false, }, + { + description: "matches both reduplicated keyphrase separated by '-' as two occurrence in English", + paper: new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.
", + { locale: "en_US", keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + // eslint-disable-next-line max-len + marked: "Lorem ipsum dolor sit amet, consecteturA string with a keyword.
", { keyword: "\"keyword\"" } ), From adfc93eb181e0ea095378052193076aeb40352c3 Mon Sep 17 00:00:00 2001 From: aidamarfuatyA string with a word of key-phrase.
", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a word of key-phrase.", + marked: "A string with aA string with a key_word.
", { keyword: "key_word" } ), @@ -739,10 +762,9 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) } ); }; - -// Decided not to remove test below as it tests the added logic of the Japanese helpers. describe( "Test for counting the keyword in a text for Japanese", () => { - it.skip( "counts/marks a string of text with a keyword in it.", function() { + // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information. + it( "counts/marks a string of text with a keyword in it.", function() { const mockPaper = new Paper( "私の猫はかわいいです。
{ expect( keyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私の私の猫はかわいい猫です。
{ expect( keyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私の私の猫はかわいいですかわいい。
", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -786,19 +805,6 @@ describe( "Test for counting the keyword in a text for Japanese", () => { marked: "私のA very intelligent cat loves their human. A dog is very cute.
A cute red panda
The red panda (Ailurus fulgens), also known as the lesser panda," + + " is a small mammal native to the eastern Himalayas and southwestern China" ); + researcher.setPaper( paper ); + buildTree( paper, researcher ); + expect( getSentencesFromTree( paper ) ).toEqual( [ + { sourceCodeRange: { endOffset: 19, startOffset: 3 }, + text: "A cute red panda", + tokens: [ + { sourceCodeRange: { endOffset: 4, startOffset: 3 }, text: "A" }, + { sourceCodeRange: { endOffset: 5, startOffset: 4 }, text: " " }, + { sourceCodeRange: { endOffset: 9, startOffset: 5 }, text: "cute" }, + { sourceCodeRange: { endOffset: 10, startOffset: 9 }, text: " " }, + { sourceCodeRange: { endOffset: 13, startOffset: 10 }, text: "red" }, + { sourceCodeRange: { endOffset: 14, startOffset: 13 }, text: " " }, + { sourceCodeRange: { endOffset: 19, startOffset: 14 }, text: "panda" }, + ], + }, + ] ); + } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index df9aad42b42..01388e22248 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -139,13 +139,13 @@ const testCases = [ expectedCount: 2, expectedMarkings: [ new Mark( { - // eslint-disable-next-line max-len - marked: "Lorem ipsum dolor sit amet, consectetur
Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.
", + { locale: "en_US", keyword: "\"kupu-kupu\"" } ), + keyphraseForms: [ [ "kupu-kupu" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "Lorem ipsum dolor sit amet, consecteturA string with a keyword.
", { keyword: "\"keyword\"" } ), @@ -204,8 +218,8 @@ const testCases = [ skip: false, }, { - // eslint-disable-next-line max-len - description: "with exact matching, a multi word keyphrase should not be counted if the focus keyphrase has the same words in a different order", + description: "with exact matching, a multi word keyphrase should not be counted if " + + "the focus keyphrase has the same words in a different order", paper: new Paper( "A string with a phrase key.
", { keyword: "\"key phrase\"" } ), keyphraseForms: [ [ "key phrase" ] ], expectedCount: 0, @@ -272,8 +286,13 @@ const testCases = [ }, ]; -// eslint-disable-next-line max-len -describe.each( testCases )( "Test for counting the keyword in a text in english", function( { description, paper, keyphraseForms, expectedCount, expectedMarkings, skip } ) { +describe.each( testCases )( "Test for counting the keyword in a text in english", function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { const test = skip ? it.skip : it; test( description, function() { @@ -308,6 +327,39 @@ const testCasesWithSpecialCharacters = [ position: { endOffset: 27, startOffset: 19 } } ) ], skip: false, }, + { + description: "counts occurrence with dash only when the keyphrase contains dash", + paper: new Paper( "A string with a key-phrase and key phrase.
", { keyword: "key-phrase" } ), + keyphraseForms: [ [ "key-phrase", "key-phrases" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with aA string with a panda-red.
", { keyword: "red-panda" } ), + keyphraseForms: [ [ "red-panda" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " + + "and the keyphrase doesn't contain dash", + paper: new Paper( "A string with a panda-red.
", { keyword: "red panda" } ), + keyphraseForms: [ [ "red-panda" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a panda-red.", + marked: "A string with aA string with a key-word.
", { keyword: "key word" } ), @@ -323,7 +375,7 @@ const testCasesWithSpecialCharacters = [ skip: false, }, { - description: "counts 'key' in 'key-word'.", + description: "counts 'key' in 'key-phrase'", paper: new Paper( "A string with a word of key-phrase.
", { keyword: "key word" } ), keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], expectedCount: 1, @@ -338,11 +390,11 @@ const testCasesWithSpecialCharacters = [ original: "A string with a word of key-phrase.", marked: "A string with aa string of text with the keyword in it, density should be 7.7%
", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); @@ -20,7 +21,7 @@ describe( "Test for counting the keyword density in a text with an English resea } ); it( "should return zero if the keyphrase does not occur in the text.", function() { - const mockPaper = new Paper( "a string of text without the keyword in it, density should be 0%", { keyword: "empty" } ); + const mockPaper = new Paper( "a string of text without the keyword in it, density should be 0%
", { keyword: "empty" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); @@ -29,7 +30,7 @@ describe( "Test for counting the keyword density in a text with an English resea it( "should recognize a keyphrase when it consists umlauted characters", function() { - const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } ); + const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); @@ -38,174 +39,135 @@ describe( "Test for counting the keyword density in a text with an English resea } ); it( "should recognize a hyphenated keyphrase.", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } ); + const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit
", { keyword: "key-word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); } ); - it( "should correctly recognize a keyphrase with a non-latin 'ı' in it", function() { - const mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - } ); - it( "should recognize a keyphrase with an underscore in it", function() { - const mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } ); + const mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%
", { keyword: "key_word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should return zero if a multiword keyphrase is not present but the multiword keyphrase occurs with an underscore", function() { - const mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } ); + const mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%
", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); } ); - it( "should not recognize a multiword keyphrase when it is occurs hyphenated", function() { - const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); + it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() { + const mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%
", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - // This behavior might change in the future. - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a keyphrase with an ampersand in it", function() { - const mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } ); + const mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%
", { keyword: "key&word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); - it( "should skip a keyphrase in case of three consecutive keyphrases", function() { - // Consecutive keywords are skipped, so this will match 2 times. - const mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.
", { keyword: "keyword" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); - } ); - - it( "should only keep the first two keyphrases if there more than three consecutive", function() { - // Consecutive keywords are skipped, so this will match 2 times. + it( "should include all keyphrase occurrence if there are more than two consecutive", function() { const mockPaper = new Paper( "This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.
", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 12.5 ); - } ); - - it( "should not skip a keyphrase in case of consecutive keyphrases with morphology sss", function() { - // Consecutive keywords are skipped, so this will match 2 times. - const mockPaper = new Paper( "This is a nice string with a keyword keyword keywords.
", { keyword: "keyword" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 30 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 56.25 ); } ); - it( "should recognize a keyphrase with a '$' in it", function() { - const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); + const mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%
", { keyword: "$keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a keyphrase regardless of capitalization", function() { - const mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } ); + const mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%
", { keyword: "keyword" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize a multiword keyphrase regardless of capitalization", function() { - const mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } ); + const mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%
", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); } ); it( "should recognize apostrophes regardless of the type of quote that was used", function() { - const mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } ); + const mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ
", { keyword: "key’word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); it( "should recognize keyphrase separated by ", function() { - const mockPaper = new Paper( "a string with quotes to match the key word, even if the quotes differ", { keyword: "key word" } ); + const mockPaper = new Paper( "a string with non-breaking space to match the key word
", { keyword: "key word" } ); const mockResearcher = new EnglishResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 10 ); } ); } ); -describe( "test for counting the keyword density in a text in a language that we haven't supported yet", function() { - it( "returns keyword density", function() { - const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng" } ); +describe( "test for counting the keyword density in a non-English and non-Japanese language", function() { + it( "returns keyword density for a language that we haven't had the support yet", function() { + const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng", locale: "jv_ID" } ); const mockResearcher = new DefaultResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 25 ); } ); + it( "should correctly recognize a Turkish transliterated keyphrase with a 'ı' in it", function() { + const mockPaper = new Paper( "a string of text with the kapakli in it, density should be 7.7%", { keyword: "kapaklı", locale: "tr_TR" } ); + const mockResearcher = new TurkishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); + } ); } ); describe( "test for counting the keyword density in a text in a language that uses a custom getWords helper (Japanese)", function() { - /*it( "returns keyword density when the keyword is not found in the sentence", function() { - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "猫" } ); + it( "returns keyword density when the keyword is not found in the sentence", function() { + const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。
", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ + } ); - it.skip( "returns the keyword density when the keyword is found once", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } ); + it( "returns the keyword density when the keyword is found once", function() { + const mockPaper = new Paper( "私の猫はかわいいです。
", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 ); } ); - it.skip( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { + it( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); + const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。
", { keyword: "小さい花の刺繍" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - /*it( "returns the keyword density when the morphologyData is not available to detect inflected forms", function() { - // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } ); - - it( "returns the keyword density when the keyword contains multiple words with an exact match separated in the sentence", function() { - const mockPaper = new Paper( "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 5 ); - } ); - it( "returns the keyword density when the keyword contains multiple words with an exact match in the sentence", function() { - const mockPaper = new Paper( "一日一冊の本を読むのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); + const mockPaper = new Paper( "一日一冊の本を読むのはできるかどうかやってみます。
", { keyword: "『一冊の本を読む』" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - - it( "returns 0 when the text is empty", function() { - const mockPaper = new Paper( "", { keyword: "猫" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 01388e22248..f4dd51bc672 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -239,7 +239,7 @@ const testCases = [ { description: "can match dollar sign as in '$keyword' with exact matching.", paper: new Paper( "A string with a $keyword.
", { keyword: "\"$keyword\"" } ), - keyphraseForms: [ [ "$keyword" ] ], + keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with aA string with a $keyword.
", { keyword: "$keyword" } ), - keyphraseForms: [ [ "$keyword" ] ], + keyphraseForms: [ [ "\\$keyword" ] ], expectedCount: 1, expectedMarkings: [ new Mark( { marked: "A string with a" + nonkeyword.repeat( 1000 ) + "
", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -30,7 +29,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", + description: "runs the keyphrase density on the paper with a low keyphrase density (0.1%)", paper: new Paper( "" + nonkeyword.repeat( 999 ) + keyword + "
", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -40,7 +39,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", + description: "runs the keyphrase density on the paper with a good keyphrase density (0.5%)", paper: new Paper( "" + nonkeyword.repeat( 995 ) + keyword.repeat( 5 ) + "
", { keyword: "keyword" } ), expectedResult: { score: 9, @@ -49,7 +48,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a good keyphrase density (2%)", + description: "runs the keyphrase density on the paper with a good keyphrase density (2%)", paper: new Paper( "" + nonkeyword.repeat( 980 ) + keyword.repeat( 20 ) + "
", { keyword: "keyword" } ), expectedResult: { score: 9, @@ -58,7 +57,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", + description: "runs the keyphrase density on the paper with a slightly too high keyphrase density (3.5%)", paper: new Paper( "" + nonkeyword.repeat( 965 ) + keyword.repeat( 35 ) + "
", { keyword: "keyword" } ), expectedResult: { score: -10, @@ -68,7 +67,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "runs the keywordDensity on the paper with a very high keyphrase density (10%)", + description: "runs the keyphrase density on the paper with a very high keyphrase density (10%)", paper: new Paper( "" + nonkeyword.repeat( 900 ) + keyword.repeat( 100 ) + "
", { keyword: "keyword" } ), expectedResult: { score: -50, @@ -78,7 +77,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - short keyphrase", paper: new Paper( "" + nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ) + "
", { keyword: "b c" } ), expectedResult: { score: 9, @@ -99,7 +98,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - long keyphrase", paper: new Paper( "" + nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ) + "
", { keyword: "b c d e f" } ), expectedResult: { score: -50, @@ -109,7 +108,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "returns a bad result if the keyword is only used once, regardless of the density", + description: "returns a bad result if the keyphrase is only used once, regardless of the density", paper: new Paper( "" + nonkeyword.repeat( 100 ) + keyword + "
", { keyword: "keyword" } ), expectedResult: { score: 4, @@ -119,7 +118,7 @@ const testDataWithDefaultResearcher = [ }, }, { - description: "returns a good result if the keyword is used twice and " + + description: "returns a good result if the keyphrase is used twice and " + "the recommended count is smaller than or equal to 2, regardless of the density", paper: new Paper( "" + nonkeyword.repeat( 100 ) + "a b c, a b c
", { keyword: "a b c", locale: "xx_XX" } ), expectedResult: { @@ -138,7 +137,7 @@ describe.each( testDataWithDefaultResearcher )( "a kephrase density test for lan const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); it( description, () => { - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( expectedResult.score ); expect( result.getText() ).toBe( expectedResult.text ); @@ -147,38 +146,38 @@ describe.each( testDataWithDefaultResearcher )( "a kephrase density test for lan const testDataForApplicability = [ { - description: "applies to a paper with a keyword and a text of at least 100 words", + description: "applies to a paper with a keyphrase and a text of at least 100 words", paper: new Paper( "" + nonkeyword.repeat( 100 ) + "
", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: true, }, { - description: "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + + description: "does not apply to a paper with a keyphrase and a text of at least 100 words when the text is inside an element" + "we want to exclude from the analysis", paper: new Paper( "" + nonkeyword.repeat( 100 ) + "", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with text of 100 words but without a keyword", + description: "does not apply to a paper with text of 100 words but without a keyphrase", paper: new Paper( "
" + nonkeyword.repeat( 100 ) + "
", { keyword: "" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with a text containing less than 100 words and with a keyword", + description: "does not apply to a paper with a text containing less than 100 words and with a keyphrase", paper: new Paper( "" + nonkeyword.repeat( 99 ) + "
", { keyword: "keyword" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "does not apply to a paper with a text containing less than 100 words and without a keyword", + description: "does not apply to a paper with a text containing less than 100 words and without a keyphrase", paper: new Paper( "" + nonkeyword.repeat( 99 ) + "
", { keyword: "" } ), researcher: new DefaultResearcher(), expectedResult: false, }, { - description: "applies to a Japanese paper with a keyword and a text of at least 200 characters", + description: "applies to a Japanese paper with a keyphrase and a text of at least 200 characters", paper: new Paper( "" + longTextJapanese + "
", { keyword: "keyword" } ), researcher: new JapaneseResearcher(), expectedResult: true, @@ -199,40 +198,41 @@ describe.each( testDataForApplicability )( "assessment applicability test", ( { researcher.setPaper( paper ); buildTree( paper, researcher ); it( description, () => { - expect( new KeywordDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); + expect( new KeyphraseDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); } ); } ); -describe( "Tests for the keywordDensity assessment for languages with morphology", function() { - it( "gives a GOOD result when keyword density is between 3 and 3.5%", function() { +describe( "Tests for the keyphrase density assessment for languages with morphology", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "en_EN" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a GOOD result when keyword density is between 3 and 3.5%, also for other languages with morphology support", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%, also for other languages with morphology support", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); buildTree( paper, researcher ); researcher.addResearchData( "morphology", morphologyDataDe ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a BAD result when keyword density is between 3 and 3.5%, if morphology support is added, but there is no morphology data", function() { + it( "gives a BAD result when keyphrase density is between 3 and 3.5%, if morphology support is added," + + " but there is no morphology data", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); buildTree( paper, researcher ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -10 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. That's more than the recommended maximum of 29 times for a text of this length. " + @@ -240,14 +240,14 @@ describe( "Tests for the keywordDensity assessment for languages with morphology } ); } ); -describe( "A test for marking the keyword", function() { +describe( "A test for marking the keyphrase", function() { it( "returns markers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "This is a very interesting paper with a keyword and another keyword.
", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); - keywordDensityAssessment.getResult( paper, researcher ); + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is a very interesting paper with a " + @@ -263,26 +263,26 @@ describe( "A test for marking the keyword", function() { original: "This is a very interesting paper with a keyword and another keyword.", position: { endOffset: 70, startOffset: 63 }, } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); it( "returns markers for a keyphrase containing numbers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "This is the release of YoastSEO 9.3.
", { keyword: "YoastSEO 9.3" } ); const researcher = new DefaultResearcher( paper ); buildTree( paper, researcher ); - keywordDensityAssessment.getResult( paper, researcher ); + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release of " +
"A flamboyant cat with a toy " + text + " " + text + " " + text + " " + text + " " + text + " " + text + " " + text + " a string of text with the keyword in it a string of text a string of text with the key word in it, with more key words. A string of text with a keyword and multiple keywords in it. A text with A string with KeY worD. There is no a string of text with the keyword in it a string of text a string of text with the key word in it, with more key words. A string of text with a keyword and multiple keywords in it. A text with A string with KeY worD. There is no A string with three keys (key and another key) and one word. A string with a panda-red. 私の猫はかわいい猫です。 a string of text with the keyword in it a string of text a string of text with the key word in it, with more key words. A string of text with a keyword and multiple keywords in it. A text with A string with KeY worD. There is no a string of text with the keyword in it a string of text a string of text with the key word in it, with more key words. A string of text with a keyword and multiple keywords in it. A text with A string with KeY worD. There is no A string with three keys (key and another key) and one word. A string with a keyphrase&. A string with a key word. 私の猫はかわいいですかわいい。 " + text + " " + text + " " + text + " " + text + " " + text + " " + text + " " + text + " a string of text with the keyword in it a string of text A string of text with a keyword and multiple keywords in it. A text with A string with three keys (key and another key) and one word. Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit. A sentence with a keyphrase. A string with a $keyword. A string with a key-word. A string with a panda-red. A string with a key-word. A string with a word of key-phrase. A string with a key_word. A string with key&word in it A string with quotes to match the key'word, even if the quotes differ. A string with quotes to match the key'word, even if the quotes differ. A string with a keyphrase&. A string with a key word. A string with a «keyword». A string with a keyword. A string with a ‹keyword›. A string with a keyword. Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. 私の猫はかわいいです。 私の猫はかわいい猫です。 A string with key&word in it İstanbul and Istanbul and istanbul and ıstanbul İstanbul and Istanbul and istanbul and ıstanbul İstanbul and Istanbul and istanbul and ıstanbul İstanbul and Istanbul and istanbul and ıstanbul
\n" +
@@ -291,7 +291,7 @@ describe( "A test for marking the keyword", function() {
const researcher = new EnglishResearcher( paper );
buildTree( paper, researcher );
- keywordDensityAssessment.getResult( paper, researcher );
+ keyphraseDensityAssessment.getResult( paper, researcher );
const expected = [
new Mark( {
marked: " A flamboyant In ancient Egyptian mythology, cats were highly revered and considered sacred animals.
" );
+ buildTree( paper, defaultResearcher );
+
+ expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 );
} );
} );
From bf76515cd7010a1a0fadcd6cf46725c7cdd7e008 Mon Sep 17 00:00:00 2001
From: aidamarfuaty keyword
in this sentence.keyword
in this sentence.keyword
in this sentence.keyword
in this sentence.In the United States and parts of Europe, " +
+ "tortoiseshell cats are often considered lucky charms.
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
+ } );
+ it( "should return 0 when the paper is empty", function() {
+ const mockPaper = new Paper( "" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
+ } );
} );
describe( "test for counting the keyword density in a non-English and non-Japanese language", function() {
From 7766e2b203b6bae9e609be5ef74641d4df2493f1 Mon Sep 17 00:00:00 2001
From: hdvos The red panda (Ailurus fulgens), also known as the lesser panda," +
+ " is a small mammal native to the eastern Himalayas and southwestern China
" );
+ researcher.setPaper( paper );
+ buildTree( paper, researcher );
+ expect( getSentencesFromTree( paper ) ).toEqual( [] );
+ } );
} );
From a324c23bc71f4fe57f783e8bd44b9b1e2382e056 Mon Sep 17 00:00:00 2001
From: aidamarfuaty In ancient Egyptian mythology, cats were highly revered and considered sacred animals.
" );
buildTree( paper, defaultResearcher );
- expect( recommendedKeywordCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 );
+ expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 );
} );
} );
diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js
index 1f1b5a38246..5483aa0bce8 100644
--- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js
+++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js
@@ -11,7 +11,7 @@ import getWordsForHTMLParser from "../word/getWordsForHTMLParser";
* @returns {string[]} The tokenized word forms.
*/
export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => {
- // Tokenize keyword forms.
+ // Tokenize word form of the keyphrase.
const wordFormText = wordForms[ 0 ];
return getWordsForHTMLParser( wordFormText );
};
@@ -20,9 +20,9 @@ export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => {
* Gets the exact matches of the keyphrase.
* Exact matching happens when the user puts the keyphrase in double quotes.
*
- * @param {Sentence} sentence The sentence to match the word forms with.
- * @param {string[]} wordForms The word forms to match.
- * @param {string} locale The locale used in the analysis.
+ * @param {Sentence} sentence The sentence to match the word forms with.
+ * @param {string[]} wordForms The word forms to match.
+ * @param {string} locale The locale used in the analysis.
*
* @returns {{count: number, matches: Token[]}} Object containing the number of the exact matches and the matched tokens.
*/
@@ -32,40 +32,44 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => {
matches: [],
};
// Tokenize word forms of the keyphrase.
- const keywordTokens = tokenizeKeyphraseFormsForExactMatching( wordForms );
+ const keyphraseTokens = tokenizeKeyphraseFormsForExactMatching( wordForms );
const sentenceTokens = sentence.tokens;
- // Check if tokenized word forms occur in the same order in the sentence tokens.
- let keywordIndex = 0;
- let sentenceIndex = 0;
+ // Initialize the index of the word token of the keyphrase.
+ let indexOfWordInKeyphrase = 0;
+ // Initialize the index of the word token of the sentence.
+ let indexOfWordInSentence = 0;
let currentMatch = [];
- while ( sentenceIndex < sentenceTokens.length ) {
- // If the current sentence token matches the current word token, add it to the current match.
- const sentenceTokenText = sentenceTokens[ sentenceIndex ].text;
- const keywordTokenText = keywordTokens[ keywordIndex ];
+ // Check if the tokenized word forms occur in the same order in the sentence tokens.
+ while ( indexOfWordInSentence < sentenceTokens.length ) {
+ // If the current sentence token matches the current word token of the keyphrase, add it to the current match.
+ const sentenceTokenText = sentenceTokens[ indexOfWordInSentence ].text;
+ const keyphraseTokenText = keyphraseTokens[ indexOfWordInKeyphrase ];
- const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keywordTokenText.toLowerCase(), locale );
+ const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keyphraseTokenText.toLowerCase(), locale );
if ( foundMatches.length > 0 ) {
- currentMatch.push( sentenceTokens[ sentenceIndex ] );
- keywordIndex++;
+ currentMatch.push( sentenceTokens[ indexOfWordInSentence ] );
+ indexOfWordInKeyphrase++;
} else {
- keywordIndex = 0;
+ indexOfWordInKeyphrase = 0;
currentMatch = [];
}
- // If the current match has the same length as the keyword tokens, the keyword forms have been matched.
- // Add the current match to the matches array and reset the keyword index and the current match.
- if ( currentMatch.length === keywordTokens.length ) {
+ /*
+ * If the current match has the same length as the keyphrase tokens, the keyphrase forms have been matched.
+ * Add the current match to the matches array and reset the index of the word in keyphrase and the current match.
+ */
+ if ( currentMatch.length === keyphraseTokens.length ) {
result.matches.push( ...currentMatch );
result.count++;
- keywordIndex = 0;
+ indexOfWordInKeyphrase = 0;
currentMatch = [];
}
- sentenceIndex++;
+ indexOfWordInSentence++;
}
return result;
};
@@ -73,13 +77,13 @@ const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => {
/**
* Matches a word form of the keyphrase with the tokens from the sentence.
*
- * @param {Token[]} tokens The array of tokens to check.
- * @param {string} wordForm The word form of the keyphrase.
- * @param {string} locale The locale used in the analysis.
+ * @param {Token[]} tokens The array of tokens to check.
+ * @param {string} wordForm The word form of the keyphrase.
+ * @param {string} locale The locale used in the analysis.
*
* @returns {Token[]} The array of the matched tokens.
*/
-const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => {
+const matchWordFormInTokens = ( tokens, wordForm, locale ) => {
let matches = [];
tokens.forEach( token => {
@@ -95,44 +99,43 @@ const matchTokensWithKeywordForm = ( tokens, wordForm, locale ) => {
/**
* Finds keyphrase forms in a sentence.
*
- * @param {Sentence} sentence The sentence to check.
- * @param {string[]} wordForms The word forms of the keyphrase to check.
- * @param {string} locale The locale used in the analysis.
- * @param {function} matchWordCustomHelper Custom function to match a word form with sentence.
+ * @param {Sentence} sentence The sentence to check.
+ * @param {string[]} wordForms The word forms of the keyphrase to check.
+ * @param {string} locale The locale used in the analysis.
+ * @param {function} matchWordCustomHelper Custom function to match a word form with sentence.
*
* @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens.
*/
const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => {
const tokens = sentence.tokens.slice();
- let count = 0;
- let matches = [];
+ const result = {
+ count: 0,
+ matches: [],
+ };
wordForms.forEach( wordForm => {
- const occurrence = matchWordCustomHelper
+ const occurrences = matchWordCustomHelper
? matchWordCustomHelper( sentence.text, wordForm )
- : matchTokensWithKeywordForm( tokens, wordForm, locale );
- count += occurrence.length;
- matches = matches.concat( occurrence );
+ : matchWordFormInTokens( tokens, wordForm, locale );
+ result.count += occurrences.length;
+ result.matches = matches.concat( occurrences );
} );
- return {
- count: count,
- matches: matches,
- };
+ return result;
};
/**
* Matches the word forms of a keyphrase with a sentence object from the html parser.
*
- * @param {Sentence} sentence The sentence to match against the keywordForms.
- * @param {string[]} wordForms The array of word forms of the keyphrase.
+ * @param {Sentence} sentence The sentence to match against the word forms of a keyphrase.
+ * @param {string[]} wordForms The array of word forms of the keyphrase.
* E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ]
* The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ).
*
- * @param {string} locale The locale used for transliteration.
+ * @param {string} locale The locale used for transliteration.
+ * @param {function} matchWordCustomHelper Custom function to match a word form with sentence.
+ * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not.
* Depends on whether the keyphrase enclosed in double quotes.
- * @param {function} matchWordCustomHelper Custom function to match a word form with sentence.
- * @param {boolean} useExactMatching Whether to match the keyword forms exactly or not.
*
* @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens.
*/
diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js
index 2d497a2e954..885d1d7892a 100644
--- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js
+++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js
@@ -21,7 +21,7 @@ export default function getKeyphraseDensity( paper, researcher ) {
return 0;
}
- const keyphraseCount = researcher.getResearch( "keyphraseCount" );
+ const keyphraseCount = researcher.getResearch( "getKeyphraseCount" );
return ( keyphraseCount.count / wordCount ) * 100;
}
diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
index 435ba768819..ac73221079b 100644
--- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
+++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
@@ -68,17 +68,18 @@ export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWo
* @param {Paper} paper The paper containing keyphrase and text.
* @param {Researcher} researcher The researcher.
*
- * @returns {Object} An object containing an array of all the matches, markings and the keyphrase count.
+ * @returns {{count: number, markings: Mark[], keyphraseLength: number}} An object containing the keyphrase count, markings and the kephrase length.
*/
export default function getKeyphraseCount( paper, researcher ) {
+ const result = { count: 0, markings: [], keyphraseLength: 0 };
const topicForms = researcher.getResearch( "morphology" );
- const keyphraseForms = topicForms.keyphraseForms;
+ let keyphraseForms = topicForms.keyphraseForms;
const keyphraseLength = keyphraseForms.length;
keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) );
if ( keyphraseLength === 0 ) {
- return { count: 0, markings: [], length: 0 };
+ return result;
}
const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" );
@@ -94,11 +95,11 @@ export default function getKeyphraseCount( paper, researcher ) {
* */
const keyphraseFound = countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested );
- return {
- count: keyphraseFound.count,
- markings: flatten( keyphraseFound.markings ),
- length: keyphraseLength,
- };
+ result.count = keyphraseFound.count;
+ result.markings = flatten( keyphraseFound.markings );
+ result.keyphraseLength = keyphraseLength;
+
+ return result;
}
/**
@@ -112,6 +113,6 @@ export default function getKeyphraseCount( paper, researcher ) {
* @returns {Object} An array of all the matches, markings and the keyphrase count.
*/
export function keywordCount( paper, researcher ) {
- console.warn( "This function is deprecated, use keyphraseCount instead." );
+ console.warn( "This function is deprecated, use getKeyphraseCount instead." );
return getKeyphraseCount( paper, researcher );
}
diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
index 900f0bb5bf3..086f67b8572 100644
--- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
@@ -101,8 +101,8 @@ class KeyphraseDensityAssessment extends Assessment {
*/
getResult( paper, researcher ) {
const customGetWords = researcher.getHelper( "getWordsCustomHelper" );
- this._keyphraseCount = researcher.getResearch( "keyphraseCount" );
- const keyphraseLength = this._keyphraseCount.length;
+ this._keyphraseCount = researcher.getResearch( "getKeyphraseCount" );
+ const keyphraseLength = this._keyphraseCount.keyphraseLength;
const assessmentResult = new AssessmentResult();
@@ -303,7 +303,7 @@ class KeyphraseDensityAssessment extends Assessment {
/**
- * Marks keywords in the text for the keyword density assessment.
+ * Marks the occurrences of keyphrase in the text for the keyphrase density assessment.
*
* @returns {Array} Marks that should be applied.
*/
@@ -313,7 +313,7 @@ class KeyphraseDensityAssessment extends Assessment {
/**
- * Checks whether the paper has a text of the minimum required length and a keyword is set. Language-specific length requirements and methods
+ * Checks whether the paper has a text of the minimum required length and a keyphrase is set. Language-specific length requirements and methods
* of counting text length may apply (e.g. for Japanese, the text should be counted in characters instead of words, which also makes the minimum
* required length higher).
*
diff --git a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js
index d668353cfa2..830f223bd48 100644
--- a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js
+++ b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js
@@ -2,18 +2,18 @@ import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAll
import keyphraseLengthFactor from "./keyphraseLengthFactor.js";
/**
- * Calculates a recommended keyword count for a paper's text. The formula to calculate this number is based on the
- * keyword density formula.
+ * Calculates a recommended keyphrase count for a paper's text. The formula to calculate this number is based on the
+ * keyphrase density formula.
*
- * @param {Paper} paper The paper to analyze.
- * @param {number} keyphraseLength The length of the focus keyphrase in words.
- * @param {number} recommendedKeywordDensity The recommended keyword density (either maximum or minimum).
- * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyword density.
- * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach.
+ * @param {Paper} paper The paper to analyze.
+ * @param {number} keyphraseLength The length of the focus keyphrase in words.
+ * @param {number} recommendedKeyphraseDensity The recommended keyphrase density (either maximum or minimum).
+ * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyphrase density.
+ * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach.
*
- * @returns {number} The recommended keyword count.
+ * @returns {number} The recommended keyphrase count.
*/
-export default function( paper, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) {
+export default function( paper, keyphraseLength, recommendedKeyphraseDensity, maxOrMin, customGetWords ) {
const wordCount = customGetWords ? customGetWords( paper.getText() ).length : getAllWordsFromTree( paper ).length;
if ( wordCount === 0 ) {
@@ -21,23 +21,23 @@ export default function( paper, keyphraseLength, recommendedKeywordDensity, maxO
}
const lengthKeyphraseFactor = keyphraseLengthFactor( keyphraseLength );
- const recommendedKeywordCount = ( recommendedKeywordDensity * wordCount ) / ( 100 * lengthKeyphraseFactor );
+ const recommendedKeyphraseCount = ( recommendedKeyphraseDensity * wordCount ) / ( 100 * lengthKeyphraseFactor );
/*
- * The recommended keyword count should always be at least 2,
- * regardless of the keyword density, the word count, or the keyphrase length.
+ * The recommended keyphrase count should always be at least 2,
+ * regardless of the keyphrase density, the word count, or the keyphrase length.
*/
- if ( recommendedKeywordCount < 2 ) {
+ if ( recommendedKeyphraseCount < 2 ) {
return 2;
}
switch ( maxOrMin ) {
case "min":
// Round up for the recommended minimum count.
- return Math.ceil( recommendedKeywordCount );
+ return Math.ceil( recommendedKeyphraseCount );
default:
case "max":
// Round down for the recommended maximum count.
- return Math.floor( recommendedKeywordCount );
+ return Math.floor( recommendedKeyphraseCount );
}
}
From c47d09cc069ff8a64da9eae4fabf64a62256286b Mon Sep 17 00:00:00 2001
From: aidamarfuaty
A string with a keyphrase&.
", { keyword: "keyphrase&" } ), keyphraseForms: [ [ "keyphrase" ] ], expectedCount: 1, From d27734a4646378ce78863d7b9dc5472032a2a6b2 Mon Sep 17 00:00:00 2001 From: aidamarfuatysome content
not an h1
blah blah
a paragraph
" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -17,7 +20,10 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns the default result when there's an H1 at the beginning of the body", function() { const mockPaper = new Paper( "a paragraph
" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); + expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -26,7 +32,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "also returns the default result when there's an H1 and it's not at the beginning of the body", function() { const mockPaper = new Paper( "Cool intro
a paragraph
" ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -36,7 +44,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns a bad score and appropriate feedback when there are multiple one superfluous (i.e., non-title) " + "H1s in the body of the text", function() { const mockPaper = new Paper( "a paragraph
a paragraph
a paragraph
a paragraph
" ); - h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + h1Assessment.getResult( mockPaper, mockResearcher ); const expected = [ - new Mark( { original: "a paragraph
" ); - const results = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const results = h1Assessment.getResult( mockPaper, mockResearcher ); expect( results._hasMarks ).toEqual( false ); } ); @@ -91,4 +113,11 @@ describe( "Checks if the assessment is applicable", function() { expect( assessment ).toBe( false ); } ); + + it( "is not applicable when there there is no text but there is an image", function() { + const mockPaper = new Paper( "" ); + const assessment = h1Assessment.isApplicable( mockPaper ); + + expect( assessment ).toBe( false ); + } ); } ); From cfd8f24383fafc4918b2bdf220d263680aba008e Mon Sep 17 00:00:00 2001 From: aidamarfuatyA sentence with a keyphrase.
", { keyword: "\".sentence\"" } ), keyphraseForms: [ [ ".sentence" ] ], expectedCount: 0, @@ -816,7 +817,140 @@ describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); } ); +const testDataForHTMLTags = [ + { + description: "counts keyphrase occurrence correctly in a text containing `` tag, and outputs correct mark objects", + paper: new Paper( "The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The redThe forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The redThe forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red私の猫はかわいいです。
私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -990,7 +990,7 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => { } ); it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() { - const mockPaper = new Paper( "私の猫はかわいい猫です。
私の猫はかわいい猫です。", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); @@ -1002,7 +1002,7 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => { } ); it( "counts a string if text with no keyphrase in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); + const mockPaper = new Paper( "私の猫はかわいいです。
", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); buildTree( mockPaper, researcher ); expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js index 8aaa3da9d70..4a82c1bdabe 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -104,24 +104,27 @@ const matchWordFormInTokens = ( tokens, wordForm, locale ) => { /** * Finds keyphrase forms in a sentence. * - * @param {Sentence} sentence The sentence to check. - * @param {string[]} wordForms The word forms of the keyphrase to check. - * @param {string} locale The locale used in the analysis. - * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {Sentence|string} sentence The sentence to check. + * @param {string[]} wordForms The word forms of the keyphrase to check. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. * * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. */ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => { - const tokens = sentence.tokens.slice(); const result = { count: 0, matches: [], }; wordForms.forEach( wordForm => { - const occurrences = matchWordCustomHelper - ? matchWordCustomHelper( sentence.text, wordForm ) - : matchWordFormInTokens( tokens, wordForm, locale ); + let occurrences = []; + if ( matchWordCustomHelper ) { + occurrences = matchWordCustomHelper( sentence, wordForm ); + } else { + const tokens = sentence.tokens.slice(); + occurrences = matchWordFormInTokens( tokens, wordForm, locale ); + } result.count += occurrences.length; result.matches = result.matches.concat( occurrences ); } ); @@ -132,7 +135,7 @@ const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomH /** * Matches the word forms of a keyphrase with a sentence object from the html parser. * - * @param {Sentence} sentence The sentence to match against the word forms of a keyphrase. + * @param {Sentence|string} sentence The sentence to match against the word forms of a keyphrase. * @param {string[]} wordForms The array of word forms of the keyphrase. * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index 31394474c4c..e26b819b8ad 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -5,11 +5,12 @@ import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSentence"; import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; +import getSentences from "../helpers/sentence/getSentences"; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * - * @param {Sentence[]} sentences The sentences to check. + * @param {(Sentence|string)[]} sentences The sentences to check. * @param {Array} keyphraseForms The keyphrase forms. * @param {string} locale The locale used in the analysis. * @param {function} matchWordCustomHelper A custom helper to match words with a text. @@ -49,7 +50,7 @@ export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWo if ( matchWordCustomHelper ) { // Currently, this check is only applicable for Japanese. - markings = markWordsInASentence( sentence.text, foundWords, matchWordCustomHelper ); + markings = markWordsInASentence( sentence, foundWords, matchWordCustomHelper ); } else { markings = getMarkingsInSentence( sentence, foundWords ); } @@ -78,7 +79,7 @@ export default function getKeyphraseCount( paper, researcher ) { /* * Normalize single quotes so that word form with different type of single quotes can still be matched. - * For exmple, "key‛word" should match "key'word". + * For example, "key‛word" should match "key'word". */ keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); @@ -87,8 +88,10 @@ export default function getKeyphraseCount( paper, researcher ) { } const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); + const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" ); const locale = paper.getLocale(); - const sentences = getSentencesFromTree( paper ); + // When the custom helper is available, we're using the sentences retrieved from the text for the analysis. + const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper ); // Exact matching is requested when the keyphrase is enclosed in double quotes. const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() ); From 15b2c21d8d83d039c7522b4ece11eee28cf1eef6 Mon Sep 17 00:00:00 2001 From: aidamarfuaty" + - "[vc_row]" + - "[vc_column]" + - "[vc_column_text" + - ' css_animation="bounceInLeft"' + - ' el_class="tøüst123"' + - ' css=".vc_custom_1482153765626{margin-right: 4px !important;}"]' + - "This is the text that needs to be preserved." + - "[/vc_column_text]" + - "[/vc_column]" + - "[/vc_row]" + - "
"; - const expected = "This is the text that needs to be preserved.
"; - - const actual = removeUnknownShortCodes( input ); - - expect( actual ).toBe( expected ); - } ); -} ); From f5228e391f8b04afe2c3924e7d7041969685ad49 Mon Sep 17 00:00:00 2001 From: hdvosHello, world!
犬が大好き
World!
thanks
forall
the fish!This is the first sentence.
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -718,12 +725,13 @@ describe( "The parse function", () => { it( "parses an HTML text with a Yoast breadcrumbs widget in Elementor, which should be filtered out", () => { // HTML:The first sentence
const html = "The first sentence
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, @@ -803,12 +811,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a script element inside a paragraph", () => { const html = "Hello, world!
Hello, world!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -962,12 +972,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a comment inside a paragraph", () => { const html = "Hello, world!
Hello, world!
Hello code! array.push( something )
Hello world!
Hello array.push( something )
code!
Some text and code console.log( code )
The red panda's coat is mainly red or orange-brown with a black belly and legs.
\n" + + "\n" + + "\n" + + "\n" + + "The red panda's coat is mainly red or orange-brown with a black belly and legs.
", + validationIssues: [], + attributes: { + content: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + dropCap: false, + }, + innerBlocks: [], + }, + { + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + name: "core/list", + isValid: true, + originalContent: "The Norwegian Forest cat is adapted to survive Norway's cold weather.
\n" + ""; + const paper = new Paper( html ); + const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1664,12 +1957,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" it( "parses an HTML text with a Yoast siblings block", () => { const html = "Hello, world!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1757,12 +2051,13 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" it( "parses an HTML text with a Yoast subpages block", () => { const html = "" + + "Giant panda is is relative to red panda
Test
Giant panda" + + " is relative to red panda
Tets
Giant panda is test tests
Tests
This is a [shortcode] test.
"; + + const shortcodes = [ "shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "This is a test.
" ); + } ); + it( "should filter a shortcode with parameters from an html string", function() { + const html = "This is a [shortcode param=\"value\"] test.
"; + + const shortcodes = [ "shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "This is a test.
" ); + } ); + it( "should filter multiple shortcodes from an html string", function() { + const html = "This is a [shortcode] test.
This is a [another_shortcode] test.
"; + + const shortcodes = [ "shortcode", "another_shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "This is a test.
This is a test.
" ); + } ); + it( "should not filter something between square brackets if it is not a shortcode.", function() { + const html = "This is a [not_a_shortcode] test.
"; + + const shortcodes = [ "shortcode", "another_shortcode" ]; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "This is a [not_a_shortcode] test.
" ); + } ); +} ); diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index adeb2358faa..b3b1d65c0e4 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,6 +4,7 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; +import { filterShortcodesFromHTML } from "../../../src/languageProcessing/helpers/sanitize/filterShortcodesFromTree"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -17,6 +18,9 @@ export default function buildTree( paper, researcher ) { const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } } /** diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js index d636b08b7dc..e3ade5140ba 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -1,3 +1,23 @@ +/** + * Creates a regex to filter shortcodes from HTML. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {RegExp} The regex to recognize the shortcodes. + */ +const createShortcodeTagsRegex = shortcodeTags => { + const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + return new RegExp( sortcodeTagsRegexString, "g" ); +}; + +/** + * Filters shortcodes from HTML string. + * @param {string} html The HTML to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {string} The filtered HTML. + */ +export const filterShortcodesFromHTML = ( html, shortcodeTags ) => { + const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); + return html.replace( shortcodeTagsRegex, "" ); +}; /** * Filters shortcodes from a sentence. @@ -53,7 +73,7 @@ const filterShortcodesFromSentences = ( tree, shortcodeTags, shortcodeTagsRegex */ const filterShortcodesFromTree = ( tree, shortcodeTags ) => { if ( shortcodeTags ) { - const sortcodeTagsRegexString = `\\[(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + const sortcodeTagsRegexString = createShortcodeTagsRegex( shortcodeTags ); const shortcodeTagsRegex = new RegExp( sortcodeTagsRegexString, "g" ); filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index dd9e49c92d9..1d6ace55c28 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -7,6 +7,7 @@ import { __, sprintf } from "@wordpress/i18n"; import { filter, find, findIndex, isFunction, isUndefined, map } from "lodash-es"; import LanguageProcessor from "../parse/language/LanguageProcessor"; import { build } from "../parse/build"; +import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; // The maximum score of individual assessment is 9. This is why we set the "score rating" here to 9. const ScoreRating = 9; @@ -126,6 +127,9 @@ Assessor.prototype.assess = function( paper ) { const languageProcessor = new LanguageProcessor( this._researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } let assessments = this.getAvailableAssessments(); this.results = []; diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index 1ba4522827b..70e0b11f0d7 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -33,6 +33,7 @@ import wrapTryCatchAroundAction from "./wrapTryCatchAroundAction"; // Tree assessor functionality. import { ReadabilityScoreAggregator, SEOScoreAggregator } from "../parsedPaper/assess/scoreAggregators"; +import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; const logger = getLogger( "yoast-analysis-worker" ); logger.setDefaultLevel( "error" ); @@ -1084,6 +1085,11 @@ export default class AnalysisWebWorker { const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes; this._paper.setTree( build( this._paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + this._paper._text = filterShortcodesFromHTML( this._paper.getText(), shortcodes ); + } + + // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); } @@ -1407,6 +1413,9 @@ export default class AnalysisWebWorker { const languageProcessor = new LanguageProcessor( researcher ); const shortcodes = paper._attributes && paper._attributes.shortcodes; paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) ); + if ( shortcodes ) { + paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes ); + } } } From 04a0bbb8c2e9db41125125a14a33960f71e53adf Mon Sep 17 00:00:00 2001 From: aidamarfuaty tags.
const allParagraphs = helpers.matchStringWithRegex( text, paragraphsRegex );
diff --git a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js
index acda161f183..a85cb29d00f 100644
--- a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js
+++ b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js
@@ -6,6 +6,7 @@ import countWords from "../helpers/word/countWords.js";
import matchParagraphs from "../helpers/html/matchParagraphs.js";
import { filter } from "lodash-es";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Gets all paragraphs and their word counts or character counts from the text.
@@ -18,6 +19,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser";
export default function( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
text = excludeTableOfContentsTag( text );
// Exclude the Estimated Reading time text from the research
diff --git a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js
index c1bd628f69b..d099d005e63 100644
--- a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js
+++ b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js
@@ -4,6 +4,7 @@ import Sentence from "../../languageProcessing/values/Sentence.js";
import { forEach } from "lodash-es";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Looks for morphological passive voice.
@@ -18,6 +19,7 @@ export const getMorphologicalPassives = function( paper, researcher ) {
const isPassiveSentence = researcher.getHelper( "isPassiveSentence" );
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
const sentences = getSentences( text, memoizedTokenizer )
.map( function( sentence ) {
@@ -55,6 +57,7 @@ export const getPeriphrasticPassives = function( paper, researcher ) {
const getClauses = researcher.getHelper( "getClauses" );
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
const sentences = getSentences( text, memoizedTokenizer )
.map( function( sentence ) {
diff --git a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js
index a8d0eb66151..22f9c3ae56a 100644
--- a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js
+++ b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js
@@ -5,6 +5,7 @@ import { stripFullTags as stripTags } from "../helpers/sanitize/stripHTMLTags.js
import { filter, forEach, isEmpty } from "lodash-es";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Compares the first word of each sentence with the first word of the following sentence.
@@ -95,6 +96,7 @@ export default function( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
// Remove any HTML whitespace padding and replace it with a single whitespace.
text = text.replace( /[\s\n]+/g, " " );
diff --git a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js
index 07adb8693c5..618413675e1 100644
--- a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js
+++ b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js
@@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent
import countWords from "../helpers/word/countWords";
import { forEach } from "lodash-es";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Gets the subheadings from the text and returns the length of these subheading in an array.
@@ -15,6 +16,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser";
export default function( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
text = excludeTableOfContentsTag( text );
const matches = getSubheadingTexts( text );
diff --git a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js
index 43920b9c699..3dbd010134f 100644
--- a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js
+++ b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js
@@ -204,6 +204,7 @@ const keyphraseDistributionResearcher = function( paper, researcher ) {
let text = paper.getText();
text = helpers.removeHtmlBlocks( text );
+ text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
text = mergeListItems( text );
const sentences = getSentences( text, memoizedTokenizer );
const topicForms = researcher.getResearch( "morphology" );
diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
index dabd39ed1ec..c08a50b2cff 100644
--- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
+++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
@@ -5,6 +5,7 @@ import { flattenDeep, uniq as unique } from "lodash-es";
import getSentences from "../helpers/sentence/getSentences";
import { markWordsInSentences } from "../helpers/word/markWordsInSentences";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Calculates the keyword count, takes morphology into account.
@@ -23,6 +24,7 @@ export default function( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const locale = paper.getLocale();
const sentences = getSentences( text, memoizedTokenizer );
diff --git a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js
index fa036350f29..82e4a332912 100644
--- a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js
+++ b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js
@@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent
import stripSomeTags from "../helpers/sanitize/stripNonTextTags";
import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Computes the amount of subheadings reflecting the topic.
@@ -44,6 +45,7 @@ export default function matchKeywordInSubheadings( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
text = stripSomeTags( excludeTableOfContentsTag( text ) );
const topicForms = researcher.getResearch( "morphology" );
const locale = paper.getLocale();
diff --git a/packages/yoastseo/src/languageProcessing/researches/sentences.js b/packages/yoastseo/src/languageProcessing/researches/sentences.js
index 18a93b9960d..bce3cce5735 100644
--- a/packages/yoastseo/src/languageProcessing/researches/sentences.js
+++ b/packages/yoastseo/src/languageProcessing/researches/sentences.js
@@ -1,5 +1,6 @@
import getSentences from "../helpers/sentence/getSentences";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* Returns the sentences from a paper.
@@ -14,5 +15,6 @@ export default function( paper, researcher ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
return getSentences( text, memoizedTokenizer );
}
diff --git a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js
index 3feb922619a..14b14feafb4 100644
--- a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js
+++ b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js
@@ -81,6 +81,7 @@ export default function wordComplexity( paper, researcher ) {
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
let text = paper.getText();
text = helpers.removeHtmlBlocks( text );
+ text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const sentences = getSentences( text, memoizedTokenizer );
// Find the complex words in each sentence.
diff --git a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js
index 67ea613c7f7..03e61387ec2 100644
--- a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js
+++ b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js
@@ -1,5 +1,6 @@
import wordCount from "../helpers/word/countWords.js";
import removeHtmlBlocks from "../helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../helpers";
/**
* A result of the word count calculation.
@@ -19,6 +20,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser";
export default function( paper ) {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
return {
text: text,
count: wordCount( text ),
diff --git a/packages/yoastseo/src/scoring/assessments/assessment.js b/packages/yoastseo/src/scoring/assessments/assessment.js
index b349ddd8270..a9d6edcb950 100644
--- a/packages/yoastseo/src/scoring/assessments/assessment.js
+++ b/packages/yoastseo/src/scoring/assessments/assessment.js
@@ -3,6 +3,7 @@
import { sanitizeString } from "../../languageProcessing";
import { isUndefined } from "lodash-es";
import removeHtmlBlocks from "../../languageProcessing/helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../../languageProcessing/helpers";
/**
* Represents the defaults of an assessment.
@@ -43,6 +44,7 @@ class Assessment {
hasEnoughContentForAssessment( paper, contentNeededForAssessment = 50 ) {
let text = isUndefined( paper ) ? "" : paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
// The isUndefined check is necessary, because if paper is undefined .getText will throw a typeError.
return sanitizeString( text ).length >= contentNeededForAssessment;
diff --git a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
index bf07fd7d2ba..9cf10885632 100644
--- a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
@@ -45,6 +45,7 @@ export default class ListAssessment extends Assessment {
let text = paper.getText();
text = languageProcessingHelpers.removeHtmlBlocks( text );
+ text = languageProcessingHelpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
return regex.test( text );
}
diff --git a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js
index 940cc7adc42..9fcc75c9bef 100644
--- a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js
@@ -10,6 +10,7 @@ import getWords from "../../../languageProcessing/helpers/word/getWords";
import AssessmentResult from "../../../values/AssessmentResult";
import { stripFullTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags";
import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser";
+import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers";
/**
* Represents the assessment for calculating the text after each subheading.
@@ -85,6 +86,7 @@ class SubheadingsDistributionTooLong extends Assessment {
const customCountLength = researcher.getHelper( "customCountLength" );
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
return customCountLength ? customCountLength( text ) : getWords( text ).length;
}
diff --git a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js
index fbcf0177402..c8bf641ddc8 100644
--- a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js
@@ -11,6 +11,7 @@ import marker from "../../../markers/addMark.js";
import Assessment from "../assessment";
import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser";
import getWords from "../../../languageProcessing/helpers/word/getWords";
+import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers";
/**
@@ -195,6 +196,7 @@ export default class TransitionWordsAssessment extends Assessment {
}
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length;
// Do not use hasEnoughContent in this assessment as it is mostly redundant with `textLength >= this._config.applicableIfTextLongerThan`
diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js
index c9fc68e3431..03060c2ad00 100644
--- a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js
@@ -2,6 +2,7 @@ import { __, sprintf } from "@wordpress/i18n";
import { merge } from "lodash-es";
import { languageProcessing, AssessmentResult, Assessment, helpers } from "yoastseo";
+import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers";
const { getSentences, helpers: languageProcessingHelpers } = languageProcessing;
const { createAnchorOpeningTag } = helpers;
@@ -181,6 +182,7 @@ class KeyphraseDistributionAssessment extends Assessment {
const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" );
let text = paper.getText();
text = languageProcessingHelpers.removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
const sentences = getSentences( text, memoizedTokenizer );
return paper.hasText() && paper.hasKeyword() && sentences.length >= 15 && researcher.hasResearch( "keyphraseDistribution" );
diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
index e7bdac1736a..e819ac3d289 100644
--- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js
@@ -9,6 +9,7 @@ import { createAnchorOpeningTag } from "../../../helpers/shortlinker";
import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js";
import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser";
import getWords from "../../../languageProcessing/helpers/word/getWords";
+import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers";
/**
* Represents the assessment that will look if the keyphrase density is within the recommended range.
@@ -113,6 +114,7 @@ class KeywordDensityAssessment extends Assessment {
let text = paper.getText();
text = removeHtmlBlocks( text );
+ text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
this.setBoundaries( text, keyphraseLength, customGetWords );
this._keywordDensity = this._keywordDensity * keyphraseLengthFactor( keyphraseLength );
diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js
index 9ff0476f891..ab84ab0581b 100644
--- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js
+++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js
@@ -2,7 +2,7 @@ import { __, sprintf } from "@wordpress/i18n";
import { isUndefined, merge } from "lodash-es";
import Assessment from "../assessment.js";
-import { createAnchorOpeningTag } from "../../../helpers/shortlinker";
+import { createAnchorOpeningTag } from "../../../helpers";
import marker from "../../../markers/addMark.js";
import AssessmentResult from "../../../values/AssessmentResult.js";
import Mark from "../../../values/Mark.js";
diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js
index 1d6ace55c28..dd9e49c92d9 100644
--- a/packages/yoastseo/src/scoring/assessor.js
+++ b/packages/yoastseo/src/scoring/assessor.js
@@ -7,7 +7,6 @@ import { __, sprintf } from "@wordpress/i18n";
import { filter, find, findIndex, isFunction, isUndefined, map } from "lodash-es";
import LanguageProcessor from "../parse/language/LanguageProcessor";
import { build } from "../parse/build";
-import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree";
// The maximum score of individual assessment is 9. This is why we set the "score rating" here to 9.
const ScoreRating = 9;
@@ -127,9 +126,6 @@ Assessor.prototype.assess = function( paper ) {
const languageProcessor = new LanguageProcessor( this._researcher );
const shortcodes = paper._attributes && paper._attributes.shortcodes;
paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) );
- if ( shortcodes ) {
- paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes );
- }
let assessments = this.getAvailableAssessments();
this.results = [];
diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js
index 70e0b11f0d7..1ba4522827b 100644
--- a/packages/yoastseo/src/worker/AnalysisWebWorker.js
+++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js
@@ -33,7 +33,6 @@ import wrapTryCatchAroundAction from "./wrapTryCatchAroundAction";
// Tree assessor functionality.
import { ReadabilityScoreAggregator, SEOScoreAggregator } from "../parsedPaper/assess/scoreAggregators";
-import { filterShortcodesFromHTML } from "../languageProcessing/helpers/sanitize/filterShortcodesFromTree";
const logger = getLogger( "yoast-analysis-worker" );
logger.setDefaultLevel( "error" );
@@ -1085,11 +1084,6 @@ export default class AnalysisWebWorker {
const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes;
this._paper.setTree( build( this._paper.getText(), languageProcessor, shortcodes ) );
- if ( shortcodes ) {
- this._paper._text = filterShortcodesFromHTML( this._paper.getText(), shortcodes );
- }
-
-
// Update the configuration locale to the paper locale.
this.setLocale( this._paper.getLocale() );
}
@@ -1413,9 +1407,6 @@ export default class AnalysisWebWorker {
const languageProcessor = new LanguageProcessor( researcher );
const shortcodes = paper._attributes && paper._attributes.shortcodes;
paper.setTree( build( paper.getText(), languageProcessor, shortcodes ) );
- if ( shortcodes ) {
- paper._text = filterShortcodesFromHTML( paper.getText(), shortcodes );
- }
}
}
From afcb257616d052e67e373f8536a12c7f97aed16f Mon Sep 17 00:00:00 2001
From: aidamarfuaty Giant panda is test testts Tets " +
+ "is this giant panda Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, yoast! Hello, world! Hello, Hello, world! Hello, The red panda (Ailurus fulgens), also known as the lesser panda," +
" is a small mammal native to the eastern Himalayas and southwestern China
" );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
index 866840d60d9..d32ffecfa2c 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
@@ -37,6 +37,9 @@ const testCases = [
endOffset: 36,
startOffsetBlock: 26,
endOffsetBlock: 33,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -65,6 +68,9 @@ const testCases = [
endOffset: 37,
startOffsetBlock: 26,
endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -76,6 +82,9 @@ const testCases = [
endOffset: 64,
startOffsetBlock: 52,
endOffsetBlock: 61,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -96,6 +105,9 @@ const testCases = [
endOffset: 34,
startOffsetBlock: 24,
endOffsetBlock: 31,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -107,6 +119,9 @@ const testCases = [
endOffset: 56,
startOffsetBlock: 45,
endOffsetBlock: 53,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -134,6 +149,9 @@ const testCases = [
endOffset: 25,
startOffsetBlock: 14,
endOffsetBlock: 22,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -163,6 +181,9 @@ const testCases = [
endOffset: 27,
startOffsetBlock: 20,
endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -175,6 +196,9 @@ const testCases = [
endOffset: 32,
startOffsetBlock: 26,
endOffsetBlock: 29,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -187,6 +211,9 @@ const testCases = [
endOffset: 48,
startOffsetBlock: 42,
endOffsetBlock: 45,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -199,6 +226,9 @@ const testCases = [
endOffset: 62,
startOffsetBlock: 55,
endOffsetBlock: 59,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -221,6 +251,9 @@ const testCases = [
startOffset: 43,
startOffsetBlock: 40,
endOffsetBlock: 55,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
new Mark( {
@@ -232,6 +265,9 @@ const testCases = [
endOffset: 67,
startOffsetBlock: 57,
endOffsetBlock: 64,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
},
} ),
],
@@ -247,7 +283,14 @@ const testCases = [
new Mark( {
marked: "Lorem ipsum dolor sit amet, consectetur
test [shortcode]
", { shortcodes: [ "shortcode" ] } ); + expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); + } ); + it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() { const mockPaper = new Paper( "Sentence 1. Sentence 2.
Sentence 3.
" ); researcher = new EnglishResearcher( mockPaper ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js index bdf9cc4a06e..c59b61abb76 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js @@ -57,6 +57,18 @@ describe( "gets the length of text segments", function() { expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 2 ].countLength ).toBe( 10 ); } ); + it( "should not include a shortcode when calculating the text length", function() { + const mockPaper = new Paper( "one two three
" );
const englishResearcher = new EnglishResearcher( mockPaper );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
index 3b4a8e1b6bd..a85708b7cda 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
@@ -477,6 +477,26 @@ describe( "Test for the research", function() {
} );
} );
+ it( "returns the score 100 when the keyphrase is a shortcode", function() {
+ const paper = new Paper(
+ "This is a text with a [keyphrase]that doesn't count",
+ {
+ locale: "en_EN",
+ keyword: "keyphrase",
+ synonyms: "synonym",
+ shortcodes: [ "keyphrase" ],
+ }
+ );
+
+ const researcher = new Researcher( paper );
+ researcher.addResearchData( "morphology", morphologyData );
+
+ expect( keyphraseDistributionResearcher( paper, researcher ) ).toEqual( {
+ keyphraseDistributionScore: 100,
+ sentencesToHighlight: [],
+ } );
+ } );
+
const paragraphWithKeyphrase1 = "Lorem ipsum keyphrase dolor sit amet, consectetur adipiscing elit." + "In sit amet semper sem, id faucibus massa.
\n"; diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index 9cf437643bb..34888acf1d5 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -160,6 +160,13 @@ const testCases = [ expectedCount: 0, expectedMarkings: [], }, + { + description: "doesn't count keyphrase instances if they are a shortcode", + paper: new Paper( "There is no [keyword] in this sentence.", { shortcodes: [ "keyword" ] } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + }, { description: "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", paper: new Paper( "A string with three keys (key and another key) and one word." ), diff --git a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js index d6d803710fa..609d63776fd 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js @@ -48,6 +48,15 @@ describe( "Matching keyphrase in subheadings", () => { expect( result.matches ).toBe( 0 ); } ); + it( "should not match text that is also a shortcode", function() { + const paper = new Paper( "how to love
your cats", { keyword: "how to love your cats" } );
expect( wordCountInText( paper ).count ).toBe( 7 );
} );
+ it( "should not count shortcodes", function() {
+ const paper = new Paper( "Tell me a story on [shortcode]how to love[/shortcode] your cats", { shortcodes: [ "shortcode" ] } );
+ expect( wordCountInText( paper ).count ).toBe( 10 );
+ } );
} );
diff --git a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js
index 0266e13572d..894db1cac54 100644
--- a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js
@@ -24,6 +24,13 @@ describe( "test hasEnoughContentForAssessment", () => {
expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false );
} );
+ it( "should return false if text has less than 50 chars after shortcodes are removed.", () => {
+ const mockPaper = new Paper( "Some text" + "[shortcode] ".repeat( 50 ), { shortcodes: [ "shortcode" ] } );
+ const mockAssessment = new Assessment();
+
+ expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false );
+ } );
+
it( "should return false if text is 49 chars and an image and no specification for contentNeededForAssessment", () => {
const mockPaper = new Paper( "This text contains forty nine characterssssssssss" +
// eslint-disable-next-line max-len
diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js
index 6745f5495d7..04c5483404d 100644
--- a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js
@@ -43,6 +43,17 @@ describe( "An assessment for scoring too long text fragments without a subheadin
"You are not using any subheadings, but your text is short enough and probably doesn't need them." );
} );
+ it( "Scores a text that's short (<300 words) after excluding shortodes," +
+ " and which does not have subheadings.", function() {
+ const assessment = subheadingDistributionTooLong.getResult(
+ new Paper( shortText + "[shortcode] ".repeat( 150 ) + "[/shortcode] ".repeat( 150 ), { shortcodes: [ "shortcode" ] } ),
+ Factory.buildMockResearcher( [] )
+ );
+ expect( assessment.getScore() ).toBe( 9 );
+ expect( assessment.getText() ).toBe( "Subheading distribution: " +
+ "You are not using any subheadings, but your text is short enough and probably doesn't need them." );
+ } );
+
it( "Scores a short text (<300 words), which has subheadings.", function() {
const assessment = subheadingDistributionTooLong.getResult(
new Paper( "a " + subheading + shortText ),
diff --git a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js
index 8cf18699849..1b145d0b147 100644
--- a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js
@@ -148,6 +148,12 @@ describe( "An assessment for transition word percentage", function() {
expect( assessment ).toBe( false );
} );
+ it( "should not be applicable if the text has more than 200 words, but part of the words are shortcodes", function() {
+ const mockPaper = new Paper( "Text " + "text ".repeat( 198 ) + "[shortcode]".repeat( 2 ), { shortcodes: [ "shortcode" ] } );
+ const assessment = new TransitionWordsAssessment().isApplicable( mockPaper, new EnglishResearcher( mockPaper ) );
+ expect( assessment ).toBe( false );
+ } );
+
it( "is applicable when used with a supported researcher, e.g. the English researcher", function() {
const mockPaper = new Paper( "Lorem ipsum dolor sit amet, ne sed agam oblique alterum. Eos percipit singulis no. No scripta graecis cum. " +
"Ut vim eius porro labore. Id quem civibus sit. Sed no primis urbanitas, aperiri laboramus voluptatibus ei per. Esse consul possim " +
diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js
index 7b74f7951c7..6acddfc8610 100644
--- a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js
@@ -162,6 +162,16 @@ describe( "Checks if the assessment is applicable", function() {
expect( assessmentIsApplicable ).toBe( false );
} );
+
+ it( "should not be applicable to a text consisting only of shortcodes", function() {
+ const shortcodeSentence = "[shortcode]".repeat( 15 ) + ". ";
+ const mockPaper = new Paper( shortcodeSentence.repeat( 15 ), { shortcodes: [ "shortcode" ] } );
+ researcher.setPaper( mockPaper );
+
+ const assessmentIsApplicable = keyphraseDistributionAssessment.isApplicable( mockPaper, researcher );
+
+ expect( assessmentIsApplicable ).toBe( false );
+ } );
} );
describe( "A test for marking keywords in the text", function() {
diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js
index dfd177c8fdb..9eb907ac76f 100644
--- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js
+++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js
@@ -100,6 +100,18 @@ describe( "Tests for the keywordDensity assessment for languages without morphol
" Don't overoptimize!" );
} );
+ it( "should not count shortcodes when calculating keyphrase density", function() {
+ const paper = new Paper( nonkeyword.repeat( 99 ) + `[${keyword}]`, { keyword: keyword, shortcodes: [ keyword ] } );
+
+ const researcher = new DefaultResearcher( paper );
+ const result = new KeywordDensityAssessment().getResult( paper, researcher );
+
+ expect( result.getScore() ).toBe( 4 );
+ expect( result.getText() ).toBe( "Keyphrase density: The keyphrase was found 0 times." +
+ " That's less than the recommended minimum of 2 times for a text of this length." +
+ " Focus on your keyphrase!" );
+ } );
+
it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() {
const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } );
const researcher = new DefaultResearcher( paper );
diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js
index 14a4ece8242..5e15c565913 100644
--- a/packages/yoastseo/src/parse/build/build.js
+++ b/packages/yoastseo/src/parse/build/build.js
@@ -31,6 +31,7 @@ export default function build( htmlString, languageProcessor, shortcodes ) {
// Add sentences and tokens to the tree's paragraph and heading nodes.
tree = tokenize( tree, languageProcessor );
+ // Filter out shortcodes from the tree.
if ( shortcodes ) {
filterShortcodesFromTree( tree, shortcodes );
}
diff --git a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
index 9cf10885632..bf07fd7d2ba 100644
--- a/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
+++ b/packages/yoastseo/src/scoring/assessments/readability/ListAssessment.js
@@ -45,7 +45,6 @@ export default class ListAssessment extends Assessment {
let text = paper.getText();
text = languageProcessingHelpers.removeHtmlBlocks( text );
- text = languageProcessingHelpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes );
return regex.test( text );
}
From efd1798d5c9406530bfeee70268181d214c6b3d5 Mon Sep 17 00:00:00 2001
From: hdvos This is a [not_a_shortcode] test.
" ); } ); + + it( "should not filter if shortcodes parameter is not given", function() { + const html = "This is a [shortcode] test.
"; + + const filtered = filterShortcodesFromHTML( html ); + + expect( filtered ).toEqual( "This is a [shortcode] test.
" ); + } ); + it( "should not filter if shortcodes is an empty list", function() { + const html = "This is a [shortcode] test.
"; + + const shortcodes = []; + + const filtered = filterShortcodesFromHTML( html, shortcodes ); + + expect( filtered ).toEqual( "This is a [shortcode] test.
" ); + } ); } ); From ad0bb76507f750977cc4dd76bdcb92724cf35fee Mon Sep 17 00:00:00 2001 From: hdvos" + + "Step by step guide on how to make your cat love you (with or without food bribe).
According to a research, " + + "cats are social animal. Your cat will prefer the hooman than a very tempting snack in a stressful situation.
" + + "A raw food diet for cats is more digestible than a diet of plant-based foods.
Studies reveal that commercially " + + "prepared raw food suffers from increased levels of contamination with potential pathogens compared to “regular” cat foods.
" + + "The red panda's skull is wide, and its lower jaw is robust.
", + }; + + const mockAttribute = { + key: "content", + }; + + const mockMarks = [ + new Mark( { + original: "The red panda's skull is wide, and its lower jaw is robust.", + marked: "Thebar
bazIf a triangle has one 90 degrees angle and one 30" + - "degrees angle, how big is the remaining angle?Fun for the whole family
", + html: "\n\nThere are many craft ideas that are interesting to both children and adults." + + " Fun for the whole family!
", }, { element: "em", @@ -235,7 +235,7 @@ const samplesWithTwoOccurrences = [ }, { element: "bdi", - html: "\n\nTheir name spells as أندرو or alternatively أندريه" + + html: "\n\n
Their name spells as أندرو or alternatively أندريه" + "and both are correct
", }, { @@ -255,15 +255,11 @@ const samplesWithTwoOccurrences = [ }, { element: "ruby", - html: "君子は和して同ぜず。", + html: "君子は和ぜず。", }, { element: "rt", - html: "君子は和して同ぜず。", - }, - { - element: "rt", - html: "君子は和して同ぜず。", + html: "君子は和ぜず。", }, { element: "sup", @@ -320,7 +316,7 @@ describe( "Miscellaneous tests", () => { // The head element seems to be removed by the parser we employ. const html = "\nSix abandoned kittens were adopted by a capybara.
\n"; - const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); - expect( tree.findAll( child => child.name === "b" ) ).toHaveLength( 0 ); - } ); - - it( "should filter out b elements", () => { - // The head element seems to be removed by the parser we employ. - const html = "\nSix abandoned kittens were adopted by a capybara.
\n