Merge branch 'release/21.4' of github.com:Yoast/wordpress-seo into trunk

Yoast · Oct 11, 2023 · a5bc21e · a5bc21e
2 parents a6e3c98 + fca39ac
commit a5bc21e
Show file tree

Hide file tree

Showing 14 changed files with 199 additions and 26 deletions.
diff --git a/package.json b/package.json
@@ -83,7 +83,7 @@
     "typescript": "^4.2.4"
   },
   "yoast": {
-    "pluginVersion": "21.4-RC5"
+    "pluginVersion": "21.4-RC6"
   },
   "version": "0.0.0"
 }
diff --git a/packages/js/src/decorator/helpers/getAnnotationsHelpers.js b/packages/js/src/decorator/helpers/getAnnotationsHelpers.js
@@ -183,7 +183,7 @@ export const getAnnotationsForFAQ = ( attributeWithAnnotationSupport, block, mar
  */
 export const getAnnotationsForHowTo = ( attributeWithAnnotationSupport, block, marks ) => {
 	const annotatableTextsFromBlock = block.attributes[ attributeWithAnnotationSupport.key ];
-	if ( annotatableTextsFromBlock.length === 0 ) {
+	if ( annotatableTextsFromBlock && annotatableTextsFromBlock.length === 0 ) {
 		return [];
 	}
 	const annotations = [];

diff --git a/packages/js/src/decorator/helpers/positionBasedAnnotationHelper.js b/packages/js/src/decorator/helpers/positionBasedAnnotationHelper.js
@@ -4,8 +4,8 @@ import { helpers } from "yoastseo";
 /**
  * Regex to detect HTML tags.
  * Please note that this regex will also detect non-HTML tags that are also wrapped in `<>`.
- * For example, in the following sentence, <strong class="">cats <dogs> rabbit </strong>,
- * we will match <strong class="">, <dogs> and </strong>. This is an edge case though.
+ * For example, in the following sentence, `<strong class="">cats <dogs> rabbit </strong>`,
+ * we will match `<strong class="">`, `<dogs>` and `</strong>`. This is an edge case though.
  * @type {RegExp}
  */
 const htmlTagsRegex = /(<([a-z]|\/)[^<>]+>)/ig;
@@ -53,6 +53,27 @@ const adjustFirstSectionOffsets = ( blockStartOffset, blockEndOffset, blockName
 	return { blockStartOffset, blockEndOffset };
 };
 
+/**
+ * Retrieves the length for HTML tags, adjusts the length for `<br>` tags.
+ * @param {[Object]} htmlTags Array of HTML tags.
+ * @returns {number} The length of the given HTML tags.
+ */
+const getTagsLength = ( htmlTags ) => {
+	let tagsLength = 0;
+	forEachRight( htmlTags, ( htmlTag ) => {
+		const [ tag ] = htmlTag;
+		let tagLength = tag.length;
+		// Here, we need to account for treating <br> tags as sentence delimiters, and subtract 1 from the tagLength.
+		if ( /^<\/?br/.test( tag ) ) {
+			tagLength -= 1;
+		}
+
+		tagsLength += tagLength;
+	} );
+
+	return tagsLength;
+};
+
 /**
  * Adjusts the block start and end offsets of a given Mark when the block HTML contains HTML tags.
  *
@@ -80,17 +101,11 @@ const adjustOffsetsForHtmlTags = ( slicedBlockHtmlToStartOffset, slicedBlockHtml
 	 * - Text: This is a giant panda.
 	 * - Range of "panda": 16 -21
 	 */
-	let foundHtmlTags = [ ...slicedBlockHtmlToStartOffset.matchAll( htmlTagsRegex ) ];
-	forEachRight( foundHtmlTags, ( foundHtmlTag ) => {
-		const [ tag ] = foundHtmlTag;
-		blockStartOffset -= tag.length;
-	} );
+	const foundHtmlTagsToStartOffset = [ ...slicedBlockHtmlToStartOffset.matchAll( htmlTagsRegex ) ];
+	blockStartOffset -= getTagsLength( foundHtmlTagsToStartOffset );
 
-	foundHtmlTags = [ ...slicedBlockHtmlToEndOffset.matchAll( htmlTagsRegex ) ];
-	forEachRight( foundHtmlTags, ( foundHtmlTag ) => {
-		const [ tag ] = foundHtmlTag;
-		blockEndOffset -= tag.length;
-	} );
+	const foundHtmlTagsToEndOffset = [ ...slicedBlockHtmlToEndOffset.matchAll( htmlTagsRegex ) ];
+	blockEndOffset -= getTagsLength( foundHtmlTagsToEndOffset );
 
 	return { blockStartOffset, blockEndOffset };
 };

diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
@@ -299,7 +299,7 @@ const testCases = [
 					attributeId: "",
 					clientId: "",
 					isFirstSection: false } } ),
-		 ],
+		],
 		skip: false,
 	},
 	{
@@ -953,6 +953,41 @@ const testCasesWithSpecialCharacters = [
 		],
 		skip: false,
 	},
+	{
+		description: "can match keyphrases in texts that contain <br> tags",
+		paper: new Paper( "<p>This is a test.<br>This is<br />another<br>test.</p>", { keyword: "test" } ),
+		keyphraseForms: [ [ "test" ] ],
+		expectedCount: 2,
+		expectedMarkings: [
+			new Mark( {
+				original: "This is a test.",
+				marked: "This is a <yoastmark class='yoast-text-mark'>test</yoastmark>.",
+				position: {
+					startOffset: 13,
+					endOffset: 17,
+					startOffsetBlock: 10,
+					endOffsetBlock: 14,
+					attributeId: "",
+					clientId: "",
+					isFirstSection: false,
+				} }
+			),
+			new Mark( {
+				original: "\nThis is\nanother\ntest.",
+				marked: "\nThis is\nanother\n<yoastmark class='yoast-text-mark'>test</yoastmark>.",
+				position: {
+					startOffset: 46,
+					endOffset: 50,
+					startOffsetBlock: 43,
+					endOffsetBlock: 47,
+					attributeId: "",
+					clientId: "",
+					isFirstSection: false,
+				} }
+			),
+		],
+		skip: false,
+	},
 	{
 		description: "can match paragraph gutenberg block",
 		paper: new Paper(
@@ -1856,6 +1891,35 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => {
 				original: "私の猫はかわいいです。" } ) ] );
 	} );
 
+	it( "counts the keyphrase occurrence inside an image caption.", function() {
+		const mockPaper = new Paper( "<p>[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]<img " +
+			"src=\"http://one.wordpress.test/wp-content/uploads/2023/07/IMG_0967_2-1-225x300.jpg\" alt=\"\" " +
+			"width=\"225\" height=\"300\" /> 一日一冊の本を読むのはできるかどうかやってみます。[/caption]</p>", {
+			locale: "ja",
+			keyphrase: "一冊の本を読む",
+			shortcodes: [
+				"wp_caption",
+				"caption",
+				"gallery",
+				"playlist" ],
+		} );
+		const keyphraseForms = [
+			[ "一冊" ],
+			[ "本" ],
+			[ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ],
+		];
+		const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper );
+		buildTree( mockPaper, researcher );
+
+
+		expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
+		expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
+			new Mark( {
+				marked: "一日<yoastmark class='yoast-text-mark'>一冊</yoastmark>の<yoastmark " +
+					"class='yoast-text-mark'>本</yoastmark>を<yoastmark class='yoast-text-mark'>読む</yoastmark>のはできるかどうかやってみます。",
+				original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] );
+	} );
+
 	it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() {
 		const mockPaper = new Paper( "<p>私の猫はかわいい猫です。</p>", { locale: "ja", keyphrase: "猫" } );
 		const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );

diff --git a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js
@@ -141,6 +141,21 @@ describe( "A test for getting positions of sentences", () => {
 		expect( getTextElementPositions( node, sentences ) ).toEqual( sentencesWithPositions );
 	} );
 
+	it( "should determine the correct token positions when the sentence contains a br tag", function() {
+		// HTML: <p>Hello<br />world!</p>.
+		const html = "<p>Hello<br />world!</p>";
+		const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) );
+		const paragraph = tree.childNodes[ 0 ];
+		const tokens = [ "Hello", "\n", "world", "!" ].map( string => new Token( string ) );
+
+		const [ hello, br, world, bang ] = getTextElementPositions( paragraph, tokens, 3 );
+
+		expect( hello.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 8 } );
+		expect( br.sourceCodeRange ).toEqual( { startOffset: 13, endOffset: 14 } );
+		expect( world.sourceCodeRange ).toEqual( { startOffset: 14, endOffset: 19 } );
+		expect( bang.sourceCodeRange ).toEqual( { startOffset: 19, endOffset: 20 } );
+	} );
+
 	it( "gets the sentence positions from a node that has a code child node", function() {
 		// HTML: <p>Hello <code>array.push( something )</code> code!</p>
 		const node = new Paragraph( {}, [

diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js
@@ -477,6 +477,30 @@ describe( "A test for the tokenize function",
 				name: "#document-fragment",
 			} );
 		} );
+
+		it( "should correctly tokenize a paragraph with single br tags", function() {
+			const mockPaper = new Paper( "<p>This is a sentence.<br />This is<br>another sentence.</p>" );
+			const mockResearcher = new EnglishResearcher( mockPaper );
+			const languageProcessor = new LanguageProcessor( mockResearcher );
+			buildTreeNoTokenize( mockPaper );
+			const result = tokenize( mockPaper.getTree(), languageProcessor );
+			const sentences = result.childNodes[ 0 ].sentences;
+			expect( sentences.length ).toEqual( 2 );
+			const firstSentence = sentences[ 0 ];
+			expect( firstSentence.text ).toEqual( "This is a sentence." );
+			expect( firstSentence.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 22 } );
+			expect( firstSentence.tokens.length ).toEqual( 8 );
+			const secondSentence = sentences[ 1 ];
+			expect( secondSentence.text ).toEqual( "\nThis is\nanother sentence." );
+			expect( secondSentence.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 56 } );
+			expect( secondSentence.tokens.length ).toEqual( 9 );
+			const [ br1, this1, , is1, br2, another1, , , ] = secondSentence.tokens;
+			expect( br1.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 28 } );
+			expect( this1.sourceCodeRange ).toEqual( { startOffset: 28, endOffset: 32 } );
+			expect( is1.sourceCodeRange ).toEqual( { startOffset: 33, endOffset: 35 } );
+			expect( br2.sourceCodeRange ).toEqual( { startOffset: 38, endOffset: 39 } );
+			expect( another1.sourceCodeRange ).toEqual( { startOffset: 39, endOffset: 46 } );
+		} );
 	} );
 
 describe( "A test for tokenizing a Japanese sentence", function() {

diff --git a/packages/yoastseo/spec/parse/traverse/innerTextSpec.js b/packages/yoastseo/spec/parse/traverse/innerTextSpec.js
@@ -46,6 +46,17 @@ describe( "A test for innerText", () => {
 		expect( searchResult ).toEqual( expected );
 	} );
 
+	it( "should consider break tags to be line breaks", function() {
+		// Matsuo Bashō's "old pond".
+		paper._text = "<p>old pond<br />frog leaps in<br>water's sound</p>";
+		const tree = build( paper, languageProcessor );
+
+		const searchResult = innerText( tree );
+		const expected = "old pond\nfrog leaps in\nwater's sound";
+
+		expect( searchResult ).toEqual( expected );
+	} );
+
 	it( "should return an empty string when presented an empty node", function() {
 		const tree = new Node( "" );
 

diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js
@@ -320,7 +320,7 @@ describe( "A test for marking the keyphrase", function() {
 		const keyphraseDensityAssessment = new KeyphraseDensityAssessment();
 		const paper = new Paper( "<p><img class='size-medium wp-image-33' src='http://basic.wordpress.test/wp-content/uploads/2021/08/" +
 			"cat-3957861_1280-211x300.jpeg' alt='a different cat with toy' width='211' height='300'></img> " +
-			"A flamboyant cat with a toy<br/>\n" +
+			"A flamboyant cat with a toy<br/>" +
 			"</p>",
 		{ keyword: "cat toy" } );
 		const researcher = new EnglishResearcher( paper );

diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js
@@ -1,3 +1,23 @@
+/**
+ * Retrieves the start offset for a given node.
+ * @param {Node} node The current node.
+ * @returns {number} The start offset.
+ */
+function getStartOffset( node ) {
+	return node.sourceCodeLocation &&
+		( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || node.sourceCodeLocation.startOffset ) || 0;
+}
+
+/**
+ * Retrieves the parent node for a given node.
+ * @param {Paper} paper The current paper.
+ * @param {Node} node The current node.
+ * @returns {Node} The parent node.
+ */
+function getParentNode( paper, node ) {
+	return paper.getTree().findAll( treeNode => treeNode.childNodes && treeNode.childNodes.includes( node ) )[ 0 ];
+}
+
 /**
  * Gets all the sentences from paragraph and heading nodes.
  * These two node types are the nodes that should contain sentences for the analysis.
@@ -11,17 +31,23 @@ export default function( paper ) {
 	const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences );
 
 	return tree.flatMap( node => node.sentences.map( sentence => {
+		let parentNode = node;
+
+		// For implicit paragraphs, base the details on the parent of this node.
+		if ( node.isImplicit ) {
+			parentNode = getParentNode( paper, node );
+		}
+
 		return {
 			...sentence,
 			// The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property.
-			parentStartOffset: node.sourceCodeLocation && ( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) ||
-				node.sourceCodeLocation.startOffset ) || 0,
+			parentStartOffset: getStartOffset( parentNode ),
 			// The block client id of the parent node.
-			parentClientId: node.clientId || "",
+			parentClientId: parentNode.clientId || "",
 			// The attribute id of the parent node, if available, otherwise an empty string.
-			parentAttributeId: node.attributeId || "",
+			parentAttributeId: parentNode.attributeId || "",
 			// Whether the parent node is the first section of Yoast sub-blocks.
-			isParentFirstSectionOfBlock: node.isFirstSection || false,
+			isParentFirstSectionOfBlock: parentNode.isFirstSection || false,
 		};
 	} ) );
 }
diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
@@ -6,6 +6,7 @@ import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSente
 import isDoubleQuoted from "../helpers/match/isDoubleQuoted";
 import { markWordsInASentence } from "../helpers/word/markWordsInSentences";
 import getSentences from "../helpers/sentence/getSentences";
+import { filterShortcodesFromHTML } from "../helpers";
 
 /**
  * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.
@@ -90,8 +91,12 @@ export default function getKeyphraseCount( paper, researcher ) {
 	const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" );
 	const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" );
 	const locale = paper.getLocale();
+	const text = matchWordCustomHelper
+		? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes )
+		: paper.getText();
+
 	// When the custom helper is available, we're using the sentences retrieved from the text for the analysis.
-	const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper );
+	const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper );
 	// Exact matching is requested when the keyphrase is enclosed in double quotes.
 	const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() );
 

diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js
@@ -26,7 +26,18 @@ function getDescendantPositions( descendantNodes ) {
 			descendantTagPositions.push( node.sourceCodeLocation );
 		} else {
 			if ( node.sourceCodeLocation.startTag ) {
-				descendantTagPositions.push( node.sourceCodeLocation.startTag );
+				const startRange = {
+					startOffset: node.sourceCodeLocation.startTag.startOffset,
+					endOffset: node.sourceCodeLocation.startTag.endOffset,
+				};
+				/*
+				 * Here, we need to account for the fact that earlier (in innerText.js), we treated a <br> as a newline character.
+				 * Therefore, we need to subtract 1 from the endOffset to not count it twice.
+				 */
+				if ( node.name === "br" ) {
+					startRange.endOffset = startRange.endOffset - 1;
+				}
+				descendantTagPositions.push( startRange );
 			}
 			/*
 			 * Check whether node has an end tag before adding it to the array.

diff --git a/packages/yoastseo/src/parse/traverse/innerText.js b/packages/yoastseo/src/parse/traverse/innerText.js
@@ -14,6 +14,8 @@ export default function innerText( node ) {
 		node.childNodes.forEach( child => {
 			if ( child.name === "#text" ) {
 				text += child.value;
+			} else if ( child.name === "br" ) {
+				text += "\n";
 			} else {
 				text += innerText( child );
 			}

diff --git a/wp-seo-main.php b/wp-seo-main.php
@@ -15,7 +15,7 @@
  * {@internal Nobody should be able to overrule the real version number as this can cause
  *            serious issues with the options, so no if ( ! defined() ).}}
  */
-define( 'WPSEO_VERSION', '21.4-RC5' );
+define( 'WPSEO_VERSION', '21.4-RC6' );
 
 
 if ( ! defined( 'WPSEO_PATH' ) ) {

diff --git a/wp-seo.php b/wp-seo.php
@@ -8,7 +8,7 @@
  *
  * @wordpress-plugin
  * Plugin Name: Yoast SEO
- * Version:     21.4-RC5
+ * Version:     21.4-RC6
  * Plugin URI:  https://yoa.st/1uj
  * Description: The first true all-in-one SEO solution for WordPress, including on-page content analysis, XML sitemaps and much more.
  * Author:      Team Yoast
@@ -20,7 +20,7 @@
  * Requires PHP: 7.2.5
  *
  * WC requires at least: 7.1
- * WC tested up to: 8.1
+ * WC tested up to: 8.2
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by