Skip to content

Commit

Permalink
Merge branch 'release/21.4' of github.com:Yoast/wordpress-seo into trunk
Browse files Browse the repository at this point in the history
  • Loading branch information
FAMarfuaty committed Oct 11, 2023
2 parents a6e3c98 + fca39ac commit a5bc21e
Show file tree
Hide file tree
Showing 14 changed files with 199 additions and 26 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"typescript": "^4.2.4"
},
"yoast": {
"pluginVersion": "21.4-RC5"
"pluginVersion": "21.4-RC6"
},
"version": "0.0.0"
}
2 changes: 1 addition & 1 deletion packages/js/src/decorator/helpers/getAnnotationsHelpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ export const getAnnotationsForFAQ = ( attributeWithAnnotationSupport, block, mar
*/
export const getAnnotationsForHowTo = ( attributeWithAnnotationSupport, block, marks ) => {
const annotatableTextsFromBlock = block.attributes[ attributeWithAnnotationSupport.key ];
if ( annotatableTextsFromBlock.length === 0 ) {
if ( annotatableTextsFromBlock && annotatableTextsFromBlock.length === 0 ) {
return [];
}
const annotations = [];
Expand Down
39 changes: 27 additions & 12 deletions packages/js/src/decorator/helpers/positionBasedAnnotationHelper.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import { helpers } from "yoastseo";
/**
* Regex to detect HTML tags.
* Please note that this regex will also detect non-HTML tags that are also wrapped in `<>`.
* For example, in the following sentence, <strong class="">cats <dogs> rabbit </strong>,
* we will match <strong class="">, <dogs> and </strong>. This is an edge case though.
* For example, in the following sentence, `<strong class="">cats <dogs> rabbit </strong>`,
* we will match `<strong class="">`, `<dogs>` and `</strong>`. This is an edge case though.
* @type {RegExp}
*/
const htmlTagsRegex = /(<([a-z]|\/)[^<>]+>)/ig;
Expand Down Expand Up @@ -53,6 +53,27 @@ const adjustFirstSectionOffsets = ( blockStartOffset, blockEndOffset, blockName
return { blockStartOffset, blockEndOffset };
};

/**
* Retrieves the length for HTML tags, adjusts the length for `<br>` tags.
* @param {[Object]} htmlTags Array of HTML tags.
* @returns {number} The length of the given HTML tags.
*/
const getTagsLength = ( htmlTags ) => {
let tagsLength = 0;
forEachRight( htmlTags, ( htmlTag ) => {
const [ tag ] = htmlTag;
let tagLength = tag.length;
// Here, we need to account for treating <br> tags as sentence delimiters, and subtract 1 from the tagLength.
if ( /^<\/?br/.test( tag ) ) {
tagLength -= 1;
}

tagsLength += tagLength;
} );

return tagsLength;
};

/**
* Adjusts the block start and end offsets of a given Mark when the block HTML contains HTML tags.
*
Expand Down Expand Up @@ -80,17 +101,11 @@ const adjustOffsetsForHtmlTags = ( slicedBlockHtmlToStartOffset, slicedBlockHtml
* - Text: This is a giant panda.
* - Range of "panda": 16 -21
*/
let foundHtmlTags = [ ...slicedBlockHtmlToStartOffset.matchAll( htmlTagsRegex ) ];
forEachRight( foundHtmlTags, ( foundHtmlTag ) => {
const [ tag ] = foundHtmlTag;
blockStartOffset -= tag.length;
} );
const foundHtmlTagsToStartOffset = [ ...slicedBlockHtmlToStartOffset.matchAll( htmlTagsRegex ) ];
blockStartOffset -= getTagsLength( foundHtmlTagsToStartOffset );

foundHtmlTags = [ ...slicedBlockHtmlToEndOffset.matchAll( htmlTagsRegex ) ];
forEachRight( foundHtmlTags, ( foundHtmlTag ) => {
const [ tag ] = foundHtmlTag;
blockEndOffset -= tag.length;
} );
const foundHtmlTagsToEndOffset = [ ...slicedBlockHtmlToEndOffset.matchAll( htmlTagsRegex ) ];
blockEndOffset -= getTagsLength( foundHtmlTagsToEndOffset );

return { blockStartOffset, blockEndOffset };
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ const testCases = [
attributeId: "",
clientId: "",
isFirstSection: false } } ),
],
],
skip: false,
},
{
Expand Down Expand Up @@ -953,6 +953,41 @@ const testCasesWithSpecialCharacters = [
],
skip: false,
},
{
description: "can match keyphrases in texts that contain <br> tags",
paper: new Paper( "<p>This is a test.<br>This is<br />another<br>test.</p>", { keyword: "test" } ),
keyphraseForms: [ [ "test" ] ],
expectedCount: 2,
expectedMarkings: [
new Mark( {
original: "This is a test.",
marked: "This is a <yoastmark class='yoast-text-mark'>test</yoastmark>.",
position: {
startOffset: 13,
endOffset: 17,
startOffsetBlock: 10,
endOffsetBlock: 14,
attributeId: "",
clientId: "",
isFirstSection: false,
} }
),
new Mark( {
original: "\nThis is\nanother\ntest.",
marked: "\nThis is\nanother\n<yoastmark class='yoast-text-mark'>test</yoastmark>.",
position: {
startOffset: 46,
endOffset: 50,
startOffsetBlock: 43,
endOffsetBlock: 47,
attributeId: "",
clientId: "",
isFirstSection: false,
} }
),
],
skip: false,
},
{
description: "can match paragraph gutenberg block",
paper: new Paper(
Expand Down Expand Up @@ -1856,6 +1891,35 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => {
original: "私の猫はかわいいです。" } ) ] );
} );

it( "counts the keyphrase occurrence inside an image caption.", function() {
const mockPaper = new Paper( "<p>[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]<img " +
"src=\"http://one.wordpress.test/wp-content/uploads/2023/07/IMG_0967_2-1-225x300.jpg\" alt=\"\" " +
"width=\"225\" height=\"300\" /> 一日一冊の本を読むのはできるかどうかやってみます。[/caption]</p>", {
locale: "ja",
keyphrase: "一冊の本を読む",
shortcodes: [
"wp_caption",
"caption",
"gallery",
"playlist" ],
} );
const keyphraseForms = [
[ "一冊" ],
[ "本" ],
[ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ],
];
const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper );
buildTree( mockPaper, researcher );


expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
new Mark( {
marked: "一日<yoastmark class='yoast-text-mark'>一冊</yoastmark>の<yoastmark " +
"class='yoast-text-mark'>本</yoastmark>を<yoastmark class='yoast-text-mark'>読む</yoastmark>のはできるかどうかやってみます。",
original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] );
} );

it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() {
const mockPaper = new Paper( "<p>私の猫はかわいい猫です。</p>", { locale: "ja", keyphrase: "猫" } );
const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,21 @@ describe( "A test for getting positions of sentences", () => {
expect( getTextElementPositions( node, sentences ) ).toEqual( sentencesWithPositions );
} );

it( "should determine the correct token positions when the sentence contains a br tag", function() {
// HTML: <p>Hello<br />world!</p>.
const html = "<p>Hello<br />world!</p>";
const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) );
const paragraph = tree.childNodes[ 0 ];
const tokens = [ "Hello", "\n", "world", "!" ].map( string => new Token( string ) );

const [ hello, br, world, bang ] = getTextElementPositions( paragraph, tokens, 3 );

expect( hello.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 8 } );
expect( br.sourceCodeRange ).toEqual( { startOffset: 13, endOffset: 14 } );
expect( world.sourceCodeRange ).toEqual( { startOffset: 14, endOffset: 19 } );
expect( bang.sourceCodeRange ).toEqual( { startOffset: 19, endOffset: 20 } );
} );

it( "gets the sentence positions from a node that has a code child node", function() {
// HTML: <p>Hello <code>array.push( something )</code> code!</p>
const node = new Paragraph( {}, [
Expand Down
24 changes: 24 additions & 0 deletions packages/yoastseo/spec/parse/build/private/tokenizeSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,30 @@ describe( "A test for the tokenize function",
name: "#document-fragment",
} );
} );

it( "should correctly tokenize a paragraph with single br tags", function() {
const mockPaper = new Paper( "<p>This is a sentence.<br />This is<br>another sentence.</p>" );
const mockResearcher = new EnglishResearcher( mockPaper );
const languageProcessor = new LanguageProcessor( mockResearcher );
buildTreeNoTokenize( mockPaper );
const result = tokenize( mockPaper.getTree(), languageProcessor );
const sentences = result.childNodes[ 0 ].sentences;
expect( sentences.length ).toEqual( 2 );
const firstSentence = sentences[ 0 ];
expect( firstSentence.text ).toEqual( "This is a sentence." );
expect( firstSentence.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 22 } );
expect( firstSentence.tokens.length ).toEqual( 8 );
const secondSentence = sentences[ 1 ];
expect( secondSentence.text ).toEqual( "\nThis is\nanother sentence." );
expect( secondSentence.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 56 } );
expect( secondSentence.tokens.length ).toEqual( 9 );
const [ br1, this1, , is1, br2, another1, , , ] = secondSentence.tokens;
expect( br1.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 28 } );
expect( this1.sourceCodeRange ).toEqual( { startOffset: 28, endOffset: 32 } );
expect( is1.sourceCodeRange ).toEqual( { startOffset: 33, endOffset: 35 } );
expect( br2.sourceCodeRange ).toEqual( { startOffset: 38, endOffset: 39 } );
expect( another1.sourceCodeRange ).toEqual( { startOffset: 39, endOffset: 46 } );
} );
} );

describe( "A test for tokenizing a Japanese sentence", function() {
Expand Down
11 changes: 11 additions & 0 deletions packages/yoastseo/spec/parse/traverse/innerTextSpec.js
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,17 @@ describe( "A test for innerText", () => {
expect( searchResult ).toEqual( expected );
} );

it( "should consider break tags to be line breaks", function() {
// Matsuo Bashō's "old pond".
paper._text = "<p>old pond<br />frog leaps in<br>water's sound</p>";
const tree = build( paper, languageProcessor );

const searchResult = innerText( tree );
const expected = "old pond\nfrog leaps in\nwater's sound";

expect( searchResult ).toEqual( expected );
} );

it( "should return an empty string when presented an empty node", function() {
const tree = new Node( "" );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ describe( "A test for marking the keyphrase", function() {
const keyphraseDensityAssessment = new KeyphraseDensityAssessment();
const paper = new Paper( "<p><img class='size-medium wp-image-33' src='http://basic.wordpress.test/wp-content/uploads/2021/08/" +
"cat-3957861_1280-211x300.jpeg' alt='a different cat with toy' width='211' height='300'></img> " +
"A flamboyant cat with a toy<br/>\n" +
"A flamboyant cat with a toy<br/>" +
"</p>",
{ keyword: "cat toy" } );
const researcher = new EnglishResearcher( paper );
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,23 @@
/**
* Retrieves the start offset for a given node.
* @param {Node} node The current node.
* @returns {number} The start offset.
*/
function getStartOffset( node ) {
return node.sourceCodeLocation &&
( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || node.sourceCodeLocation.startOffset ) || 0;
}

/**
* Retrieves the parent node for a given node.
* @param {Paper} paper The current paper.
* @param {Node} node The current node.
* @returns {Node} The parent node.
*/
function getParentNode( paper, node ) {
return paper.getTree().findAll( treeNode => treeNode.childNodes && treeNode.childNodes.includes( node ) )[ 0 ];
}

/**
* Gets all the sentences from paragraph and heading nodes.
* These two node types are the nodes that should contain sentences for the analysis.
Expand All @@ -11,17 +31,23 @@ export default function( paper ) {
const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences );

return tree.flatMap( node => node.sentences.map( sentence => {
let parentNode = node;

// For implicit paragraphs, base the details on the parent of this node.
if ( node.isImplicit ) {
parentNode = getParentNode( paper, node );
}

return {
...sentence,
// The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property.
parentStartOffset: node.sourceCodeLocation && ( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) ||
node.sourceCodeLocation.startOffset ) || 0,
parentStartOffset: getStartOffset( parentNode ),
// The block client id of the parent node.
parentClientId: node.clientId || "",
parentClientId: parentNode.clientId || "",
// The attribute id of the parent node, if available, otherwise an empty string.
parentAttributeId: node.attributeId || "",
parentAttributeId: parentNode.attributeId || "",
// Whether the parent node is the first section of Yoast sub-blocks.
isParentFirstSectionOfBlock: node.isFirstSection || false,
isParentFirstSectionOfBlock: parentNode.isFirstSection || false,
};
} ) );
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSente
import isDoubleQuoted from "../helpers/match/isDoubleQuoted";
import { markWordsInASentence } from "../helpers/word/markWordsInSentences";
import getSentences from "../helpers/sentence/getSentences";
import { filterShortcodesFromHTML } from "../helpers";

/**
* Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.
Expand Down Expand Up @@ -90,8 +91,12 @@ export default function getKeyphraseCount( paper, researcher ) {
const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" );
const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" );
const locale = paper.getLocale();
const text = matchWordCustomHelper
? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes )
: paper.getText();

// When the custom helper is available, we're using the sentences retrieved from the text for the analysis.
const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper );
const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper );
// Exact matching is requested when the keyphrase is enclosed in double quotes.
const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,18 @@ function getDescendantPositions( descendantNodes ) {
descendantTagPositions.push( node.sourceCodeLocation );
} else {
if ( node.sourceCodeLocation.startTag ) {
descendantTagPositions.push( node.sourceCodeLocation.startTag );
const startRange = {
startOffset: node.sourceCodeLocation.startTag.startOffset,
endOffset: node.sourceCodeLocation.startTag.endOffset,
};
/*
* Here, we need to account for the fact that earlier (in innerText.js), we treated a <br> as a newline character.
* Therefore, we need to subtract 1 from the endOffset to not count it twice.
*/
if ( node.name === "br" ) {
startRange.endOffset = startRange.endOffset - 1;
}
descendantTagPositions.push( startRange );
}
/*
* Check whether node has an end tag before adding it to the array.
Expand Down
2 changes: 2 additions & 0 deletions packages/yoastseo/src/parse/traverse/innerText.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ export default function innerText( node ) {
node.childNodes.forEach( child => {
if ( child.name === "#text" ) {
text += child.value;
} else if ( child.name === "br" ) {
text += "\n";
} else {
text += innerText( child );
}
Expand Down
2 changes: 1 addition & 1 deletion wp-seo-main.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* {@internal Nobody should be able to overrule the real version number as this can cause
* serious issues with the options, so no if ( ! defined() ).}}
*/
define( 'WPSEO_VERSION', '21.4-RC5' );
define( 'WPSEO_VERSION', '21.4-RC6' );


if ( ! defined( 'WPSEO_PATH' ) ) {
Expand Down
4 changes: 2 additions & 2 deletions wp-seo.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
*
* @wordpress-plugin
* Plugin Name: Yoast SEO
* Version: 21.4-RC5
* Version: 21.4-RC6
* Plugin URI: https://yoa.st/1uj
* Description: The first true all-in-one SEO solution for WordPress, including on-page content analysis, XML sitemaps and much more.
* Author: Team Yoast
Expand All @@ -20,7 +20,7 @@
* Requires PHP: 7.2.5
*
* WC requires at least: 7.1
* WC tested up to: 8.1
* WC tested up to: 8.2
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand Down

0 comments on commit a5bc21e

Please sign in to comment.