Filters shortcode before getting the sentences for Japanese

Yoast · Oct 11, 2023 · 1c7e644 · 1c7e644
1 parent d65a8c3
commit 1c7e644
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 1 deletion.
diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
@@ -1856,6 +1856,35 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => {
 				original: "私の猫はかわいいです。" } ) ] );
 	} );
 
+	it( "counts the keyphrase occurrence inside an image caption.", function() {
+		const mockPaper = new Paper( "<p>[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]<img " +
+			"src=\"http://one.wordpress.test/wp-content/uploads/2023/07/IMG_0967_2-1-225x300.jpg\" alt=\"\" " +
+			"width=\"225\" height=\"300\" /> 一日一冊の本を読むのはできるかどうかやってみます。[/caption]</p>", {
+			locale: "ja",
+			keyphrase: "一冊の本を読む",
+			shortcodes: [
+				"wp_caption",
+				"caption",
+				"gallery",
+				"playlist" ],
+		} );
+		const keyphraseForms = [
+			[ "一冊" ],
+			[ "本" ],
+			[ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ],
+		];
+		const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper );
+		buildTree( mockPaper, researcher );
+
+
+		expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
+		expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
+			new Mark( {
+				marked: "一日<yoastmark class='yoast-text-mark'>一冊</yoastmark>の<yoastmark " +
+					"class='yoast-text-mark'>本</yoastmark>を<yoastmark class='yoast-text-mark'>読む</yoastmark>のはできるかどうかやってみます。",
+				original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] );
+	} );
+
 	it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() {
 		const mockPaper = new Paper( "<p>私の猫はかわいい猫です。</p>", { locale: "ja", keyphrase: "猫" } );
 		const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );

diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js
@@ -6,6 +6,7 @@ import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSente
 import isDoubleQuoted from "../helpers/match/isDoubleQuoted";
 import { markWordsInASentence } from "../helpers/word/markWordsInSentences";
 import getSentences from "../helpers/sentence/getSentences";
+import { filterShortcodesFromHTML } from "../helpers";
 
 /**
  * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.
@@ -90,8 +91,12 @@ export default function getKeyphraseCount( paper, researcher ) {
 	const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" );
 	const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" );
 	const locale = paper.getLocale();
+	const text = matchWordCustomHelper
+		? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes )
+		: paper.getText();
+
 	// When the custom helper is available, we're using the sentences retrieved from the text for the analysis.
-	const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper );
+	const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper );
 	// Exact matching is requested when the keyphrase is enclosed in double quotes.
 	const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() );