From 1c7e644a73445a7889f0c7f2e1eabe85db1ad115 Mon Sep 17 00:00:00 2001 From: aidamarfuaty Date: Wed, 11 Oct 2023 11:13:09 +0200 Subject: [PATCH] Filters shortcode before getting the sentences for Japanese --- .../researches/keywordCountSpec.js | 29 +++++++++++++++++++ .../researches/keywordCount.js | 7 ++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index c67344bc7bb..e81d66b9909 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -1856,6 +1856,35 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => { original: "私の猫はかわいいです。" } ) ] ); } ); + it( "counts the keyphrase occurrence inside an image caption.", function() { + const mockPaper = new Paper( "

[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]\"\" 一日一冊の本を読むのはできるかどうかやってみます。[/caption]

", { + locale: "ja", + keyphrase: "一冊の本を読む", + shortcodes: [ + "wp_caption", + "caption", + "gallery", + "playlist" ], + } ); + const keyphraseForms = [ + [ "一冊" ], + [ "本" ], + [ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ], + ]; + const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper ); + buildTree( mockPaper, researcher ); + + + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + new Mark( { + marked: "一日一冊読むのはできるかどうかやってみます。", + original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] ); + } ); + it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() { const mockPaper = new Paper( "

私の猫はかわいい猫です。

", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index e26b819b8ad..315f91fb7c5 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -6,6 +6,7 @@ import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSente import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; import getSentences from "../helpers/sentence/getSentences"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. @@ -90,8 +91,12 @@ export default function getKeyphraseCount( paper, researcher ) { const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" ); const locale = paper.getLocale(); + const text = matchWordCustomHelper + ? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes ) + : paper.getText(); + // When the custom helper is available, we're using the sentences retrieved from the text for the analysis. - const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper ); + const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper ); // Exact matching is requested when the keyphrase is enclosed in double quotes. const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() );