Skip to content

Commit

Permalink
Filters shortcode before getting the sentences for Japanese
Browse files Browse the repository at this point in the history
  • Loading branch information
FAMarfuaty committed Oct 11, 2023
1 parent d65a8c3 commit 1c7e644
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -1856,6 +1856,35 @@ describe( "Test for counting the keyphrase in a text for Japanese", () => {
original: "私の猫はかわいいです。" } ) ] );
} );

it( "counts the keyphrase occurrence inside an image caption.", function() {
const mockPaper = new Paper( "<p>[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]<img " +
"src=\"http://one.wordpress.test/wp-content/uploads/2023/07/IMG_0967_2-1-225x300.jpg\" alt=\"\" " +
"width=\"225\" height=\"300\" /> 一日一冊の本を読むのはできるかどうかやってみます。[/caption]</p>", {
locale: "ja",
keyphrase: "一冊の本を読む",
shortcodes: [
"wp_caption",
"caption",
"gallery",
"playlist" ],
} );
const keyphraseForms = [
[ "一冊" ],
[ "本" ],
[ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ],
];
const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper );
buildTree( mockPaper, researcher );


expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
new Mark( {
marked: "一日<yoastmark class='yoast-text-mark'>一冊</yoastmark>の<yoastmark " +
"class='yoast-text-mark'>本</yoastmark>を<yoastmark class='yoast-text-mark'>読む</yoastmark>のはできるかどうかやってみます。",
original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] );
} );

it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() {
const mockPaper = new Paper( "<p>私の猫はかわいい猫です。</p>", { locale: "ja", keyphrase: "猫" } );
const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSente
import isDoubleQuoted from "../helpers/match/isDoubleQuoted";
import { markWordsInASentence } from "../helpers/word/markWordsInSentences";
import getSentences from "../helpers/sentence/getSentences";
import { filterShortcodesFromHTML } from "../helpers";

/**
* Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches.
Expand Down Expand Up @@ -90,8 +91,12 @@ export default function getKeyphraseCount( paper, researcher ) {
const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" );
const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" );
const locale = paper.getLocale();
const text = matchWordCustomHelper
? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes )
: paper.getText();

// When the custom helper is available, we're using the sentences retrieved from the text for the analysis.
const sentences = matchWordCustomHelper ? getSentences( paper.getText(), customSentenceTokenizer ) : getSentencesFromTree( paper );
const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper );
// Exact matching is requested when the keyphrase is enclosed in double quotes.
const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() );

Expand Down

0 comments on commit 1c7e644

Please sign in to comment.