';
- echo '
';
+ if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) {
+ $bf_label = \esc_html__( 'BLACK FRIDAY', 'wordpress-seo' );
+ $sale_label = \esc_html__( '30% OFF', 'wordpress-seo' );
+ // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Already escaped above.
+ echo "
$bf_label $sale_label
";
+ }
+
+ echo '
';
echo '
' . $upsell_button . '
' . PHP_EOL . PHP_EOL;
}
diff --git a/admin/formatter/class-metabox-formatter.php b/admin/formatter/class-metabox-formatter.php
index 9100491e1c0..d7689f45437 100644
--- a/admin/formatter/class-metabox-formatter.php
+++ b/admin/formatter/class-metabox-formatter.php
@@ -240,6 +240,7 @@ private function get_content_analysis_component_translations() {
'content-analysis.highlight' => __( 'Highlight this result in the text', 'wordpress-seo' ),
'content-analysis.nohighlight' => __( 'Remove highlight from the text', 'wordpress-seo' ),
'content-analysis.disabledButton' => __( 'Marks are disabled in current view', 'wordpress-seo' ),
+ /* translators: Hidden accessibility text. */
'a11yNotice.opensInNewTab' => __( '(Opens in a new browser tab)', 'wordpress-seo' ),
];
}
diff --git a/admin/menu/class-admin-menu.php b/admin/menu/class-admin-menu.php
index df97b3185c1..53d037e3893 100644
--- a/admin/menu/class-admin-menu.php
+++ b/admin/menu/class-admin-menu.php
@@ -114,12 +114,10 @@ protected function get_notification_counter() {
$notification_count = $notification_center->get_notification_count();
// Add main page.
- /* translators: %s: number of notifications */
+ /* translators: Hidden accessibility text; %s: number of notifications. */
$notifications = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) );
- $counter = sprintf( '
%1$d%2$s', $notification_count, $notifications );
-
- return $counter;
+ return sprintf( '
%1$d%2$s', $notification_count, $notifications );
}
/**
diff --git a/admin/menu/class-base-menu.php b/admin/menu/class-base-menu.php
index 59cda24f245..a840987dc4b 100644
--- a/admin/menu/class-base-menu.php
+++ b/admin/menu/class-base-menu.php
@@ -5,6 +5,8 @@
* @package WPSEO\Admin\Menu
*/
+use Yoast\WP\SEO\Promotions\Application\Promotion_Manager;
+
/**
* Admin menu base class.
*/
@@ -258,6 +260,10 @@ protected function get_license_page_title() {
$title = __( 'Premium', 'wordpress-seo' );
}
+ if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) && ! YoastSEO()->helpers->product->is_premium() ) {
+ $title = __( 'Premium', 'wordpress-seo' ) . '';
+ }
+
return $title;
}
diff --git a/admin/metabox/class-metabox.php b/admin/metabox/class-metabox.php
index 527801cb7eb..31fda910e33 100644
--- a/admin/metabox/class-metabox.php
+++ b/admin/metabox/class-metabox.php
@@ -893,8 +893,7 @@ public function enqueue() {
'has_taxonomies' => $this->current_post_type_has_taxonomies(),
],
'shortcodes' => [
- 'wpseo_filter_shortcodes_nonce' => wp_create_nonce( 'wpseo-filter-shortcodes' ),
- 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(),
+ 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(),
],
];
@@ -931,7 +930,7 @@ public function enqueue() {
'dismissedAlerts' => $dismissed_alerts,
'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(),
'webinarIntroBlockEditorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-block-editor' ),
- 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '',
+ 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black-friday-2023-checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '',
'isJetpackBoostActive' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Active_Conditional::class )->is_met() : false,
'isJetpackBoostNotPremium' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Not_Premium_Conditional::class )->is_met() : false,
'isWooCommerceActive' => $woocommerce_active,
diff --git a/admin/taxonomy/class-taxonomy-columns.php b/admin/taxonomy/class-taxonomy-columns.php
index 91905940e7b..6e3304d4dc4 100644
--- a/admin/taxonomy/class-taxonomy-columns.php
+++ b/admin/taxonomy/class-taxonomy-columns.php
@@ -84,11 +84,13 @@ public function add_columns( array $columns ) {
$new_columns[ $column_name ] = $column_value;
if ( $column_name === 'description' && $this->analysis_seo->is_enabled() ) {
- $new_columns['wpseo-score'] = '
' . __( 'SEO score', 'wordpress-seo' ) . '';
+ $new_columns['wpseo-score'] = '
' .
+ __( 'SEO score', 'wordpress-seo' ) . '';
}
if ( $column_name === 'description' && $this->analysis_readability->is_enabled() ) {
- $new_columns['wpseo-score-readability'] = '
' . __( 'Readability score', 'wordpress-seo' ) . '';
+ $new_columns['wpseo-score-readability'] = '
' .
+ __( 'Readability score', 'wordpress-seo' ) . '';
}
}
diff --git a/admin/taxonomy/class-taxonomy.php b/admin/taxonomy/class-taxonomy.php
index 963adff2fdb..7cb90e8e32b 100644
--- a/admin/taxonomy/class-taxonomy.php
+++ b/admin/taxonomy/class-taxonomy.php
@@ -163,6 +163,9 @@ public function admin_enqueue_scripts() {
'recommended_replace_vars' => $this->get_recommended_replace_vars(),
'scope' => $this->determine_scope(),
],
+ 'shortcodes' => [
+ 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(),
+ ],
],
'worker' => [
'url' => YoastSEO()->helpers->asset->get_asset_url( 'yoast-seo-analysis-worker' ),
@@ -446,4 +449,19 @@ private function get_recommended_replace_vars() {
return $recommended_replace_vars->get_recommended_replacevars_for( $page_type );
}
+
+ /**
+ * Returns an array with shortcode tags for all registered shortcodes.
+ *
+ * @return array
+ */
+ private function get_valid_shortcode_tags() {
+ $shortcode_tags = [];
+
+ foreach ( $GLOBALS['shortcode_tags'] as $tag => $description ) {
+ $shortcode_tags[] = $tag;
+ }
+
+ return $shortcode_tags;
+ }
}
diff --git a/admin/views/js-templates-primary-term.php b/admin/views/js-templates-primary-term.php
index a1387529a76..684ef87d52b 100644
--- a/admin/views/js-templates-primary-term.php
+++ b/admin/views/js-templates-primary-term.php
@@ -14,7 +14,7 @@
inside an element we want to exclude" +
- " from the analysis, density should be 0%", { keyword: "keyword" } );
- mockResearcher = new EnglishResearcher( mockPaper );
- mockResearcher.addResearchData( "morphology", morphologyDataEN );
- expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
- mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } );
- mockResearcher = new EnglishResearcher( mockPaper );
+ it( "should recognize a keyphrase when it consists umlauted characters", function() {
+ const mockPaper = new Paper( "
Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
mockResearcher.addResearchData( "morphology", morphologyDataEN );
+ buildTree( mockPaper, mockResearcher );
+
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 );
+ } );
- mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } );
- mockResearcher = new EnglishResearcher( mockPaper );
+ it( "should recognize a hyphenated keyphrase.", function() {
+ const mockPaper = new Paper( "
Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit
", { keyword: "key-word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
mockResearcher.addResearchData( "morphology", morphologyDataEN );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 );
+ } );
- mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } );
- mockResearcher = new EnglishResearcher( mockPaper );
- mockResearcher.addResearchData( "morphology", morphologyDataEN );
+ it( "should recognize a keyphrase with an underscore in it", function() {
+ const mockPaper = new Paper( "
a string of text with the key_word in it, density should be 7.7%
", { keyword: "key_word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
+ } );
- mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } );
- mockResearcher = new EnglishResearcher( mockPaper );
- mockResearcher.addResearchData( "morphology", morphologyDataEN );
- expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } );
- expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } );
+ it( "should return zero if a multiword keyphrase is not present but the multiword keyphrase occurs with an underscore", function() {
+ const mockPaper = new Paper( "
a string of text with the key_word in it, density should be 0.0%
", { keyword: "key word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
- mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } );
- // This behavior might change in the future.
+ } );
+
+ it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() {
+ const mockPaper = new Paper( "
a string of text with the key-word in it, density should be 7.7%
", { keyword: "key word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } );
+ } );
+
+ it( "should recognize a keyphrase with an ampersand in it", function() {
+ const mockPaper = new Paper( "
a string of text with the key&word in it, density should be 7.7%
", { keyword: "key&word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "
", { keyword: "key&word" } );
- expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
- // Consecutive keywords are skipped, so this will match 2 times.
- mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.", { keyword: "keyword" } );
- expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 );
- mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } );
+ } );
+
+ it( "should include all keyphrase occurrence if there are more than two consecutive", function() {
+ const mockPaper = new Paper( "
This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.
",
+ { keyword: "keyword" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 56.25 );
+ } );
+
+ it( "should recognize a keyphrase with a '$' in it", function() {
+ const mockPaper = new Paper( "
a string of text with the $keyword in it, density should be 7.7%
", { keyword: "$keyword" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } );
+ } );
+
+ it( "should recognize a keyphrase regardless of capitalization", function() {
+ const mockPaper = new Paper( "
a string of text with the Keyword in it, density should be 7.7%
", { keyword: "keyword" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
- mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } );
+ } );
+
+ it( "should recognize a multiword keyphrase regardless of capitalization", function() {
+ const mockPaper = new Paper( "
a string of text with the Key word in it, density should be 14.29%
", { keyword: "key word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 );
- mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } );
+ } );
+
+ it( "should recognize apostrophes regardless of the type of quote that was used", function() {
+ const mockPaper = new Paper( "
a string with quotes to match the key'word, even if the quotes differ
", { keyword: "key’word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
} );
+ it( "should recognize keyphrase separated by ", function() {
+ const mockPaper = new Paper( "
a string with non-breaking space to match the key word
", { keyword: "key word" } );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 10 );
+ } );
+ it( "should return 0 when the paper contains only excluded element", function() {
+ const mockPaper = new Paper( "
In the United States and parts of Europe, " +
+ "tortoiseshell cats are often considered lucky charms.
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
+ } );
+ it( "should return 0 when the paper is empty", function() {
+ const mockPaper = new Paper( "" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 );
+ } );
} );
-describe( "test for counting the keyword density in a text in a language that we haven't supported yet", function() {
- it( "returns keyword density", function() {
- const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng" } );
- expect( getKeywordDensity( mockPaper, new DefaultResearcher( mockPaper ) ) ).toBe( 25 );
+describe( "test for counting the keyword density in a non-English and non-Japanese language", function() {
+ it( "returns keyword density for a language that we haven't had the support yet", function() {
+ const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng", locale: "jv_ID" } );
+ const mockResearcher = new DefaultResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 25 );
+ } );
+ it( "should correctly recognize a Turkish transliterated keyphrase with a 'ı' in it", function() {
+ const mockPaper = new Paper( "a string of text with the kapakli in it, density should be 7.7%", { keyword: "kapaklı", locale: "tr_TR" } );
+ const mockResearcher = new TurkishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 );
} );
} );
describe( "test for counting the keyword density in a text in a language that uses a custom getWords helper (Japanese)", function() {
- /*it( "returns keyword density when the keyword is not found in the sentence", function() {
- const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "猫" } );
+ it( "returns keyword density when the keyword is not found in the sentence", function() {
+ const mockPaper = new Paper( "
小さくて可愛い花の刺繍に関する一般一般の記事です。
", { keyword: "猫" } );
const mockResearcher = new JapaneseResearcher( mockPaper );
mockResearcher.addResearchData( "morphology", morphologyDataJA );
+ buildTree( mockPaper, mockResearcher );
+
expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 );
- } );*/
+ } );
it( "returns the keyword density when the keyword is found once", function() {
- const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } );
+ const mockPaper = new Paper( "
私の猫はかわいいです。
", { keyword: "猫" } );
const mockResearcher = new JapaneseResearcher( mockPaper );
mockResearcher.addResearchData( "morphology", morphologyDataJA );
+ buildTree( mockPaper, mockResearcher );
expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 );
} );
it( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() {
// 小さく is the inflected form of 小さい.
- const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } );
+ const mockPaper = new Paper( "
小さくて可愛い花の刺繍に関する一般一般の記事です。
", { keyword: "小さい花の刺繍" } );
const mockResearcher = new JapaneseResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
mockResearcher.addResearchData( "morphology", morphologyDataJA );
expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 );
} );
- /*it( "returns the keyword density when the morphologyData is not available to detect inflected forms", function() {
- // 小さく is the inflected form of 小さい.
- const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } );
- const mockResearcher = new JapaneseResearcher( mockPaper );
- expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 );
- } );
-
- it( "returns the keyword density when the keyword contains multiple words with an exact match separated in the sentence", function() {
- const mockPaper = new Paper( "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } );
- const mockResearcher = new JapaneseResearcher( mockPaper );
- mockResearcher.addResearchData( "morphology", morphologyDataJA );
- expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 5 );
- } );
-
it( "returns the keyword density when the keyword contains multiple words with an exact match in the sentence", function() {
- const mockPaper = new Paper( "一日一冊の本を読むのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } );
+ const mockPaper = new Paper( "
一日一冊の本を読むのはできるかどうかやってみます。
", { keyword: "『一冊の本を読む』" } );
const mockResearcher = new JapaneseResearcher( mockPaper );
mockResearcher.addResearchData( "morphology", morphologyDataJA );
+ buildTree( mockPaper, mockResearcher );
+
expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 );
} );
-
- it( "returns 0 when the text is empty", function() {
- const mockPaper = new Paper( "", { keyword: "猫" } );
- const mockResearcher = new JapaneseResearcher( mockPaper );
- mockResearcher.addResearchData( "morphology", morphologyDataJA );
- expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 );
- } );*/
} );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js
index e274380fc23..858b85c4259 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js
@@ -82,6 +82,11 @@ describe( "a test for getting paragraph length", function() {
expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 );
} );
+ it( "returns the paragraph length, ignoring shortcodes", function() {
+ const mockPaper = new Paper( "
test [shortcode]
", { shortcodes: [ "shortcode" ] } );
+ expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 );
+ } );
+
it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() {
const mockPaper = new Paper( "
Lorem ipsum dolor sit amet
" );
expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js
index 8c7f22a806d..4635d21cac8 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js
@@ -118,6 +118,21 @@ describe( "gets the sentence beginnings and the count of consecutive duplicates.
expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 );
} );
+ it( "ignores shortcodes", function() {
+ mockPaper = new Paper( "Cats are fluffy. Dogs are happy. [shortcode]Unicorns cant code.", { shortcodes: [ "shortcode" ] } );
+ researcher = new EnglishResearcher( mockPaper );
+
+ const sentenceBeginnings = getSentenceBeginnings( mockPaper, researcher );
+
+ expect( sentenceBeginnings ).toHaveLength( 3 );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].word ).toBe( "cats" );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].count ).toBe( 1 );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].word ).toBe( "dogs" );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].word ).toBe( "unicorns" );
+ expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].count ).toBe( 1 );
+ } );
+
it( "returns an object with English sentence beginnings with paragraph tags - it should match over paragraphs", function() {
mockPaper = new Paper( "
Sentence 1. Sentence 2.
Sentence 3.
" );
researcher = new EnglishResearcher( mockPaper );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js
index bdf9cc4a06e..c59b61abb76 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js
@@ -57,6 +57,18 @@ describe( "gets the length of text segments", function() {
expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 2 ].countLength ).toBe( 10 );
} );
+ it( "should not include a shortcode when calculating the text length", function() {
+ const mockPaper = new Paper( "
heading with [shortcode]
this is a text string with a [shortcode]",
+ { shortcodes: [ "shortcode" ] } );
+ const englishResearcher = new EnglishResearcher( mockPaper );
+ // Check the total number of subheading blocks.
+ expect( foundSubheadingsTextLength( mockPaper, englishResearcher ).length ).toBe( 1 );
+ // Check the length of each individual text segment.
+ expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].subheading ).toBe( "
heading with
" );
+ expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].text ).toBe( "this is a text string with a " );
+ expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].countLength ).toBe( 7 );
+ } );
+
it( "does not count text inside elements we want to exclude from the analysis ", function() {
const mockPaper = new Paper( "
test
one two three
one two three
" );
const englishResearcher = new EnglishResearcher( mockPaper );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js
index c1c11800792..3755197aa3e 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js
@@ -1,54 +1,72 @@
import h1s from "../../../src/languageProcessing/researches/h1s.js";
import Paper from "../../../src/values/Paper.js";
+import buildTree from "../../specHelpers/parse/buildTree";
+import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher.js";
describe( "Gets all H1s in the text", function() {
it( "should return empty when there is no H1", function() {
- const mockPaper = new Paper( "some content
content h2
" );
+ const mockPaper = new Paper( "
some content
content h2
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( h1s( mockPaper ) ).toEqual( [] );
} );
it( "should return empty when there is only empty H1s", function() {
const mockPaper = new Paper( "some content
other content
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+
expect( h1s( mockPaper ) ).toEqual( [] );
} );
it( "should return one object when there is one H1", function() {
const mockPaper = new Paper( "
first h1
some content
content h2
" );
- expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: 0 } ] );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+ expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } } ] );
} );
it( "should return all H1s in the text", function() {
const mockPaper = new Paper( "
first h1
not an h1
second h1
not an h1
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( h1s( mockPaper ) ).toEqual( [
- { tag: "h1", content: "first h1", position: 0 },
- { tag: "h1", content: "second h1", position: 2 },
+ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } },
+ { tag: "h1", content: "second h1", position: { startOffset: 37, endOffset: 46, clientId: "" } },
] );
} );
it( "should return two h1s next to each other", function() {
const mockPaper = new Paper( "
first h1
second h1
not an h1
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( h1s( mockPaper ) ).toEqual( [
- { tag: "h1", content: "first h1", position: 0 },
- { tag: "h1", content: "second h1", position: 1 },
+ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } },
+ { tag: "h1", content: "second h1", position: { startOffset: 21, endOffset: 30, clientId: "" } },
] );
} );
it( "should rightly ignore empty paragraphs or empty blocks", function() {
const mockPaper = new Paper( "
\n
first h1
second h1
not an h1
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
expect( h1s( mockPaper ) ).toEqual( [
- { tag: "h1", content: "first h1", position: 0 },
- { tag: "h1", content: "second h1", position: 1 },
+ { tag: "h1", content: "first h1", position: { startOffset: 12, endOffset: 20, clientId: "" } },
+ { tag: "h1", content: "second h1", position: { startOffset: 29, endOffset: 38, clientId: "" } },
] );
} );
it( "should find H1 within division tags", function() {
const mockPaper = new Paper( "
first h1
second h1
" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+
expect( h1s( mockPaper ) ).toEqual( [
- { tag: "h1", content: "first h1", position: 0 },
- { tag: "h1", content: "second h1", position: 2 },
+ { tag: "h1", content: "first h1", position: { startOffset: 9, endOffset: 17, clientId: "" } },
+ { tag: "h1", content: "second h1", position: { startOffset: 64, endOffset: 73, clientId: "" } },
] );
} );
@@ -116,9 +134,12 @@ describe( "Gets all H1s in the text", function() {
"I think voice will become the dominant search query, but I think screens will continue to be " +
"important in presenting search results.’" );
+ const mockResearcher = new EnglishResearcher( mockPaper );
+ buildTree( mockPaper, mockResearcher );
+
expect( h1s( mockPaper ) ).toEqual( [
- { tag: "h1", content: "Voice Search", position: 0 },
- { tag: "h1", content: "So what will the future bring?", position: 11 },
+ { tag: "h1", content: "Voice Search", position: { startOffset: 4, endOffset: 16, clientId: "" } },
+ { tag: "h1", content: "So what will the future bring?", position: { startOffset: 3903, endOffset: 3933, clientId: "" } },
] );
} );
} );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
index 3b4a8e1b6bd..a85708b7cda 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js
@@ -477,6 +477,26 @@ describe( "Test for the research", function() {
} );
} );
+ it( "returns the score 100 when the keyphrase is a shortcode", function() {
+ const paper = new Paper(
+ "This is a text with a [keyphrase]that doesn't count",
+ {
+ locale: "en_EN",
+ keyword: "keyphrase",
+ synonyms: "synonym",
+ shortcodes: [ "keyphrase" ],
+ }
+ );
+
+ const researcher = new Researcher( paper );
+ researcher.addResearchData( "morphology", morphologyData );
+
+ expect( keyphraseDistributionResearcher( paper, researcher ) ).toEqual( {
+ keyphraseDistributionScore: 100,
+ sentencesToHighlight: [],
+ } );
+ } );
+
const paragraphWithKeyphrase1 = "
Lorem ipsum keyphrase dolor sit amet, consectetur adipiscing elit." +
"In sit amet semper sem, id faucibus massa.
\n";
diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
index ce5d6324f77..31614969e6e 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js
@@ -1,4 +1,4 @@
-import keywordCount from "../../../src/languageProcessing/researches/keywordCount.js";
+import getKeyphraseCount from "../../../src/languageProcessing/researches/keywordCount";
import Paper from "../../../src/values/Paper.js";
import factory from "../../specHelpers/factory";
import Mark from "../../../src/values/Mark";
@@ -6,6 +6,7 @@ import wordsCountHelper from "../../../src/languageProcessing/languages/ja/helpe
import matchWordsHelper from "../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord";
import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer";
import japaneseMemoizedSentenceTokenizer from "../../../src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer";
+import buildTree from "../../specHelpers/parse/buildTree";
/**
* Adds morphological forms to the mock researcher.
@@ -22,168 +23,1836 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) {
}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } );
};
-const mockResearcher = buildMorphologyMockResearcher( [ [ "keyword", "keywords" ] ] );
-const mockResearcherGermanDiacritics = buildMorphologyMockResearcher( [ [ "äöüß" ] ] );
-const mockResearcherMinus = buildMorphologyMockResearcher( [ [ "key-word", "key-words" ] ] );
-const mockResearcherUnderscore = buildMorphologyMockResearcher( [ [ "key_word", "key_words" ] ] );
-const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ] ] );
-const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] );
-const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] );
-const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] );
-// Escape, since the morphology researcher escapes regex as well.
-const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] );
-
-describe( "Test for counting the keyword in a text", function() {
- it( "counts/marks a string of text with a keyword in it.", function() {
- const mockPaper = new Paper( "a string of text with the keyword in it" );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [
+const testCases = [
+ {
+ description: "counts/marks a string of text with an occurrence of the keyphrase in it.",
+ paper: new Paper( "
a string of text with the keyword in it
", { keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
new Mark( { marked: "a string of text with the
keyword in it",
- original: "a string of text with the keyword in it" } ) ]
- );
- } );
-
- it( "counts a string of text with no keyword in it.", function() {
- const mockPaper = new Paper( "a string of text" );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 );
- expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [] );
- } );
-
- it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() {
- const mockPaper = new Paper( "a string of text with the key word in it, with more key words." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [
- new Mark( { marked: "a string of text with the
key word in it, " +
- "with more
key words.",
- original: "a string of text with the key word in it, with more key words." } ) ]
- );
- } );
-
- it( "counts a string of text with German diacritics and eszett as the keyword", function() {
- const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." );
- expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [
- new Mark( { marked: "Waltz keepin auf mitz auf keepin
äöüß weiner blitz deutsch spitzen.",
- original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ]
- );
- } );
-
- it( "counts a string with multiple keyword morphological forms", function() {
- const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 2 );
- expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [
- new Mark( { marked: "A string of text with a
keyword " +
+ original: "a string of text with the keyword in it",
+ position: {
+ startOffset: 29,
+ endOffset: 36,
+ startOffsetBlock: 26,
+ endOffsetBlock: 33,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts a string of text with no occurrence of the keyphrase in it.",
+ paper: new Paper( "
a string of text
", { keyword: "" } ),
+ keyphraseForms: [],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "counts multiple occurrences of a keyphrase consisting of multiple words.",
+ paper: new Paper( "
a string of text with the key word in it, with more key words.
", { keyword: "key word" } ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "a string of text with the
key word in it, " +
+ "with more
key words.",
+ original: "a string of text with the key word in it, with more key words.",
+ position: {
+ startOffset: 29,
+ endOffset: 37,
+ startOffsetBlock: 26,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "a string of text with the
key word in it, " +
+ "with more
key words.",
+ original: "a string of text with the key word in it, with more key words.",
+ position: {
+ startOffset: 55,
+ endOffset: 64,
+ startOffsetBlock: 52,
+ endOffsetBlock: 61,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts a string with multiple occurrences of keyphrase with different morphological forms",
+ paper: new Paper( "
A string of text with a keyword and multiple keywords in it.
", { keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "A string of text with a
keyword " +
+ "and multiple
keywords in it.",
+ original: "A string of text with a keyword and multiple keywords in it.",
+ position: {
+ startOffset: 27,
+ endOffset: 34,
+ startOffsetBlock: 24,
+ endOffsetBlock: 31,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "A string of text with a
keyword " +
"and multiple
keywords in it.",
- original: "A string of text with a keyword and multiple keywords in it." } ) ]
- );
- } );
+ original: "A string of text with a keyword and multiple keywords in it.",
+ position: {
+ startOffset: 48,
+ endOffset: 56,
+ startOffsetBlock: 45,
+ endOffsetBlock: 53,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "does not match keyphrase occurrence in image alt attribute.",
+ paper: new Paper( "
A text with
", { keyword: "image" } ),
+ keyphraseForms: [ [ "image", "images" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "also matches same phrase with different capitalization.",
+ paper: new Paper( "
A string with KeY worD.
", { keyword: "key word" } ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "A string with
KeY worD.",
+ original: "A string with KeY worD.",
+ position: {
+ startOffset: 17,
+ endOffset: 25,
+ startOffsetBlock: 14,
+ endOffsetBlock: 22,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis",
+ paper: new Paper( "
There is no keyword
in this sentence.
" ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "doesn't count keyphrase instances if they are a shortcode",
+ paper: new Paper( "
There is no [keyword] in this sentence.
", { shortcodes: [ "keyword" ] } ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "only counts full-matches of the keyphrase: full-match means when all word of the keyphrase are found in the sentence",
+ paper: new Paper( "
A string with three keys (key and another key) and one word.
" ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "A string with three
keys (
" +
+ "key and another
key) and one
word.",
+ original: "A string with three keys (key and another key) and one word.",
+ position: {
+ startOffset: 23,
+ endOffset: 27,
+ startOffsetBlock: 20,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "A string with three
keys (
" +
+ "key and another
key) and one
word.",
+ original: "A string with three keys (key and another key) and one word.",
+ position: {
+ startOffset: 29,
+ endOffset: 32,
+ startOffsetBlock: 26,
+ endOffsetBlock: 29,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "A string with three
keys (
" +
+ "key and another
key) and one
word.",
+ original: "A string with three keys (key and another key) and one word.",
+ position: {
+ startOffset: 45,
+ endOffset: 48,
+ startOffsetBlock: 42,
+ endOffsetBlock: 45,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "A string with three
keys (
" +
+ "key and another
key) and one
word.",
+ original: "A string with three keys (key and another key) and one word.",
+ position: {
+ startOffset: 58,
+ endOffset: 62,
+ startOffsetBlock: 55,
+ endOffsetBlock: 59,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "matches both singular and reduplicated plural form of the keyphrase in Indonesian, " +
+ "the plural should be counted as one occurrence",
+ paper: new Paper( "
Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.
",
+ { locale: "id_ID", keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword", "keyword-keyword" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "Lorem ipsum dolor sit amet, consectetur
keyword-keyword," +
+ "
keyword adipiscing elit.",
+ original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.",
+ position: {
+ endOffset: 58,
+ startOffset: 43,
+ startOffsetBlock: 40,
+ endOffsetBlock: 55,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "Lorem ipsum dolor sit amet, consectetur
keyword-keyword," +
+ "
keyword adipiscing elit.",
+ original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.",
+ position: {
+ startOffset: 60,
+ endOffset: 67,
+ startOffsetBlock: 57,
+ endOffsetBlock: 64,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "matches both reduplicated keyphrase separated by '-' as two occurrence in English",
+ paper: new Paper( "
Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.
",
+ { locale: "en_US", keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword", "keyword-keyword" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "Lorem ipsum dolor sit amet, consectetur
keyword-keyword, adipiscing elit.",
+ original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.",
+ position: {
+ endOffset: 58,
+ startOffset: 43,
+ startOffsetBlock: 40,
+ endOffsetBlock: 55,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false } } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts one occurrence of reduplicated keyphrase separated by '-' as two occurrence in English " +
+ "when the keyphrase is enclosed in double quotes",
+ paper: new Paper( "
Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.
",
+ { locale: "en_US", keyword: "\"kupu-kupu\"" } ),
+ keyphraseForms: [ [ "kupu-kupu" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "Lorem ipsum dolor sit amet, consectetur
kupu-kupu, adipiscing elit.",
+ original: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.",
+ position: {
+ endOffset: 52,
+ startOffset: 43,
+ startOffsetBlock: 40,
+ endOffsetBlock: 49,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts a single word keyphrase with exact matching",
+ paper: new Paper( "
A string with a keyword.
", { keyword: "\"keyword\"" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( { marked: "A string with a
keyword.",
+ original: "A string with a keyword.",
+ position: {
+ startOffset: 19,
+ endOffset: 26,
+ startOffsetBlock: 16,
+ endOffsetBlock: 23,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "with exact matching, a singular single word keyphrase should not be counted if the focus keyphrase is plural",
+ paper: new Paper( "
A string with a keyword.
", { keyword: "\"keywords\"" } ),
+ keyphraseForms: [ [ "keywords" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same",
+ paper: new Paper( "
A string with a key phrase key phrase.
", { keyword: "\"key phrase\"" } ),
+ keyphraseForms: [ [ "key phrase" ] ],
+ expectedCount: 2,
+ expectedMarkings: [ new Mark( { marked: "A string with a
key phrase key phrase.",
+ original: "A string with a key phrase key phrase.",
+ position: {
+ endOffset: 40,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 37,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same",
+ paper: new Paper( "
A string with a key phrase.
", { keyword: "\"key phrase\"" } ),
+ keyphraseForms: [ [ "key phrase" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
key phrase.",
+ original: "A string with a key phrase.",
+ position: {
+ startOffset: 19,
+ endOffset: 29,
+ startOffsetBlock: 16,
+ endOffsetBlock: 26,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ) ],
+ skip: false,
+ },
+ {
+ description: "with exact matching, a multi word keyphrase should not be counted if " +
+ "the focus keyphrase has the same words in a different order",
+ paper: new Paper( "
A string with a phrase key.
", { keyword: "\"key phrase\"" } ),
+ keyphraseForms: [ [ "key phrase" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.",
+ paper: new Paper( "
A .sentence with a keyphrase.
", { keyword: "\".sentence\"" } ),
+ keyphraseForms: [ [ ".sentence" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A
.sentence with a keyphrase.",
+ original: "A .sentence with a keyphrase.",
+ position: {
+ startOffset: 5,
+ endOffset: 14,
+ startOffsetBlock: 2,
+ endOffsetBlock: 11,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ) ],
+ skip: false,
+ },
+ {
+ description: "with exact matching, the keyphrase directly preceded by a full stop should not match a " +
+ "keyphrase occurrence without the full stop in the text.",
+ paper: new Paper( "
A sentence with a keyphrase.
", { keyword: "\".sentence\"" } ),
+ keyphraseForms: [ [ ".sentence" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "can match dollar sign as in '$keyword' with exact matching.",
+ paper: new Paper( "
A string with a $keyword.
", { keyword: "\"$keyword\"" } ),
+ keyphraseForms: [ [ "\\$keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
$keyword.",
+ original: "A string with a $keyword.",
+ position: {
+ endOffset: 27,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "can match multiple occurrences of keyphrase in the text with exact matching.",
+ paper: new Paper( "
A string with cats and dogs, and other cats and dogs.
", { keyword: "\"cats and dogs\"" } ),
+ keyphraseForms: [ [ "cats and dogs" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( { marked: "A string with
cats and dogs, " +
+ "and other
cats and dogs.",
+ original: "A string with cats and dogs, and other cats and dogs.",
+ position: {
+ endOffset: 30,
+ startOffset: 17,
+ startOffsetBlock: 14,
+ endOffsetBlock: 27,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( { marked: "A string with
cats and dogs, " +
+ "and other
cats and dogs.",
+ original: "A string with cats and dogs, and other cats and dogs.",
+ position: {
+ endOffset: 55,
+ startOffset: 42,
+ startOffsetBlock: 39,
+ endOffsetBlock: 52,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively",
+ paper: new Paper( "
this is a keyword keyword keyword.
", { keyword: "keyword", locale: "en_US" } ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 3,
+ expectedMarkings: [
+ new Mark( { marked: "this is a
keyword keyword keyword.",
+ original: "this is a keyword keyword keyword.",
+ position: {
+ endOffset: 36,
+ startOffset: 13,
+ startOffsetBlock: 10,
+ endOffsetBlock: 33,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively: with exact matching",
+ paper: new Paper( "
A sentence about a red panda red panda red panda.
", { keyword: "\"red panda\"", locale: "en_US" } ),
+ keyphraseForms: [ [ "red panda" ] ],
+ expectedCount: 3,
+ expectedMarkings: [
+ new Mark( { marked: "A sentence about a
red panda red panda red panda.",
+ original: "A sentence about a red panda red panda red panda.",
+ position: {
+ endOffset: 51,
+ startOffset: 22,
+ startOffsetBlock: 19,
+ endOffsetBlock: 48,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+];
- it( "counts a string with a keyword with a '-' in it", function() {
- const mockPaper = new Paper( "A string with a key-word." );
- expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [
- new Mark( { marked: "A string with a
key-word.",
- original: "A string with a key-word." } ) ]
- );
- } );
+describe.each( testCases )( "Test for counting the keyphrase in a text in english", function( {
+ description,
+ paper,
+ keyphraseForms,
+ expectedCount,
+ expectedMarkings,
+ skip } ) {
+ const test = skip ? it.skip : it;
- it( "counts 'key word' in 'key-word'.", function() {
- const mockPaper = new Paper( "A string with a key-word." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 );
- // Note: this behavior might change in the future.
+ test( description, function() {
+ const mockResearcher = buildMorphologyMockResearcher( keyphraseForms );
+ buildTree( paper, mockResearcher );
+ const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher );
+ expect( keyphraseCountResult.count ).toBe( expectedCount );
+ expect( keyphraseCountResult.markings ).toEqual( expectedMarkings );
} );
+} );
- it( "counts a string with a keyword with a '_' in it", function() {
- const mockPaper = new Paper( "A string with a key_word." );
- expect( keywordCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [
- new Mark( { marked: "A string with a
key_word.",
- original: "A string with a key_word." } ) ]
- );
- } );
+const testCasesWithSpecialCharacters = [
+ {
+ description: "counts the keyphrase occurrence in the text with ",
+ paper: new Paper( "
a string with nbsp to match the key word.
", { keyword: "key word" } ),
+ keyphraseForms: [ [ "key" ], [ "word" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( { marked: "a string with nbsp to match the
key word.",
+ original: "a string with nbsp to match the key word.",
+ position: {
+ startOffset: 35,
+ endOffset: 43,
+ startOffsetBlock: 32,
+ endOffsetBlock: 40,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a keyphrase occurrence with a '-' in it",
+ paper: new Paper( "
A string with a key-word.
", { keyword: "key-word" } ),
+ keyphraseForms: [ [ "key-word", "key-words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
key-word.",
+ original: "A string with a key-word.",
+ position: {
+ endOffset: 27,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts occurrence with dash only when the keyphrase contains dash",
+ paper: new Paper( "
A string with a key-phrase and key phrase.
", { keyword: "key-phrase" } ),
+ keyphraseForms: [ [ "key-phrase", "key-phrases" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
key-phrase and key phrase.",
+ original: "A string with a key-phrase and key phrase.",
+ position: {
+ endOffset: 29,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 26,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "should be no match when the sentence contains the keyphrase with a dash but in the wrong order",
+ paper: new Paper( "
A string with a panda-red.
", { keyword: "red-panda" } ),
+ keyphraseForms: [ [ "red-panda" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " +
+ "and the keyphrase doesn't contain a dash",
+ paper: new Paper( "
A string with a panda-red.
", { keyword: "red panda" } ),
+ keyphraseForms: [ [ "red" ], [ "panda" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a panda-red.",
+ marked: "A string with a
panda-red.",
+ position: {
+ startOffset: 19,
+ endOffset: 28,
+ startOffsetBlock: 16,
+ endOffsetBlock: 25,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "should find a match when the sentence contains the keyphrase with a dash with the same order " +
+ "and the keyphrase doesn't contain a dash",
+ paper: new Paper( "
A string with a key-word.
", { keyword: "key word" } ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a key-word.",
+ marked: "A string with a
key-word.",
+ position: {
+ endOffset: 27,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "should find a match when the word of the keyphrase occurs in the sentence hyphenated with a non-keyphrase word." +
+ "For example 'key' in 'key-phrase'.",
+ paper: new Paper( "
A string with a word of key-phrase.
", { keyword: "key word" } ),
+ keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a word of key-phrase.",
+ marked: "A string with a
word of" +
+ "
key-phrase.",
+ position: {
+ endOffset: 23,
+ startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 20,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ original: "A string with a word of key-phrase.",
+ marked: "A string with a
word of" +
+ "
key-phrase.",
+ position: {
+ endOffset: 37,
+ startOffset: 27,
+ startOffsetBlock: 24,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ // Note that in this case, we'll highlight 'key-phrase' even though technically we only match "key" in "key-phrase".
+ // This is the consequence of tokenizing "key-phrase" into one token instead of three.
+ ],
+ skip: false,
+ },
+ {
+ description: "should find a match between a keyphrase with an underscore and its occurrence in the text with the same form",
+ paper: new Paper( "
A string with a key_word.
", { keyword: "key_word" } ),
+ keyphraseForms: [ [ "key_word", "key_words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
key_word.",
+ original: "A string with a key_word.",
+ position: { endOffset: 27, startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "should find a match between a keyphrase with an ampersand (&) and its occurrence in the text with the same form",
+ paper: new Paper( "
A string with key&word in it
", { keyword: "key&word" } ),
+ keyphraseForms: [ [ "key&word" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with
key&word in it",
+ original: "A string with key&word in it",
+ position: { endOffset: 25, startOffset: 17,
+ startOffsetBlock: 14,
+ endOffsetBlock: 22,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "should still match keyphrase occurrence with different types of apostrophe.",
+ paper: new Paper( "
A string with quotes to match the key'word, even if the quotes differ.
", { keyword: "key‛word" } ),
+ keyphraseForms: [ [ "key‛word", "key‛words" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( {
+ marked: "A string with quotes to match the
key'word, even if the quotes differ.",
+ original: "A string with quotes to match the key'word, even if the quotes differ.",
+ position: { endOffset: 45, startOffset: 37,
+ startOffsetBlock: 34,
+ endOffsetBlock: 42,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "can match dollar sign as in '$keyword'.",
+ paper: new Paper( "
A string with a $keyword.
", { keyword: "$keyword" } ),
+ keyphraseForms: [ [ "\\$keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [ new Mark( { marked: "A string with a
$keyword.",
+ original: "A string with a $keyword.",
+ position: {
+ startOffset: 19,
+ endOffset: 27,
+ startOffsetBlock: 16,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ) ],
+ skip: false,
+ },
+ {
+ description: "can match 2 occurrences of a keyphrase ending in an HTML entity (© standing for ©) and output correct Marks",
+ paper: new Paper( "
A string keyphrase© with a keyphrase©.
", { keyword: "keyphrase©" } ),
+ keyphraseForms: [ [ "keyphrase©" ] ],
+ expectedCount: 2,
+ expectedMarkings: [ new Mark( {
+ marked: "A string
keyphrase© with a " +
+ "
keyphrase©.",
+ original: "A string keyphrase© with a keyphrase©.",
+ position: { endOffset: 27, startOffset: 12,
+ startOffsetBlock: 9,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "A string
keyphrase© with a " +
+ "
keyphrase©.",
+ original: "A string keyphrase© with a keyphrase©.",
+ position: { endOffset: 50, startOffset: 35,
+ startOffsetBlock: 32,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "can match an occurrence of a keyphrase after HTML entities, and output correct Marks",
+ paper: new Paper( "
I find a&b to be > c&d for dog food.
", { keyword: "dog" } ),
+ keyphraseForms: [ [ "dog" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "I find a&b to be > c&d for
dog food.",
+ original: "I find a&b to be > c&d for dog food.",
+ position: {
+ startOffsetBlock: 38,
+ endOffsetBlock: 41,
+ startOffset: 41,
+ endOffset: 44,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "can match an occurrence of a keyphrase containing an & in the middle, as in 'a&b', and output correct Marks",
+ paper: new Paper( "
At a&b they have the best stuff.
", { keyword: "a&b" } ),
+ keyphraseForms: [ [ "a&b" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "At
a&b they have the best stuff.",
+ original: "At a&b they have the best stuff.",
+ position: {
+ startOffsetBlock: 3,
+ endOffsetBlock: 10,
+ startOffset: 6,
+ endOffset: 13,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "doesn't match unhyphenated occurrences in the text if the keyphrase is hyphenated.",
+ paper: new Paper( "
A string with a key word.
", { keyword: "key-word" } ),
+ keyphraseForms: [ [ "key-word", "key-words" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "can match keyphrase enclosed in «» in the text, also if the paper's keyphrase is not enclosed in «»",
+ paper: new Paper( "
A string with a «keyword».
", { keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a «keyword».",
+ marked: "A string with a «
keyword».",
+ position: { endOffset: 27, startOffset: 20,
+ startOffsetBlock: 17,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match an occurrence of the keyphrase not enclosed in «» when the paper's keyphrase is enclosed in «»",
+ paper: new Paper( "
A string with a keyword.
", { keyword: "«keyword»" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a keyword.",
+ marked: "A string with a
keyword.",
+ position: { endOffset: 26, startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 23,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match keyphrase enclosed in ‹› in the text, also if the paper's keyphrase is not enclosed in ‹›",
+ paper: new Paper( "
A string with a ‹keyword›.
", { keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a ‹keyword›.",
+ marked: "A string with a '
keyword'.",
+ position: { endOffset: 27, startOffset: 20,
+ startOffsetBlock: 17,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match an occurrence of the keyphrase not enclosed in ‹› when the paper's keyphrase is enclosed in ‹›",
+ paper: new Paper( "
A string with a keyword.
", { keyword: "‹keyword›" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "A string with a keyword.",
+ marked: "A string with a
keyword.",
+ position: { endOffset: 26, startOffset: 19,
+ startOffsetBlock: 16,
+ endOffsetBlock: 23,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match keyphrase preceded by ¿",
+ paper: new Paper( "
¿Keyword is it
", { keyword: "keyword" } ),
+ keyphraseForms: [ [ "keyword" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "¿Keyword is it",
+ marked: "¿
Keyword is it",
+ position: { endOffset: 11, startOffset: 4,
+ startOffsetBlock: 1,
+ endOffsetBlock: 8,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match keyphrase followed by RTL-specific punctuation marks",
+ paper: new Paper( "
الجيدة؟
", { keyword: "الجيدة" } ),
+ keyphraseForms: [ [ "الجيدة" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ original: "الجيدة؟",
+ marked: "
الجيدة؟",
+ position: { endOffset: 9, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 6,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match keyphrases in texts that contain
tags",
+ paper: new Paper( "
This is a test.
This is
another
test.
", { keyword: "test" } ),
+ keyphraseForms: [ [ "test" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ original: "This is a test.",
+ marked: "This is a
test.",
+ position: {
+ startOffset: 13,
+ endOffset: 17,
+ startOffsetBlock: 10,
+ endOffsetBlock: 14,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ new Mark( {
+ original: "\nThis is\nanother\ntest.",
+ marked: "\nThis is\nanother\n
test.",
+ position: {
+ startOffset: 46,
+ endOffset: 50,
+ startOffsetBlock: 43,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } }
+ ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match paragraph gutenberg block",
+ paper: new Paper(
+ `
+
a string of text with the keyword in it
+ `,
+ {
+ keyword: "keyword",
+ wpBlocks: [
+ {
+ attributes: {
+ content: "a string of text with the keyword in it",
+ },
+ name: "core/paragraph",
+ clientId: "5615ca1f-4052-41a9-97be-be19cfd2085b",
+ innerBlocks: [],
+ },
+ ],
+ }
+ ),
+ keyphraseForms: [ [ "keyword", "keywords" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: "a string of text with the
keyword in it",
+ original: "a string of text with the keyword in it",
+ position: {
+ startOffset: 55,
+ endOffset: 62,
+ startOffsetBlock: 26,
+ endOffsetBlock: 33,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match complex paragraph gutenberg block",
+ paper: new Paper(
+ `
+
Over the years, we’ve written quite a few articles about
+ branding.
+ Branding is about getting people to relate to your company and
+ products. It’s also about trying to make your brand synonymous with a certain product or service.
+ This can be a lengthy and hard project. It can potentially cost you all of your revenue.
+ It’s no wonder that branding is often associated with investing lots of money in marketing and promotion.
+ However, for a lot of small business owners, the investment in branding will have
+ to be made with a relatively small budget.
+ `,
+ {
+ keyword: "keyword",
+ wpBlocks: [
+ {
+ attributes: {
+ // eslint-disable-next-line max-len
+ content: "
Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ",
+ },
+ name: "core/paragraph",
+ clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c",
+ innerBlocks: [],
+ },
+ ],
+ }
+ ),
+ keyphraseForms: [ [ "synonymous" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
- it( "counts a string with with 'kapaklı' as a keyword in it", function() {
- const mockPaper = new Paper( "A string with kapaklı." );
- expect( keywordCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [
- new Mark( { marked: "A string with
kapaklı.",
- original: "A string with kapaklı." } ) ]
- );
- } );
+ new Mark( {
+ // eslint-disable-next-line max-len
+ marked: " It's also about trying to make your brand
synonymous with a certain product or service.",
+ original: " It’s also about trying to make your brand synonymous with a certain product or service.",
+ position: {
+ startOffset: 295,
+ endOffset: 305,
+ startOffsetBlock: 266,
+ endOffsetBlock: 276,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match complex paragraph gutenberg block",
+ paper: new Paper(
+ `
+
You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people.
+ These all can be qualified as
+ ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There
+ are multiple ways to do this, without a huge budget.
+ In this post, I’ll share my thoughts on how to go about your own low-budget branding.
+ `,
+ {
+ keyword: "expertise",
+ wpBlocks: [
+ {
+ attributes: {
+ // eslint-disable-next-line max-len
+ content: "You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. These all can be qualified as ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There are multiple ways to do this, without a huge budget. In this post, I’ll share my thoughts on how to go about your own low-budget branding.",
+ },
+ name: "core/paragraph",
+ clientId: "65be5146-3395-4845-8c7c-4a79fd6e3611",
+ innerBlocks: [],
+ },
+ ],
+ }
+ ),
+ keyphraseForms: [ [ "expertise" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
- it( "counts a string with with '&' in the string and the keyword", function() {
- const mockPaper = new Paper( "A string with key&word." );
- expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [
- new Mark( { marked: "A string with
key&word.",
- original: "A string with key&word." } ) ]
- );
- } );
+ new Mark( {
+ // eslint-disable-next-line max-len
+ marked: " All have the same main goal when they start: the need to establish a name in their field of
expertise.",
+ original: " All have the same main goal when they start: the need to establish a name in their field of expertise.",
+ position: {
+ startOffset: 282,
+ endOffset: 291,
+ startOffsetBlock: 253,
+ endOffsetBlock: 262,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match heading gutenberg block",
+ paper: new Paper(
+ `
+
Define and communicate brand values
+ `,
+ {
+ keyword: "communicate",
+ wpBlocks: [
+ {
+ attributes: {
+ level: 2,
+ content: "Define and communicate brand values",
+ },
+ name: "core/heading",
+ clientId: "b8e62d35-b3af-45ec-9889-139ef0a9baaa",
+ innerBlocks: [],
+ },
+ ],
+ }
+ ),
+ keyphraseForms: [ [ "communicate" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ // eslint-disable-next-line max-len
+ new Mark( {
+ marked: "Define and
communicate brand values",
+ original: "Define and communicate brand values",
+ position: {
+ startOffset: 107,
+ endOffset: 118,
+ startOffsetBlock: 11,
+ endOffsetBlock: 22,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "can match complex paragraph gutenberg block",
+ paper: new Paper(
+ `
+
+
+
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
+
+
- it( "does not count images as keywords.", function() {
- const mockPaper = new Paper( "
" );
- expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 );
- expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] );
- } );
+
+
+
There are many variations of passages of Lorem Ipsum available
+
+
+ `,
+ {
+ keyword: "Ipsum",
+ wpBlocks: [
+ {
+ attributes: {},
+ name: "core/columns",
+ clientId: "1b9f1d49-813e-4578-a19a-bf236447cc41",
+ innerBlocks: [
+ {
+ attributes: {},
+ name: "core/column",
+ clientId: "09b93261-25ef-4391-98cc-630e8fa1eac1",
+ innerBlocks: [
+ {
+ attributes: {
+ // eslint-disable-next-line max-len
+ content: "
Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
+ },
+ name: "core/paragraph",
+ clientId: "3f0e68c1-287e-40ef-90c8-54b3ab61702a",
+ innerBlocks: [],
+ },
+ ],
+ },
+ {
+ attributes: {},
+ name: "core/column",
+ clientId: "0f247feb-5ada-433d-bc97-1faa0265f7c4",
+ innerBlocks: [
+ {
+ attributes: {
+ content: "There are many variations of passages of Lorem Ipsum available",
+ },
+ name: "core/paragraph",
+ clientId: "5093342c-ec93-48a2-b2d5-883ae514b12d",
+ innerBlocks: [],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ }
+ ),
+ keyphraseForms: [ [ "Ipsum" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "Lorem
Ipsum is simply dummy text of the printing and typesetting industry.",
+ original: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
+ position: {
+ startOffset: 149,
+ endOffset: 154,
+ startOffsetBlock: 14,
+ endOffsetBlock: 19,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: "There are many variations of passages of Lorem
Ipsum available",
+ original: "There are many variations of passages of Lorem Ipsum available",
+ position: {
+ startOffset: 426,
+ endOffset: 431,
+ startOffsetBlock: 47,
+ endOffsetBlock: 52,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+];
- it( "keyword counting is blind to CApiTal LeTteRs.", function() {
- const mockPaper = new Paper( "A string with KeY worD." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [
- new Mark( { marked: "A string with
KeY worD.",
- original: "A string with KeY worD." } ) ]
- );
- } );
+describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyphrase in a text containing special characters",
+ function( {
+ description,
+ paper,
+ keyphraseForms,
+ expectedCount,
+ expectedMarkings,
+ skip } ) {
+ const test = skip ? it.skip : it;
- it( "keyword counting is blind to types of apostrophe.", function() {
- const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." );
- expect( keywordCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [
- new Mark( { marked: "A string with quotes to match the
key'word, " +
- "even if the quotes differ.",
- original: "A string with quotes to match the key'word, even if the quotes differ." } ) ]
- );
+ test( description, function() {
+ const mockResearcher = buildMorphologyMockResearcher( keyphraseForms );
+ buildTree( paper, mockResearcher );
+ const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher );
+ expect( keyphraseCountResult.count ).toBe( expectedCount );
+ expect( keyphraseCountResult.markings ).toEqual( expectedMarkings );
+ } );
} );
- it( "counts can count dollar sign as in '$keyword'.", function() {
- const mockPaper = new Paper( "A string with a $keyword." );
- expect( keywordCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 );
- // Markings do not currently work in this condition.
- } );
+/**
+ * The following test cases illustrates the current approach of matching transliterated keyphrase with non-transliterated version.
+ * For example, word form "acción" from the keyphrase will match word "accion" in the sentence.
+ * But, word form "accion" from the keyphrase will NOT match word "acción" in the sentence.
+ *
+ * This behaviour might change in the future.
+ */
+const testCasesWithLocaleMapping = [
+ {
+ description: "counts a string of text with German diacritics and eszett as the keyphrase",
+ paper: new Paper( "
Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.
", { keyword: "äöüß", locale: "de_DE" } ),
+ keyphraseForms: [ [ "äöüß" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( { marked: "Waltz keepin auf mitz auf keepin
äöüß weiner blitz deutsch spitzen.",
+ original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.",
+ position: { endOffset: 40, startOffset: 36,
+ startOffsetBlock: 33,
+ endOffsetBlock: 37,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "doesn't count a match if the keyphrase is with diacritic while the occurrence in the text is with diacritic",
+ paper: new Paper( "
Acción Española fue una revista.
", { keyword: "accion", locale: "es_ES" } ),
+ keyphraseForms: [ [ "accion" ] ],
+ expectedCount: 0,
+ expectedMarkings: [],
+ skip: false,
+ },
+ {
+ description: "counts a string of text with Spanish accented vowel",
+ paper: new Paper( "
Accion Española fue una revista.
", { keyword: "acción", locale: "es_ES" } ),
+ keyphraseForms: [ [ "acción" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( { marked: "
Accion Española fue una revista.",
+ original: "Accion Española fue una revista.",
+ position: { endOffset: 9, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 6,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string of text with Swedish diacritics",
+ paper: new Paper( "
oeverlaatelsebesiktning and overlatelsebesiktning
", { keyword: "överlåtelsebesiktning", locale: "sv_SV" } ),
+ keyphraseForms: [ [ "överlåtelsebesiktning" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
oeverlaatelsebesiktning " +
+ "and
overlatelsebesiktning",
+ original: "oeverlaatelsebesiktning and overlatelsebesiktning",
+ position: { endOffset: 26, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 23,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
oeverlaatelsebesiktning " +
+ "and
overlatelsebesiktning",
+ original: "oeverlaatelsebesiktning and overlatelsebesiktning",
+ position: { endOffset: 52, startOffset: 31,
+ startOffsetBlock: 28,
+ endOffsetBlock: 49,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string of text with Norwegian diacritics",
+ paper: new Paper( "
København and Koebenhavn and Kobenhavn
", { keyword: "København", locale: "nb_NO" } ),
+ keyphraseForms: [ [ "København" ] ],
+ expectedCount: 3,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
København and
" +
+ "Koebenhavn and
Kobenhavn",
+ original: "København and Koebenhavn and Kobenhavn",
+ position: { endOffset: 12, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 9,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
København and
" +
+ "Koebenhavn and
Kobenhavn",
+ original: "København and Koebenhavn and Kobenhavn",
+ position: { endOffset: 27, startOffset: 17,
+ startOffsetBlock: 14,
+ endOffsetBlock: 24,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
København and
" +
+ "Koebenhavn and
Kobenhavn",
+ original: "København and Koebenhavn and Kobenhavn",
+ position: { endOffset: 41, startOffset: 32,
+ startOffsetBlock: 29,
+ endOffsetBlock: 38,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a Turkish diacritics",
+ paper: new Paper( "
Türkçe and Turkce
", { keyword: "Türkçe", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "Türkçe" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
Türkçe and
Turkce",
+ original: "Türkçe and Turkce",
+ position: { endOffset: 9, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 6,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
Türkçe and
Turkce",
+ original: "Türkçe and Turkce",
+ position: { endOffset: 20, startOffset: 14,
+ startOffsetBlock: 11,
+ endOffsetBlock: 17,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "still counts a string with a Turkish diacritics when exact matching is used",
+ paper: new Paper( "
Türkçe and Turkce
", { keyword: "\"Türkçe\"", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "Türkçe" ] ],
+ expectedCount: 2,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
Türkçe and
Turkce",
+ original: "Türkçe and Turkce",
+ position: { endOffset: 9, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 6,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
Türkçe and
Turkce",
+ original: "Türkçe and Turkce",
+ position: { endOffset: 20, startOffset: 14,
+ startOffsetBlock: 11,
+ endOffsetBlock: 17,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a different forms of Turkish i, kephrase: İstanbul",
+ paper: new Paper( "
İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "İstanbul", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "İstanbul" ] ],
+ expectedCount: 4,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 11, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 8,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 24, startOffset: 16,
+ startOffsetBlock: 13,
+ endOffsetBlock: 21,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 37, startOffset: 29,
+ startOffsetBlock: 26,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 50, startOffset: 42,
+ startOffsetBlock: 39,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a different forms of Turkish i, kephrase: Istanbul",
+ paper: new Paper( "
İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "Istanbul", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "Istanbul" ] ],
+ expectedCount: 4,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 11, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 8,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 24, startOffset: 16,
+ startOffsetBlock: 13,
+ endOffsetBlock: 21,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 37, startOffset: 29,
+ startOffsetBlock: 26,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 50, startOffset: 42,
+ startOffsetBlock: 39,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a different forms of Turkish i, kephrase: istanbul",
+ paper: new Paper( "
İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "istanbul", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "istanbul" ] ],
+ expectedCount: 4,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 11, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 8,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 24, startOffset: 16,
+ startOffsetBlock: 13,
+ endOffsetBlock: 21,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 37, startOffset: 29,
+ startOffsetBlock: 26,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 50, startOffset: 42,
+ startOffsetBlock: 39,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+ {
+ description: "counts a string with a different forms of Turkish i, kephrase: ıstanbul",
+ paper: new Paper( "
İstanbul and Istanbul and istanbul and ıstanbul
", { keyword: "ıstanbul", locale: "tr_TR" } ),
+ keyphraseForms: [ [ "ıstanbul" ] ],
+ expectedCount: 4,
+ expectedMarkings: [
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 11, startOffset: 3,
+ startOffsetBlock: 0,
+ endOffsetBlock: 8,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 24, startOffset: 16,
+ startOffsetBlock: 13,
+ endOffsetBlock: 21,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 37, startOffset: 29,
+ startOffsetBlock: 26,
+ endOffsetBlock: 34,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ),
+ new Mark( {
+ marked: "
İstanbul and
Istanbul and " +
+ "
istanbul and
ıstanbul",
+ original: "İstanbul and Istanbul and istanbul and ıstanbul",
+ position: { endOffset: 50, startOffset: 42,
+ startOffsetBlock: 39,
+ endOffsetBlock: 47,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ } } ) ],
+ skip: false,
+ },
+];
- it( "counts 'key word' also in 'key-word'.)", function() {
- const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 );
- // Note: this behavior might change in the future.
- } );
+describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in a text with different locale mapping, e.g. Turkish",
+ function( {
+ description,
+ paper,
+ keyphraseForms,
+ expectedCount,
+ expectedMarkings,
+ skip } ) {
+ const test = skip ? it.skip : it;
- it( "doesn't count 'key-word' in 'key word'.", function() {
- const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." );
- expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 );
- // Note: this behavior might change in the future.
+ test( description, function() {
+ const mockResearcher = buildMorphologyMockResearcher( keyphraseForms );
+ buildTree( paper, mockResearcher );
+ const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher );
+ expect( keyphraseCountResult.count ).toBe( expectedCount );
+ expect( keyphraseCountResult.markings ).toEqual( expectedMarkings );
+ } );
} );
- it( "doesn't count keyphrase instances inside elements we want to exclude from the analysis", function() {
- const mockPaper = new Paper( "There is no
keyword
in this sentence." );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 );
- } );
+const testDataForHTMLTags = [
+ {
+ description: "counts keyphrase occurrence correctly in a text containing `
` tag, and outputs correct Marks",
+ paper: new Paper( "The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " +
+ "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " +
+ "and both the digits and wrist bones are highly flexible. The red panda shares this feature " +
+ "with the giant panda, which has a larger sesamoid that is more compressed at the sides." +
+ " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
",
+ { keyword: "giant panda", locale: "en_US" } ),
+ keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " +
+ "that is more compressed at the sides.",
+ position: {
+ endOffset: 253,
+ startOffset: 248,
+ startOffsetBlock: 245,
+ endOffsetBlock: 250,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger " +
+ "sesamoid that is more compressed at the sides.",
+ position: {
+ endOffset: 288,
+ startOffset: 283,
+ startOffsetBlock: 280,
+ endOffsetBlock: 285,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger " +
+ "sesamoid that is more compressed at the sides.",
+ position: {
+ endOffset: 302,
+ startOffset: 297,
+ startOffsetBlock: 294,
+ endOffsetBlock: 299,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts keyphrase occurrence correctly in a text containing `` tag and outputs correct Marks",
+ paper: new Paper( "The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " +
+ "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " +
+ "and both the digits and wrist bones are highly flexible. The red panda shares this feature " +
+ "with the giant panda, which has a larger sesamoid that is more compressed at the sides." +
+ " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
",
+ { keyword: "giant panda", locale: "en_US" } ),
+ keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " +
+ "that is more compressed at the sides.",
+ position: {
+ endOffset: 253,
+ startOffset: 248,
+ startOffsetBlock: 245,
+ endOffsetBlock: 250,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger " +
+ "sesamoid that is more compressed at the sides.",
+ position: {
+ endOffset: 298,
+ startOffset: 287,
+ startOffsetBlock: 284,
+ endOffsetBlock: 295,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+ {
+ description: "counts keyphrase occurrence correctly when it's found inside an anchor text and outputs correct Marks",
+ paper: new Paper( "The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " +
+ "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " +
+ "and both the digits and wrist bones are highly flexible. The red panda shares this feature " +
+ "with the giant panda, which has a larger sesamoid " +
+ "that is more compressed at the sides." +
+ " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle.
",
+ { keyword: "giant panda", locale: "en_US" } ),
+ keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ],
+ expectedCount: 1,
+ expectedMarkings: [
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " +
+ "that is more compressed at the sides.",
+ position: {
+ endOffset: 253,
+ startOffset: 248,
+ startOffsetBlock: 245,
+ endOffsetBlock: 250,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ new Mark( {
+ marked: " The red panda shares this feature with the " +
+ "giant panda, which has a larger sesamoid that is more compressed at the sides.",
+ original: " The red panda shares this feature with the giant panda, which has a larger " +
+ "sesamoid that is more compressed at the sides.",
+ position: {
+ endOffset: 346,
+ startOffset: 335,
+ startOffsetBlock: 332,
+ endOffsetBlock: 343,
+ attributeId: "",
+ clientId: "",
+ isFirstSection: false,
+ },
+ } ),
+ ],
+ skip: false,
+ },
+];
- it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() {
- const mockPaper = new Paper( "A string with three keys (key and another key) and one word." );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [
- new Mark( { marked: "A string with three keys (" +
- "key and another key) and one word.",
- original: "A string with three keys (key and another key) and one word." } ) ]
- );
- } );
+describe.each( testDataForHTMLTags )( "Test for counting the keyphrase in a text containing multiple html tags",
+ function( {
+ description,
+ paper,
+ keyphraseForms,
+ expectedCount,
+ expectedMarkings,
+ skip } ) {
+ const test = skip ? it.skip : it;
- it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() {
- const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } );
- expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 );
+ test( description, function() {
+ const mockResearcher = buildMorphologyMockResearcher( keyphraseForms );
+ buildTree( paper, mockResearcher );
+ const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher );
+ expect( keyphraseCountResult.count ).toBe( expectedCount );
+ expect( keyphraseCountResult.markings ).toEqual( expectedMarkings );
+ } );
} );
-} );
/**
* Mocks Japanese Researcher.
@@ -209,34 +1878,79 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 )
} );
};
-
-// Decided not to remove test below as it tests the added logic of the Japanese helpers.
-describe( "Test for counting the keyword in a text for Japanese", () => {
- it( "counts/marks a string of text with a keyword in it.", function() {
- const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } );
+describe( "Test for counting the keyphrase in a text for Japanese", () => {
+ // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information.
+ it( "counts/marks a string of text with a keyphrase in it.", function() {
+ const mockPaper = new Paper( "私の猫はかわいいです。
", { locale: "ja", keyphrase: "猫" } );
const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );
- expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [
+ buildTree( mockPaper, researcher );
+
+ expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
+ expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
new Mark( { marked: "私の猫はかわいいです。",
original: "私の猫はかわいいです。" } ) ] );
} );
- it( "counts a string of text with no keyword in it.", function() {
- const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } );
+ it( "counts the keyphrase occurrence inside an image caption.", function() {
+ const mockPaper = new Paper( "[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"] 一日一冊の本を読むのはできるかどうかやってみます。[/caption]
", {
+ locale: "ja",
+ keyphrase: "一冊の本を読む",
+ shortcodes: [
+ "wp_caption",
+ "caption",
+ "gallery",
+ "playlist" ],
+ } );
+ const keyphraseForms = [
+ [ "一冊" ],
+ [ "本" ],
+ [ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ],
+ ];
+ const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper );
+ buildTree( mockPaper, researcher );
+
+
+ expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
+ expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
+ new Mark( {
+ marked: "一日一冊の本を読むのはできるかどうかやってみます。",
+ original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] );
+ } );
+
+ it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() {
+ const mockPaper = new Paper( "私の猫はかわいい猫です。
", { locale: "ja", keyphrase: "猫" } );
+ const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper );
+ buildTree( mockPaper, researcher );
+
+ expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 2 );
+ expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
+ new Mark( { marked: "私の猫はかわいい猫です。",
+ original: "私の猫はかわいい猫です。",
+ } ) ] );
+ } );
+
+ it( "counts a string if text with no keyphrase in it.", function() {
+ const mockPaper = new Paper( "私の猫はかわいいです。
", { locale: "ja" } );
const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper );
- expect( keywordCount( mockPaper, researcher ).count ).toBe( 0 );
- expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [] );
+ buildTree( mockPaper, researcher );
+ expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 0 );
+ expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [] );
} );
it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() {
- const mockPaper = new Paper( "私の猫はかわいいですかわいい。", { locale: "ja" } );
+ const mockPaper = new Paper( "私の猫はかわいいですかわいい。
", { locale: "ja" } );
const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper );
- expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 );
- expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [
+ buildTree( mockPaper, researcher );
+ expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 );
+ expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [
new Mark( {
marked: "私の猫はかわいい" +
"ですかわいい。",
original: "私の猫はかわいいですかわいい。",
- } ) ] );
+ } ),
+ ] );
} );
} );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js
index d6d803710fa..609d63776fd 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js
@@ -48,6 +48,15 @@ describe( "Matching keyphrase in subheadings", () => {
expect( result.matches ).toBe( 0 );
} );
+ it( "should not match text that is also a shortcode", function() {
+ const paper = new Paper( "" +
+ "What is a [shortcode]
A beautiful dog.", { keyword: "shortcode", shortcodes: [ "shortcode" ] } );
+
+ const result = matchKeywordInSubheadings( paper, new Researcher( paper ) );
+ expect( result.count ).toBe( 1 );
+ expect( result.matches ).toBe( 0 );
+ } );
+
// This unit test is skipped for now because it returns an incorrect result. Result.matches should return 0, instead it returns 1.
// When this research is adapted to use HTML Parser 5, please unskip this test and make sure that it passes.
xit( "should not match text within heading attributes", () => {
diff --git a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js
index 3260e313d12..1c5ca3cec55 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js
@@ -12,4 +12,10 @@ describe( "Get sentences from text", function() {
const researcher = new EnglishResearcher();
expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye." ] );
} );
+
+ it( "should get sentences without the shortcodes", function() {
+ const paper = new Paper( "Hello. Goodbye. To be [shortcode]or not to be.", { shortcodes: [ "shortcode" ] } );
+ const researcher = new EnglishResearcher();
+ expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye.", "To be or not to be." ] );
+ } );
} );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js
index d2a12244c87..3711517bab3 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js
@@ -92,6 +92,14 @@ describe( "a test for getting the complex words in the sentence and calculating
expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 );
} );
+ it( "should return an empty array and 0% if the only complex word are is a shortcode ", function() {
+ const paper = new Paper( "This is short text. [tortoiseshell] A text about Calico.", { shortcodes: [ "tortoiseshell" ] } );
+ researcher.setPaper( paper );
+
+ expect( wordComplexity( paper, researcher ).complexWords ).toEqual( [] );
+ expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 );
+ } );
+
it( "should return an empty array and 0% if there is no complex word found in the text: " +
"Also test with a word starting with capital letter enclosed in different types of quotation mark.", () => {
let paper = new Paper( "This is short text. This is another short text. A text about \"Calico\"." );
diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js
index 4da5ffd745f..6a5936c3890 100644
--- a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js
+++ b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js
@@ -10,4 +10,8 @@ describe( "a test for counting the words in the text", function() {
const paper = new Paper( "Tell me a story on how to love
your cats", { keyword: "how to love your cats" } );
expect( wordCountInText( paper ).count ).toBe( 7 );
} );
+ it( "should not count shortcodes", function() {
+ const paper = new Paper( "Tell me a story on [shortcode]how to love[/shortcode] your cats", { shortcodes: [ "shortcode" ] } );
+ expect( wordCountInText( paper ).count ).toBe( 10 );
+ } );
} );
diff --git a/packages/yoastseo/spec/markers/removeDuplicateMarks.js b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js
similarity index 64%
rename from packages/yoastseo/spec/markers/removeDuplicateMarks.js
rename to packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js
index 73ae017032f..335a3fe1fd5 100644
--- a/packages/yoastseo/spec/markers/removeDuplicateMarks.js
+++ b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js
@@ -7,11 +7,11 @@ describe( "removeDuplicateMarks", function() {
} );
it( "should remove duplicated marks from the array", function() {
- var marks = [
+ const marks = [
new Mark( { original: "original", marked: "marked" } ),
new Mark( { original: "original", marked: "marked" } ),
];
- var expected = [
+ const expected = [
new Mark( { original: "original", marked: "marked" } ),
];
@@ -19,16 +19,26 @@ describe( "removeDuplicateMarks", function() {
} );
it( "should remove duplicated marks from the array", function() {
- var marks = [
+ const marks = [
new Mark( { original: "original", marked: "marked" } ),
new Mark( { original: "original2", marked: "marked" } ),
new Mark( { original: "original", marked: "marked" } ),
];
- var expected = [
+ const expected = [
new Mark( { original: "original", marked: "marked" } ),
new Mark( { original: "original2", marked: "marked" } ),
];
expect( removeDuplicateMarks( marks ) ).toEqual( expected );
} );
+
+ it( "should not remove duplicated marks if the mark objects have position information", function() {
+ const marks = [
+ new Mark( { original: "original", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ),
+ new Mark( { original: "original2", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ),
+ new Mark( { original: "original1", marked: "marked", position: { startOffset: 15, endOffset: 20 } } ),
+ ];
+
+ expect( removeDuplicateMarks( marks ) ).toEqual( marks );
+ } );
} );
diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js
index f748f8ef880..e85cf7fe33d 100644
--- a/packages/yoastseo/spec/parse/build/buildSpec.js
+++ b/packages/yoastseo/spec/parse/build/buildSpec.js
@@ -1,31 +1,34 @@
import build from "../../../src/parse/build/build";
import LanguageProcessor from "../../../src/parse/language/LanguageProcessor";
+import Paper from "../../../src/values/Paper";
import Factory from "../../specHelpers/factory";
import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer";
+import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom";
describe( "The parse function", () => {
- it( "parses a basic HTML text", () => {
- const html = "";
+ it( "parses a basic HTML text with an HTML entity (&)", () => {
+ const html = "";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
name: "div",
sourceCodeLocation: {
startOffset: 0,
- endOffset: 45,
+ endOffset: 58,
startTag: {
startOffset: 0,
endOffset: 5,
},
endTag: {
- startOffset: 39,
- endOffset: 45,
+ startOffset: 52,
+ endOffset: 58,
},
},
attributes: {},
@@ -36,30 +39,97 @@ describe( "The parse function", () => {
"class": new Set( [ "yoast" ] ),
},
sentences: [ {
- text: "Hello, world!",
- sourceCodeRange: { startOffset: 22, endOffset: 35 },
+ text: "Hello, world & beyond!",
+ sourceCodeRange: { startOffset: 22, endOffset: 48 },
tokens: [
{ text: "Hello", sourceCodeRange: { startOffset: 22, endOffset: 27 } },
{ text: ",", sourceCodeRange: { startOffset: 27, endOffset: 28 } },
{ text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } },
{ text: "world", sourceCodeRange: { startOffset: 29, endOffset: 34 } },
- { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } },
+ { text: " ", sourceCodeRange: { startOffset: 34, endOffset: 35 } },
+ { text: "&", sourceCodeRange: { startOffset: 35, endOffset: 40 } },
+ { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } },
+ { text: "beyond", sourceCodeRange: { startOffset: 41, endOffset: 47 } },
+ { text: "!", sourceCodeRange: { startOffset: 47, endOffset: 48 } },
+ ],
+ } ],
+ childNodes: [ {
+ name: "#text",
+ value: "Hello, world #amp; beyond!",
+ sourceCodeRange: { startOffset: 22, endOffset: 48 },
+ } ],
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 52,
+ startTag: {
+ startOffset: 5,
+ endOffset: 22,
+ },
+ endTag: {
+ startOffset: 48,
+ endOffset: 52,
+ },
+ },
+ } ],
+ } ],
+ } );
+ } );
+
+ it( "parses a basic Japanese HTML text", () => {
+ const html = "";
+ const paper = new Paper( html );
+
+ const researcher = Factory.buildMockResearcher( {}, true, false, false,
+ { splitIntoTokensCustom: splitIntoTokensCustom, memoizedTokenizer: memoizedSentenceTokenizer } );
+ const languageProcessor = new LanguageProcessor( researcher );
+ expect( build( paper, languageProcessor ) ).toEqual( {
+ name: "#document-fragment",
+ attributes: {},
+ childNodes: [ {
+ name: "div",
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 37,
+ startTag: {
+ startOffset: 0,
+ endOffset: 5,
+ },
+ endTag: {
+ startOffset: 31,
+ endOffset: 37,
+ },
+ },
+ attributes: {},
+ childNodes: [ {
+ name: "p",
+ isImplicit: false,
+ attributes: {
+ "class": new Set( [ "yoast" ] ),
+ },
+ sentences: [ {
+ text: "犬が大好き",
+ sourceCodeRange: { startOffset: 22, endOffset: 27 },
+ tokens: [
+ { text: "犬", sourceCodeRange: { startOffset: 22, endOffset: 23 } },
+ { text: "が", sourceCodeRange: { startOffset: 23, endOffset: 24 } },
+ { text: "大好き", sourceCodeRange: { startOffset: 24, endOffset: 27 } },
],
} ],
childNodes: [ {
name: "#text",
- value: "Hello, world!",
+ value: "犬が大好き",
+ sourceCodeRange: { startOffset: 22, endOffset: 27 },
} ],
sourceCodeLocation: {
startOffset: 5,
- endOffset: 39,
+ endOffset: 31,
startTag: {
startOffset: 5,
endOffset: 22,
},
endTag: {
- startOffset: 35,
- endOffset: 39,
+ startOffset: 27,
+ endOffset: 31,
},
},
} ],
@@ -69,6 +139,7 @@ describe( "The parse function", () => {
it( "adds implicit paragraphs around phrasing content outside of paragraphs and headings", () => {
const html = "Hello World!
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
@@ -86,7 +157,7 @@ describe( "The parse function", () => {
* [/#document-fragment]
* ```
*/
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
@@ -110,6 +181,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Hello ",
+ sourceCodeRange: { startOffset: 5, endOffset: 11 },
},
{
name: "span",
@@ -117,6 +189,7 @@ describe( "The parse function", () => {
childNodes: [ {
name: "#text",
value: "World!",
+ sourceCodeRange: { startOffset: 17, endOffset: 23 },
} ],
sourceCodeLocation: {
startOffset: 11,
@@ -155,6 +228,7 @@ describe( "The parse function", () => {
it( "parses another HTML text and adds implicit paragraphs where needed", () => {
const html = "";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
@@ -175,7 +249,7 @@ describe( "The parse function", () => {
* [/#document-fragment]
* ```
*/
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
@@ -199,6 +273,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Hello ",
+ sourceCodeRange: { startOffset: 5, endOffset: 11 },
},
],
sourceCodeLocation: {
@@ -223,6 +298,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "World!",
+ sourceCodeRange: { startOffset: 14, endOffset: 20 },
},
],
sourceCodeLocation: {
@@ -258,6 +334,7 @@ describe( "The parse function", () => {
it( "parses an HTML text with implicit paragraphs before, between, and after p tags", () => {
const html = "So
long, and
thanks
for
all
the
fish!
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
@@ -287,7 +364,7 @@ describe( "The parse function", () => {
* [/#document-fragment]
* ```
*/
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
@@ -303,6 +380,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "So ",
+ sourceCodeRange: { startOffset: 5, endOffset: 8 },
},
{
name: "em",
@@ -311,6 +389,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "long",
+ sourceCodeRange: { startOffset: 12, endOffset: 16 },
},
],
sourceCodeLocation: {
@@ -329,6 +408,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: ", and ",
+ sourceCodeRange: { startOffset: 21, endOffset: 27 },
},
],
sentences: [ {
@@ -357,6 +437,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "thanks",
+ sourceCodeRange: { startOffset: 30, endOffset: 36 },
},
],
sentences: [ {
@@ -387,6 +468,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: " for ",
+ sourceCodeRange: { startOffset: 40, endOffset: 45 },
},
],
sentences: [ {
@@ -411,6 +493,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "all",
+ sourceCodeRange: { startOffset: 48, endOffset: 51 },
},
],
sentences: [ {
@@ -441,6 +524,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: " the ",
+ sourceCodeRange: { startOffset: 55, endOffset: 60 },
},
{
name: "strong",
@@ -449,6 +533,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "fish",
+ sourceCodeRange: { startOffset: 68, endOffset: 72 },
},
],
sourceCodeLocation: {
@@ -467,6 +552,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "!",
+ sourceCodeRange: { startOffset: 81, endOffset: 82 },
},
],
sentences: [ {
@@ -511,158 +597,62 @@ describe( "The parse function", () => {
const html = "\n\nThis is the first sentence.
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
- name: "#document-fragment",
+ expect( build( paper, languageProcessor ) ).toEqual( {
attributes: {},
childNodes: [
- {
- attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 39,
- startOffset: 0,
- },
- },
- {
- name: "#text",
- value: "\n",
- },
- {
- name: "#text",
- value: "\n",
- },
- {
- attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 328,
- startOffset: 288,
- },
- },
+ { name: "#text", sourceCodeRange: { endOffset: 40, startOffset: 39 }, value: "\n" },
+ { name: "#text", sourceCodeRange: { endOffset: 288, startOffset: 287 }, value: "\n" },
{
attributes: {},
childNodes: [
{
name: "#text",
- value: "This is the first sentence.",
- },
+ sourceCodeRange: { endOffset: 358, startOffset: 331 },
+ value: "This is the first sentence." },
],
isImplicit: false,
name: "p",
sentences: [
{
- sourceCodeRange: {
- endOffset: 358,
- startOffset: 331,
- },
+ sourceCodeRange: { endOffset: 358, startOffset: 331 },
text: "This is the first sentence.",
tokens: [
- {
- sourceCodeRange: {
- endOffset: 335,
- startOffset: 331,
- },
- text: "This",
- },
- {
- sourceCodeRange: {
- endOffset: 336,
- startOffset: 335,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 338,
- startOffset: 336,
- },
- text: "is",
- },
- {
- sourceCodeRange: {
- endOffset: 339,
- startOffset: 338,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 342,
- startOffset: 339,
- },
- text: "the",
- },
- {
- sourceCodeRange: {
- endOffset: 343,
- startOffset: 342,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 348,
- startOffset: 343,
- },
- text: "first",
- },
- {
- sourceCodeRange: {
- endOffset: 349,
- startOffset: 348,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 357,
- startOffset: 349,
- },
- text: "sentence",
- },
- {
- sourceCodeRange: {
- endOffset: 358,
- startOffset: 357,
- },
- text: ".",
- },
- ],
- },
- ],
+ { sourceCodeRange: { endOffset: 335, startOffset: 331 }, text: "This" },
+ { sourceCodeRange: { endOffset: 336, startOffset: 335 }, text: " " },
+ { sourceCodeRange: { endOffset: 338, startOffset: 336 }, text: "is" },
+ { sourceCodeRange: { endOffset: 339, startOffset: 338 }, text: " " },
+ { sourceCodeRange: { endOffset: 342, startOffset: 339 }, text: "the" },
+ { sourceCodeRange: { endOffset: 343, startOffset: 342 }, text: " " },
+ { sourceCodeRange: { endOffset: 348, startOffset: 343 }, text: "first" },
+ { sourceCodeRange: { endOffset: 349, startOffset: 348 }, text: " " },
+ { sourceCodeRange: { endOffset: 357, startOffset: 349 }, text: "sentence" },
+ { sourceCodeRange: { endOffset: 358, startOffset: 357 }, text: "." },
+ ] } ],
sourceCodeLocation: {
endOffset: 362,
- endTag: {
- endOffset: 362,
- startOffset: 358,
- },
+ endTag: { endOffset: 362, startOffset: 358 },
startOffset: 328,
- startTag: {
- endOffset: 331,
- startOffset: 328,
- },
- },
- },
- ],
- }
+ startTag: { endOffset: 331, startOffset: 328 },
+ } } ],
+ name: "#document-fragment" }
);
} );
it( "parses an HTML text with a Yoast breadcrumbs widget in Elementor, which should be filtered out", () => {
// HTML: Home
The first sentence
const html = "Home
The first sentence
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual(
+ expect( build( paper, languageProcessor ) ).toEqual(
{
name: "#document-fragment",
attributes: {},
@@ -673,6 +663,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "The first sentence",
+ sourceCodeRange: { startOffset: 99, endOffset: 117 },
},
],
isImplicit: false,
@@ -742,12 +733,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a script element inside a paragraph", () => {
const html = "";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
@@ -775,6 +767,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: " Hello, world!",
+ sourceCodeRange: { startOffset: 53, endOffset: 67 },
},
],
sentences: [
@@ -850,12 +843,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a script element outside of a paragraph", () => {
const html = "Hello, world!
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
@@ -864,6 +858,10 @@ describe( "The parse function", () => {
attributes: {},
sentences: [],
childNodes: [],
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 45,
+ },
},
{
name: "p",
@@ -883,6 +881,7 @@ describe( "The parse function", () => {
childNodes: [ {
name: "#text",
value: "Hello, world!",
+ sourceCodeRange: { startOffset: 48, endOffset: 61 },
} ],
sourceCodeLocation: {
startOffset: 45,
@@ -901,12 +900,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a comment inside a paragraph", () => {
const html = "";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
@@ -940,15 +940,10 @@ describe( "The parse function", () => {
sourceCodeRange: { startOffset: 26, endOffset: 39 },
} ],
childNodes: [
- {
- name: "#comment",
- attributes: {},
- childNodes: [],
- sourceCodeLocation: { startOffset: 8, endOffset: 26 },
- },
{
name: "#text",
value: "Hello, world!",
+ sourceCodeRange: { startOffset: 26, endOffset: 39 },
} ],
sourceCodeLocation: {
startOffset: 5,
@@ -968,12 +963,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a comment within a sentence", () => {
const html = "";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
@@ -1011,16 +1007,12 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Hello, ",
- },
- {
- name: "#comment",
- attributes: {},
- childNodes: [],
- sourceCodeLocation: { startOffset: 15, endOffset: 33 },
+ sourceCodeRange: { startOffset: 8, endOffset: 15 },
},
{
name: "#text",
value: " world!",
+ sourceCodeRange: { startOffset: 33, endOffset: 40 },
} ],
sourceCodeLocation: {
startOffset: 5,
@@ -1041,12 +1033,13 @@ describe( "The parse function", () => {
it( "parses an HTML text with a code element within a paragraph", () => {
const html = "Hello code! array.push( something )
Hello world!
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
@@ -1074,10 +1067,12 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Hello code! ",
+ sourceCodeRange: { startOffset: 8, endOffset: 20 },
},
{
name: "#text",
value: " Hello world!",
+ sourceCodeRange: { startOffset: 56, endOffset: 69 },
},
],
sentences: [
@@ -1190,12 +1185,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a code element within a sentence", () => {
const html = "Hello array.push( something )
code!
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [ {
@@ -1232,10 +1228,12 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Hello ",
+ sourceCodeRange: { startOffset: 8, endOffset: 14 },
},
{
name: "#text",
value: " code!",
+ sourceCodeRange: { startOffset: 50, endOffset: 56 },
},
],
sourceCodeLocation: {
@@ -1256,12 +1254,13 @@ describe( "The parse function", () => {
} );
it( "parses an HTML text with a code element with a child node within a sentence", () => {
const html = "Some text and code console.log( code )
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
attributes: {},
childNodes: [
{
@@ -1272,6 +1271,7 @@ describe( "The parse function", () => {
{
name: "#text",
value: "Some text and code ",
+ sourceCodeRange: { startOffset: 3, endOffset: 22 },
},
],
sentences: [
@@ -1358,6 +1358,243 @@ describe( "The parse function", () => {
name: "#document-fragment",
} );
} );
+ it( "also parses blocks when the blocks array is available inside Paper, " +
+ "the available block client id should also be added to the childNodes", () => {
+ const html = "\n" +
+ "The red panda's coat is mainly red or orange-brown with a black belly and legs.
\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "- giant panda
\n" +
+ "\n" +
+ "\n" +
+ "\n" +
+ "- red panda
\n" +
+ "
\n" +
+ "";
+ const paper = new Paper( html, { wpBlocks: [
+ {
+ clientId: "da950985-3903-479c-92f5-9a98bcec51e9",
+ name: "core/paragraph",
+ isValid: true,
+ originalContent: "The red panda's coat is mainly red or orange-brown with a black belly and legs.
",
+ validationIssues: [],
+ attributes: {
+ content: "The red panda's coat is mainly red or orange-brown with a black belly and legs.",
+ dropCap: false,
+ },
+ innerBlocks: [],
+ },
+ {
+ clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ name: "core/list",
+ isValid: true,
+ originalContent: "",
+ validationIssues: [],
+ attributes: {
+ ordered: false,
+ values: "",
+ },
+ innerBlocks: [
+ {
+ clientId: "b45eed06-594b-468e-8723-1f597689c300",
+ name: "core/list-item",
+ isValid: true,
+ originalContent: "giant panda",
+ validationIssues: [],
+ attributes: {
+ content: "giant panda",
+ },
+ innerBlocks: [],
+ },
+ {
+ clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce",
+ name: "core/list-item",
+ isValid: true,
+ originalContent: "red panda",
+ validationIssues: [],
+ attributes: {
+ content: "red panda",
+ },
+ innerBlocks: [],
+ },
+ ],
+ },
+ ] } );
+
+ const researcher = Factory.buildMockResearcher( {}, true, false, false,
+ { memoizedTokenizer: memoizedSentenceTokenizer } );
+ const languageProcessor = new LanguageProcessor( researcher );
+ expect( build( paper, languageProcessor ) ).toEqual(
+ {
+ name: "#document-fragment",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "\n",
+ sourceCodeRange: { startOffset: 21, endOffset: 22 } },
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ {
+ name: "#text",
+ value: "The red panda's coat is mainly red or orange-brown with a black belly and legs.",
+ sourceCodeRange: { startOffset: 25, endOffset: 104 },
+ clientId: "da950985-3903-479c-92f5-9a98bcec51e9",
+ },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 22, endOffset: 25 },
+ endTag: { startOffset: 104, endOffset: 108 },
+ startOffset: 22,
+ endOffset: 108,
+ },
+ isImplicit: false,
+ clientId: "da950985-3903-479c-92f5-9a98bcec51e9",
+ sentences: [
+ {
+ text: "The red panda's coat is mainly red or orange-brown with a black belly and legs.",
+ tokens: [
+ { text: "The", sourceCodeRange: { startOffset: 25, endOffset: 28 } },
+ { text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } },
+ { text: "red", sourceCodeRange: { startOffset: 29, endOffset: 32 } },
+ { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } },
+ { text: "panda's", sourceCodeRange: { startOffset: 33, endOffset: 40 } },
+ { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } },
+ { text: "coat", sourceCodeRange: { startOffset: 41, endOffset: 45 } },
+ { text: " ", sourceCodeRange: { startOffset: 45, endOffset: 46 } },
+ { text: "is", sourceCodeRange: { startOffset: 46, endOffset: 48 } },
+ { text: " ", sourceCodeRange: { startOffset: 48, endOffset: 49 } },
+ { text: "mainly", sourceCodeRange: { startOffset: 49, endOffset: 55 } },
+ { text: " ", sourceCodeRange: { startOffset: 55, endOffset: 56 } },
+ { text: "red", sourceCodeRange: { startOffset: 56, endOffset: 59 } },
+ { text: " ", sourceCodeRange: { startOffset: 59, endOffset: 60 } },
+ { text: "or", sourceCodeRange: { startOffset: 60, endOffset: 62 } },
+ { text: " ", sourceCodeRange: { startOffset: 62, endOffset: 63 } },
+ { text: "orange-brown", sourceCodeRange: { startOffset: 63, endOffset: 75 } },
+ { text: " ", sourceCodeRange: { startOffset: 75, endOffset: 76 } },
+ { text: "with", sourceCodeRange: { startOffset: 76, endOffset: 80 } },
+ { text: " ", sourceCodeRange: { startOffset: 80, endOffset: 81 } },
+ { text: "a", sourceCodeRange: { startOffset: 81, endOffset: 82 } },
+ { text: " ", sourceCodeRange: { startOffset: 82, endOffset: 83 } },
+ { text: "black", sourceCodeRange: { startOffset: 83, endOffset: 88 } },
+ { text: " ", sourceCodeRange: { startOffset: 88, endOffset: 89 } },
+ { text: "belly", sourceCodeRange: { startOffset: 89, endOffset: 94 } },
+ { text: " ", sourceCodeRange: { startOffset: 94, endOffset: 95 } },
+ { text: "and", sourceCodeRange: { startOffset: 95, endOffset: 98 } },
+ { text: " ", sourceCodeRange: { startOffset: 98, endOffset: 99 } },
+ { text: "legs", sourceCodeRange: { startOffset: 99, endOffset: 103 } },
+ { text: ".", sourceCodeRange: { startOffset: 103, endOffset: 104 } },
+ ],
+ sourceCodeRange: { startOffset: 25, endOffset: 104 },
+ },
+ ],
+ },
+ { name: "#text", value: "\n",
+ sourceCodeRange: { startOffset: 108, endOffset: 109 } },
+ { name: "#text", value: "\n\n",
+ sourceCodeRange: { startOffset: 131, endOffset: 133 } },
+ { name: "#text", value: "\n",
+ sourceCodeRange: { startOffset: 149, endOffset: 150 } },
+ {
+ name: "ul",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ sourceCodeRange: { startOffset: 175, endOffset: 176 } },
+ {
+ name: "li",
+ attributes: {},
+ childNodes: [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "giant panda", clientId: "b45eed06-594b-468e-8723-1f597689c300",
+ sourceCodeRange: { startOffset: 180, endOffset: 191 } },
+ ],
+ sourceCodeLocation: { startOffset: 180, endOffset: 191 },
+ isImplicit: true,
+ clientId: "b45eed06-594b-468e-8723-1f597689c300",
+ sentences: [
+ {
+ text: "giant panda",
+ tokens: [
+ { text: "giant", sourceCodeRange: { startOffset: 180, endOffset: 185 } },
+ { text: " ", sourceCodeRange: { startOffset: 185, endOffset: 186 } },
+ { text: "panda", sourceCodeRange: { startOffset: 186, endOffset: 191 } },
+ ],
+ sourceCodeRange: { startOffset: 180, endOffset: 191 },
+ },
+ ],
+ },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 176, endOffset: 180 },
+ endTag: { startOffset: 191, endOffset: 196 },
+ startOffset: 176,
+ endOffset: 196,
+ },
+ clientId: "b45eed06-594b-468e-8723-1f597689c300",
+ },
+ { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ sourceCodeRange: { startOffset: 196, endOffset: 197 } },
+ { name: "#text", value: "\n\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ sourceCodeRange: { startOffset: 219, endOffset: 221 } },
+ { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ sourceCodeRange: { startOffset: 242, endOffset: 243 } },
+ {
+ name: "li",
+ attributes: {},
+ childNodes: [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "red panda", clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce",
+ sourceCodeRange: { startOffset: 247, endOffset: 256 } },
+ ],
+ sourceCodeLocation: { startOffset: 247, endOffset: 256 },
+ isImplicit: true,
+ clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce",
+ sentences: [
+ {
+ text: "red panda",
+ tokens: [
+ { text: "red", sourceCodeRange: { startOffset: 247, endOffset: 250 } },
+ { text: " ", sourceCodeRange: { startOffset: 250, endOffset: 251 } },
+ { text: "panda", sourceCodeRange: { startOffset: 251, endOffset: 256 } },
+ ],
+ sourceCodeRange: { startOffset: 247, endOffset: 256 },
+ },
+ ],
+ },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 243, endOffset: 247 },
+ endTag: { startOffset: 256, endOffset: 261 },
+ startOffset: 243, endOffset: 261,
+ },
+ clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce",
+ },
+ { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ sourceCodeRange: { startOffset: 261, endOffset: 262 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 150, endOffset: 154 },
+ endTag: { startOffset: 284, endOffset: 289 },
+ startOffset: 150,
+ endOffset: 289,
+ },
+ clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45",
+ },
+ { name: "#text", value: "\n",
+ sourceCodeRange: { startOffset: 289, endOffset: 290 } },
+ ],
+ }
+ );
+ } );
} );
describe( "parsing html with Yoast blocks that enter the Paper as html comments", () => {
@@ -1366,42 +1603,91 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments"
"\n" +
"The Norwegian Forest cat is adapted to survive Norway's cold weather.
\n" +
"";
+ const paper = new Paper( html );
+
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
+ { name: "#text", value: "\n", sourceCodeRange: { startOffset: 55, endOffset: 56 } },
{
attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 34,
- startOffset: 0,
- },
- },
- {
- attributes: {},
- childNodes: [],
- name: "#comment",
+ childNodes: [
+ {
+ name: "#text",
+ value: "The Norwegian Forest cat is adapted to survive Norway's cold weather.",
+ sourceCodeRange: { startOffset: 59, endOffset: 128 } },
+ ],
+ isImplicit: false,
+ name: "p",
+ sentences: [
+ {
+ sourceCodeRange: {
+ endOffset: 128,
+ startOffset: 59,
+ },
+ text: "The Norwegian Forest cat is adapted to survive Norway's cold weather.",
+ tokens: [
+ { sourceCodeRange: { endOffset: 62, startOffset: 59 }, text: "The" },
+ { sourceCodeRange: { endOffset: 63, startOffset: 62 }, text: " " },
+ { sourceCodeRange: { endOffset: 72, startOffset: 63 }, text: "Norwegian" },
+ { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: " " },
+ { sourceCodeRange: { endOffset: 79, startOffset: 73 }, text: "Forest" },
+ { sourceCodeRange: { endOffset: 80, startOffset: 79 }, text: " " },
+ { sourceCodeRange: { endOffset: 83, startOffset: 80 }, text: "cat" },
+ { sourceCodeRange: { endOffset: 84, startOffset: 83 }, text: " " },
+ { sourceCodeRange: { endOffset: 86, startOffset: 84 }, text: "is" },
+ { sourceCodeRange: { endOffset: 87, startOffset: 86 }, text: " " },
+ { sourceCodeRange: { endOffset: 94, startOffset: 87 }, text: "adapted" },
+ { sourceCodeRange: { endOffset: 95, startOffset: 94 }, text: " " },
+ { sourceCodeRange: { endOffset: 97, startOffset: 95 }, text: "to" },
+ { sourceCodeRange: { endOffset: 98, startOffset: 97 }, text: " " },
+ { sourceCodeRange: { endOffset: 105, startOffset: 98 }, text: "survive" },
+ { sourceCodeRange: { endOffset: 106, startOffset: 105 }, text: " " },
+ { sourceCodeRange: { endOffset: 114, startOffset: 106 }, text: "Norway's" },
+ { sourceCodeRange: { endOffset: 115, startOffset: 114 }, text: " " },
+ { sourceCodeRange: { endOffset: 119, startOffset: 115 }, text: "cold" },
+ { sourceCodeRange: { endOffset: 120, startOffset: 119 }, text: " " },
+ { sourceCodeRange: { endOffset: 127, startOffset: 120 }, text: "weather" },
+ { sourceCodeRange: { endOffset: 128, startOffset: 127 }, text: "." },
+ ],
+ },
+ ],
sourceCodeLocation: {
- endOffset: 55,
- startOffset: 34,
+ endOffset: 132,
+ endTag: { endOffset: 132, startOffset: 128 },
+ startOffset: 56,
+ startTag: { endOffset: 59, startOffset: 56 },
},
},
- {
- name: "#text",
- value: "\n",
- },
+ { name: "#text", value: "\n", sourceCodeRange: { startOffset: 132, endOffset: 133 } },
+ ],
+ } );
+ } );
+
+ it( "parses an HTML text with a Yoast siblings block", () => {
+ const html = "Hello, world!
";
+ const paper = new Paper( html );
+
+ const researcher = Factory.buildMockResearcher( {}, true, false, false,
+ { memoizedTokenizer: memoizedSentenceTokenizer } );
+ const languageProcessor = new LanguageProcessor( researcher );
+
+ expect( build( paper, languageProcessor ) ).toEqual( {
+ name: "#document-fragment",
+ attributes: {},
+ childNodes: [
{
attributes: {},
childNodes: [
{
name: "#text",
- value: "The Norwegian Forest cat is adapted to survive Norway's cold weather.",
+ value: "Hello, world!",
+ sourceCodeRange: { startOffset: 3, endOffset: 16 },
},
],
isImplicit: false,
@@ -1409,238 +1695,22 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments"
sentences: [
{
sourceCodeRange: {
- endOffset: 128,
- startOffset: 59,
+ endOffset: 16,
+ startOffset: 3,
},
- text: "The Norwegian Forest cat is adapted to survive Norway's cold weather.",
+ text: "Hello, world!",
tokens: [
{
sourceCodeRange: {
- endOffset: 62,
- startOffset: 59,
+ endOffset: 8,
+ startOffset: 3,
},
- text: "The",
+ text: "Hello",
},
{
sourceCodeRange: {
- endOffset: 63,
- startOffset: 62,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 72,
- startOffset: 63,
- },
- text: "Norwegian",
- },
- {
- sourceCodeRange: {
- endOffset: 73,
- startOffset: 72,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 79,
- startOffset: 73,
- },
- text: "Forest",
- },
- {
- sourceCodeRange: {
- endOffset: 80,
- startOffset: 79,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 83,
- startOffset: 80,
- },
- text: "cat",
- },
- {
- sourceCodeRange: {
- endOffset: 84,
- startOffset: 83,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 86,
- startOffset: 84,
- },
- text: "is",
- },
- {
- sourceCodeRange: {
- endOffset: 87,
- startOffset: 86,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 94,
- startOffset: 87,
- },
- text: "adapted",
- },
- {
- sourceCodeRange: {
- endOffset: 95,
- startOffset: 94,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 97,
- startOffset: 95,
- },
- text: "to",
- },
- {
- sourceCodeRange: {
- endOffset: 98,
- startOffset: 97,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 105,
- startOffset: 98,
- },
- text: "survive",
- },
- {
- sourceCodeRange: {
- endOffset: 106,
- startOffset: 105,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 114,
- startOffset: 106,
- },
- text: "Norway's",
- },
- {
- sourceCodeRange: {
- endOffset: 115,
- startOffset: 114,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 119,
- startOffset: 115,
- },
- text: "cold",
- },
- {
- sourceCodeRange: {
- endOffset: 120,
- startOffset: 119,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 127,
- startOffset: 120,
- },
- text: "weather",
- },
- {
- sourceCodeRange: {
- endOffset: 128,
- startOffset: 127,
- },
- text: ".",
- },
- ],
- },
- ],
- sourceCodeLocation: {
- endOffset: 132,
- endTag: {
- endOffset: 132,
- startOffset: 128,
- },
- startOffset: 56,
- startTag: {
- endOffset: 59,
- startOffset: 56,
- },
- },
- },
- {
- name: "#text",
- value: "\n",
- },
- {
- attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 155,
- startOffset: 133,
- },
- },
- ],
- } );
- } );
-
- it( "parses an HTML text with a Yoast siblings block", () => {
- const html = "Hello, world!
";
-
- const researcher = Factory.buildMockResearcher( {}, true, false, false,
- { memoizedTokenizer: memoizedSentenceTokenizer } );
- const languageProcessor = new LanguageProcessor( researcher );
-
- expect( build( html, languageProcessor ) ).toEqual( {
- name: "#document-fragment",
- attributes: {},
- childNodes: [
- {
- attributes: {},
- childNodes: [
- {
- name: "#text",
- value: "Hello, world!",
- },
- ],
- isImplicit: false,
- name: "p",
- sentences: [
- {
- sourceCodeRange: {
- endOffset: 16,
- startOffset: 3,
- },
- text: "Hello, world!",
- tokens: [
- {
- sourceCodeRange: {
- endOffset: 8,
- startOffset: 3,
- },
- text: "Hello",
- },
- {
- sourceCodeRange: {
- endOffset: 9,
- startOffset: 8,
+ endOffset: 9,
+ startOffset: 8,
},
text: ",",
},
@@ -1681,27 +1751,19 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments"
},
},
},
- {
- attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 51,
- startOffset: 20,
- },
- },
],
} );
} );
it( "parses an HTML text with a Yoast subpages block", () => {
const html = "The Norwegian Forest cat is strongly built and larger than an average cat.
";
+ const paper = new Paper( html );
const researcher = Factory.buildMockResearcher( {}, true, false, false,
{ memoizedTokenizer: memoizedSentenceTokenizer } );
const languageProcessor = new LanguageProcessor( researcher );
- expect( build( html, languageProcessor ) ).toEqual( {
+ expect( build( paper, languageProcessor ) ).toEqual( {
name: "#document-fragment",
attributes: {},
childNodes: [
@@ -1714,6 +1776,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments"
{
name: "#text",
value: "The Norwegian Forest cat is strongly built and larger than an average cat.",
+ sourceCodeRange: { startOffset: 5, endOffset: 79 },
},
],
isImplicit: true,
@@ -1726,221 +1789,419 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments"
},
text: "The Norwegian Forest cat is strongly built and larger than an average cat.",
tokens: [
- {
- sourceCodeRange: {
- endOffset: 8,
- startOffset: 5,
- },
- text: "The",
- },
- {
- sourceCodeRange: {
- endOffset: 9,
- startOffset: 8,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 18,
- startOffset: 9,
- },
- text: "Norwegian",
- },
- {
- sourceCodeRange: {
- endOffset: 19,
- startOffset: 18,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 25,
- startOffset: 19,
- },
- text: "Forest",
- },
- {
- sourceCodeRange: {
- endOffset: 26,
- startOffset: 25,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 29,
- startOffset: 26,
- },
- text: "cat",
- },
- {
- sourceCodeRange: {
- endOffset: 30,
- startOffset: 29,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 32,
- startOffset: 30,
- },
- text: "is",
- },
- {
- sourceCodeRange: {
- endOffset: 33,
- startOffset: 32,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 41,
- startOffset: 33,
- },
- text: "strongly",
- },
- {
- sourceCodeRange: {
- endOffset: 42,
- startOffset: 41,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 47,
- startOffset: 42,
- },
- text: "built",
- },
- {
- sourceCodeRange: {
- endOffset: 48,
- startOffset: 47,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 51,
- startOffset: 48,
- },
- text: "and",
- },
- {
- sourceCodeRange: {
- endOffset: 52,
- startOffset: 51,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 58,
- startOffset: 52,
- },
- text: "larger",
- },
- {
- sourceCodeRange: {
- endOffset: 59,
- startOffset: 58,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 63,
- startOffset: 59,
- },
- text: "than",
- },
- {
- sourceCodeRange: {
- endOffset: 64,
- startOffset: 63,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 66,
- startOffset: 64,
- },
- text: "an",
- },
- {
- sourceCodeRange: {
- endOffset: 67,
- startOffset: 66,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 74,
- startOffset: 67,
- },
- text: "average",
- },
- {
- sourceCodeRange: {
- endOffset: 75,
- startOffset: 74,
- },
- text: " ",
- },
- {
- sourceCodeRange: {
- endOffset: 78,
- startOffset: 75,
- },
- text: "cat",
- },
- {
- sourceCodeRange: {
- endOffset: 79,
- startOffset: 78,
- },
- text: ".",
- },
+ { sourceCodeRange: { endOffset: 8, startOffset: 5 }, text: "The" },
+ { sourceCodeRange: { endOffset: 9, startOffset: 8 }, text: " " },
+ { sourceCodeRange: { endOffset: 18, startOffset: 9 }, text: "Norwegian" },
+ { sourceCodeRange: { endOffset: 19, startOffset: 18 }, text: " " },
+ { sourceCodeRange: { endOffset: 25, startOffset: 19 }, text: "Forest" },
+ { sourceCodeRange: { endOffset: 26, startOffset: 25 }, text: " " },
+ { sourceCodeRange: { endOffset: 29, startOffset: 26 }, text: "cat" },
+ { sourceCodeRange: { endOffset: 30, startOffset: 29 }, text: " " },
+ { sourceCodeRange: { endOffset: 32, startOffset: 30 }, text: "is" },
+ { sourceCodeRange: { endOffset: 33, startOffset: 32 }, text: " " },
+ { sourceCodeRange: { endOffset: 41, startOffset: 33 }, text: "strongly" },
+ { sourceCodeRange: { endOffset: 42, startOffset: 41 }, text: " " },
+ { sourceCodeRange: { endOffset: 47, startOffset: 42 }, text: "built" },
+ { sourceCodeRange: { endOffset: 48, startOffset: 47 }, text: " " },
+ { sourceCodeRange: { endOffset: 51, startOffset: 48 }, text: "and" },
+ { sourceCodeRange: { endOffset: 52, startOffset: 51 }, text: " " },
+ { sourceCodeRange: { endOffset: 58, startOffset: 52 }, text: "larger" },
+ { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " },
+ { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "than" },
+ { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: " " },
+ { sourceCodeRange: { endOffset: 66, startOffset: 64 }, text: "an" },
+ { sourceCodeRange: { endOffset: 67, startOffset: 66 }, text: " " },
+ { sourceCodeRange: { endOffset: 74, startOffset: 67 }, text: "average" },
+ { sourceCodeRange: { endOffset: 75, startOffset: 74 }, text: " " },
+ { sourceCodeRange: { endOffset: 78, startOffset: 75 }, text: "cat" },
+ { sourceCodeRange: { endOffset: 79, startOffset: 78 }, text: "." },
],
},
],
- sourceCodeLocation: {
- endOffset: 79,
- startOffset: 5,
- },
+ sourceCodeLocation: { endOffset: 79, startOffset: 5 },
},
],
name: "div",
sourceCodeLocation: {
endOffset: 85,
- endTag: {
- endOffset: 85,
- startOffset: 79,
- },
+ endTag: { endOffset: 85, startOffset: 79 },
startOffset: 0,
- startTag: {
- endOffset: 5,
- startOffset: 0,
- },
+ startTag: { endOffset: 5, startOffset: 0 },
},
},
+ ],
+ } );
+ } );
+
+ it( "parses an HTML text with a Yoast FAQ block: " +
+ "The block client id, attribute id, and the information whether a child node is " +
+ "the first section in the sub-block should be added to the tree", () => {
+ const html = "\n" +
+ "" +
+ "
What is giant panda " +
+ "Giant panda is is relative to red panda
" +
+ "
" +
+ "giant panda is silly \n" +
+ "";
+ const paper = new Paper( html, {
+ wpBlocks: [
{
- attributes: {},
- childNodes: [],
- name: "#comment",
- sourceCodeLocation: {
- endOffset: 116,
- startOffset: 85,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ name: "yoast/faq-block",
+ isValid: true,
+ originalContent: "" +
+ "What is giant panda Giant panda" +
+ " is relative to red panda
giant panda is silly " +
+ "
",
+ validationIssues: [],
+ attributes: {
+ questions: [
+ {
+ id: "faq-question-1689322642789",
+ question: [ "What is giant panda" ],
+ answer: [ "Giant ", { type: "strong", props: { children: [ "panda" ] } }, " is relative to red panda" ],
+ jsonQuestion: "What is giant panda",
+ jsonAnswer: "Giant panda is is relative to red panda",
+ },
+ {
+ id: "faq-question-1689322667728",
+ question: [ "Test" ],
+ answer: [ "Tests" ],
+ jsonQuestion: "Test",
+ jsonAnswer: "Tests",
+ },
+ {
+ id: "faq-question-1689936392675",
+ question: [ "giant panda is silly" ],
+ answer: [],
+ jsonQuestion: "giant panda is silly",
+ jsonAnswer: "",
+ },
+ ],
},
+ innerBlocks: [],
+ startOffset: 2510,
+ contentOffset: 2963,
},
],
} );
+
+ const researcher = Factory.buildMockResearcher( {}, true, false, false,
+ { memoizedTokenizer: memoizedSentenceTokenizer } );
+ const languageProcessor = new LanguageProcessor( researcher );
+ expect( build( paper, languageProcessor ) ).toEqual(
+ {
+ name: "#document-fragment",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "\n", sourceCodeRange: { startOffset: 458, endOffset: 459 } },
+ {
+ name: "div",
+ attributes: {
+ "class": new Set( [ "schema-faq", "wp-block-yoast-faq-block" ] ),
+ },
+ childNodes: [
+ {
+ name: "div",
+ attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322642789" },
+ childNodes: [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ {
+ name: "strong",
+ attributes: { "class": new Set( [ "schema-faq-question" ] ) },
+ childNodes: [
+ { name: "#text", value: "What is giant panda",
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 608, endOffset: 627 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 572, endOffset: 608 },
+ endTag: { startOffset: 627, endOffset: 636 },
+ startOffset: 572,
+ endOffset: 636,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ ],
+ sourceCodeLocation: { startOffset: 572, endOffset: 636 },
+ isImplicit: true,
+ attributeId: "faq-question-1689322642789",
+ isFirstSection: true,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [
+ {
+ text: "What is giant panda",
+ tokens: [
+ { text: "What", sourceCodeRange: { startOffset: 608, endOffset: 612 } },
+ { text: " ", sourceCodeRange: { startOffset: 612, endOffset: 613 } },
+ { text: "is", sourceCodeRange: { startOffset: 613, endOffset: 615 } },
+ { text: " ", sourceCodeRange: { startOffset: 615, endOffset: 616 } },
+ { text: "giant", sourceCodeRange: { startOffset: 616, endOffset: 621 } },
+ { text: " ", sourceCodeRange: { startOffset: 621, endOffset: 622 } },
+ { text: "panda", sourceCodeRange: { startOffset: 622, endOffset: 627 } },
+ ],
+ sourceCodeRange: { startOffset: 608, endOffset: 627 },
+ },
+ ],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 636, endOffset: 637 } },
+ {
+ name: "p",
+ attributes: { "class": new Set( [ "schema-faq-answer" ] ) },
+ childNodes: [
+ { name: "#text", value: "Giant ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 666, endOffset: 672 } },
+ {
+ name: "strong",
+ attributes: {},
+ childNodes: [
+ { name: "#text", value: "panda", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 680, endOffset: 685 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 672, endOffset: 680 },
+ endTag: { startOffset: 685, endOffset: 694 },
+ startOffset: 672,
+ endOffset: 694,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ { name: "#text", value: " is is relative to red panda",
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 694, endOffset: 722 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 637, endOffset: 666 },
+ endTag: { startOffset: 722, endOffset: 726 },
+ startOffset: 637,
+ endOffset: 726,
+ },
+ isImplicit: false,
+ attributeId: "faq-question-1689322642789",
+ isFirstSection: false,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [
+ {
+ text: "Giant panda is is relative to red panda",
+ tokens: [
+ { text: "Giant", sourceCodeRange: { startOffset: 666, endOffset: 671 } },
+ { text: " ", sourceCodeRange: { startOffset: 671, endOffset: 672 } },
+ { text: "panda", sourceCodeRange: { startOffset: 680, endOffset: 685 } },
+ { text: " ", sourceCodeRange: { startOffset: 694, endOffset: 695 } },
+ { text: "is", sourceCodeRange: { startOffset: 695, endOffset: 697 } },
+ { text: " ", sourceCodeRange: { startOffset: 697, endOffset: 698 } },
+ { text: "is", sourceCodeRange: { startOffset: 698, endOffset: 700 } },
+ { text: " ", sourceCodeRange: { startOffset: 700, endOffset: 701 } },
+ { text: "relative", sourceCodeRange: { startOffset: 701, endOffset: 709 } },
+ { text: " ", sourceCodeRange: { startOffset: 709, endOffset: 710 } },
+ { text: "to", sourceCodeRange: { startOffset: 710, endOffset: 712 } },
+ { text: " ", sourceCodeRange: { startOffset: 712, endOffset: 713 } },
+ { text: "red", sourceCodeRange: { startOffset: 713, endOffset: 716 } },
+ { text: " ", sourceCodeRange: { startOffset: 716, endOffset: 717 } },
+ { text: "panda", sourceCodeRange: { startOffset: 717, endOffset: 722 } },
+ ],
+ sourceCodeRange: { startOffset: 666, endOffset: 722 },
+ },
+ ],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 726, endOffset: 727 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 508, endOffset: 572 },
+ endTag: { startOffset: 727, endOffset: 733 },
+ startOffset: 508,
+ endOffset: 733,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 733, endOffset: 734 } },
+ {
+ name: "div",
+ attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322667728" },
+ childNodes: [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ {
+ name: "strong",
+ attributes: { "class": new Set( [ "schema-faq-question" ] ) },
+ childNodes: [
+ { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 834, endOffset: 838 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 798, endOffset: 834 },
+ endTag: { startOffset: 838, endOffset: 847 },
+ startOffset: 798,
+ endOffset: 847,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ ],
+ sourceCodeLocation: { startOffset: 798, endOffset: 847 },
+ isImplicit: true,
+ attributeId: "faq-question-1689322667728",
+ isFirstSection: true,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [
+ {
+ text: "Test",
+ tokens: [
+ { text: "Test", sourceCodeRange: { startOffset: 834, endOffset: 838 } },
+ ],
+ sourceCodeRange: { startOffset: 834, endOffset: 838 },
+ },
+ ],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 847, endOffset: 848 } },
+ {
+ name: "p",
+ attributes: { "class": new Set( [ "schema-faq-answer" ] ) },
+ childNodes: [
+ { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 877, endOffset: 881 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 848, endOffset: 877 },
+ endTag: { startOffset: 881, endOffset: 885 },
+ startOffset: 848,
+ endOffset: 885,
+ },
+ isImplicit: false,
+ attributeId: "faq-question-1689322667728",
+ isFirstSection: false,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [
+ {
+ text: "Test",
+ tokens: [
+ { text: "Test", sourceCodeRange: { startOffset: 877, endOffset: 881 } },
+ ],
+ sourceCodeRange: { startOffset: 877, endOffset: 881 },
+ },
+ ],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 885, endOffset: 886 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 734, endOffset: 798 },
+ endTag: { startOffset: 886, endOffset: 892 },
+ startOffset: 734,
+ endOffset: 892,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 892, endOffset: 893 } },
+ {
+ name: "div",
+ attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689936392675" },
+ childNodes: [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [
+ {
+ name: "strong",
+ attributes: { "class": new Set( [ "schema-faq-question" ] ) },
+ childNodes: [
+ { name: "#text", value: "giant panda is silly",
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 993, endOffset: 1013 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 957, endOffset: 993 },
+ endTag: { startOffset: 1013, endOffset: 1022 },
+ startOffset: 957,
+ endOffset: 1022,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ ],
+ sourceCodeLocation: { startOffset: 957, endOffset: 1022 },
+ isImplicit: true,
+ attributeId: "faq-question-1689936392675",
+ isFirstSection: true,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [
+ {
+ text: "giant panda is silly",
+ tokens: [
+ { text: "giant", sourceCodeRange: { startOffset: 993, endOffset: 998 } },
+ { text: " ", sourceCodeRange: { startOffset: 998, endOffset: 999 } },
+ { text: "panda", sourceCodeRange: { startOffset: 999, endOffset: 1004 } },
+ { text: " ", sourceCodeRange: { startOffset: 1004, endOffset: 1005 } },
+ { text: "is", sourceCodeRange: { startOffset: 1005, endOffset: 1007 } },
+ { text: " ", sourceCodeRange: { startOffset: 1007, endOffset: 1008 } },
+ { text: "silly", sourceCodeRange: { startOffset: 1008, endOffset: 1013 } },
+ ],
+ sourceCodeRange: { startOffset: 993, endOffset: 1013 },
+ },
+ ],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 1022, endOffset: 1023 } },
+ {
+ name: "p",
+ attributes: { "class": new Set( [ "schema-faq-answer" ] ) },
+ childNodes: [],
+ sourceCodeLocation: {
+ startTag: { startOffset: 1023, endOffset: 1052 },
+ endTag: { startOffset: 1052, endOffset: 1056 },
+ startOffset: 1023,
+ endOffset: 1056,
+ },
+ isImplicit: false,
+ attributeId: "faq-question-1689936392675",
+ isFirstSection: false,
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sentences: [],
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 1056, endOffset: 1057 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 893, endOffset: 957 },
+ endTag: { startOffset: 1057, endOffset: 1063 },
+ startOffset: 893,
+ endOffset: 1063,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ sourceCodeRange: { startOffset: 1063, endOffset: 1064 } },
+ ],
+ sourceCodeLocation: {
+ startTag: { startOffset: 459, endOffset: 508 },
+ endTag: { startOffset: 1064, endOffset: 1070 },
+ startOffset: 459,
+ endOffset: 1070,
+ },
+ clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1",
+ },
+ { name: "#text", value: "\n", sourceCodeRange: { startOffset: 1070, endOffset: 1071 } },
+ ],
+ }
+ );
} );
} );
diff --git a/packages/yoastseo/spec/parse/build/private/adaptSpec.js b/packages/yoastseo/spec/parse/build/private/adaptSpec.js
index 05395403764..16924f775c5 100644
--- a/packages/yoastseo/spec/parse/build/private/adaptSpec.js
+++ b/packages/yoastseo/spec/parse/build/private/adaptSpec.js
@@ -8,7 +8,7 @@ describe( "The adapt function",
() => {
it( "adapts a basic div element", () => {
const html = "";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -17,17 +17,45 @@ describe( "The adapt function",
childNodes: [ {
attributes: {},
childNodes: [ {
+ name: "p",
+ isImplicit: false,
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 39,
+ startTag: {
+ startOffset: 5,
+ endOffset: 22,
+ },
+ endTag: {
+ startOffset: 35,
+ endOffset: 39,
+ },
+ },
attributes: {
"class": new Set( [ "yoast" ] ),
},
childNodes: [ {
name: "#text",
value: "Hello, world!",
+ sourceCodeRange: {
+ startOffset: 22,
+ endOffset: 35,
+ },
} ],
- isImplicit: false,
- name: "p",
} ],
name: "div",
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 45,
+ startTag: {
+ startOffset: 0,
+ endOffset: 5,
+ },
+ endTag: {
+ startOffset: 39,
+ endOffset: 45,
+ },
+ },
} ],
name: "#document-fragment",
};
@@ -37,21 +65,20 @@ describe( "The adapt function",
it( "adapts a heading element", () => {
const html = "Hello World!
";
- const tree = parseFragment( html );
-
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
const expected = new Node( "#document-fragment", {}, [
new Heading( 1, {}, [
- new Text( "Hello World!" ),
- ] ),
+ new Text( { value: "Hello World!", sourceCodeLocation: { startOffset: 4, endOffset: 16 } } ),
+ ], { startOffset: 0, endOffset: 21, startTag: { startOffset: 0, endOffset: 4 }, endTag: { startOffset: 16, endOffset: 21 } } ),
] );
expect( adaptedTree ).toEqual( expected );
} );
it( "adapts a tree with a code element inside a paragraph", () => {
const html = "Hello World! function()
Hello Yoast!
";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -63,24 +90,60 @@ describe( "The adapt function",
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 56,
+ startTag: {
+ startOffset: 0,
+ endOffset: 3,
+ },
+ endTag: {
+ startOffset: 52,
+ endOffset: 56,
+ },
+ },
childNodes: [
{
name: "#text",
value: "Hello World! ",
+ sourceCodeRange: {
+ startOffset: 3,
+ endOffset: 16,
+ },
},
{
name: "code",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 16,
+ endOffset: 39,
+ startTag: {
+ startOffset: 16,
+ endOffset: 22,
+ },
+ endTag: {
+ startOffset: 32,
+ endOffset: 39,
+ },
+ },
childNodes: [
{
name: "#text",
value: "function()",
+ sourceCodeRange: {
+ startOffset: 22,
+ endOffset: 32,
+ },
},
],
},
{
name: "#text",
value: " Hello Yoast!",
+ sourceCodeRange: {
+ startOffset: 39,
+ endOffset: 52,
+ },
},
],
},
@@ -91,7 +154,7 @@ describe( "The adapt function",
it( "adapts a tree with a code element within a sentence", () => {
const html = "Hello push()
World!
";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -103,24 +166,60 @@ describe( "The adapt function",
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 39,
+ startTag: {
+ startOffset: 0,
+ endOffset: 3,
+ },
+ endTag: {
+ startOffset: 35,
+ endOffset: 39,
+ },
+ },
childNodes: [
{
name: "#text",
value: "Hello ",
+ sourceCodeRange: {
+ startOffset: 3,
+ endOffset: 9,
+ },
},
{
name: "code",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 9,
+ endOffset: 28,
+ startTag: {
+ startOffset: 9,
+ endOffset: 15,
+ },
+ endTag: {
+ startOffset: 21,
+ endOffset: 28,
+ },
+ },
childNodes: [
{
name: "#text",
value: "push()",
+ sourceCodeRange: {
+ startOffset: 15,
+ endOffset: 21,
+ },
},
],
},
{
name: "#text",
value: " World!",
+ sourceCodeRange: {
+ startOffset: 28,
+ endOffset: 35,
+ },
},
],
},
@@ -131,7 +230,7 @@ describe( "The adapt function",
it( "adapts a tree with a script element", () => {
const html = "";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -142,19 +241,51 @@ describe( "The adapt function",
{
name: "div",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 69,
+ startTag: {
+ startOffset: 0,
+ endOffset: 5,
+ },
+ endTag: {
+ startOffset: 63,
+ endOffset: 69,
+ },
+ },
childNodes: [
{
name: "p",
isImplicit: true,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 44,
+ },
childNodes: [
{
name: "script",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 44,
+ startTag: {
+ startOffset: 5,
+ endOffset: 13,
+ },
+ endTag: {
+ startOffset: 35,
+ endOffset: 44,
+ },
+ },
childNodes: [
{
name: "#text",
value: "alert(\"Hello World!\");",
+ sourceCodeRange: {
+ startOffset: 13,
+ endOffset: 35,
+ },
},
],
},
@@ -164,10 +295,26 @@ describe( "The adapt function",
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 44,
+ endOffset: 63,
+ startTag: {
+ startOffset: 44,
+ endOffset: 47,
+ },
+ endTag: {
+ startOffset: 59,
+ endOffset: 63,
+ },
+ },
childNodes: [
{
name: "#text",
value: "Hello World!",
+ sourceCodeRange: {
+ startOffset: 47,
+ endOffset: 59,
+ },
},
],
},
@@ -180,7 +327,7 @@ describe( "The adapt function",
it( "adapts a tree with a script element within a paragraph", () => {
const html = "";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -191,25 +338,69 @@ describe( "The adapt function",
{
name: "div",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 70,
+ startTag: {
+ startOffset: 0,
+ endOffset: 5,
+ },
+ endTag: {
+ startOffset: 64,
+ endOffset: 70,
+ },
+ },
childNodes: [
{
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 64,
+ startTag: {
+ startOffset: 5,
+ endOffset: 8,
+ },
+ endTag: {
+ startOffset: 60,
+ endOffset: 64,
+ },
+ },
childNodes: [
{
name: "script",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 8,
+ endOffset: 47,
+ startTag: {
+ startOffset: 8,
+ endOffset: 16,
+ },
+ endTag: {
+ startOffset: 38,
+ endOffset: 47,
+ },
+ },
childNodes: [
{
name: "#text",
value: "alert(\"Hello World!\");",
+ sourceCodeRange: {
+ startOffset: 16,
+ endOffset: 38,
+ },
},
],
},
{
name: "#text",
value: " Hello World!",
+ sourceCodeRange: {
+ startOffset: 47,
+ endOffset: 60,
+ },
},
],
},
@@ -222,7 +413,7 @@ describe( "The adapt function",
it( "adapts a tree with a style element", () => {
const html = "Hello World!
";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -233,15 +424,35 @@ describe( "The adapt function",
{
name: "style",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 36,
+ startTag: {
+ startOffset: 0,
+ endOffset: 7,
+ },
+ endTag: {
+ startOffset: 28,
+ endOffset: 36,
+ },
+ },
childNodes: [
{
name: "p",
isImplicit: true,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 7,
+ endOffset: 28,
+ },
childNodes: [
{
name: "#text",
value: "div { color: #FF00FF}",
+ sourceCodeRange: {
+ startOffset: 7,
+ endOffset: 28,
+ },
},
],
},
@@ -251,10 +462,26 @@ describe( "The adapt function",
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 36,
+ endOffset: 55,
+ startTag: {
+ startOffset: 36,
+ endOffset: 39,
+ },
+ endTag: {
+ startOffset: 51,
+ endOffset: 55,
+ },
+ },
childNodes: [
{
name: "#text",
value: "Hello World!",
+ sourceCodeRange: {
+ startOffset: 39,
+ endOffset: 51,
+ },
},
],
},
@@ -266,7 +493,7 @@ describe( "The adapt function",
it( "adapts a tree with a blockquote element", () => {
const html = "";
- const tree = parseFragment( html );
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
const adaptedTree = adapt( tree );
@@ -277,30 +504,78 @@ describe( "The adapt function",
{
name: "div",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 0,
+ endOffset: 67,
+ startTag: {
+ startOffset: 0,
+ endOffset: 5,
+ },
+ endTag: {
+ startOffset: 61,
+ endOffset: 67,
+ },
+ },
childNodes: [
{
name: "p",
isImplicit: false,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 5,
+ endOffset: 24,
+ startTag: {
+ startOffset: 5,
+ endOffset: 8,
+ },
+ endTag: {
+ startOffset: 20,
+ endOffset: 24,
+ },
+ },
childNodes: [
{
name: "#text",
value: "Hello World!",
+ sourceCodeRange: {
+ startOffset: 8,
+ endOffset: 20,
+ },
},
],
},
{
name: "blockquote",
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 24,
+ endOffset: 61,
+ startTag: {
+ startOffset: 24,
+ endOffset: 36,
+ },
+ endTag: {
+ startOffset: 48,
+ endOffset: 61,
+ },
+ },
childNodes: [
{
name: "p",
isImplicit: true,
attributes: {},
+ sourceCodeLocation: {
+ startOffset: 36,
+ endOffset: 48,
+ },
childNodes: [
{
name: "#text",
value: "Hello Yoast!",
+ sourceCodeRange: {
+ startOffset: 36,
+ endOffset: 48,
+ },
},
],
},
@@ -313,8 +588,23 @@ describe( "The adapt function",
expect( adaptedTree ).toEqual( expected );
} );
+ it( "should correctly adapt a fragment with a (overarching) paragraph element that has double line breaks as child nodes", () => {
+ const html = "test
test
test
";
+ const tree = parseFragment( html, { sourceCodeLocationInfo: true } );
+
+ const adaptedTree = adapt( tree );
+
+ const overarchingParagraph = adaptedTree.childNodes[ 0 ];
+ expect( overarchingParagraph.name ).toEqual( "p-overarching" );
+ expect( overarchingParagraph.childNodes[ 0 ].name ).toEqual( "p" );
+ expect( overarchingParagraph.childNodes[ 0 ].isImplicit ).toBeTruthy();
+ expect( overarchingParagraph.childNodes[ 1 ].name ).toEqual( "br" );
+ expect( overarchingParagraph.childNodes[ 2 ].name ).toEqual( "br" );
+ expect( overarchingParagraph.childNodes[ 3 ].name ).toEqual( "p" );
+ expect( overarchingParagraph.childNodes[ 3 ].isImplicit ).toBeTruthy();
+ } );
- it( "should correctly adapt a node with no childnodes.", () => {
+ it( "should correctly adapt a node with no childNodes.", () => {
const tree = { nodeName: "div" };
const adaptedTree = adapt( tree );
diff --git a/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js b/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js
index ae4d35c4a35..a09a1fe9eea 100644
--- a/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js
+++ b/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js
@@ -2,6 +2,7 @@ import combineIntoImplicitParagraphs from "../../../../src/parse/build/private/c
describe( "The combineIntoImplicitParagraphs function", () => {
it( "combines phrasing content into paragraphs", () => {
+ // #text
const nodes = [
{ name: "p" },
{ name: "span" },
@@ -34,6 +35,7 @@ describe( "The combineIntoImplicitParagraphs function", () => {
} );
it( "correctly handles an InterElementWhitespace", () => {
+ // #text
const nodes = [
{ name: "p" },
{ name: "span" },
@@ -75,6 +77,7 @@ describe( "The combineIntoImplicitParagraphs function", () => {
} );
it( "correctly handles an element with children", () => {
+ // #text
const nodes = [
{ name: "p" },
{ name: "span" },
@@ -105,4 +108,37 @@ describe( "The combineIntoImplicitParagraphs function", () => {
expect( implicitParagraphs ).toEqual( expected );
} );
+
+ it( "correctly handles double and single br tags: the former should lead ending implicit paragraphs", () => {
+ // #text
#text
#text
+ const nodes = [
+ { name: "#text" },
+ { name: "br" },
+ { name: "br" },
+ { name: "#text" },
+ { name: "br" },
+ { name: "#text" },
+ ];
+
+ const implicitParagraphs = combineIntoImplicitParagraphs( nodes );
+
+ const expected = [
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [ { name: "#text" } ],
+ isImplicit: true,
+ },
+ { name: "br" },
+ { name: "br" },
+ {
+ name: "p",
+ attributes: {},
+ childNodes: [ { name: "#text" }, { name: "br" }, { name: "#text" } ],
+ isImplicit: true,
+ },
+ ];
+
+ expect( implicitParagraphs ).toEqual( expected );
+ } );
} );
diff --git a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js
index eb57224f5af..7f39f532c54 100644
--- a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js
+++ b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js
@@ -21,6 +21,10 @@ describe( "A test for filterBeforeTokenizing", () => {
{
name: "#text",
value: "Some text and code ",
+ sourceCodeRange: {
+ startOffset: 3,
+ endOffset: 22,
+ },
},
{
name: "code",
@@ -41,16 +45,16 @@ describe( "A test for filterBeforeTokenizing", () => {
},
],
sourceCodeLocation: {
- endOffset: 58,
- endTag: {
- endOffset: 58,
- startOffset: 54,
- },
startOffset: 0,
+ endOffset: 58,
startTag: {
endOffset: 3,
startOffset: 0,
},
+ endTag: {
+ startOffset: 54,
+ endOffset: 58,
+ },
},
},
],
@@ -107,6 +111,10 @@ describe( "A test for filterBeforeTokenizing", () => {
{
name: "#text",
value: "Hello, world!",
+ sourceCodeRange: {
+ startOffset: 53,
+ endOffset: 66,
+ },
},
],
sourceCodeLocation: {
@@ -160,14 +168,18 @@ describe( "A test for filterBeforeTokenizing", () => {
{
name: "#text",
value: "Some text and code ",
+ sourceCodeRange: {
+ startOffset: 3,
+ endOffset: 22,
+ },
},
{
name: "code",
attributes: {},
childNodes: [],
sourceCodeLocation: {
- endOffset: 71,
startOffset: 22,
+ endOffset: 71,
startTag: {
startOffset: 22,
endOffset: 28,
@@ -180,15 +192,15 @@ describe( "A test for filterBeforeTokenizing", () => {
},
],
sourceCodeLocation: {
- endOffset: 75,
- endTag: {
- endOffset: 75,
- startOffset: 71,
- },
startOffset: 0,
+ endOffset: 75,
startTag: {
- endOffset: 3,
startOffset: 0,
+ endOffset: 3,
+ },
+ endTag: {
+ startOffset: 71,
+ endOffset: 75,
},
},
},
@@ -215,6 +227,10 @@ describe( "A test for filterBeforeTokenizing", () => {
{
name: "#text",
value: "Some text and code ",
+ sourceCodeRange: {
+ startOffset: 3,
+ endOffset: 22,
+ },
},
{
name: "code",
diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js
index 3eb34bd1b3e..f54d00d605f 100644
--- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js
+++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js
@@ -167,6 +167,20 @@ const samplesWithOneOccurrence = [
element: "title",
html: "\n \nNews and gossip\n A paragraph
",
},
+ {
+ element: "cite",
+ html: "\n\nThere are many craft ideas that are interesting to both children and adults." +
+ " Fun for the whole family!
",
+ },
+ {
+ element: "output",
+ html: "\n\nArmadillos are cute animals.
",
+ },
+ {
+ element: "samp",
+ html: "\n\nThe automated chat saidThe answer is incorrectbut I wasn't " +
+ "sure why.
",
+ },
];
describe.each( samplesWithOneOccurrence )( "Tests HTML elements, part 1 ", ( htmlElement ) => {
@@ -198,6 +212,11 @@ const samplesWithTwoOccurrences = [
html: "\n\nIf a triangle has one 90 degrees angle and one 30" +
"degrees angle, how big is the remaining angle?
Fun for the whole family
",
},
+ {
+ element: "form",
+ html: "\n\n",
+ },
];
describe.each( samplesWithTwoOccurrences )( "Tests HTML elements, part 2", ( htmlElement ) => {
@@ -219,6 +238,15 @@ describe( "Miscellaneous tests", () => {
expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 );
} );
+ it( "shouldn't filter out elements like , which are included in the list of excluded elements", () => {
+ const html = "Welcome to the blue screen of death.
";
+
+ const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) );
+ expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 );
+ const filteredTree = filterTree( tree, permanentFilters );
+ expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 );
+ } );
+
it( "should filter out elements like when nested within excluded elements, e.g. ", () => {
const html = "Welcome to the blue screen of death.
Kiss your computer goodbye.
";
@@ -244,6 +272,12 @@ describe( "Miscellaneous tests", () => {
expect( tree.findAll( child => child.name === "head" ) ).toHaveLength( 0 );
} );
+ it( "should filter out map elements", () => {
+ // The head element seems to be removed by the parser we employ.
+ const html = "\nAbout artificial intelligence\n\n