diff --git a/admin/class-admin-asset-manager.php b/admin/class-admin-asset-manager.php index e1559802f8f..d9c16f15333 100644 --- a/admin/class-admin-asset-manager.php +++ b/admin/class-admin-asset-manager.php @@ -622,6 +622,11 @@ protected function styles_to_be_registered() { 'src' => 'new-settings-' . $flat_version, 'deps' => [ self::PREFIX . 'tailwind' ], ], + [ + 'name' => 'black-friday-banner', + 'src' => 'black-friday-banner-' . $flat_version, + 'deps' => [ self::PREFIX . 'tailwind' ], + ], [ 'name' => 'academy', 'src' => 'academy-' . $flat_version, diff --git a/admin/class-admin-utils.php b/admin/class-admin-utils.php index b907214f95a..7b513777b4b 100644 --- a/admin/class-admin-utils.php +++ b/admin/class-admin-utils.php @@ -75,6 +75,7 @@ public static function get_install_link( $plugin ) { public static function get_new_tab_message() { return sprintf( '%s', + /* translators: Hidden accessibility text. */ esc_html__( '(Opens in a new browser tab)', 'wordpress-seo' ) ); } diff --git a/admin/class-bulk-editor-list-table.php b/admin/class-bulk-editor-list-table.php index 016fe6a1a5e..57891a63add 100644 --- a/admin/class-bulk-editor-list-table.php +++ b/admin/class-bulk-editor-list-table.php @@ -408,6 +408,7 @@ public function extra_tablenav( $which ) { printf( '', esc_attr( 'post-type-filter-' . $instance_type ), + /* translators: Hidden accessibility text. */ esc_html__( 'Filter by content type', 'wordpress-seo' ) ); printf( @@ -809,7 +810,7 @@ protected function parse_page_title_column( $rec ) { $actions['edit'] = sprintf( '%s', esc_url( get_edit_post_link( $rec->ID, true ) ), - /* translators: %s: post title */ + /* translators: Hidden accessibility text; %s: post title. */ esc_attr( sprintf( __( 'Edit “%s”', 'wordpress-seo' ), $title ) ), __( 'Edit', 'wordpress-seo' ) ); @@ -821,7 +822,7 @@ protected function parse_page_title_column( $rec ) { $actions['view'] = sprintf( '%s', esc_url( add_query_arg( 'preview', 'true', get_permalink( $rec->ID ) ) ), - /* translators: %s: post title */ + /* translators: Hidden accessibility text; %s: post title. */ esc_attr( sprintf( __( 'Preview “%s”', 'wordpress-seo' ), $title ) ), __( 'Preview', 'wordpress-seo' ) ); @@ -831,7 +832,7 @@ protected function parse_page_title_column( $rec ) { $actions['view'] = sprintf( '%s', esc_url( get_permalink( $rec->ID ) ), - /* translators: %s: post title */ + /* translators: Hidden accessibility text; %s: post title. */ esc_attr( sprintf( __( 'View “%s”', 'wordpress-seo' ), $title ) ), __( 'View', 'wordpress-seo' ) ); diff --git a/admin/class-config.php b/admin/class-config.php index 47dcb5048e8..69d99529f1d 100644 --- a/admin/class-config.php +++ b/admin/class-config.php @@ -91,9 +91,10 @@ public function config_page_scripts() { 'isRtl' => is_rtl(), 'isPremium' => YoastSEO()->helpers->product->is_premium(), 'isWooCommerceActive' => $woocommerce_conditional->is_met(), + 'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(), 'webinarIntroSettingsUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-settings' ), - 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', 'webinarIntroFirstTimeConfigUrl' => $this->get_webinar_shortlink(), + 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), ]; diff --git a/admin/class-gutenberg-compatibility.php b/admin/class-gutenberg-compatibility.php index 80451943a24..568f73c8f1c 100644 --- a/admin/class-gutenberg-compatibility.php +++ b/admin/class-gutenberg-compatibility.php @@ -15,14 +15,14 @@ class WPSEO_Gutenberg_Compatibility { * * @var string */ - const CURRENT_RELEASE = '16.6.0'; + const CURRENT_RELEASE = '16.8.0'; /** * The minimally supported version of Gutenberg by the plugin. * * @var string */ - const MINIMUM_SUPPORTED = '16.6.0'; + const MINIMUM_SUPPORTED = '16.8.0'; /** * Holds the current version. diff --git a/admin/class-meta-columns.php b/admin/class-meta-columns.php index b25206aeb4d..1f348cee81e 100644 --- a/admin/class-meta-columns.php +++ b/admin/class-meta-columns.php @@ -97,11 +97,19 @@ public function column_heading( $columns ) { $added_columns = []; if ( $this->analysis_seo->is_enabled() ) { - $added_columns['wpseo-score'] = '' . __( 'SEO score', 'wordpress-seo' ) . ''; + $added_columns['wpseo-score'] = '' . + __( 'SEO score', 'wordpress-seo' ) . + ''; } if ( $this->analysis_readability->is_enabled() ) { - $added_columns['wpseo-score-readability'] = '' . __( 'Readability score', 'wordpress-seo' ) . ''; + $added_columns['wpseo-score-readability'] = '' . + __( 'Readability score', 'wordpress-seo' ) . + ''; } $added_columns['wpseo-title'] = __( 'SEO Title', 'wordpress-seo' ); @@ -154,6 +162,7 @@ public function column_content( $column_name, $post_id ) { } if ( $metadesc_val === '' ) { echo '', + /* translators: Hidden accessibility text. */ esc_html__( 'Meta description not set.', 'wordpress-seo' ), ''; @@ -169,6 +178,7 @@ public function column_content( $column_name, $post_id ) { if ( $focuskw_val === '' ) { echo '', + /* translators: Hidden accessibility text. */ esc_html__( 'Focus keyphrase not set.', 'wordpress-seo' ), ''; @@ -238,6 +248,7 @@ public function posts_filter_dropdown() { $ranks = WPSEO_Rank::get_all_ranks(); + /* translators: Hidden accessibility text. */ echo ''; echo ''; diff --git a/admin/class-premium-popup.php b/admin/class-premium-popup.php index 372f1179dbc..008876946d6 100644 --- a/admin/class-premium-popup.php +++ b/admin/class-premium-popup.php @@ -78,7 +78,8 @@ public function get_premium_message( $popup = true ) { $assets_uri = trailingslashit( plugin_dir_url( WPSEO_FILE ) ); /* translators: %s expands to Yoast SEO Premium */ - $cta_text = esc_html( sprintf( __( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ) ); + $cta_text = esc_html( sprintf( __( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ) ); + /* translators: Hidden accessibility text. */ $new_tab_message = '' . esc_html__( '(Opens in a new browser tab)', 'wordpress-seo' ) . ''; $caret_icon = ''; $classes = ''; diff --git a/admin/class-premium-upsell-admin-block.php b/admin/class-premium-upsell-admin-block.php index d14fbf6be2d..572e20601c3 100644 --- a/admin/class-premium-upsell-admin-block.php +++ b/admin/class-premium-upsell-admin-block.php @@ -5,6 +5,8 @@ * @package WPSEO\Admin */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + /** * Class WPSEO_Premium_Upsell_Admin_Block */ @@ -66,7 +68,8 @@ public function render() { $class = $this->get_html_class(); /* translators: %s expands to Yoast SEO Premium */ - $button_text = sprintf( esc_html__( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ); + $button_text = YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ? \esc_html__( 'Claim your 30% off now!', 'wordpress-seo' ) : sprintf( esc_html__( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ); + /* translators: Hidden accessibility text. */ $button_text .= '' . esc_html__( '(Opens in a new browser tab)', 'wordpress-seo' ) . '' . ''; @@ -79,7 +82,14 @@ public function render() { echo '
'; - echo '
'; + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + $bf_label = \esc_html__( 'BLACK FRIDAY', 'wordpress-seo' ); + $sale_label = \esc_html__( '30% OFF', 'wordpress-seo' ); + // phpcs:ignore WordPress.Security.EscapeOutput.OutputNotEscaped -- Already escaped above. + echo "
$bf_label $sale_label
"; + } + + echo '
' . $upsell_button . '
' . PHP_EOL . PHP_EOL; } diff --git a/admin/formatter/class-metabox-formatter.php b/admin/formatter/class-metabox-formatter.php index 9100491e1c0..d7689f45437 100644 --- a/admin/formatter/class-metabox-formatter.php +++ b/admin/formatter/class-metabox-formatter.php @@ -240,6 +240,7 @@ private function get_content_analysis_component_translations() { 'content-analysis.highlight' => __( 'Highlight this result in the text', 'wordpress-seo' ), 'content-analysis.nohighlight' => __( 'Remove highlight from the text', 'wordpress-seo' ), 'content-analysis.disabledButton' => __( 'Marks are disabled in current view', 'wordpress-seo' ), + /* translators: Hidden accessibility text. */ 'a11yNotice.opensInNewTab' => __( '(Opens in a new browser tab)', 'wordpress-seo' ), ]; } diff --git a/admin/menu/class-admin-menu.php b/admin/menu/class-admin-menu.php index df97b3185c1..53d037e3893 100644 --- a/admin/menu/class-admin-menu.php +++ b/admin/menu/class-admin-menu.php @@ -114,12 +114,10 @@ protected function get_notification_counter() { $notification_count = $notification_center->get_notification_count(); // Add main page. - /* translators: %s: number of notifications */ + /* translators: Hidden accessibility text; %s: number of notifications. */ $notifications = sprintf( _n( '%s notification', '%s notifications', $notification_count, 'wordpress-seo' ), number_format_i18n( $notification_count ) ); - $counter = sprintf( '%2$s', $notification_count, $notifications ); - - return $counter; + return sprintf( '%2$s', $notification_count, $notifications ); } /** diff --git a/admin/menu/class-base-menu.php b/admin/menu/class-base-menu.php index 59cda24f245..a840987dc4b 100644 --- a/admin/menu/class-base-menu.php +++ b/admin/menu/class-base-menu.php @@ -5,6 +5,8 @@ * @package WPSEO\Admin\Menu */ +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + /** * Admin menu base class. */ @@ -258,6 +260,10 @@ protected function get_license_page_title() { $title = __( 'Premium', 'wordpress-seo' ); } + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) && ! YoastSEO()->helpers->product->is_premium() ) { + $title = __( 'Premium', 'wordpress-seo' ) . '' . __( '30% OFF', 'wordpress-seo' ) . ''; + } + return $title; } diff --git a/admin/metabox/class-metabox.php b/admin/metabox/class-metabox.php index 527801cb7eb..31fda910e33 100644 --- a/admin/metabox/class-metabox.php +++ b/admin/metabox/class-metabox.php @@ -893,8 +893,7 @@ public function enqueue() { 'has_taxonomies' => $this->current_post_type_has_taxonomies(), ], 'shortcodes' => [ - 'wpseo_filter_shortcodes_nonce' => wp_create_nonce( 'wpseo-filter-shortcodes' ), - 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(), + 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(), ], ]; @@ -931,7 +930,7 @@ public function enqueue() { 'dismissedAlerts' => $dismissed_alerts, 'currentPromotions' => YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->get_current_promotions(), 'webinarIntroBlockEditorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-block-editor' ), - 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', + 'blackFridayBlockEditorUrl' => ( YoastSEO()->classes->get( Yoast\WP\SEO\Promotions\Application\Promotion_Manager::class )->is( 'black-friday-2023-checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', 'isJetpackBoostActive' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Active_Conditional::class )->is_met() : false, 'isJetpackBoostNotPremium' => ( $is_block_editor ) ? YoastSEO()->classes->get( Jetpack_Boost_Not_Premium_Conditional::class )->is_met() : false, 'isWooCommerceActive' => $woocommerce_active, diff --git a/admin/taxonomy/class-taxonomy-columns.php b/admin/taxonomy/class-taxonomy-columns.php index 91905940e7b..6e3304d4dc4 100644 --- a/admin/taxonomy/class-taxonomy-columns.php +++ b/admin/taxonomy/class-taxonomy-columns.php @@ -84,11 +84,13 @@ public function add_columns( array $columns ) { $new_columns[ $column_name ] = $column_value; if ( $column_name === 'description' && $this->analysis_seo->is_enabled() ) { - $new_columns['wpseo-score'] = '' . __( 'SEO score', 'wordpress-seo' ) . ''; + $new_columns['wpseo-score'] = '' . + __( 'SEO score', 'wordpress-seo' ) . ''; } if ( $column_name === 'description' && $this->analysis_readability->is_enabled() ) { - $new_columns['wpseo-score-readability'] = '' . __( 'Readability score', 'wordpress-seo' ) . ''; + $new_columns['wpseo-score-readability'] = '' . + __( 'Readability score', 'wordpress-seo' ) . ''; } } diff --git a/admin/taxonomy/class-taxonomy.php b/admin/taxonomy/class-taxonomy.php index 963adff2fdb..7cb90e8e32b 100644 --- a/admin/taxonomy/class-taxonomy.php +++ b/admin/taxonomy/class-taxonomy.php @@ -163,6 +163,9 @@ public function admin_enqueue_scripts() { 'recommended_replace_vars' => $this->get_recommended_replace_vars(), 'scope' => $this->determine_scope(), ], + 'shortcodes' => [ + 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(), + ], ], 'worker' => [ 'url' => YoastSEO()->helpers->asset->get_asset_url( 'yoast-seo-analysis-worker' ), @@ -446,4 +449,19 @@ private function get_recommended_replace_vars() { return $recommended_replace_vars->get_recommended_replacevars_for( $page_type ); } + + /** + * Returns an array with shortcode tags for all registered shortcodes. + * + * @return array + */ + private function get_valid_shortcode_tags() { + $shortcode_tags = []; + + foreach ( $GLOBALS['shortcode_tags'] as $tag => $description ) { + $shortcode_tags[] = $tag; + } + + return $shortcode_tags; + } } diff --git a/admin/views/js-templates-primary-term.php b/admin/views/js-templates-primary-term.php index a1387529a76..684ef87d52b 100644 --- a/admin/views/js-templates-primary-term.php +++ b/admin/views/js-templates-primary-term.php @@ -14,7 +14,7 @@ inside an element we want to exclude" + - " from the analysis, density should be 0%", { keyword: "keyword" } ); - mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); - mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen. ", { keyword: "äöüß" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should recognize a keyphrase when it consists umlauted characters", function() { + const mockPaper = new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); + } ); - mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit ", { keyword: "key-word" } ); - mockResearcher = new EnglishResearcher( mockPaper ); + it( "should recognize a hyphenated keyphrase.", function() { + const mockPaper = new Paper( "

Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit

", { keyword: "key-word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataEN ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 9.090909090909092 ); + } ); - mockPaper = new Paper( "a string of text with the kapaklı in it, density should be 7.7%", { keyword: "kapaklı" } ); - mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); + it( "should recognize a keyphrase with an underscore in it", function() { + const mockPaper = new Paper( "

a string of text with the key_word in it, density should be 7.7%

", { keyword: "key_word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); + } ); - mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key-word" } ); - mockResearcher = new EnglishResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataEN ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key_word in it, density should be 7.7%", { keyword: "key_word" } ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key_word in it, density should be 0.0%", { keyword: "key word" } ); + it( "should return zero if a multiword keyphrase is not present but the multiword keyphrase occurs with an underscore", function() { + const mockPaper = new Paper( "

a string of text with the key_word in it, density should be 0.0%

", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); - mockPaper = new Paper( "a string of text with the key-word in it, density should be 7.7%", { keyword: "key word" } ); - // This behavior might change in the future. + } ); + + it( "should recognize a multiword keyphrase when it is occurs hyphenated", function() { + const mockPaper = new Paper( "

a string of text with the key-word in it, density should be 7.7%

", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the key&word in it, density should be 7.7%", { keyword: "key&word" } ); + } ); + + it( "should recognize a keyphrase with an ampersand in it", function() { + const mockPaper = new Paper( "

a string of text with the key&word in it, density should be 7.7%

", { keyword: "key&word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "", { keyword: "key&word" } ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); - // Consecutive keywords are skipped, so this will match 2 times. - mockPaper = new Paper( "This is a nice string with a keyword keyword keyword.", { keyword: "keyword" } ); - expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 20 ); - mockPaper = new Paper( "a string of text with the $keyword in it, density should be 7.7%", { keyword: "$keyword" } ); + } ); + + it( "should include all keyphrase occurrence if there are more than two consecutive", function() { + const mockPaper = new Paper( "

This is a nice string with a keyword keyword keyword keyword keyword keyword keyword keyword keyword.

", + { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 56.25 ); + } ); + + it( "should recognize a keyphrase with a '$' in it", function() { + const mockPaper = new Paper( "

a string of text with the $keyword in it, density should be 7.7%

", { keyword: "$keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the Keyword in it, density should be 7.7%", { keyword: "keyword" } ); + } ); + + it( "should recognize a keyphrase regardless of capitalization", function() { + const mockPaper = new Paper( "

a string of text with the Keyword in it, density should be 7.7%

", { keyword: "keyword" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); - mockPaper = new Paper( "a string of text with the Key word in it, density should be 14.29%", { keyword: "key word" } ); + } ); + + it( "should recognize a multiword keyphrase regardless of capitalization", function() { + const mockPaper = new Paper( "

a string of text with the Key word in it, density should be 14.29%

", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.142857142857142 ); - mockPaper = new Paper( "a string with quotes to match the key'word, even if the quotes differ", { keyword: "key’word" } ); + } ); + + it( "should recognize apostrophes regardless of the type of quote that was used", function() { + const mockPaper = new Paper( "

a string with quotes to match the key'word, even if the quotes differ

", { keyword: "key’word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); + it( "should recognize keyphrase separated by  ", function() { + const mockPaper = new Paper( "

a string with non-breaking space to match the key word

", { keyword: "key word" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 10 ); + } ); + it( "should return 0 when the paper contains only excluded element", function() { + const mockPaper = new Paper( "
In the United States and parts of Europe, " + + "tortoiseshell cats are often considered lucky charms.
" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); + it( "should return 0 when the paper is empty", function() { + const mockPaper = new Paper( "" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 0 ); + } ); } ); -describe( "test for counting the keyword density in a text in a language that we haven't supported yet", function() { - it( "returns keyword density", function() { - const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng" } ); - expect( getKeywordDensity( mockPaper, new DefaultResearcher( mockPaper ) ) ).toBe( 25 ); +describe( "test for counting the keyword density in a non-English and non-Japanese language", function() { + it( "returns keyword density for a language that we haven't had the support yet", function() { + const mockPaper = new Paper( "Ukara sing isine cemeng.", { keyword: "cemeng", locale: "jv_ID" } ); + const mockResearcher = new DefaultResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 25 ); + } ); + it( "should correctly recognize a Turkish transliterated keyphrase with a 'ı' in it", function() { + const mockPaper = new Paper( "a string of text with the kapakli in it, density should be 7.7%

", { keyword: "kapaklı", locale: "tr_TR" } ); + const mockResearcher = new TurkishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, new EnglishResearcher( mockPaper ) ) ).toBe( 7.6923076923076925 ); } ); } ); describe( "test for counting the keyword density in a text in a language that uses a custom getWords helper (Japanese)", function() { - /*it( "returns keyword density when the keyword is not found in the sentence", function() { - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "猫" } ); + it( "returns keyword density when the keyword is not found in the sentence", function() { + const mockPaper = new Paper( "

小さくて可愛い花の刺繍に関する一般一般の記事です。

", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ + } ); it( "returns the keyword density when the keyword is found once", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { keyword: "猫" } ); + const mockPaper = new Paper( "

私の猫はかわいいです。

", { keyword: "猫" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 16.666666666666664 ); } ); it( "returns the keyword density when the keyword contains multiple words and the sentence contains an inflected form", function() { // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); + const mockPaper = new Paper( "

小さくて可愛い花の刺繍に関する一般一般の記事です。

", { keyword: "小さい花の刺繍" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - /*it( "returns the keyword density when the morphologyData is not available to detect inflected forms", function() { - // 小さく is the inflected form of 小さい. - const mockPaper = new Paper( "小さくて可愛い花の刺繍に関する一般一般の記事です。", { keyword: "小さい花の刺繍" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } ); - - it( "returns the keyword density when the keyword contains multiple words with an exact match separated in the sentence", function() { - const mockPaper = new Paper( "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 5 ); - } ); - it( "returns the keyword density when the keyword contains multiple words with an exact match in the sentence", function() { - const mockPaper = new Paper( "一日一冊の本を読むのはできるかどうかやってみます。", { keyword: "一冊の本を読む" } ); + const mockPaper = new Paper( "

一日一冊の本を読むのはできるかどうかやってみます。

", { keyword: "『一冊の本を読む』" } ); const mockResearcher = new JapaneseResearcher( mockPaper ); mockResearcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( mockPaper, mockResearcher ); + expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 6.666666666666667 ); } ); - - it( "returns 0 when the text is empty", function() { - const mockPaper = new Paper( "", { keyword: "猫" } ); - const mockResearcher = new JapaneseResearcher( mockPaper ); - mockResearcher.addResearchData( "morphology", morphologyDataJA ); - expect( getKeywordDensity( mockPaper, mockResearcher ) ).toBe( 0 ); - } );*/ } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js index e274380fc23..858b85c4259 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getParagraphLengthSpec.js @@ -82,6 +82,11 @@ describe( "a test for getting paragraph length", function() { expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); } ); + it( "returns the paragraph length, ignoring shortcodes", function() { + const mockPaper = new Paper( "

test [shortcode]

", { shortcodes: [ "shortcode" ] } ); + expect( getParagraphLength( mockPaper, new EnglishResearcher() ).length ).toBe( 1 ); + } ); + it( "returns the paragraph length of paragraph without p tags or double linebreaks, but with h2 tags", function() { const mockPaper = new Paper( "

Lorem ipsum dolor sit amet

" ); expect( getParagraphLength( mockPaper, new EnglishResearcher() )[ 0 ].countLength ).toBe( 5 ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js index 8c7f22a806d..4635d21cac8 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getSentenceBeginningsSpec.js @@ -118,6 +118,21 @@ describe( "gets the sentence beginnings and the count of consecutive duplicates. expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 ); } ); + it( "ignores shortcodes", function() { + mockPaper = new Paper( "Cats are fluffy. Dogs are happy. [shortcode]Unicorns cant code.", { shortcodes: [ "shortcode" ] } ); + researcher = new EnglishResearcher( mockPaper ); + + const sentenceBeginnings = getSentenceBeginnings( mockPaper, researcher ); + + expect( sentenceBeginnings ).toHaveLength( 3 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].word ).toBe( "cats" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 0 ].count ).toBe( 1 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].word ).toBe( "dogs" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 1 ].count ).toBe( 1 ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].word ).toBe( "unicorns" ); + expect( getSentenceBeginnings( mockPaper, researcher )[ 2 ].count ).toBe( 1 ); + } ); + it( "returns an object with English sentence beginnings with paragraph tags - it should match over paragraphs", function() { mockPaper = new Paper( "

Sentence 1. Sentence 2.

Sentence 3.

" ); researcher = new EnglishResearcher( mockPaper ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js index bdf9cc4a06e..c59b61abb76 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/getSubheadingTextLengthSpec.js @@ -57,6 +57,18 @@ describe( "gets the length of text segments", function() { expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 2 ].countLength ).toBe( 10 ); } ); + it( "should not include a shortcode when calculating the text length", function() { + const mockPaper = new Paper( "

heading with [shortcode]

this is a text string with a [shortcode]", + { shortcodes: [ "shortcode" ] } ); + const englishResearcher = new EnglishResearcher( mockPaper ); + // Check the total number of subheading blocks. + expect( foundSubheadingsTextLength( mockPaper, englishResearcher ).length ).toBe( 1 ); + // Check the length of each individual text segment. + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].subheading ).toBe( "

heading with

" ); + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].text ).toBe( "this is a text string with a " ); + expect( foundSubheadingsTextLength( mockPaper, englishResearcher )[ 0 ].countLength ).toBe( 7 ); + } ); + it( "does not count text inside elements we want to exclude from the analysis ", function() { const mockPaper = new Paper( "

test

one two threeone two three" ); const englishResearcher = new EnglishResearcher( mockPaper ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js index c1c11800792..3755197aa3e 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/h1sSpec.js @@ -1,54 +1,72 @@ import h1s from "../../../src/languageProcessing/researches/h1s.js"; import Paper from "../../../src/values/Paper.js"; +import buildTree from "../../specHelpers/parse/buildTree"; +import EnglishResearcher from "../../../src/languageProcessing/languages/en/Researcher.js"; describe( "Gets all H1s in the text", function() { it( "should return empty when there is no H1", function() { - const mockPaper = new Paper( "some content

content h2

" ); + const mockPaper = new Paper( "

some content

content h2

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [] ); } ); it( "should return empty when there is only empty H1s", function() { const mockPaper = new Paper( "some content

other content

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [] ); } ); it( "should return one object when there is one H1", function() { const mockPaper = new Paper( "

first h1

some content

content h2

" ); - expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: 0 } ] ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [ { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } } ] ); } ); it( "should return all H1s in the text", function() { const mockPaper = new Paper( "

first h1

not an h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 2 }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 37, endOffset: 46, clientId: "" } }, ] ); } ); it( "should return two h1s next to each other", function() { const mockPaper = new Paper( "

first h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 1 }, + { tag: "h1", content: "first h1", position: { startOffset: 4, endOffset: 12, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 21, endOffset: 30, clientId: "" } }, ] ); } ); it( "should rightly ignore empty paragraphs or empty blocks", function() { const mockPaper = new Paper( "

\n

first h1

second h1

not an h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 1 }, + { tag: "h1", content: "first h1", position: { startOffset: 12, endOffset: 20, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 29, endOffset: 38, clientId: "" } }, ] ); } ); it( "should find H1 within division tags", function() { const mockPaper = new Paper( "

first h1

blah blah

second h1

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "first h1", position: 0 }, - { tag: "h1", content: "second h1", position: 2 }, + { tag: "h1", content: "first h1", position: { startOffset: 9, endOffset: 17, clientId: "" } }, + { tag: "h1", content: "second h1", position: { startOffset: 64, endOffset: 73, clientId: "" } }, ] ); } ); @@ -116,9 +134,12 @@ describe( "Gets all H1s in the text", function() { "I think voice will become the dominant search query, but I think screens will continue to be " + "important in presenting search results.’

" ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + expect( h1s( mockPaper ) ).toEqual( [ - { tag: "h1", content: "Voice Search", position: 0 }, - { tag: "h1", content: "So what will the future bring?", position: 11 }, + { tag: "h1", content: "Voice Search", position: { startOffset: 4, endOffset: 16, clientId: "" } }, + { tag: "h1", content: "So what will the future bring?", position: { startOffset: 3903, endOffset: 3933, clientId: "" } }, ] ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js index 3b4a8e1b6bd..a85708b7cda 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keyphraseDistributionSpec.js @@ -477,6 +477,26 @@ describe( "Test for the research", function() { } ); } ); + it( "returns the score 100 when the keyphrase is a shortcode", function() { + const paper = new Paper( + "This is a text with a [keyphrase]that doesn't count", + { + locale: "en_EN", + keyword: "keyphrase", + synonyms: "synonym", + shortcodes: [ "keyphrase" ], + } + ); + + const researcher = new Researcher( paper ); + researcher.addResearchData( "morphology", morphologyData ); + + expect( keyphraseDistributionResearcher( paper, researcher ) ).toEqual( { + keyphraseDistributionScore: 100, + sentencesToHighlight: [], + } ); + } ); + const paragraphWithKeyphrase1 = "

Lorem ipsum keyphrase dolor sit amet, consectetur adipiscing elit." + "In sit amet semper sem, id faucibus massa.

\n"; diff --git a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js index ce5d6324f77..31614969e6e 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/keywordCountSpec.js @@ -1,4 +1,4 @@ -import keywordCount from "../../../src/languageProcessing/researches/keywordCount.js"; +import getKeyphraseCount from "../../../src/languageProcessing/researches/keywordCount"; import Paper from "../../../src/values/Paper.js"; import factory from "../../specHelpers/factory"; import Mark from "../../../src/values/Mark"; @@ -6,6 +6,7 @@ import wordsCountHelper from "../../../src/languageProcessing/languages/ja/helpe import matchWordsHelper from "../../../src/languageProcessing/languages/ja/helpers/matchTextWithWord"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import japaneseMemoizedSentenceTokenizer from "../../../src/languageProcessing/languages/ja/helpers/memoizedSentenceTokenizer"; +import buildTree from "../../specHelpers/parse/buildTree"; /** * Adds morphological forms to the mock researcher. @@ -22,168 +23,1836 @@ const buildMorphologyMockResearcher = function( keyphraseForms ) { }, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); }; -const mockResearcher = buildMorphologyMockResearcher( [ [ "keyword", "keywords" ] ] ); -const mockResearcherGermanDiacritics = buildMorphologyMockResearcher( [ [ "äöüß" ] ] ); -const mockResearcherMinus = buildMorphologyMockResearcher( [ [ "key-word", "key-words" ] ] ); -const mockResearcherUnderscore = buildMorphologyMockResearcher( [ [ "key_word", "key_words" ] ] ); -const mockResearcherKeyWord = buildMorphologyMockResearcher( [ [ "key", "keys" ], [ "word", "words" ] ] ); -const mockResearcherKaplaki = buildMorphologyMockResearcher( [ [ "kapaklı" ] ] ); -const mockResearcherAmpersand = buildMorphologyMockResearcher( [ [ "key&word" ] ] ); -const mockResearcherApostrophe = buildMorphologyMockResearcher( [ [ "key`word" ] ] ); -// Escape, since the morphology researcher escapes regex as well. -const mockResearcherDollarSign = buildMorphologyMockResearcher( [ [ "\\$keyword" ] ] ); - -describe( "Test for counting the keyword in a text", function() { - it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "a string of text with the keyword in it" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ +const testCases = [ + { + description: "counts/marks a string of text with an occurrence of the keyphrase in it.", + paper: new Paper( "

a string of text with the keyword in it

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "a string of text with the keyword in it", - original: "a string of text with the keyword in it" } ) ] - ); - } ); - - it( "counts a string of text with no keyword in it.", function() { - const mockPaper = new Paper( "a string of text" ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [] ); - } ); - - it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "a string of text with the key word in it, with more key words." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "a string of text with the key word in it, " + - "with more key words.", - original: "a string of text with the key word in it, with more key words." } ) ] - ); - } ); - - it( "counts a string of text with German diacritics and eszett as the keyword", function() { - const mockPaper = new Paper( "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherGermanDiacritics ).markings ).toEqual( [ - new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", - original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen." } ) ] - ); - } ); - - it( "counts a string with multiple keyword morphological forms", function() { - const mockPaper = new Paper( "A string of text with a keyword and multiple keywords in it." ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 2 ); - expect( keywordCount( mockPaper, mockResearcher ).markings ).toEqual( [ - new Mark( { marked: "A string of text with a keyword " + + original: "a string of text with the keyword in it", + position: { + startOffset: 29, + endOffset: 36, + startOffsetBlock: 26, + endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "counts a string of text with no occurrence of the keyphrase in it.", + paper: new Paper( "

a string of text

", { keyword: "" } ), + keyphraseForms: [], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "counts multiple occurrences of a keyphrase consisting of multiple words.", + paper: new Paper( "

a string of text with the key word in it, with more key words.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + position: { + startOffset: 29, + endOffset: 37, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "a string of text with the key word in it, " + + "with more key words.", + original: "a string of text with the key word in it, with more key words.", + position: { + startOffset: 55, + endOffset: 64, + startOffsetBlock: 52, + endOffsetBlock: 61, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "counts a string with multiple occurrences of keyphrase with different morphological forms", + paper: new Paper( "

A string of text with a keyword and multiple keywords in it.

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "A string of text with a keyword " + + "and multiple keywords in it.", + original: "A string of text with a keyword and multiple keywords in it.", + position: { + startOffset: 27, + endOffset: 34, + startOffsetBlock: 24, + endOffsetBlock: 31, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "A string of text with a keyword " + "and multiple keywords in it.", - original: "A string of text with a keyword and multiple keywords in it." } ) ] - ); - } ); + original: "A string of text with a keyword and multiple keywords in it.", + position: { + startOffset: 48, + endOffset: 56, + startOffsetBlock: 45, + endOffsetBlock: 53, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "does not match keyphrase occurrence in image alt attribute.", + paper: new Paper( "

A text with image

", { keyword: "image" } ), + keyphraseForms: [ [ "image", "images" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "also matches same phrase with different capitalization.", + paper: new Paper( "

A string with KeY worD.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "A string with KeY worD.", + original: "A string with KeY worD.", + position: { + startOffset: 17, + endOffset: 25, + startOffsetBlock: 14, + endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "doesn't count keyphrase instances inside elements we want to exclude from the analysis", + paper: new Paper( "

There is no keyword in this sentence.

" ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "doesn't count keyphrase instances if they are a shortcode", + paper: new Paper( "

There is no [keyword] in this sentence.

", { shortcodes: [ "keyword" ] } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "only counts full-matches of the keyphrase: full-match means when all word of the keyphrase are found in the sentence", + paper: new Paper( "

A string with three keys (key and another key) and one word.

" ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 23, + endOffset: 27, + startOffsetBlock: 20, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 29, + endOffset: 32, + startOffsetBlock: 26, + endOffsetBlock: 29, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 45, + endOffset: 48, + startOffsetBlock: 42, + endOffsetBlock: 45, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "A string with three keys (" + + "key and another key) and one word.", + original: "A string with three keys (key and another key) and one word.", + position: { + startOffset: 58, + endOffset: 62, + startOffsetBlock: 55, + endOffsetBlock: 59, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "matches both singular and reduplicated plural form of the keyphrase in Indonesian, " + + "the plural should be counted as one occurrence", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.

", + { locale: "id_ID", keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword," + + " keyword adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + position: { + endOffset: 58, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 55, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword," + + " keyword adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", + position: { + startOffset: 60, + endOffset: 67, + startOffsetBlock: 57, + endOffsetBlock: 64, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "matches both reduplicated keyphrase separated by '-' as two occurrence in English", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.

", + { locale: "en_US", keyword: "keyword" } ), + keyphraseForms: [ [ "keyword", "keyword-keyword" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur keyword-keyword, adipiscing elit.", + position: { + endOffset: 58, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 55, + attributeId: "", + clientId: "", + isFirstSection: false } } ), + ], + skip: false, + }, + { + description: "counts one occurrence of reduplicated keyphrase separated by '-' as two occurrence in English " + + "when the keyphrase is enclosed in double quotes", + paper: new Paper( "

Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.

", + { locale: "en_US", keyword: "\"kupu-kupu\"" } ), + keyphraseForms: [ [ "kupu-kupu" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", + original: "Lorem ipsum dolor sit amet, consectetur kupu-kupu, adipiscing elit.", + position: { + endOffset: 52, + startOffset: 43, + startOffsetBlock: 40, + endOffsetBlock: 49, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + ], + skip: false, + }, + { + description: "counts a single word keyphrase with exact matching", + paper: new Paper( "

A string with a keyword.

", { keyword: "\"keyword\"" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "A string with a keyword.", + original: "A string with a keyword.", + position: { + startOffset: 19, + endOffset: 26, + startOffsetBlock: 16, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "with exact matching, a singular single word keyphrase should not be counted if the focus keyphrase is plural", + paper: new Paper( "

A string with a keyword.

", { keyword: "\"keywords\"" } ), + keyphraseForms: [ [ "keywords" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", + paper: new Paper( "

A string with a key phrase key phrase.

", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { marked: "A string with a key phrase key phrase.", + original: "A string with a key phrase key phrase.", + position: { + endOffset: 40, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 37, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "with exact matching, a multi word keyphrase should be counted if the focus keyphrase is the same", + paper: new Paper( "

A string with a key phrase.

", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key phrase.", + original: "A string with a key phrase.", + position: { + startOffset: 19, + endOffset: 29, + startOffsetBlock: 16, + endOffsetBlock: 26, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ], + skip: false, + }, + { + description: "with exact matching, a multi word keyphrase should not be counted if " + + "the focus keyphrase has the same words in a different order", + paper: new Paper( "

A string with a phrase key.

", { keyword: "\"key phrase\"" } ), + keyphraseForms: [ [ "key phrase" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "with exact matching, it should match a full stop if it is part of the keyphrase and directly precedes the keyphrase.", + paper: new Paper( "

A .sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), + keyphraseForms: [ [ ".sentence" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A .sentence with a keyphrase.", + original: "A .sentence with a keyphrase.", + position: { + startOffset: 5, + endOffset: 14, + startOffsetBlock: 2, + endOffsetBlock: 11, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ], + skip: false, + }, + { + description: "with exact matching, the keyphrase directly preceded by a full stop should not match a " + + "keyphrase occurrence without the full stop in the text.", + paper: new Paper( "

A sentence with a keyphrase.

", { keyword: "\".sentence\"" } ), + keyphraseForms: [ [ ".sentence" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "can match dollar sign as in '$keyword' with exact matching.", + paper: new Paper( "

A string with a $keyword.

", { keyword: "\"$keyword\"" } ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "can match multiple occurrences of keyphrase in the text with exact matching.", + paper: new Paper( "

A string with cats and dogs, and other cats and dogs.

", { keyword: "\"cats and dogs\"" } ), + keyphraseForms: [ [ "cats and dogs" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { marked: "A string with cats and dogs, " + + "and other cats and dogs.", + original: "A string with cats and dogs, and other cats and dogs.", + position: { + endOffset: 30, + startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 27, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { marked: "A string with cats and dogs, " + + "and other cats and dogs.", + original: "A string with cats and dogs, and other cats and dogs.", + position: { + endOffset: 55, + startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 52, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively", + paper: new Paper( "

this is a keyword keyword keyword.

", { keyword: "keyword", locale: "en_US" } ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "this is a keyword keyword keyword.", + original: "this is a keyword keyword keyword.", + position: { + endOffset: 36, + startOffset: 13, + startOffsetBlock: 10, + endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts all occurrence in the sentence, even when they occur more than 2 time consecutively: with exact matching", + paper: new Paper( "

A sentence about a red panda red panda red panda.

", { keyword: "\"red panda\"", locale: "en_US" } ), + keyphraseForms: [ [ "red panda" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { marked: "A sentence about a red panda red panda red panda.", + original: "A sentence about a red panda red panda red panda.", + position: { + endOffset: 51, + startOffset: 22, + startOffsetBlock: 19, + endOffsetBlock: 48, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, +]; - it( "counts a string with a keyword with a '-' in it", function() { - const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherMinus ).markings ).toEqual( [ - new Mark( { marked: "A string with a key-word.", - original: "A string with a key-word." } ) ] - ); - } ); +describe.each( testCases )( "Test for counting the keyphrase in a text in english", function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; - it( "counts 'key word' in 'key-word'.", function() { - const mockPaper = new Paper( "A string with a key-word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - // Note: this behavior might change in the future. + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); } ); +} ); - it( "counts a string with a keyword with a '_' in it", function() { - const mockPaper = new Paper( "A string with a key_word." ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherUnderscore ).markings ).toEqual( [ - new Mark( { marked: "A string with a key_word.", - original: "A string with a key_word." } ) ] - ); - } ); +const testCasesWithSpecialCharacters = [ + { + description: "counts the keyphrase occurrence in the text with  ", + paper: new Paper( "

a string with nbsp to match the key word.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key" ], [ "word" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "a string with nbsp to match the key word.", + original: "a string with nbsp to match the key word.", + position: { + startOffset: 35, + endOffset: 43, + startOffsetBlock: 32, + endOffsetBlock: 40, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "counts a string with a keyphrase occurrence with a '-' in it", + paper: new Paper( "

A string with a key-word.

", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key-word.", + original: "A string with a key-word.", + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts occurrence with dash only when the keyphrase contains dash", + paper: new Paper( "

A string with a key-phrase and key phrase.

", { keyword: "key-phrase" } ), + keyphraseForms: [ [ "key-phrase", "key-phrases" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key-phrase and key phrase.", + original: "A string with a key-phrase and key phrase.", + position: { + endOffset: 29, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 26, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "should be no match when the sentence contains the keyphrase with a dash but in the wrong order", + paper: new Paper( "

A string with a panda-red.

", { keyword: "red-panda" } ), + keyphraseForms: [ [ "red-panda" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "should find a match when the sentence contains the keyphrase with a dash but in the wrong order " + + "and the keyphrase doesn't contain a dash", + paper: new Paper( "

A string with a panda-red.

", { keyword: "red panda" } ), + keyphraseForms: [ [ "red" ], [ "panda" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a panda-red.", + marked: "A string with a panda-red.", + position: { + startOffset: 19, + endOffset: 28, + startOffsetBlock: 16, + endOffsetBlock: 25, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "should find a match when the sentence contains the keyphrase with a dash with the same order " + + "and the keyphrase doesn't contain a dash", + paper: new Paper( "

A string with a key-word.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a key-word.", + marked: "A string with a key-word.", + position: { + endOffset: 27, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "should find a match when the word of the keyphrase occurs in the sentence hyphenated with a non-keyphrase word." + + "For example 'key' in 'key-phrase'.", + paper: new Paper( "

A string with a word of key-phrase.

", { keyword: "key word" } ), + keyphraseForms: [ [ "key", "keys" ], [ "word", "words" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a word of key-phrase.", + marked: "A string with a word of" + + " key-phrase.", + position: { + endOffset: 23, + startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 20, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + original: "A string with a word of key-phrase.", + marked: "A string with a word of" + + " key-phrase.", + position: { + endOffset: 37, + startOffset: 27, + startOffsetBlock: 24, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + // Note that in this case, we'll highlight 'key-phrase' even though technically we only match "key" in "key-phrase". + // This is the consequence of tokenizing "key-phrase" into one token instead of three. + ], + skip: false, + }, + { + description: "should find a match between a keyphrase with an underscore and its occurrence in the text with the same form", + paper: new Paper( "

A string with a key_word.

", { keyword: "key_word" } ), + keyphraseForms: [ [ "key_word", "key_words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a key_word.", + original: "A string with a key_word.", + position: { endOffset: 27, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "should find a match between a keyphrase with an ampersand (&) and its occurrence in the text with the same form", + paper: new Paper( "

A string with key&word in it

", { keyword: "key&word" } ), + keyphraseForms: [ [ "key&word" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with key&word in it", + original: "A string with key&word in it", + position: { endOffset: 25, startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "should still match keyphrase occurrence with different types of apostrophe.", + paper: new Paper( "

A string with quotes to match the key'word, even if the quotes differ.

", { keyword: "key‛word" } ), + keyphraseForms: [ [ "key‛word", "key‛words" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { + marked: "A string with quotes to match the key'word, even if the quotes differ.", + original: "A string with quotes to match the key'word, even if the quotes differ.", + position: { endOffset: 45, startOffset: 37, + startOffsetBlock: 34, + endOffsetBlock: 42, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "can match dollar sign as in '$keyword'.", + paper: new Paper( "

A string with a $keyword.

", { keyword: "$keyword" } ), + keyphraseForms: [ [ "\\$keyword" ] ], + expectedCount: 1, + expectedMarkings: [ new Mark( { marked: "A string with a $keyword.", + original: "A string with a $keyword.", + position: { + startOffset: 19, + endOffset: 27, + startOffsetBlock: 16, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ], + skip: false, + }, + { + description: "can match 2 occurrences of a keyphrase ending in an HTML entity (© standing for ©) and output correct Marks", + paper: new Paper( "

A string keyphrase© with a keyphrase©.

", { keyword: "keyphrase©" } ), + keyphraseForms: [ [ "keyphrase©" ] ], + expectedCount: 2, + expectedMarkings: [ new Mark( { + marked: "A string keyphrase© with a " + + "keyphrase©.", + original: "A string keyphrase© with a keyphrase©.", + position: { endOffset: 27, startOffset: 12, + startOffsetBlock: 9, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "A string keyphrase© with a " + + "keyphrase©.", + original: "A string keyphrase© with a keyphrase©.", + position: { endOffset: 50, startOffset: 35, + startOffsetBlock: 32, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "can match an occurrence of a keyphrase after HTML entities, and output correct Marks", + paper: new Paper( "

I find a&b to be > c&d for dog food.

", { keyword: "dog" } ), + keyphraseForms: [ [ "dog" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "I find a&b to be > c&d for dog food.", + original: "I find a&b to be > c&d for dog food.", + position: { + startOffsetBlock: 38, + endOffsetBlock: 41, + startOffset: 41, + endOffset: 44, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "can match an occurrence of a keyphrase containing an & in the middle, as in 'a&b', and output correct Marks", + paper: new Paper( "

At a&b they have the best stuff.

", { keyword: "a&b" } ), + keyphraseForms: [ [ "a&b" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "At a&b they have the best stuff.", + original: "At a&b they have the best stuff.", + position: { + startOffsetBlock: 3, + endOffsetBlock: 10, + startOffset: 6, + endOffset: 13, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "doesn't match unhyphenated occurrences in the text if the keyphrase is hyphenated.", + paper: new Paper( "

A string with a key word.

", { keyword: "key-word" } ), + keyphraseForms: [ [ "key-word", "key-words" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "can match keyphrase enclosed in «» in the text, also if the paper's keyphrase is not enclosed in «»", + paper: new Paper( "

A string with a «keyword».

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a «keyword».", + marked: "A string with a «keyword».", + position: { endOffset: 27, startOffset: 20, + startOffsetBlock: 17, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match an occurrence of the keyphrase not enclosed in «» when the paper's keyphrase is enclosed in «»", + paper: new Paper( "

A string with a keyword.

", { keyword: "«keyword»" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with a keyword.", + position: { endOffset: 26, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase enclosed in ‹› in the text, also if the paper's keyphrase is not enclosed in ‹›", + paper: new Paper( "

A string with a ‹keyword›.

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a ‹keyword›.", + marked: "A string with a 'keyword'.", + position: { endOffset: 27, startOffset: 20, + startOffsetBlock: 17, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match an occurrence of the keyphrase not enclosed in ‹› when the paper's keyphrase is enclosed in ‹›", + paper: new Paper( "

A string with a keyword.

", { keyword: "‹keyword›" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "A string with a keyword.", + marked: "A string with a keyword.", + position: { endOffset: 26, startOffset: 19, + startOffsetBlock: 16, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase preceded by ¿", + paper: new Paper( "

¿Keyword is it

", { keyword: "keyword" } ), + keyphraseForms: [ [ "keyword" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "¿Keyword is it", + marked: "¿Keyword is it", + position: { endOffset: 11, startOffset: 4, + startOffsetBlock: 1, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match keyphrase followed by RTL-specific punctuation marks", + paper: new Paper( "

الجيدة؟

", { keyword: "الجيدة" } ), + keyphraseForms: [ [ "الجيدة" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + original: "الجيدة؟", + marked: "الجيدة؟", + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match keyphrases in texts that contain
tags", + paper: new Paper( "

This is a test.
This is
another
test.

", { keyword: "test" } ), + keyphraseForms: [ [ "test" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + original: "This is a test.", + marked: "This is a test.", + position: { + startOffset: 13, + endOffset: 17, + startOffsetBlock: 10, + endOffsetBlock: 14, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + new Mark( { + original: "\nThis is\nanother\ntest.", + marked: "\nThis is\nanother\ntest.", + position: { + startOffset: 46, + endOffset: 50, + startOffsetBlock: 43, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } + ), + ], + skip: false, + }, + { + description: "can match paragraph gutenberg block", + paper: new Paper( + ` +

a string of text with the keyword in it

+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + content: "a string of text with the keyword in it", + }, + name: "core/paragraph", + clientId: "5615ca1f-4052-41a9-97be-be19cfd2085b", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "keyword", "keywords" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: "a string of text with the keyword in it", + original: "a string of text with the keyword in it", + position: { + startOffset: 55, + endOffset: 62, + startOffsetBlock: 26, + endOffsetBlock: 33, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +

Over the years, we’ve written quite a few articles about  + branding. + Branding is about getting people to relate to your company and + products. It’s also about trying to make your brand synonymous with a certain product or service. + This can be a lengthy and hard project. It can potentially cost you all of your revenue. + It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. + However, for a lot of small business owners, the investment in branding will have + to be made with a relatively small budget. 

+ `, + { + keyword: "keyword", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Over the years, we’ve written quite a few articles about branding. Branding is about getting people to relate to your company and products. It’s also about trying to make your brand synonymous with a certain product or service. This can be a lengthy and hard project. It can potentially cost you all of your revenue. It’s no wonder that branding is often associated with investing lots of money in marketing and promotion. However, for a lot of small business owners, the investment in branding will have to be made with a relatively small budget. ", + }, + name: "core/paragraph", + clientId: "6860403c-0b36-43b2-96fa-2d30c10cb44c", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "synonymous" ] ], + expectedCount: 1, + expectedMarkings: [ - it( "counts a string with with 'kapaklı' as a keyword in it", function() { - const mockPaper = new Paper( "A string with kapaklı." ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKaplaki ).markings ).toEqual( [ - new Mark( { marked: "A string with kapaklı.", - original: "A string with kapaklı." } ) ] - ); - } ); + new Mark( { + // eslint-disable-next-line max-len + marked: " It's also about trying to make your brand synonymous with a certain product or service.", + original: " It’s also about trying to make your brand synonymous with a certain product or service.", + position: { + startOffset: 295, + endOffset: 305, + startOffsetBlock: 266, + endOffsetBlock: 276, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +

You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. + These all can be qualified as + ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There + are multiple ways to do this, without a huge budget. + In this post, I’ll share my thoughts on how to go about your own low-budget branding.

+ `, + { + keyword: "expertise", + wpBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "You might be a local bakery with 10 employees, or a local industrial company employing up to 500 people. These all can be qualified as ‘small business’. All have the same main goal when they start: the need to establish a name in their field of expertise. There are multiple ways to do this, without a huge budget. In this post, I’ll share my thoughts on how to go about your own low-budget branding.", + }, + name: "core/paragraph", + clientId: "65be5146-3395-4845-8c7c-4a79fd6e3611", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "expertise" ] ], + expectedCount: 1, + expectedMarkings: [ - it( "counts a string with with '&' in the string and the keyword", function() { - const mockPaper = new Paper( "A string with key&word." ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [ - new Mark( { marked: "A string with key&word.", - original: "A string with key&word." } ) ] - ); - } ); + new Mark( { + // eslint-disable-next-line max-len + marked: " All have the same main goal when they start: the need to establish a name in their field of expertise.", + original: " All have the same main goal when they start: the need to establish a name in their field of expertise.", + position: { + startOffset: 282, + endOffset: 291, + startOffsetBlock: 253, + endOffsetBlock: 262, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "can match heading gutenberg block", + paper: new Paper( + ` +

Define and communicate brand values

+ `, + { + keyword: "communicate", + wpBlocks: [ + { + attributes: { + level: 2, + content: "Define and communicate brand values", + }, + name: "core/heading", + clientId: "b8e62d35-b3af-45ec-9889-139ef0a9baaa", + innerBlocks: [], + }, + ], + } + ), + keyphraseForms: [ [ "communicate" ] ], + expectedCount: 1, + expectedMarkings: [ + // eslint-disable-next-line max-len + new Mark( { + marked: "Define and communicate brand values", + original: "Define and communicate brand values", + position: { + startOffset: 107, + endOffset: 118, + startOffsetBlock: 11, + endOffsetBlock: 22, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "can match complex paragraph gutenberg block", + paper: new Paper( + ` +
+
+

Lorem Ipsum is simply dummy text of the printing and typesetting industry.

+
+ - it( "does not count images as keywords.", function() { - const mockPaper = new Paper( "" ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, mockResearcherAmpersand ).markings ).toEqual( [] ); - } ); + +
+

There are many variations of passages of Lorem Ipsum available

+
+
+ `, + { + keyword: "Ipsum", + wpBlocks: [ + { + attributes: {}, + name: "core/columns", + clientId: "1b9f1d49-813e-4578-a19a-bf236447cc41", + innerBlocks: [ + { + attributes: {}, + name: "core/column", + clientId: "09b93261-25ef-4391-98cc-630e8fa1eac1", + innerBlocks: [ + { + attributes: { + // eslint-disable-next-line max-len + content: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + }, + name: "core/paragraph", + clientId: "3f0e68c1-287e-40ef-90c8-54b3ab61702a", + innerBlocks: [], + }, + ], + }, + { + attributes: {}, + name: "core/column", + clientId: "0f247feb-5ada-433d-bc97-1faa0265f7c4", + innerBlocks: [ + { + attributes: { + content: "There are many variations of passages of Lorem Ipsum available", + }, + name: "core/paragraph", + clientId: "5093342c-ec93-48a2-b2d5-883ae514b12d", + innerBlocks: [], + }, + ], + }, + ], + }, + ], + } + ), + keyphraseForms: [ [ "Ipsum" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + original: "Lorem Ipsum is simply dummy text of the printing and typesetting industry.", + position: { + startOffset: 149, + endOffset: 154, + startOffsetBlock: 14, + endOffsetBlock: 19, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "There are many variations of passages of Lorem Ipsum available", + original: "There are many variations of passages of Lorem Ipsum available", + position: { + startOffset: 426, + endOffset: 431, + startOffsetBlock: 47, + endOffsetBlock: 52, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, +]; - it( "keyword counting is blind to CApiTal LeTteRs.", function() { - const mockPaper = new Paper( "A string with KeY worD." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "A string with KeY worD.", - original: "A string with KeY worD." } ) ] - ); - } ); +describe.each( testCasesWithSpecialCharacters )( "Test for counting the keyphrase in a text containing special characters", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; - it( "keyword counting is blind to types of apostrophe.", function() { - const mockPaper = new Paper( "A string with quotes to match the key'word, even if the quotes differ." ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherApostrophe ).markings ).toEqual( [ - new Mark( { marked: "A string with quotes to match the key'word, " + - "even if the quotes differ.", - original: "A string with quotes to match the key'word, even if the quotes differ." } ) ] - ); + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); + } ); } ); - it( "counts can count dollar sign as in '$keyword'.", function() { - const mockPaper = new Paper( "A string with a $keyword." ); - expect( keywordCount( mockPaper, mockResearcherDollarSign ).count ).toBe( 1 ); - // Markings do not currently work in this condition. - } ); +/** + * The following test cases illustrates the current approach of matching transliterated keyphrase with non-transliterated version. + * For example, word form "acción" from the keyphrase will match word "accion" in the sentence. + * But, word form "accion" from the keyphrase will NOT match word "acción" in the sentence. + * + * This behaviour might change in the future. + */ +const testCasesWithLocaleMapping = [ + { + description: "counts a string of text with German diacritics and eszett as the keyphrase", + paper: new Paper( "

Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.

", { keyword: "äöüß", locale: "de_DE" } ), + keyphraseForms: [ [ "äöüß" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + original: "Waltz keepin auf mitz auf keepin äöüß weiner blitz deutsch spitzen.", + position: { endOffset: 40, startOffset: 36, + startOffsetBlock: 33, + endOffsetBlock: 37, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "doesn't count a match if the keyphrase is with diacritic while the occurrence in the text is with diacritic", + paper: new Paper( "

Acción Española fue una revista.

", { keyword: "accion", locale: "es_ES" } ), + keyphraseForms: [ [ "accion" ] ], + expectedCount: 0, + expectedMarkings: [], + skip: false, + }, + { + description: "counts a string of text with Spanish accented vowel", + paper: new Paper( "

Accion Española fue una revista.

", { keyword: "acción", locale: "es_ES" } ), + keyphraseForms: [ [ "acción" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { marked: "Accion Española fue una revista.", + original: "Accion Española fue una revista.", + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string of text with Swedish diacritics", + paper: new Paper( "

oeverlaatelsebesiktning and overlatelsebesiktning

", { keyword: "överlåtelsebesiktning", locale: "sv_SV" } ), + keyphraseForms: [ [ "överlåtelsebesiktning" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "oeverlaatelsebesiktning " + + "and overlatelsebesiktning", + original: "oeverlaatelsebesiktning and overlatelsebesiktning", + position: { endOffset: 26, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 23, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "oeverlaatelsebesiktning " + + "and overlatelsebesiktning", + original: "oeverlaatelsebesiktning and overlatelsebesiktning", + position: { endOffset: 52, startOffset: 31, + startOffsetBlock: 28, + endOffsetBlock: 49, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string of text with Norwegian diacritics", + paper: new Paper( "

København and Koebenhavn and Kobenhavn

", { keyword: "København", locale: "nb_NO" } ), + keyphraseForms: [ [ "København" ] ], + expectedCount: 3, + expectedMarkings: [ + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 12, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 9, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 27, startOffset: 17, + startOffsetBlock: 14, + endOffsetBlock: 24, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "København and " + + "Koebenhavn and Kobenhavn", + original: "København and Koebenhavn and Kobenhavn", + position: { endOffset: 41, startOffset: 32, + startOffsetBlock: 29, + endOffsetBlock: 38, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string with a Turkish diacritics", + paper: new Paper( "

Türkçe and Turkce

", { keyword: "Türkçe", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 20, startOffset: 14, + startOffsetBlock: 11, + endOffsetBlock: 17, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + ], + skip: false, + }, + { + description: "still counts a string with a Turkish diacritics when exact matching is used", + paper: new Paper( "

Türkçe and Turkce

", { keyword: "\"Türkçe\"", locale: "tr_TR" } ), + keyphraseForms: [ [ "Türkçe" ] ], + expectedCount: 2, + expectedMarkings: [ + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 9, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 6, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "Türkçe and Turkce", + original: "Türkçe and Turkce", + position: { endOffset: 20, startOffset: 14, + startOffsetBlock: 11, + endOffsetBlock: 17, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + ], + skip: false, + }, + { + description: "counts a string with a different forms of Turkish i, kephrase: İstanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "İstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "İstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string with a different forms of Turkish i, kephrase: Istanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "Istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "Istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string with a different forms of Turkish i, kephrase: istanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "istanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "istanbul" ] ], + expectedCount: 4, + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, + { + description: "counts a string with a different forms of Turkish i, kephrase: ıstanbul", + paper: new Paper( "

İstanbul and Istanbul and istanbul and ıstanbul

", { keyword: "ıstanbul", locale: "tr_TR" } ), + keyphraseForms: [ [ "ıstanbul" ] ], + expectedCount: 4, + expectedMarkings: [ + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 11, startOffset: 3, + startOffsetBlock: 0, + endOffsetBlock: 8, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 24, startOffset: 16, + startOffsetBlock: 13, + endOffsetBlock: 21, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 37, startOffset: 29, + startOffsetBlock: 26, + endOffsetBlock: 34, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ), + new Mark( { + marked: "İstanbul and Istanbul and " + + "istanbul and ıstanbul", + original: "İstanbul and Istanbul and istanbul and ıstanbul", + position: { endOffset: 50, startOffset: 42, + startOffsetBlock: 39, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + } } ) ], + skip: false, + }, +]; - it( "counts 'key word' also in 'key-word'.)", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 2 ); - // Note: this behavior might change in the future. - } ); +describe.each( testCasesWithLocaleMapping )( "Test for counting the keyphrase in a text with different locale mapping, e.g. Turkish", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; - it( "doesn't count 'key-word' in 'key word'.", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, key word consectetur key-word adipiscing elit." ); - expect( keywordCount( mockPaper, mockResearcherMinus ).count ).toBe( 1 ); - // Note: this behavior might change in the future. + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); + } ); } ); - it( "doesn't count keyphrase instances inside elements we want to exclude from the analysis", function() { - const mockPaper = new Paper( "There is no keyword in this sentence." ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 0 ); - } ); +const testDataForHTMLTags = [ + { + description: "counts keyphrase occurrence correctly in a text containing `` tag, and outputs correct Marks", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 288, + startOffset: 283, + startOffsetBlock: 280, + endOffsetBlock: 285, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 302, + startOffset: 297, + startOffsetBlock: 294, + endOffsetBlock: 299, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "counts keyphrase occurrence correctly in a text containing `` tag and outputs correct Marks", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 298, + startOffset: 287, + startOffsetBlock: 284, + endOffsetBlock: 295, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, + { + description: "counts keyphrase occurrence correctly when it's found inside an anchor text and outputs correct Marks", + paper: new Paper( "

The forepaws possess a \"false thumb\", which is an extension of a wrist bone, " + + "the radial sesamoid found in many carnivorans. This thumb allows the animal to grip onto bamboo stalks " + + "and both the digits and wrist bones are highly flexible. The red panda shares this feature " + + "with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides." + + " In addition, the red panda's sesamoid has a more sunken tip while the giant panda's curves in the middle. 

", + { keyword: "giant panda", locale: "en_US" } ), + keyphraseForms: [ [ "giant" ], [ "panda", "pandas" ] ], + expectedCount: 1, + expectedMarkings: [ + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger sesamoid " + + "that is more compressed at the sides.", + position: { + endOffset: 253, + startOffset: 248, + startOffsetBlock: 245, + endOffsetBlock: 250, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: " The red panda shares this feature with the " + + "giant panda, which has a larger sesamoid that is more compressed at the sides.", + original: " The red panda shares this feature with the giant panda, which has a larger " + + "sesamoid that is more compressed at the sides.", + position: { + endOffset: 346, + startOffset: 335, + startOffsetBlock: 332, + endOffsetBlock: 343, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ], + skip: false, + }, +]; - it( "only counts full key phrases (when all keywords are in the sentence once, twice etc.) as matches.", function() { - const mockPaper = new Paper( "A string with three keys (key and another key) and one word." ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, mockResearcherKeyWord ).markings ).toEqual( [ - new Mark( { marked: "A string with three keys (" + - "key and another key) and one word.", - original: "A string with three keys (key and another key) and one word." } ) ] - ); - } ); +describe.each( testDataForHTMLTags )( "Test for counting the keyphrase in a text containing multiple html tags", + function( { + description, + paper, + keyphraseForms, + expectedCount, + expectedMarkings, + skip } ) { + const test = skip ? it.skip : it; - it( "doesn't match singular forms in reduplicated plurals in Indonesian", function() { - const mockPaper = new Paper( "Lorem ipsum dolor sit amet, consectetur keyword-keyword, keyword adipiscing elit.", { locale: "id_ID" } ); - expect( keywordCount( mockPaper, mockResearcher ).count ).toBe( 1 ); + test( description, function() { + const mockResearcher = buildMorphologyMockResearcher( keyphraseForms ); + buildTree( paper, mockResearcher ); + const keyphraseCountResult = getKeyphraseCount( paper, mockResearcher ); + expect( keyphraseCountResult.count ).toBe( expectedCount ); + expect( keyphraseCountResult.markings ).toEqual( expectedMarkings ); + } ); } ); -} ); /** * Mocks Japanese Researcher. @@ -209,34 +1878,79 @@ const buildJapaneseMockResearcher = function( keyphraseForms, helper1, helper2 ) } ); }; - -// Decided not to remove test below as it tests the added logic of the Japanese helpers. -describe( "Test for counting the keyword in a text for Japanese", () => { - it( "counts/marks a string of text with a keyword in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja", keyphrase: "猫" } ); +describe( "Test for counting the keyphrase in a text for Japanese", () => { + // NOTE: Japanese is not yet adapted to use HTML parser, hence, the marking out doesn't include the position information. + it( "counts/marks a string of text with a keyphrase in it.", function() { + const mockPaper = new Paper( "

私の猫はかわいいです。

", { locale: "ja", keyphrase: "猫" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [ + buildTree( mockPaper, researcher ); + + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のはかわいいです。", original: "私の猫はかわいいです。" } ) ] ); } ); - it( "counts a string of text with no keyword in it.", function() { - const mockPaper = new Paper( "私の猫はかわいいです。", { locale: "ja" } ); + it( "counts the keyphrase occurrence inside an image caption.", function() { + const mockPaper = new Paper( "

[caption id=\"attachment_157\" align=\"alignnone\" width=\"225\"]\"\" 一日一冊の本を読むのはできるかどうかやってみます。[/caption]

", { + locale: "ja", + keyphrase: "一冊の本を読む", + shortcodes: [ + "wp_caption", + "caption", + "gallery", + "playlist" ], + } ); + const keyphraseForms = [ + [ "一冊" ], + [ "本" ], + [ "読む", "読み", "読ま", "読め", "読も", "読ん", "読める", "読ませ", "読ませる", "読まれ", "読まれる", "読もう" ], + ]; + const researcher = buildJapaneseMockResearcher( keyphraseForms, wordsCountHelper, matchWordsHelper ); + buildTree( mockPaper, researcher ); + + + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + new Mark( { + marked: "一日一冊読むのはできるかどうかやってみます。", + original: "一日一冊の本を読むのはできるかどうかやってみます。" } ) ] ); + } ); + + it( "counts/marks a string of text with multiple occurrences of the same keyphrase in it.", function() { + const mockPaper = new Paper( "

私の猫はかわいい猫です。

", { locale: "ja", keyphrase: "猫" } ); + const researcher = buildJapaneseMockResearcher( [ [ "猫" ] ], wordsCountHelper, matchWordsHelper ); + buildTree( mockPaper, researcher ); + + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 2 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ + new Mark( { marked: "私のはかわいいです。", + original: "私の猫はかわいい猫です。", + } ) ] ); + } ); + + it( "counts a string if text with no keyphrase in it.", function() { + const mockPaper = new Paper( "

私の猫はかわいいです。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "会い" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 0 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [] ); + buildTree( mockPaper, researcher ); + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 0 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [] ); } ); it( "counts multiple occurrences of a keyphrase consisting of multiple words.", function() { - const mockPaper = new Paper( "私の猫はかわいいですかわいい。", { locale: "ja" } ); + const mockPaper = new Paper( "

私の猫はかわいいですかわいい。

", { locale: "ja" } ); const researcher = buildJapaneseMockResearcher( [ [ "猫" ], [ "かわいい" ] ], wordsCountHelper, matchWordsHelper ); - expect( keywordCount( mockPaper, researcher ).count ).toBe( 1 ); - expect( keywordCount( mockPaper, researcher ).markings ).toEqual( [ + buildTree( mockPaper, researcher ); + expect( getKeyphraseCount( mockPaper, researcher ).count ).toBe( 1 ); + expect( getKeyphraseCount( mockPaper, researcher ).markings ).toEqual( [ new Mark( { marked: "私のかわいい" + "ですかわいい。", original: "私の猫はかわいいですかわいい。", - } ) ] ); + } ), + ] ); } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js index d6d803710fa..609d63776fd 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/matchKeywordInSubheadingsSpec.js @@ -48,6 +48,15 @@ describe( "Matching keyphrase in subheadings", () => { expect( result.matches ).toBe( 0 ); } ); + it( "should not match text that is also a shortcode", function() { + const paper = new Paper( "

" + + "What is a [shortcode]

A beautiful dog.", { keyword: "shortcode", shortcodes: [ "shortcode" ] } ); + + const result = matchKeywordInSubheadings( paper, new Researcher( paper ) ); + expect( result.count ).toBe( 1 ); + expect( result.matches ).toBe( 0 ); + } ); + // This unit test is skipped for now because it returns an incorrect result. Result.matches should return 0, instead it returns 1. // When this research is adapted to use HTML Parser 5, please unskip this test and make sure that it passes. xit( "should not match text within heading attributes", () => { diff --git a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js index 3260e313d12..1c5ca3cec55 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/sentencesSpec.js @@ -12,4 +12,10 @@ describe( "Get sentences from text", function() { const researcher = new EnglishResearcher(); expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye." ] ); } ); + + it( "should get sentences without the shortcodes", function() { + const paper = new Paper( "Hello. Goodbye. To be [shortcode]or not to be.", { shortcodes: [ "shortcode" ] } ); + const researcher = new EnglishResearcher(); + expect( sentences( paper, researcher ) ).toEqual( [ "Hello.", "Goodbye.", "To be or not to be." ] ); + } ); } ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js index d2a12244c87..3711517bab3 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/wordComplexitySpec.js @@ -92,6 +92,14 @@ describe( "a test for getting the complex words in the sentence and calculating expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 ); } ); + it( "should return an empty array and 0% if the only complex word are is a shortcode ", function() { + const paper = new Paper( "This is short text. [tortoiseshell] A text about Calico.", { shortcodes: [ "tortoiseshell" ] } ); + researcher.setPaper( paper ); + + expect( wordComplexity( paper, researcher ).complexWords ).toEqual( [] ); + expect( wordComplexity( paper, researcher ).percentage ).toEqual( 0 ); + } ); + it( "should return an empty array and 0% if there is no complex word found in the text: " + "Also test with a word starting with capital letter enclosed in different types of quotation mark.", () => { let paper = new Paper( "This is short text. This is another short text. A text about \"Calico\"." ); diff --git a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js index 4da5ffd745f..6a5936c3890 100644 --- a/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js +++ b/packages/yoastseo/spec/languageProcessing/researches/wordCountInTextSpec.js @@ -10,4 +10,8 @@ describe( "a test for counting the words in the text", function() { const paper = new Paper( "Tell me a story on how to love your cats", { keyword: "how to love your cats" } ); expect( wordCountInText( paper ).count ).toBe( 7 ); } ); + it( "should not count shortcodes", function() { + const paper = new Paper( "Tell me a story on [shortcode]how to love[/shortcode] your cats", { shortcodes: [ "shortcode" ] } ); + expect( wordCountInText( paper ).count ).toBe( 10 ); + } ); } ); diff --git a/packages/yoastseo/spec/markers/removeDuplicateMarks.js b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js similarity index 64% rename from packages/yoastseo/spec/markers/removeDuplicateMarks.js rename to packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js index 73ae017032f..335a3fe1fd5 100644 --- a/packages/yoastseo/spec/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/spec/markers/removeDuplicateMarksSpec.js @@ -7,11 +7,11 @@ describe( "removeDuplicateMarks", function() { } ); it( "should remove duplicated marks from the array", function() { - var marks = [ + const marks = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original", marked: "marked" } ), ]; - var expected = [ + const expected = [ new Mark( { original: "original", marked: "marked" } ), ]; @@ -19,16 +19,26 @@ describe( "removeDuplicateMarks", function() { } ); it( "should remove duplicated marks from the array", function() { - var marks = [ + const marks = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original2", marked: "marked" } ), new Mark( { original: "original", marked: "marked" } ), ]; - var expected = [ + const expected = [ new Mark( { original: "original", marked: "marked" } ), new Mark( { original: "original2", marked: "marked" } ), ]; expect( removeDuplicateMarks( marks ) ).toEqual( expected ); } ); + + it( "should not remove duplicated marks if the mark objects have position information", function() { + const marks = [ + new Mark( { original: "original", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ), + new Mark( { original: "original2", marked: "marked", position: { startOffset: 0, endOffset: 10 } } ), + new Mark( { original: "original1", marked: "marked", position: { startOffset: 15, endOffset: 20 } } ), + ]; + + expect( removeDuplicateMarks( marks ) ).toEqual( marks ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/build/buildSpec.js b/packages/yoastseo/spec/parse/build/buildSpec.js index f748f8ef880..e85cf7fe33d 100644 --- a/packages/yoastseo/spec/parse/build/buildSpec.js +++ b/packages/yoastseo/spec/parse/build/buildSpec.js @@ -1,31 +1,34 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; +import Paper from "../../../src/values/Paper"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; describe( "The parse function", () => { - it( "parses a basic HTML text", () => { - const html = "

Hello, world!

"; + it( "parses a basic HTML text with an HTML entity (&)", () => { + const html = "

Hello, world & beyond!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { name: "div", sourceCodeLocation: { startOffset: 0, - endOffset: 45, + endOffset: 58, startTag: { startOffset: 0, endOffset: 5, }, endTag: { - startOffset: 39, - endOffset: 45, + startOffset: 52, + endOffset: 58, }, }, attributes: {}, @@ -36,30 +39,97 @@ describe( "The parse function", () => { "class": new Set( [ "yoast" ] ), }, sentences: [ { - text: "Hello, world!", - sourceCodeRange: { startOffset: 22, endOffset: 35 }, + text: "Hello, world & beyond!", + sourceCodeRange: { startOffset: 22, endOffset: 48 }, tokens: [ { text: "Hello", sourceCodeRange: { startOffset: 22, endOffset: 27 } }, { text: ",", sourceCodeRange: { startOffset: 27, endOffset: 28 } }, { text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, { text: "world", sourceCodeRange: { startOffset: 29, endOffset: 34 } }, - { text: "!", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: " ", sourceCodeRange: { startOffset: 34, endOffset: 35 } }, + { text: "&", sourceCodeRange: { startOffset: 35, endOffset: 40 } }, + { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } }, + { text: "beyond", sourceCodeRange: { startOffset: 41, endOffset: 47 } }, + { text: "!", sourceCodeRange: { startOffset: 47, endOffset: 48 } }, + ], + } ], + childNodes: [ { + name: "#text", + value: "Hello, world #amp; beyond!", + sourceCodeRange: { startOffset: 22, endOffset: 48 }, + } ], + sourceCodeLocation: { + startOffset: 5, + endOffset: 52, + startTag: { + startOffset: 5, + endOffset: 22, + }, + endTag: { + startOffset: 48, + endOffset: 52, + }, + }, + } ], + } ], + } ); + } ); + + it( "parses a basic Japanese HTML text", () => { + const html = "

犬が大好き

"; + const paper = new Paper( html ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensCustom: splitIntoTokensCustom, memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( paper, languageProcessor ) ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ { + name: "div", + sourceCodeLocation: { + startOffset: 0, + endOffset: 37, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 31, + endOffset: 37, + }, + }, + attributes: {}, + childNodes: [ { + name: "p", + isImplicit: false, + attributes: { + "class": new Set( [ "yoast" ] ), + }, + sentences: [ { + text: "犬が大好き", + sourceCodeRange: { startOffset: 22, endOffset: 27 }, + tokens: [ + { text: "犬", sourceCodeRange: { startOffset: 22, endOffset: 23 } }, + { text: "が", sourceCodeRange: { startOffset: 23, endOffset: 24 } }, + { text: "大好き", sourceCodeRange: { startOffset: 24, endOffset: 27 } }, ], } ], childNodes: [ { name: "#text", - value: "Hello, world!", + value: "犬が大好き", + sourceCodeRange: { startOffset: 22, endOffset: 27 }, } ], sourceCodeLocation: { startOffset: 5, - endOffset: 39, + endOffset: 31, startTag: { startOffset: 5, endOffset: 22, }, endTag: { - startOffset: 35, - endOffset: 39, + startOffset: 27, + endOffset: 31, }, }, } ], @@ -69,6 +139,7 @@ describe( "The parse function", () => { it( "adds implicit paragraphs around phrasing content outside of paragraphs and headings", () => { const html = "
Hello World!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -86,7 +157,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -110,6 +181,7 @@ describe( "The parse function", () => { { name: "#text", value: "Hello ", + sourceCodeRange: { startOffset: 5, endOffset: 11 }, }, { name: "span", @@ -117,6 +189,7 @@ describe( "The parse function", () => { childNodes: [ { name: "#text", value: "World!", + sourceCodeRange: { startOffset: 17, endOffset: 23 }, } ], sourceCodeLocation: { startOffset: 11, @@ -155,6 +228,7 @@ describe( "The parse function", () => { it( "parses another HTML text and adds implicit paragraphs where needed", () => { const html = "
Hello

World!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -175,7 +249,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -199,6 +273,7 @@ describe( "The parse function", () => { { name: "#text", value: "Hello ", + sourceCodeRange: { startOffset: 5, endOffset: 11 }, }, ], sourceCodeLocation: { @@ -223,6 +298,7 @@ describe( "The parse function", () => { { name: "#text", value: "World!", + sourceCodeRange: { startOffset: 14, endOffset: 20 }, }, ], sourceCodeLocation: { @@ -258,6 +334,7 @@ describe( "The parse function", () => { it( "parses an HTML text with implicit paragraphs before, between, and after p tags", () => { const html = "
So long, and

thanks

for

all

the fish!
"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -287,7 +364,7 @@ describe( "The parse function", () => { * [/#document-fragment] * ``` */ - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -303,6 +380,7 @@ describe( "The parse function", () => { { name: "#text", value: "So ", + sourceCodeRange: { startOffset: 5, endOffset: 8 }, }, { name: "em", @@ -311,6 +389,7 @@ describe( "The parse function", () => { { name: "#text", value: "long", + sourceCodeRange: { startOffset: 12, endOffset: 16 }, }, ], sourceCodeLocation: { @@ -329,6 +408,7 @@ describe( "The parse function", () => { { name: "#text", value: ", and ", + sourceCodeRange: { startOffset: 21, endOffset: 27 }, }, ], sentences: [ { @@ -357,6 +437,7 @@ describe( "The parse function", () => { { name: "#text", value: "thanks", + sourceCodeRange: { startOffset: 30, endOffset: 36 }, }, ], sentences: [ { @@ -387,6 +468,7 @@ describe( "The parse function", () => { { name: "#text", value: " for ", + sourceCodeRange: { startOffset: 40, endOffset: 45 }, }, ], sentences: [ { @@ -411,6 +493,7 @@ describe( "The parse function", () => { { name: "#text", value: "all", + sourceCodeRange: { startOffset: 48, endOffset: 51 }, }, ], sentences: [ { @@ -441,6 +524,7 @@ describe( "The parse function", () => { { name: "#text", value: " the ", + sourceCodeRange: { startOffset: 55, endOffset: 60 }, }, { name: "strong", @@ -449,6 +533,7 @@ describe( "The parse function", () => { { name: "#text", value: "fish", + sourceCodeRange: { startOffset: 68, endOffset: 72 }, }, ], sourceCodeLocation: { @@ -467,6 +552,7 @@ describe( "The parse function", () => { { name: "#text", value: "!", + sourceCodeRange: { startOffset: 81, endOffset: 82 }, }, ], sentences: [ { @@ -511,158 +597,62 @@ describe( "The parse function", () => { const html = "\n
" + "

Table of contents

\n

This is the first sentence.

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { - name: "#document-fragment", + expect( build( paper, languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 39, - startOffset: 0, - }, - }, - { - name: "#text", - value: "\n", - }, - { - name: "#text", - value: "\n", - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 328, - startOffset: 288, - }, - }, + { name: "#text", sourceCodeRange: { endOffset: 40, startOffset: 39 }, value: "\n" }, + { name: "#text", sourceCodeRange: { endOffset: 288, startOffset: 287 }, value: "\n" }, { attributes: {}, childNodes: [ { name: "#text", - value: "This is the first sentence.", - }, + sourceCodeRange: { endOffset: 358, startOffset: 331 }, + value: "This is the first sentence." }, ], isImplicit: false, name: "p", sentences: [ { - sourceCodeRange: { - endOffset: 358, - startOffset: 331, - }, + sourceCodeRange: { endOffset: 358, startOffset: 331 }, text: "This is the first sentence.", tokens: [ - { - sourceCodeRange: { - endOffset: 335, - startOffset: 331, - }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 336, - startOffset: 335, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 338, - startOffset: 336, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 339, - startOffset: 338, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 342, - startOffset: 339, - }, - text: "the", - }, - { - sourceCodeRange: { - endOffset: 343, - startOffset: 342, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 348, - startOffset: 343, - }, - text: "first", - }, - { - sourceCodeRange: { - endOffset: 349, - startOffset: 348, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 357, - startOffset: 349, - }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 358, - startOffset: 357, - }, - text: ".", - }, - ], - }, - ], + { sourceCodeRange: { endOffset: 335, startOffset: 331 }, text: "This" }, + { sourceCodeRange: { endOffset: 336, startOffset: 335 }, text: " " }, + { sourceCodeRange: { endOffset: 338, startOffset: 336 }, text: "is" }, + { sourceCodeRange: { endOffset: 339, startOffset: 338 }, text: " " }, + { sourceCodeRange: { endOffset: 342, startOffset: 339 }, text: "the" }, + { sourceCodeRange: { endOffset: 343, startOffset: 342 }, text: " " }, + { sourceCodeRange: { endOffset: 348, startOffset: 343 }, text: "first" }, + { sourceCodeRange: { endOffset: 349, startOffset: 348 }, text: " " }, + { sourceCodeRange: { endOffset: 357, startOffset: 349 }, text: "sentence" }, + { sourceCodeRange: { endOffset: 358, startOffset: 357 }, text: "." }, + ] } ], sourceCodeLocation: { endOffset: 362, - endTag: { - endOffset: 362, - startOffset: 358, - }, + endTag: { endOffset: 362, startOffset: 358 }, startOffset: 328, - startTag: { - endOffset: 331, - startOffset: 328, - }, - }, - }, - ], - } + startTag: { endOffset: 331, startOffset: 328 }, + } } ], + name: "#document-fragment" } ); } ); it( "parses an HTML text with a Yoast breadcrumbs widget in Elementor, which should be filtered out", () => { // HTML:

The first sentence

const html = "

Home

The first sentence

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, @@ -673,6 +663,7 @@ describe( "The parse function", () => { { name: "#text", value: "The first sentence", + sourceCodeRange: { startOffset: 99, endOffset: 117 }, }, ], isImplicit: false, @@ -742,12 +733,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a script element inside a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -775,6 +767,7 @@ describe( "The parse function", () => { { name: "#text", value: " Hello, world!", + sourceCodeRange: { startOffset: 53, endOffset: 67 }, }, ], sentences: [ @@ -850,12 +843,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a script element outside of a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -864,6 +858,10 @@ describe( "The parse function", () => { attributes: {}, sentences: [], childNodes: [], + sourceCodeLocation: { + startOffset: 0, + endOffset: 45, + }, }, { name: "p", @@ -883,6 +881,7 @@ describe( "The parse function", () => { childNodes: [ { name: "#text", value: "Hello, world!", + sourceCodeRange: { startOffset: 48, endOffset: 61 }, } ], sourceCodeLocation: { startOffset: 45, @@ -901,12 +900,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a comment inside a paragraph", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -940,15 +940,10 @@ describe( "The parse function", () => { sourceCodeRange: { startOffset: 26, endOffset: 39 }, } ], childNodes: [ - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 8, endOffset: 26 }, - }, { name: "#text", value: "Hello, world!", + sourceCodeRange: { startOffset: 26, endOffset: 39 }, } ], sourceCodeLocation: { startOffset: 5, @@ -968,12 +963,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a comment within a sentence", () => { const html = "

Hello, world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -1011,16 +1007,12 @@ describe( "The parse function", () => { { name: "#text", value: "Hello, ", - }, - { - name: "#comment", - attributes: {}, - childNodes: [], - sourceCodeLocation: { startOffset: 15, endOffset: 33 }, + sourceCodeRange: { startOffset: 8, endOffset: 15 }, }, { name: "#text", value: " world!", + sourceCodeRange: { startOffset: 33, endOffset: 40 }, } ], sourceCodeLocation: { startOffset: 5, @@ -1041,12 +1033,13 @@ describe( "The parse function", () => { it( "parses an HTML text with a code element within a paragraph", () => { const html = "

Hello code! array.push( something ) Hello world!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1074,10 +1067,12 @@ describe( "The parse function", () => { { name: "#text", value: "Hello code! ", + sourceCodeRange: { startOffset: 8, endOffset: 20 }, }, { name: "#text", value: " Hello world!", + sourceCodeRange: { startOffset: 56, endOffset: 69 }, }, ], sentences: [ @@ -1190,12 +1185,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a code element within a sentence", () => { const html = "

Hello array.push( something ) code!

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ { @@ -1232,10 +1228,12 @@ describe( "The parse function", () => { { name: "#text", value: "Hello ", + sourceCodeRange: { startOffset: 8, endOffset: 14 }, }, { name: "#text", value: " code!", + sourceCodeRange: { startOffset: 50, endOffset: 56 }, }, ], sourceCodeLocation: { @@ -1256,12 +1254,13 @@ describe( "The parse function", () => { } ); it( "parses an HTML text with a code element with a child node within a sentence", () => { const html = "

Some text and code console.log( code )

"; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { @@ -1272,6 +1271,7 @@ describe( "The parse function", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { startOffset: 3, endOffset: 22 }, }, ], sentences: [ @@ -1358,6 +1358,243 @@ describe( "The parse function", () => { name: "#document-fragment", } ); } ); + it( "also parses blocks when the blocks array is available inside Paper, " + + "the available block client id should also be added to the childNodes", () => { + const html = "\n" + + "

The red panda's coat is mainly red or orange-brown with a black belly and legs.

\n" + + "\n" + + "\n" + + "\n" + + "
    \n" + + "
  • giant panda
  • \n" + + "\n" + + "\n" + + "\n" + + "
  • red panda
  • \n" + + "
\n" + + ""; + const paper = new Paper( html, { wpBlocks: [ + { + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + name: "core/paragraph", + isValid: true, + originalContent: "

The red panda's coat is mainly red or orange-brown with a black belly and legs.

", + validationIssues: [], + attributes: { + content: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + dropCap: false, + }, + innerBlocks: [], + }, + { + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + name: "core/list", + isValid: true, + originalContent: "
    \n\n
", + validationIssues: [], + attributes: { + ordered: false, + values: "", + }, + innerBlocks: [ + { + clientId: "b45eed06-594b-468e-8723-1f597689c300", + name: "core/list-item", + isValid: true, + originalContent: "
  • giant panda
  • ", + validationIssues: [], + attributes: { + content: "giant panda", + }, + innerBlocks: [], + }, + { + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + name: "core/list-item", + isValid: true, + originalContent: "
  • red panda
  • ", + validationIssues: [], + attributes: { + content: "red panda", + }, + innerBlocks: [], + }, + ], + }, + ] } ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( paper, languageProcessor ) ).toEqual( + { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 21, endOffset: 22 } }, + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "#text", + value: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + sourceCodeRange: { startOffset: 25, endOffset: 104 }, + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 22, endOffset: 25 }, + endTag: { startOffset: 104, endOffset: 108 }, + startOffset: 22, + endOffset: 108, + }, + isImplicit: false, + clientId: "da950985-3903-479c-92f5-9a98bcec51e9", + sentences: [ + { + text: "The red panda's coat is mainly red or orange-brown with a black belly and legs.", + tokens: [ + { text: "The", sourceCodeRange: { startOffset: 25, endOffset: 28 } }, + { text: " ", sourceCodeRange: { startOffset: 28, endOffset: 29 } }, + { text: "red", sourceCodeRange: { startOffset: 29, endOffset: 32 } }, + { text: " ", sourceCodeRange: { startOffset: 32, endOffset: 33 } }, + { text: "panda's", sourceCodeRange: { startOffset: 33, endOffset: 40 } }, + { text: " ", sourceCodeRange: { startOffset: 40, endOffset: 41 } }, + { text: "coat", sourceCodeRange: { startOffset: 41, endOffset: 45 } }, + { text: " ", sourceCodeRange: { startOffset: 45, endOffset: 46 } }, + { text: "is", sourceCodeRange: { startOffset: 46, endOffset: 48 } }, + { text: " ", sourceCodeRange: { startOffset: 48, endOffset: 49 } }, + { text: "mainly", sourceCodeRange: { startOffset: 49, endOffset: 55 } }, + { text: " ", sourceCodeRange: { startOffset: 55, endOffset: 56 } }, + { text: "red", sourceCodeRange: { startOffset: 56, endOffset: 59 } }, + { text: " ", sourceCodeRange: { startOffset: 59, endOffset: 60 } }, + { text: "or", sourceCodeRange: { startOffset: 60, endOffset: 62 } }, + { text: " ", sourceCodeRange: { startOffset: 62, endOffset: 63 } }, + { text: "orange-brown", sourceCodeRange: { startOffset: 63, endOffset: 75 } }, + { text: " ", sourceCodeRange: { startOffset: 75, endOffset: 76 } }, + { text: "with", sourceCodeRange: { startOffset: 76, endOffset: 80 } }, + { text: " ", sourceCodeRange: { startOffset: 80, endOffset: 81 } }, + { text: "a", sourceCodeRange: { startOffset: 81, endOffset: 82 } }, + { text: " ", sourceCodeRange: { startOffset: 82, endOffset: 83 } }, + { text: "black", sourceCodeRange: { startOffset: 83, endOffset: 88 } }, + { text: " ", sourceCodeRange: { startOffset: 88, endOffset: 89 } }, + { text: "belly", sourceCodeRange: { startOffset: 89, endOffset: 94 } }, + { text: " ", sourceCodeRange: { startOffset: 94, endOffset: 95 } }, + { text: "and", sourceCodeRange: { startOffset: 95, endOffset: 98 } }, + { text: " ", sourceCodeRange: { startOffset: 98, endOffset: 99 } }, + { text: "legs", sourceCodeRange: { startOffset: 99, endOffset: 103 } }, + { text: ".", sourceCodeRange: { startOffset: 103, endOffset: 104 } }, + ], + sourceCodeRange: { startOffset: 25, endOffset: 104 }, + }, + ], + }, + { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 108, endOffset: 109 } }, + { name: "#text", value: "\n\n", + sourceCodeRange: { startOffset: 131, endOffset: 133 } }, + { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 149, endOffset: 150 } }, + { + name: "ul", + attributes: {}, + childNodes: [ + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + sourceCodeRange: { startOffset: 175, endOffset: 176 } }, + { + name: "li", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { name: "#text", value: "giant panda", clientId: "b45eed06-594b-468e-8723-1f597689c300", + sourceCodeRange: { startOffset: 180, endOffset: 191 } }, + ], + sourceCodeLocation: { startOffset: 180, endOffset: 191 }, + isImplicit: true, + clientId: "b45eed06-594b-468e-8723-1f597689c300", + sentences: [ + { + text: "giant panda", + tokens: [ + { text: "giant", sourceCodeRange: { startOffset: 180, endOffset: 185 } }, + { text: " ", sourceCodeRange: { startOffset: 185, endOffset: 186 } }, + { text: "panda", sourceCodeRange: { startOffset: 186, endOffset: 191 } }, + ], + sourceCodeRange: { startOffset: 180, endOffset: 191 }, + }, + ], + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 176, endOffset: 180 }, + endTag: { startOffset: 191, endOffset: 196 }, + startOffset: 176, + endOffset: 196, + }, + clientId: "b45eed06-594b-468e-8723-1f597689c300", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + sourceCodeRange: { startOffset: 196, endOffset: 197 } }, + { name: "#text", value: "\n\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + sourceCodeRange: { startOffset: 219, endOffset: 221 } }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + sourceCodeRange: { startOffset: 242, endOffset: 243 } }, + { + name: "li", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { name: "#text", value: "red panda", clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + sourceCodeRange: { startOffset: 247, endOffset: 256 } }, + ], + sourceCodeLocation: { startOffset: 247, endOffset: 256 }, + isImplicit: true, + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + sentences: [ + { + text: "red panda", + tokens: [ + { text: "red", sourceCodeRange: { startOffset: 247, endOffset: 250 } }, + { text: " ", sourceCodeRange: { startOffset: 250, endOffset: 251 } }, + { text: "panda", sourceCodeRange: { startOffset: 251, endOffset: 256 } }, + ], + sourceCodeRange: { startOffset: 247, endOffset: 256 }, + }, + ], + }, + ], + sourceCodeLocation: { + startTag: { startOffset: 243, endOffset: 247 }, + endTag: { startOffset: 256, endOffset: 261 }, + startOffset: 243, endOffset: 261, + }, + clientId: "f8a22538-9e9d-4862-968d-524580f5d8ce", + }, + { name: "#text", value: "\n", clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + sourceCodeRange: { startOffset: 261, endOffset: 262 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 150, endOffset: 154 }, + endTag: { startOffset: 284, endOffset: 289 }, + startOffset: 150, + endOffset: 289, + }, + clientId: "ce5c002f-eea2-4a92-be4a-6fd25e947f45", + }, + { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 289, endOffset: 290 } }, + ], + } + ); + } ); } ); describe( "parsing html with Yoast blocks that enter the Paper as html comments", () => { @@ -1366,42 +1603,91 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" "\n" + "

    The Norwegian Forest cat is adapted to survive Norway's cold weather.

    \n" + ""; + const paper = new Paper( html ); + const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ + { name: "#text", value: "\n", sourceCodeRange: { startOffset: 55, endOffset: 56 } }, { attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 34, - startOffset: 0, - }, - }, - { - attributes: {}, - childNodes: [], - name: "#comment", + childNodes: [ + { + name: "#text", + value: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", + sourceCodeRange: { startOffset: 59, endOffset: 128 } }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + endOffset: 128, + startOffset: 59, + }, + text: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", + tokens: [ + { sourceCodeRange: { endOffset: 62, startOffset: 59 }, text: "The" }, + { sourceCodeRange: { endOffset: 63, startOffset: 62 }, text: " " }, + { sourceCodeRange: { endOffset: 72, startOffset: 63 }, text: "Norwegian" }, + { sourceCodeRange: { endOffset: 73, startOffset: 72 }, text: " " }, + { sourceCodeRange: { endOffset: 79, startOffset: 73 }, text: "Forest" }, + { sourceCodeRange: { endOffset: 80, startOffset: 79 }, text: " " }, + { sourceCodeRange: { endOffset: 83, startOffset: 80 }, text: "cat" }, + { sourceCodeRange: { endOffset: 84, startOffset: 83 }, text: " " }, + { sourceCodeRange: { endOffset: 86, startOffset: 84 }, text: "is" }, + { sourceCodeRange: { endOffset: 87, startOffset: 86 }, text: " " }, + { sourceCodeRange: { endOffset: 94, startOffset: 87 }, text: "adapted" }, + { sourceCodeRange: { endOffset: 95, startOffset: 94 }, text: " " }, + { sourceCodeRange: { endOffset: 97, startOffset: 95 }, text: "to" }, + { sourceCodeRange: { endOffset: 98, startOffset: 97 }, text: " " }, + { sourceCodeRange: { endOffset: 105, startOffset: 98 }, text: "survive" }, + { sourceCodeRange: { endOffset: 106, startOffset: 105 }, text: " " }, + { sourceCodeRange: { endOffset: 114, startOffset: 106 }, text: "Norway's" }, + { sourceCodeRange: { endOffset: 115, startOffset: 114 }, text: " " }, + { sourceCodeRange: { endOffset: 119, startOffset: 115 }, text: "cold" }, + { sourceCodeRange: { endOffset: 120, startOffset: 119 }, text: " " }, + { sourceCodeRange: { endOffset: 127, startOffset: 120 }, text: "weather" }, + { sourceCodeRange: { endOffset: 128, startOffset: 127 }, text: "." }, + ], + }, + ], sourceCodeLocation: { - endOffset: 55, - startOffset: 34, + endOffset: 132, + endTag: { endOffset: 132, startOffset: 128 }, + startOffset: 56, + startTag: { endOffset: 59, startOffset: 56 }, }, }, - { - name: "#text", - value: "\n", - }, + { name: "#text", value: "\n", sourceCodeRange: { startOffset: 132, endOffset: 133 } }, + ], + } ); + } ); + + it( "parses an HTML text with a Yoast siblings block", () => { + const html = "

    Hello, world!

    "; + const paper = new Paper( html ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + + expect( build( paper, languageProcessor ) ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ { attributes: {}, childNodes: [ { name: "#text", - value: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", + value: "Hello, world!", + sourceCodeRange: { startOffset: 3, endOffset: 16 }, }, ], isImplicit: false, @@ -1409,238 +1695,22 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" sentences: [ { sourceCodeRange: { - endOffset: 128, - startOffset: 59, + endOffset: 16, + startOffset: 3, }, - text: "The Norwegian Forest cat is adapted to survive Norway's cold weather.", + text: "Hello, world!", tokens: [ { sourceCodeRange: { - endOffset: 62, - startOffset: 59, + endOffset: 8, + startOffset: 3, }, - text: "The", + text: "Hello", }, { sourceCodeRange: { - endOffset: 63, - startOffset: 62, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 72, - startOffset: 63, - }, - text: "Norwegian", - }, - { - sourceCodeRange: { - endOffset: 73, - startOffset: 72, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 79, - startOffset: 73, - }, - text: "Forest", - }, - { - sourceCodeRange: { - endOffset: 80, - startOffset: 79, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 83, - startOffset: 80, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 84, - startOffset: 83, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 86, - startOffset: 84, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 87, - startOffset: 86, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 94, - startOffset: 87, - }, - text: "adapted", - }, - { - sourceCodeRange: { - endOffset: 95, - startOffset: 94, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 97, - startOffset: 95, - }, - text: "to", - }, - { - sourceCodeRange: { - endOffset: 98, - startOffset: 97, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 105, - startOffset: 98, - }, - text: "survive", - }, - { - sourceCodeRange: { - endOffset: 106, - startOffset: 105, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 114, - startOffset: 106, - }, - text: "Norway's", - }, - { - sourceCodeRange: { - endOffset: 115, - startOffset: 114, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 119, - startOffset: 115, - }, - text: "cold", - }, - { - sourceCodeRange: { - endOffset: 120, - startOffset: 119, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 127, - startOffset: 120, - }, - text: "weather", - }, - { - sourceCodeRange: { - endOffset: 128, - startOffset: 127, - }, - text: ".", - }, - ], - }, - ], - sourceCodeLocation: { - endOffset: 132, - endTag: { - endOffset: 132, - startOffset: 128, - }, - startOffset: 56, - startTag: { - endOffset: 59, - startOffset: 56, - }, - }, - }, - { - name: "#text", - value: "\n", - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 155, - startOffset: 133, - }, - }, - ], - } ); - } ); - - it( "parses an HTML text with a Yoast siblings block", () => { - const html = "

    Hello, world!

    "; - - const researcher = Factory.buildMockResearcher( {}, true, false, false, - { memoizedTokenizer: memoizedSentenceTokenizer } ); - const languageProcessor = new LanguageProcessor( researcher ); - - expect( build( html, languageProcessor ) ).toEqual( { - name: "#document-fragment", - attributes: {}, - childNodes: [ - { - attributes: {}, - childNodes: [ - { - name: "#text", - value: "Hello, world!", - }, - ], - isImplicit: false, - name: "p", - sentences: [ - { - sourceCodeRange: { - endOffset: 16, - startOffset: 3, - }, - text: "Hello, world!", - tokens: [ - { - sourceCodeRange: { - endOffset: 8, - startOffset: 3, - }, - text: "Hello", - }, - { - sourceCodeRange: { - endOffset: 9, - startOffset: 8, + endOffset: 9, + startOffset: 8, }, text: ",", }, @@ -1681,27 +1751,19 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, }, }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 51, - startOffset: 20, - }, - }, ], } ); } ); it( "parses an HTML text with a Yoast subpages block", () => { const html = "
    The Norwegian Forest cat is strongly built and larger than an average cat.
    "; + const paper = new Paper( html ); const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - expect( build( html, languageProcessor ) ).toEqual( { + expect( build( paper, languageProcessor ) ).toEqual( { name: "#document-fragment", attributes: {}, childNodes: [ @@ -1714,6 +1776,7 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" { name: "#text", value: "The Norwegian Forest cat is strongly built and larger than an average cat.", + sourceCodeRange: { startOffset: 5, endOffset: 79 }, }, ], isImplicit: true, @@ -1726,221 +1789,419 @@ describe( "parsing html with Yoast blocks that enter the Paper as html comments" }, text: "The Norwegian Forest cat is strongly built and larger than an average cat.", tokens: [ - { - sourceCodeRange: { - endOffset: 8, - startOffset: 5, - }, - text: "The", - }, - { - sourceCodeRange: { - endOffset: 9, - startOffset: 8, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 18, - startOffset: 9, - }, - text: "Norwegian", - }, - { - sourceCodeRange: { - endOffset: 19, - startOffset: 18, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 25, - startOffset: 19, - }, - text: "Forest", - }, - { - sourceCodeRange: { - endOffset: 26, - startOffset: 25, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 29, - startOffset: 26, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 30, - startOffset: 29, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 32, - startOffset: 30, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 33, - startOffset: 32, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 41, - startOffset: 33, - }, - text: "strongly", - }, - { - sourceCodeRange: { - endOffset: 42, - startOffset: 41, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 47, - startOffset: 42, - }, - text: "built", - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 47, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 51, - startOffset: 48, - }, - text: "and", - }, - { - sourceCodeRange: { - endOffset: 52, - startOffset: 51, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 58, - startOffset: 52, - }, - text: "larger", - }, - { - sourceCodeRange: { - endOffset: 59, - startOffset: 58, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 63, - startOffset: 59, - }, - text: "than", - }, - { - sourceCodeRange: { - endOffset: 64, - startOffset: 63, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 66, - startOffset: 64, - }, - text: "an", - }, - { - sourceCodeRange: { - endOffset: 67, - startOffset: 66, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 74, - startOffset: 67, - }, - text: "average", - }, - { - sourceCodeRange: { - endOffset: 75, - startOffset: 74, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 78, - startOffset: 75, - }, - text: "cat", - }, - { - sourceCodeRange: { - endOffset: 79, - startOffset: 78, - }, - text: ".", - }, + { sourceCodeRange: { endOffset: 8, startOffset: 5 }, text: "The" }, + { sourceCodeRange: { endOffset: 9, startOffset: 8 }, text: " " }, + { sourceCodeRange: { endOffset: 18, startOffset: 9 }, text: "Norwegian" }, + { sourceCodeRange: { endOffset: 19, startOffset: 18 }, text: " " }, + { sourceCodeRange: { endOffset: 25, startOffset: 19 }, text: "Forest" }, + { sourceCodeRange: { endOffset: 26, startOffset: 25 }, text: " " }, + { sourceCodeRange: { endOffset: 29, startOffset: 26 }, text: "cat" }, + { sourceCodeRange: { endOffset: 30, startOffset: 29 }, text: " " }, + { sourceCodeRange: { endOffset: 32, startOffset: 30 }, text: "is" }, + { sourceCodeRange: { endOffset: 33, startOffset: 32 }, text: " " }, + { sourceCodeRange: { endOffset: 41, startOffset: 33 }, text: "strongly" }, + { sourceCodeRange: { endOffset: 42, startOffset: 41 }, text: " " }, + { sourceCodeRange: { endOffset: 47, startOffset: 42 }, text: "built" }, + { sourceCodeRange: { endOffset: 48, startOffset: 47 }, text: " " }, + { sourceCodeRange: { endOffset: 51, startOffset: 48 }, text: "and" }, + { sourceCodeRange: { endOffset: 52, startOffset: 51 }, text: " " }, + { sourceCodeRange: { endOffset: 58, startOffset: 52 }, text: "larger" }, + { sourceCodeRange: { endOffset: 59, startOffset: 58 }, text: " " }, + { sourceCodeRange: { endOffset: 63, startOffset: 59 }, text: "than" }, + { sourceCodeRange: { endOffset: 64, startOffset: 63 }, text: " " }, + { sourceCodeRange: { endOffset: 66, startOffset: 64 }, text: "an" }, + { sourceCodeRange: { endOffset: 67, startOffset: 66 }, text: " " }, + { sourceCodeRange: { endOffset: 74, startOffset: 67 }, text: "average" }, + { sourceCodeRange: { endOffset: 75, startOffset: 74 }, text: " " }, + { sourceCodeRange: { endOffset: 78, startOffset: 75 }, text: "cat" }, + { sourceCodeRange: { endOffset: 79, startOffset: 78 }, text: "." }, ], }, ], - sourceCodeLocation: { - endOffset: 79, - startOffset: 5, - }, + sourceCodeLocation: { endOffset: 79, startOffset: 5 }, }, ], name: "div", sourceCodeLocation: { endOffset: 85, - endTag: { - endOffset: 85, - startOffset: 79, - }, + endTag: { endOffset: 85, startOffset: 79 }, startOffset: 0, - startTag: { - endOffset: 5, - startOffset: 0, - }, + startTag: { endOffset: 5, startOffset: 0 }, }, }, + ], + } ); + } ); + + it( "parses an HTML text with a Yoast FAQ block: " + + "The block client id, attribute id, and the information whether a child node is " + + "the first section in the sub-block should be added to the tree", () => { + const html = "\n" + + "
    " + + "What is giant panda

    " + + "Giant panda is is relative to red panda

    " + + "Test

    Test

    " + + "
    " + + "giant panda is silly

    \n" + + ""; + const paper = new Paper( html, { + wpBlocks: [ { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 116, - startOffset: 85, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + name: "yoast/faq-block", + isValid: true, + originalContent: "
    " + + "What is giant panda

    Giant panda" + + " is relative to red panda

    " + + "Test

    Tets

    giant panda is silly " + + "

    ", + validationIssues: [], + attributes: { + questions: [ + { + id: "faq-question-1689322642789", + question: [ "What is giant panda" ], + answer: [ "Giant ", { type: "strong", props: { children: [ "panda" ] } }, " is relative to red panda" ], + jsonQuestion: "What is giant panda", + jsonAnswer: "Giant panda is is relative to red panda", + }, + { + id: "faq-question-1689322667728", + question: [ "Test" ], + answer: [ "Tests" ], + jsonQuestion: "Test", + jsonAnswer: "Tests", + }, + { + id: "faq-question-1689936392675", + question: [ "giant panda is silly" ], + answer: [], + jsonQuestion: "giant panda is silly", + jsonAnswer: "", + }, + ], }, + innerBlocks: [], + startOffset: 2510, + contentOffset: 2963, }, ], } ); + + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); + expect( build( paper, languageProcessor ) ).toEqual( + { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { name: "#text", value: "\n", sourceCodeRange: { startOffset: 458, endOffset: 459 } }, + { + name: "div", + attributes: { + "class": new Set( [ "schema-faq", "wp-block-yoast-faq-block" ] ), + }, + childNodes: [ + { + name: "div", + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322642789" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, + childNodes: [ + { name: "#text", value: "What is giant panda", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 608, endOffset: 627 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 572, endOffset: 608 }, + endTag: { startOffset: 627, endOffset: 636 }, + startOffset: 572, + endOffset: 636, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 572, endOffset: 636 }, + isImplicit: true, + attributeId: "faq-question-1689322642789", + isFirstSection: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "What is giant panda", + tokens: [ + { text: "What", sourceCodeRange: { startOffset: 608, endOffset: 612 } }, + { text: " ", sourceCodeRange: { startOffset: 612, endOffset: 613 } }, + { text: "is", sourceCodeRange: { startOffset: 613, endOffset: 615 } }, + { text: " ", sourceCodeRange: { startOffset: 615, endOffset: 616 } }, + { text: "giant", sourceCodeRange: { startOffset: 616, endOffset: 621 } }, + { text: " ", sourceCodeRange: { startOffset: 621, endOffset: 622 } }, + { text: "panda", sourceCodeRange: { startOffset: 622, endOffset: 627 } }, + ], + sourceCodeRange: { startOffset: 608, endOffset: 627 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 636, endOffset: 637 } }, + { + name: "p", + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, + childNodes: [ + { name: "#text", value: "Giant ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 666, endOffset: 672 } }, + { + name: "strong", + attributes: {}, + childNodes: [ + { name: "#text", value: "panda", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 680, endOffset: 685 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 672, endOffset: 680 }, + endTag: { startOffset: 685, endOffset: 694 }, + startOffset: 672, + endOffset: 694, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " is is relative to red panda", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 694, endOffset: 722 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 637, endOffset: 666 }, + endTag: { startOffset: 722, endOffset: 726 }, + startOffset: 637, + endOffset: 726, + }, + isImplicit: false, + attributeId: "faq-question-1689322642789", + isFirstSection: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Giant panda is is relative to red panda", + tokens: [ + { text: "Giant", sourceCodeRange: { startOffset: 666, endOffset: 671 } }, + { text: " ", sourceCodeRange: { startOffset: 671, endOffset: 672 } }, + { text: "panda", sourceCodeRange: { startOffset: 680, endOffset: 685 } }, + { text: " ", sourceCodeRange: { startOffset: 694, endOffset: 695 } }, + { text: "is", sourceCodeRange: { startOffset: 695, endOffset: 697 } }, + { text: " ", sourceCodeRange: { startOffset: 697, endOffset: 698 } }, + { text: "is", sourceCodeRange: { startOffset: 698, endOffset: 700 } }, + { text: " ", sourceCodeRange: { startOffset: 700, endOffset: 701 } }, + { text: "relative", sourceCodeRange: { startOffset: 701, endOffset: 709 } }, + { text: " ", sourceCodeRange: { startOffset: 709, endOffset: 710 } }, + { text: "to", sourceCodeRange: { startOffset: 710, endOffset: 712 } }, + { text: " ", sourceCodeRange: { startOffset: 712, endOffset: 713 } }, + { text: "red", sourceCodeRange: { startOffset: 713, endOffset: 716 } }, + { text: " ", sourceCodeRange: { startOffset: 716, endOffset: 717 } }, + { text: "panda", sourceCodeRange: { startOffset: 717, endOffset: 722 } }, + ], + sourceCodeRange: { startOffset: 666, endOffset: 722 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 726, endOffset: 727 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 508, endOffset: 572 }, + endTag: { startOffset: 727, endOffset: 733 }, + startOffset: 508, + endOffset: 733, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 733, endOffset: 734 } }, + { + name: "div", + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689322667728" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, + childNodes: [ + { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 834, endOffset: 838 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 798, endOffset: 834 }, + endTag: { startOffset: 838, endOffset: 847 }, + startOffset: 798, + endOffset: 847, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 798, endOffset: 847 }, + isImplicit: true, + attributeId: "faq-question-1689322667728", + isFirstSection: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Test", + tokens: [ + { text: "Test", sourceCodeRange: { startOffset: 834, endOffset: 838 } }, + ], + sourceCodeRange: { startOffset: 834, endOffset: 838 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 847, endOffset: 848 } }, + { + name: "p", + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, + childNodes: [ + { name: "#text", value: "Test", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 877, endOffset: 881 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 848, endOffset: 877 }, + endTag: { startOffset: 881, endOffset: 885 }, + startOffset: 848, + endOffset: 885, + }, + isImplicit: false, + attributeId: "faq-question-1689322667728", + isFirstSection: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "Test", + tokens: [ + { text: "Test", sourceCodeRange: { startOffset: 877, endOffset: 881 } }, + ], + sourceCodeRange: { startOffset: 877, endOffset: 881 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 885, endOffset: 886 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 734, endOffset: 798 }, + endTag: { startOffset: 886, endOffset: 892 }, + startOffset: 734, + endOffset: 892, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 892, endOffset: 893 } }, + { + name: "div", + attributes: { "class": new Set( [ "schema-faq-section" ] ), id: "faq-question-1689936392675" }, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "strong", + attributes: { "class": new Set( [ "schema-faq-question" ] ) }, + childNodes: [ + { name: "#text", value: "giant panda is silly", + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 993, endOffset: 1013 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 957, endOffset: 993 }, + endTag: { startOffset: 1013, endOffset: 1022 }, + startOffset: 957, + endOffset: 1022, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + ], + sourceCodeLocation: { startOffset: 957, endOffset: 1022 }, + isImplicit: true, + attributeId: "faq-question-1689936392675", + isFirstSection: true, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [ + { + text: "giant panda is silly", + tokens: [ + { text: "giant", sourceCodeRange: { startOffset: 993, endOffset: 998 } }, + { text: " ", sourceCodeRange: { startOffset: 998, endOffset: 999 } }, + { text: "panda", sourceCodeRange: { startOffset: 999, endOffset: 1004 } }, + { text: " ", sourceCodeRange: { startOffset: 1004, endOffset: 1005 } }, + { text: "is", sourceCodeRange: { startOffset: 1005, endOffset: 1007 } }, + { text: " ", sourceCodeRange: { startOffset: 1007, endOffset: 1008 } }, + { text: "silly", sourceCodeRange: { startOffset: 1008, endOffset: 1013 } }, + ], + sourceCodeRange: { startOffset: 993, endOffset: 1013 }, + }, + ], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 1022, endOffset: 1023 } }, + { + name: "p", + attributes: { "class": new Set( [ "schema-faq-answer" ] ) }, + childNodes: [], + sourceCodeLocation: { + startTag: { startOffset: 1023, endOffset: 1052 }, + endTag: { startOffset: 1052, endOffset: 1056 }, + startOffset: 1023, + endOffset: 1056, + }, + isImplicit: false, + attributeId: "faq-question-1689936392675", + isFirstSection: false, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sentences: [], + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 1056, endOffset: 1057 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 893, endOffset: 957 }, + endTag: { startOffset: 1057, endOffset: 1063 }, + startOffset: 893, + endOffset: 1063, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: " ", clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + sourceCodeRange: { startOffset: 1063, endOffset: 1064 } }, + ], + sourceCodeLocation: { + startTag: { startOffset: 459, endOffset: 508 }, + endTag: { startOffset: 1064, endOffset: 1070 }, + startOffset: 459, + endOffset: 1070, + }, + clientId: "a062b3dd-26d5-4d33-b59f-9746d13d1ee1", + }, + { name: "#text", value: "\n", sourceCodeRange: { startOffset: 1070, endOffset: 1071 } }, + ], + } + ); } ); } ); diff --git a/packages/yoastseo/spec/parse/build/private/adaptSpec.js b/packages/yoastseo/spec/parse/build/private/adaptSpec.js index 05395403764..16924f775c5 100644 --- a/packages/yoastseo/spec/parse/build/private/adaptSpec.js +++ b/packages/yoastseo/spec/parse/build/private/adaptSpec.js @@ -8,7 +8,7 @@ describe( "The adapt function", () => { it( "adapts a basic div element", () => { const html = "

    Hello, world!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -17,17 +17,45 @@ describe( "The adapt function", childNodes: [ { attributes: {}, childNodes: [ { + name: "p", + isImplicit: false, + sourceCodeLocation: { + startOffset: 5, + endOffset: 39, + startTag: { + startOffset: 5, + endOffset: 22, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, attributes: { "class": new Set( [ "yoast" ] ), }, childNodes: [ { name: "#text", value: "Hello, world!", + sourceCodeRange: { + startOffset: 22, + endOffset: 35, + }, } ], - isImplicit: false, - name: "p", } ], name: "div", + sourceCodeLocation: { + startOffset: 0, + endOffset: 45, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 39, + endOffset: 45, + }, + }, } ], name: "#document-fragment", }; @@ -37,21 +65,20 @@ describe( "The adapt function", it( "adapts a heading element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); - + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); const expected = new Node( "#document-fragment", {}, [ new Heading( 1, {}, [ - new Text( "Hello World!" ), - ] ), + new Text( { value: "Hello World!", sourceCodeLocation: { startOffset: 4, endOffset: 16 } } ), + ], { startOffset: 0, endOffset: 21, startTag: { startOffset: 0, endOffset: 4 }, endTag: { startOffset: 16, endOffset: 21 } } ), ] ); expect( adaptedTree ).toEqual( expected ); } ); it( "adapts a tree with a code element inside a paragraph", () => { const html = "

    Hello World! function() Hello Yoast!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -63,24 +90,60 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 56, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 52, + endOffset: 56, + }, + }, childNodes: [ { name: "#text", value: "Hello World! ", + sourceCodeRange: { + startOffset: 3, + endOffset: 16, + }, }, { name: "code", attributes: {}, + sourceCodeLocation: { + startOffset: 16, + endOffset: 39, + startTag: { + startOffset: 16, + endOffset: 22, + }, + endTag: { + startOffset: 32, + endOffset: 39, + }, + }, childNodes: [ { name: "#text", value: "function()", + sourceCodeRange: { + startOffset: 22, + endOffset: 32, + }, }, ], }, { name: "#text", value: " Hello Yoast!", + sourceCodeRange: { + startOffset: 39, + endOffset: 52, + }, }, ], }, @@ -91,7 +154,7 @@ describe( "The adapt function", it( "adapts a tree with a code element within a sentence", () => { const html = "

    Hello push() World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -103,24 +166,60 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 39, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 35, + endOffset: 39, + }, + }, childNodes: [ { name: "#text", value: "Hello ", + sourceCodeRange: { + startOffset: 3, + endOffset: 9, + }, }, { name: "code", attributes: {}, + sourceCodeLocation: { + startOffset: 9, + endOffset: 28, + startTag: { + startOffset: 9, + endOffset: 15, + }, + endTag: { + startOffset: 21, + endOffset: 28, + }, + }, childNodes: [ { name: "#text", value: "push()", + sourceCodeRange: { + startOffset: 15, + endOffset: 21, + }, }, ], }, { name: "#text", value: " World!", + sourceCodeRange: { + startOffset: 28, + endOffset: 35, + }, }, ], }, @@ -131,7 +230,7 @@ describe( "The adapt function", it( "adapts a tree with a script element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -142,19 +241,51 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 69, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 63, + endOffset: 69, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 44, + }, childNodes: [ { name: "script", attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 44, + startTag: { + startOffset: 5, + endOffset: 13, + }, + endTag: { + startOffset: 35, + endOffset: 44, + }, + }, childNodes: [ { name: "#text", value: "alert(\"Hello World!\");", + sourceCodeRange: { + startOffset: 13, + endOffset: 35, + }, }, ], }, @@ -164,10 +295,26 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 44, + endOffset: 63, + startTag: { + startOffset: 44, + endOffset: 47, + }, + endTag: { + startOffset: 59, + endOffset: 63, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 47, + endOffset: 59, + }, }, ], }, @@ -180,7 +327,7 @@ describe( "The adapt function", it( "adapts a tree with a script element within a paragraph", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -191,25 +338,69 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 70, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 64, + endOffset: 70, + }, + }, childNodes: [ { name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 64, + startTag: { + startOffset: 5, + endOffset: 8, + }, + endTag: { + startOffset: 60, + endOffset: 64, + }, + }, childNodes: [ { name: "script", attributes: {}, + sourceCodeLocation: { + startOffset: 8, + endOffset: 47, + startTag: { + startOffset: 8, + endOffset: 16, + }, + endTag: { + startOffset: 38, + endOffset: 47, + }, + }, childNodes: [ { name: "#text", value: "alert(\"Hello World!\");", + sourceCodeRange: { + startOffset: 16, + endOffset: 38, + }, }, ], }, { name: "#text", value: " Hello World!", + sourceCodeRange: { + startOffset: 47, + endOffset: 60, + }, }, ], }, @@ -222,7 +413,7 @@ describe( "The adapt function", it( "adapts a tree with a style element", () => { const html = "

    Hello World!

    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -233,15 +424,35 @@ describe( "The adapt function", { name: "style", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 36, + startTag: { + startOffset: 0, + endOffset: 7, + }, + endTag: { + startOffset: 28, + endOffset: 36, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 7, + endOffset: 28, + }, childNodes: [ { name: "#text", value: "div { color: #FF00FF}", + sourceCodeRange: { + startOffset: 7, + endOffset: 28, + }, }, ], }, @@ -251,10 +462,26 @@ describe( "The adapt function", name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 36, + endOffset: 55, + startTag: { + startOffset: 36, + endOffset: 39, + }, + endTag: { + startOffset: 51, + endOffset: 55, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 39, + endOffset: 51, + }, }, ], }, @@ -266,7 +493,7 @@ describe( "The adapt function", it( "adapts a tree with a blockquote element", () => { const html = "

    Hello World!

    Hello Yoast!
    "; - const tree = parseFragment( html ); + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); const adaptedTree = adapt( tree ); @@ -277,30 +504,78 @@ describe( "The adapt function", { name: "div", attributes: {}, + sourceCodeLocation: { + startOffset: 0, + endOffset: 67, + startTag: { + startOffset: 0, + endOffset: 5, + }, + endTag: { + startOffset: 61, + endOffset: 67, + }, + }, childNodes: [ { name: "p", isImplicit: false, attributes: {}, + sourceCodeLocation: { + startOffset: 5, + endOffset: 24, + startTag: { + startOffset: 5, + endOffset: 8, + }, + endTag: { + startOffset: 20, + endOffset: 24, + }, + }, childNodes: [ { name: "#text", value: "Hello World!", + sourceCodeRange: { + startOffset: 8, + endOffset: 20, + }, }, ], }, { name: "blockquote", attributes: {}, + sourceCodeLocation: { + startOffset: 24, + endOffset: 61, + startTag: { + startOffset: 24, + endOffset: 36, + }, + endTag: { + startOffset: 48, + endOffset: 61, + }, + }, childNodes: [ { name: "p", isImplicit: true, attributes: {}, + sourceCodeLocation: { + startOffset: 36, + endOffset: 48, + }, childNodes: [ { name: "#text", value: "Hello Yoast!", + sourceCodeRange: { + startOffset: 36, + endOffset: 48, + }, }, ], }, @@ -313,8 +588,23 @@ describe( "The adapt function", expect( adaptedTree ).toEqual( expected ); } ); + it( "should correctly adapt a fragment with a (overarching) paragraph element that has double line breaks as child nodes", () => { + const html = "

    test

    test
    test

    "; + const tree = parseFragment( html, { sourceCodeLocationInfo: true } ); + + const adaptedTree = adapt( tree ); + + const overarchingParagraph = adaptedTree.childNodes[ 0 ]; + expect( overarchingParagraph.name ).toEqual( "p-overarching" ); + expect( overarchingParagraph.childNodes[ 0 ].name ).toEqual( "p" ); + expect( overarchingParagraph.childNodes[ 0 ].isImplicit ).toBeTruthy(); + expect( overarchingParagraph.childNodes[ 1 ].name ).toEqual( "br" ); + expect( overarchingParagraph.childNodes[ 2 ].name ).toEqual( "br" ); + expect( overarchingParagraph.childNodes[ 3 ].name ).toEqual( "p" ); + expect( overarchingParagraph.childNodes[ 3 ].isImplicit ).toBeTruthy(); + } ); - it( "should correctly adapt a node with no childnodes.", () => { + it( "should correctly adapt a node with no childNodes.", () => { const tree = { nodeName: "div" }; const adaptedTree = adapt( tree ); diff --git a/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js b/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js index ae4d35c4a35..a09a1fe9eea 100644 --- a/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js +++ b/packages/yoastseo/spec/parse/build/private/combineIntoImplicitParagraphsSpec.js @@ -2,6 +2,7 @@ import combineIntoImplicitParagraphs from "../../../../src/parse/build/private/c describe( "The combineIntoImplicitParagraphs function", () => { it( "combines phrasing content into paragraphs", () => { + //

    #text
    const nodes = [ { name: "p" }, { name: "span" }, @@ -34,6 +35,7 @@ describe( "The combineIntoImplicitParagraphs function", () => { } ); it( "correctly handles an InterElementWhitespace", () => { + //

    #text
    const nodes = [ { name: "p" }, { name: "span" }, @@ -75,6 +77,7 @@ describe( "The combineIntoImplicitParagraphs function", () => { } ); it( "correctly handles an element with children", () => { + //

    #text

    #text
    const nodes = [ { name: "p" }, { name: "span" }, @@ -105,4 +108,37 @@ describe( "The combineIntoImplicitParagraphs function", () => { expect( implicitParagraphs ).toEqual( expected ); } ); + + it( "correctly handles double and single br tags: the former should lead ending implicit paragraphs", () => { + // #text

    #text
    #text + const nodes = [ + { name: "#text" }, + { name: "br" }, + { name: "br" }, + { name: "#text" }, + { name: "br" }, + { name: "#text" }, + ]; + + const implicitParagraphs = combineIntoImplicitParagraphs( nodes ); + + const expected = [ + { + name: "p", + attributes: {}, + childNodes: [ { name: "#text" } ], + isImplicit: true, + }, + { name: "br" }, + { name: "br" }, + { + name: "p", + attributes: {}, + childNodes: [ { name: "#text" }, { name: "br" }, { name: "#text" } ], + isImplicit: true, + }, + ]; + + expect( implicitParagraphs ).toEqual( expected ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js index eb57224f5af..7f39f532c54 100644 --- a/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterBeforeTokenizingSpec.js @@ -21,6 +21,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", @@ -41,16 +45,16 @@ describe( "A test for filterBeforeTokenizing", () => { }, ], sourceCodeLocation: { - endOffset: 58, - endTag: { - endOffset: 58, - startOffset: 54, - }, startOffset: 0, + endOffset: 58, startTag: { endOffset: 3, startOffset: 0, }, + endTag: { + startOffset: 54, + endOffset: 58, + }, }, }, ], @@ -107,6 +111,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Hello, world!", + sourceCodeRange: { + startOffset: 53, + endOffset: 66, + }, }, ], sourceCodeLocation: { @@ -160,14 +168,18 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", attributes: {}, childNodes: [], sourceCodeLocation: { - endOffset: 71, startOffset: 22, + endOffset: 71, startTag: { startOffset: 22, endOffset: 28, @@ -180,15 +192,15 @@ describe( "A test for filterBeforeTokenizing", () => { }, ], sourceCodeLocation: { - endOffset: 75, - endTag: { - endOffset: 75, - startOffset: 71, - }, startOffset: 0, + endOffset: 75, startTag: { - endOffset: 3, startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 71, + endOffset: 75, }, }, }, @@ -215,6 +227,10 @@ describe( "A test for filterBeforeTokenizing", () => { { name: "#text", value: "Some text and code ", + sourceCodeRange: { + startOffset: 3, + endOffset: 22, + }, }, { name: "code", diff --git a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js index 3eb34bd1b3e..f54d00d605f 100644 --- a/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/filterTreeSpec.js @@ -167,6 +167,20 @@ const samplesWithOneOccurrence = [ element: "title", html: "\n \nNews and gossip\n

    A paragraph

    ", }, + { + element: "cite", + html: "\n\n

    There are many craft ideas that are interesting to both children and adults." + + " Fun for the whole family!

    ", + }, + { + element: "output", + html: "\n\n

    Armadillos x are cute animals.

    ", + }, + { + element: "samp", + html: "\n\n

    The automated chat saidThe answer is incorrectbut I wasn't " + + "sure why.

    ", + }, ]; describe.each( samplesWithOneOccurrence )( "Tests HTML elements, part 1 ", ( htmlElement ) => { @@ -198,6 +212,11 @@ const samplesWithTwoOccurrences = [ html: "\n\n

    If a triangle has one 90 degrees angle and one 30" + "degrees angle, how big is the remaining angle?

    Fun for the whole family

    ", }, + { + element: "form", + html: "\n\n", + }, ]; describe.each( samplesWithTwoOccurrences )( "Tests HTML elements, part 2", ( htmlElement ) => { @@ -219,6 +238,15 @@ describe( "Miscellaneous tests", () => { expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); } ); + it( "shouldn't filter out elements like , which are included in the list of excluded elements", () => { + const html = "

    Welcome to the blue screen of death.

    "; + + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + expect( tree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); + const filteredTree = filterTree( tree, permanentFilters ); + expect( filteredTree.findAll( child => child.name === "strong" ) ).toHaveLength( 1 ); + } ); + it( "should filter out elements like when nested within excluded elements, e.g. ", () => { const html = "

    Welcome to the blue screen of death.
    Kiss your computer goodbye.

    "; @@ -244,6 +272,12 @@ describe( "Miscellaneous tests", () => { expect( tree.findAll( child => child.name === "head" ) ).toHaveLength( 0 ); } ); + it( "should filter out map elements", () => { + // The head element seems to be removed by the parser we employ. + const html = "\nAbout artificial intelligence<map name=>AI</map>\n\n"; + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + expect( tree.findAll( child => child.name === "abbr" ) ).toHaveLength( 0 ); + } ); it( "should filter out the Elementor Yoast Breadcrumbs widget ", () => { // When the HTML enters the paper, the Breadcrumbs widget doesn't include the div tag. let html = "

    Home

    Estimated reading time: " + "3 minutes

    \n" + "" + - "

    A HeadingA Heading

    "; const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); const filteredTree = filterTree( tree, permanentFilters ); @@ -427,31 +422,15 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo name: "#document-fragment", attributes: {}, childNodes: [ - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 71, - startOffset: 0, - }, - }, { name: "#text", value: "\n", + sourceCodeRange: { startOffset: 71, endOffset: 72 }, }, { name: "#text", value: "\n", - }, - { - attributes: {}, - childNodes: [], - name: "#comment", - sourceCodeLocation: { - endOffset: 404, - startOffset: 359, - }, + sourceCodeRange: { startOffset: 358, endOffset: 359 }, }, { attributes: {}, @@ -459,16 +438,21 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo { name: "#text", value: "A Heading", + sourceCodeRange: { startOffset: 408, endOffset: 417 }, }, ], level: 2, name: "h2", sourceCodeLocation: { - endOffset: 421, startOffset: 404, + endOffset: 422, startTag: { - endOffset: 408, startOffset: 404, + endOffset: 408, + }, + endTag: { + startOffset: 417, + endOffset: 422, }, }, }, @@ -499,6 +483,7 @@ describe( "Tests filtered trees of a few Yoast blocks and of a made-up Yoast blo { name: "#text", value: "Hello world!", + sourceCodeRange: { startOffset: 33, endOffset: 45 }, }, ], sourceCodeLocation: { diff --git a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js index 588bb90443e..c33399e13d1 100644 --- a/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js +++ b/packages/yoastseo/spec/parse/build/private/getTextElementPositionsSpec.js @@ -141,6 +141,21 @@ describe( "A test for getting positions of sentences", () => { expect( getTextElementPositions( node, sentences ) ).toEqual( sentencesWithPositions ); } ); + it( "should determine the correct token positions when the sentence contains a br tag", function() { + // HTML:

    Hello
    world!

    . + const html = "

    Hello
    world!

    "; + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + const paragraph = tree.childNodes[ 0 ]; + const tokens = [ "Hello", "\n", "world", "!" ].map( string => new Token( string ) ); + + const [ hello, br, world, bang ] = getTextElementPositions( paragraph, tokens, 3 ); + + expect( hello.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 8 } ); + expect( br.sourceCodeRange ).toEqual( { startOffset: 13, endOffset: 14 } ); + expect( world.sourceCodeRange ).toEqual( { startOffset: 14, endOffset: 19 } ); + expect( bang.sourceCodeRange ).toEqual( { startOffset: 19, endOffset: 20 } ); + } ); + it( "gets the sentence positions from a node that has a code child node", function() { // HTML:

    Hello array.push( something ) code!

    const node = new Paragraph( {}, [ @@ -506,4 +521,78 @@ describe( "A test for getting positions of sentences", () => { expect( world.sourceCodeRange ).toEqual( { startOffset: 16, endOffset: 21 } ); expect( bang.sourceCodeRange ).toEqual( { startOffset: 21, endOffset: 22 } ); } ); + + it( "should correctly add positions to an implicit paragraph", function() { + const html = "Hello world!"; + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + const paragraph = tree.childNodes[ 0 ]; + + const tokens = [ "Hello", " ", "world", "!" ].map( string => new Token( string ) ); + + const [ hello, space, world, bang ] = getTextElementPositions( paragraph, tokens ); + + expect( hello.sourceCodeRange ).toEqual( { startOffset: 0, endOffset: 5 } ); + expect( space.sourceCodeRange ).toEqual( { startOffset: 5, endOffset: 6 } ); + expect( world.sourceCodeRange ).toEqual( { startOffset: 6, endOffset: 11 } ); + expect( bang.sourceCodeRange ).toEqual( { startOffset: 11, endOffset: 12 } ); + } ); + + it( "should correctly add positions to two sentences in an implicit paragraph", function() { + const html = "Hello world! It is Yoast."; + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + const paragraph = tree.childNodes[ 0 ]; + + expect( paragraph.sourceCodeLocation ).toEqual( { startOffset: 0, endOffset: 42 } ); + + const tokens = [ "Hello", " ", "world", "!" ].map( string => new Token( string ) ); + const tokens2 = [ "It", " ", "is", " ", "Yoast", "." ].map( string => new Token( string ) ); + + const [ hello, space, world, bang ] = getTextElementPositions( paragraph, tokens ); + const [ it, space2, is, space3, yoast, dot ] = getTextElementPositions( paragraph, tokens2, 13 ); + + expect( hello.sourceCodeRange ).toEqual( { startOffset: 0, endOffset: 5 } ); + expect( space.sourceCodeRange ).toEqual( { startOffset: 5, endOffset: 6 } ); + expect( world.sourceCodeRange ).toEqual( { startOffset: 6, endOffset: 11 } ); + expect( bang.sourceCodeRange ).toEqual( { startOffset: 11, endOffset: 12 } ); + + expect( it.sourceCodeRange ).toEqual( { startOffset: 13, endOffset: 15 } ); + expect( space2.sourceCodeRange ).toEqual( { startOffset: 15, endOffset: 16 } ); + expect( is.sourceCodeRange ).toEqual( { startOffset: 16, endOffset: 18 } ); + expect( space3.sourceCodeRange ).toEqual( { startOffset: 18, endOffset: 19 } ); + expect( yoast.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 32 } ); + expect( dot.sourceCodeRange ).toEqual( { startOffset: 41, endOffset: 42 } ); + } ); + + it( "correctly calculates the position of an image caption", () => { + const html = "
    [caption id=\"attachment_3341501\" align=\"alignnone\" width=\"300\"]" + + "\"alt\"" + + " An image with the keyword in the caption.[/caption]
    "; + const tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + const div = tree.childNodes[ 0 ]; + const caption = div.childNodes[ 0 ]; + + const tokens = [ " ", "An", " ", "image", " ", "with", " ", "the", " ", "keyword", " ", "in", " ", "the", " ", "caption", "." ].map( + string => new Token( string ) ); + + const [ space0, an, space1, image, space2, withToken, space3, the, + space4, keyword, space5, inToken, space6, the2, space7, captionToken, dot ] = getTextElementPositions( caption, tokens, 148 ); + + expect( space0.sourceCodeRange ).toEqual( { startOffset: 148, endOffset: 149 } ); + expect( an.sourceCodeRange ).toEqual( { startOffset: 149, endOffset: 151 } ); + expect( space1.sourceCodeRange ).toEqual( { startOffset: 151, endOffset: 152 } ); + expect( image.sourceCodeRange ).toEqual( { startOffset: 152, endOffset: 157 } ); + expect( space2.sourceCodeRange ).toEqual( { startOffset: 157, endOffset: 158 } ); + expect( withToken.sourceCodeRange ).toEqual( { startOffset: 158, endOffset: 162 } ); + expect( space3.sourceCodeRange ).toEqual( { startOffset: 162, endOffset: 163 } ); + expect( the.sourceCodeRange ).toEqual( { startOffset: 163, endOffset: 166 } ); + expect( space4.sourceCodeRange ).toEqual( { startOffset: 166, endOffset: 167 } ); + expect( keyword.sourceCodeRange ).toEqual( { startOffset: 167, endOffset: 174 } ); + expect( space5.sourceCodeRange ).toEqual( { startOffset: 174, endOffset: 175 } ); + expect( inToken.sourceCodeRange ).toEqual( { startOffset: 175, endOffset: 177 } ); + expect( space6.sourceCodeRange ).toEqual( { startOffset: 177, endOffset: 178 } ); + expect( the2.sourceCodeRange ).toEqual( { startOffset: 178, endOffset: 181 } ); + expect( space7.sourceCodeRange ).toEqual( { startOffset: 181, endOffset: 182 } ); + expect( captionToken.sourceCodeRange ).toEqual( { startOffset: 182, endOffset: 189 } ); + expect( dot.sourceCodeRange ).toEqual( { startOffset: 189, endOffset: 190 } ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js index 12651e4c272..a00d2825961 100644 --- a/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js +++ b/packages/yoastseo/spec/parse/build/private/tokenizeSpec.js @@ -1,27 +1,514 @@ import tokenize from "../../../../src/parse/build/private/tokenize"; import Paper from "../../../../src/values/Paper"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; +import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/Researcher"; import { buildTreeNoTokenize } from "../../../specHelpers/parse/buildTree"; import LanguageProcessor from "../../../../src/parse/language/LanguageProcessor"; +import SourceCodeLocation from "../../../../src/parse/structure/SourceCodeLocation"; -describe( "A test for the tokenize function", function() { - it( "should correctly tokenize a paragraph of one sentence", function() { - const mockPaper = new Paper( "

    This is a paragraph

    ", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); - const languageProcessor = new LanguageProcessor( mockResearcher ); - buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len - expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ { attributes: {}, childNodes: [ { name: "#text", value: "This is a paragraph" } ], isImplicit: false, name: "p", sentences: [ { sourceCodeRange: { endOffset: 22, startOffset: 3 }, text: "This is a paragraph", tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { sourceCodeRange: { endOffset: 8, startOffset: 7 }, text: " " }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { sourceCodeRange: { endOffset: 11, startOffset: 10 }, text: " " }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { sourceCodeRange: { endOffset: 13, startOffset: 12 }, text: " " }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ] } ], sourceCodeLocation: { endOffset: 26, endTag: { endOffset: 26, startOffset: 22 }, startOffset: 0, startTag: { endOffset: 3, startOffset: 0 } } } ], name: "#document-fragment" } +describe( "A test for the tokenize function", + function() { + it( "should correctly tokenize a paragraph of one sentence", function() { + const mockPaper = new Paper( "

    This is a paragraph

    ", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ { + attributes: {}, + childNodes: [ { + name: "#text", + value: "This is a paragraph", + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 22 } ) } ], + isImplicit: false, + name: "p", + sentences: [ { + sourceCodeRange: { endOffset: 22, startOffset: 3 }, + text: "This is a paragraph", + tokens: [ { sourceCodeRange: { endOffset: 7, startOffset: 3 }, text: "This" }, { + sourceCodeRange: { + endOffset: 8, + startOffset: 7, + }, text: " ", + }, { sourceCodeRange: { endOffset: 10, startOffset: 8 }, text: "is" }, { + sourceCodeRange: { endOffset: 11, startOffset: 10 }, + text: " ", + }, { sourceCodeRange: { endOffset: 12, startOffset: 11 }, text: "a" }, { + sourceCodeRange: { endOffset: 13, startOffset: 12 }, + text: " ", + }, { sourceCodeRange: { endOffset: 22, startOffset: 13 }, text: "paragraph" } ], + } ], + sourceCodeLocation: { + endOffset: 26, + endTag: { endOffset: 26, startOffset: 22 }, + startOffset: 0, + startTag: { endOffset: 3, startOffset: 0 }, + }, + } ], + name: "#document-fragment", + } + ); + } ); + + it( "should correctly tokenize a paragraph of two sentences", function() { + const mockPaper = new Paper( "

    This is a sentence. This is another sentence.

    ", { locale: "en_US" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is a sentence. This is another sentence.", + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 48 } ), + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + endOffset: 22, + startOffset: 3, + }, + text: "This is a sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 7, + startOffset: 3, + }, + text: "This", + }, + { + sourceCodeRange: { + endOffset: 8, + startOffset: 7, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 10, + startOffset: 8, + }, + text: "is", + }, + { + sourceCodeRange: { + endOffset: 11, + startOffset: 10, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 12, + startOffset: 11, + }, + text: "a", + }, + { + sourceCodeRange: { + endOffset: 13, + startOffset: 12, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 21, + startOffset: 13, + }, + text: "sentence", + }, + { + sourceCodeRange: { + endOffset: 22, + startOffset: 21, + }, + text: ".", + }, + ], + }, + { + sourceCodeRange: { + endOffset: 48, + startOffset: 22, + }, + text: " This is another sentence.", + tokens: [ + { + sourceCodeRange: { + endOffset: 23, + startOffset: 22, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 27, + startOffset: 23, + }, + text: "This", + }, + { + sourceCodeRange: { + endOffset: 28, + startOffset: 27, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 30, + startOffset: 28, + }, + text: "is", + }, + { + sourceCodeRange: { + endOffset: 31, + startOffset: 30, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 38, + startOffset: 31, + }, + text: "another", + }, + { + sourceCodeRange: { + endOffset: 39, + startOffset: 38, + }, + text: " ", + }, + { + sourceCodeRange: { + endOffset: 47, + startOffset: 39, + }, + text: "sentence", + }, + { + sourceCodeRange: { + endOffset: 48, + startOffset: 47, + }, + text: ".", + }, + ], + }, + ], + sourceCodeLocation: { + endOffset: 52, + endTag: { + endOffset: 52, + startOffset: 48, + }, + startOffset: 0, + startTag: { + endOffset: 3, + startOffset: 0, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); + + it( "should correctly tokenize a sentence with a number with a decimal point", function() { + const mockPaper = new Paper( "

    This is the release of YoastSEO 9.3.

    ", { keyword: "YoastSEO 9.3" } ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + const result = tokenize( mockPaper.getTree(), languageProcessor ); + expect( result ).toEqual( { + name: "#document-fragment", + attributes: {}, + childNodes: [ + { + name: "p", + attributes: {}, + childNodes: [ + { + name: "#text", + value: "This is the release of YoastSEO 9.3.", + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 39 } ), + }, + ], + sourceCodeLocation: { + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 39, + endOffset: 43, + }, + startOffset: 0, + endOffset: 43, + }, + isImplicit: false, + sentences: [ + { + text: "This is the release of YoastSEO 9.3.", + tokens: [ + { + text: "This", + sourceCodeRange: { + startOffset: 3, + endOffset: 7, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 7, + endOffset: 8, + }, + }, + { + text: "is", + sourceCodeRange: { + startOffset: 8, + endOffset: 10, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 10, + endOffset: 11, + }, + }, + { + text: "the", + sourceCodeRange: { + startOffset: 11, + endOffset: 14, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 14, + endOffset: 15, + }, + }, + { + text: "release", + sourceCodeRange: { + startOffset: 15, + endOffset: 22, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 22, + endOffset: 23, + }, + }, + { + text: "of", + sourceCodeRange: { + startOffset: 23, + endOffset: 25, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 25, + endOffset: 26, + }, + }, + { + text: "YoastSEO", + sourceCodeRange: { + startOffset: 26, + endOffset: 34, + }, + }, + { + text: " ", + sourceCodeRange: { + startOffset: 34, + endOffset: 35, + }, + }, + { + text: "9.3", + sourceCodeRange: { + startOffset: 35, + endOffset: 38, + }, + }, + { + text: ".", + sourceCodeRange: { + startOffset: 38, + endOffset: 39, + }, + }, + ], + sourceCodeRange: { + startOffset: 3, + endOffset: 39, + }, + }, + ], + }, + ], + } ); + } ); + + it( "should correctly tokenize a sentence containing an HTML entity", function() { + // This tests the scenario where an ampersand character "&" enters the Paper as "&" in Classic editor. + // "&" is first transformed to "#amp;" and then turned back to "&" during tokenization. + // The length of "&" should be 5 characters (like "&"), instead of 1. + const mockPaper = new Paper( "

    This is a paragraph&

    " ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + const result = tokenize( mockPaper.getTree(), languageProcessor ); + expect( result ).toEqual( { + attributes: {}, + childNodes: [ + { + attributes: {}, + childNodes: [ + { + name: "#text", + sourceCodeRange: { + startOffset: 3, + endOffset: 27, + }, + value: "This is a paragraph#amp;", + }, + ], + isImplicit: false, + name: "p", + sentences: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 27, + }, + text: "This is a paragraph&", + tokens: [ + { + sourceCodeRange: { + startOffset: 3, + endOffset: 7, + }, + text: "This", + }, + { + sourceCodeRange: { + startOffset: 7, + endOffset: 8, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 8, + endOffset: 10, + }, + text: "is", + }, + { + sourceCodeRange: { + startOffset: 10, + endOffset: 11, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 11, + endOffset: 12, + }, + text: "a", + }, + { + sourceCodeRange: { + startOffset: 12, + endOffset: 13, + }, + text: " ", + }, + { + sourceCodeRange: { + startOffset: 13, + endOffset: 27, + }, + text: "paragraph&", + }, + ], + }, + ], + sourceCodeLocation: { + startOffset: 0, + endOffset: 31, + startTag: { + startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 27, + endOffset: 31, + }, + }, + }, + ], + name: "#document-fragment", + } ); + } ); - ); + it( "should correctly tokenize a paragraph with single br tags", function() { + const mockPaper = new Paper( "

    This is a sentence.
    This is
    another sentence.

    " ); + const mockResearcher = new EnglishResearcher( mockPaper ); + const languageProcessor = new LanguageProcessor( mockResearcher ); + buildTreeNoTokenize( mockPaper ); + const result = tokenize( mockPaper.getTree(), languageProcessor ); + const sentences = result.childNodes[ 0 ].sentences; + expect( sentences.length ).toEqual( 2 ); + const firstSentence = sentences[ 0 ]; + expect( firstSentence.text ).toEqual( "This is a sentence." ); + expect( firstSentence.sourceCodeRange ).toEqual( { startOffset: 3, endOffset: 22 } ); + expect( firstSentence.tokens.length ).toEqual( 8 ); + const secondSentence = sentences[ 1 ]; + expect( secondSentence.text ).toEqual( "\nThis is\nanother sentence." ); + expect( secondSentence.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 56 } ); + expect( secondSentence.tokens.length ).toEqual( 9 ); + const [ br1, this1, , is1, br2, another1, , , ] = secondSentence.tokens; + expect( br1.sourceCodeRange ).toEqual( { startOffset: 27, endOffset: 28 } ); + expect( this1.sourceCodeRange ).toEqual( { startOffset: 28, endOffset: 32 } ); + expect( is1.sourceCodeRange ).toEqual( { startOffset: 33, endOffset: 35 } ); + expect( br2.sourceCodeRange ).toEqual( { startOffset: 38, endOffset: 39 } ); + expect( another1.sourceCodeRange ).toEqual( { startOffset: 39, endOffset: 46 } ); + } ); } ); - it( "should correctly tokenize a paragraph of two sentences", function() { - const mockPaper = new Paper( "

    This is a sentence. This is another sentence.

    ", { locale: "en_US" } ); - const mockResearcher = new EnglishResearcher( mockPaper ); +describe( "A test for tokenizing a Japanese sentence", function() { + it( "should correctly tokenize a simple Japanese sentence.", function() { + const mockPaper = new Paper( "

    犬が大好き\u3002

    ", { locale: "ja_JP" } ); + const mockResearcher = new JapaneseResearcher( mockPaper ); const languageProcessor = new LanguageProcessor( mockResearcher ); buildTreeNoTokenize( mockPaper ); - // eslint-disable-next-line max-len expect( tokenize( mockPaper.getTree(), languageProcessor ) ).toEqual( { attributes: {}, childNodes: [ @@ -30,7 +517,8 @@ describe( "A test for the tokenize function", function() { childNodes: [ { name: "#text", - value: "This is a sentence. This is another sentence.", + value: "犬が大好き。", + sourceCodeRange: new SourceCodeLocation( { startOffset: 3, endOffset: 9 } ), }, ], isImplicit: false, @@ -38,152 +526,52 @@ describe( "A test for the tokenize function", function() { sentences: [ { sourceCodeRange: { - endOffset: 22, startOffset: 3, + endOffset: 9, }, - text: "This is a sentence.", + text: "犬が大好き。", tokens: [ { sourceCodeRange: { - endOffset: 7, startOffset: 3, + endOffset: 4, }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 8, - startOffset: 7, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 10, - startOffset: 8, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 11, - startOffset: 10, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 12, - startOffset: 11, - }, - text: "a", - }, - { - sourceCodeRange: { - endOffset: 13, - startOffset: 12, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 21, - startOffset: 13, - }, - text: "sentence", - }, - { - sourceCodeRange: { - endOffset: 22, - startOffset: 21, - }, - text: ".", - }, - ], - }, - { - sourceCodeRange: { - endOffset: 48, - startOffset: 22, - }, - text: " This is another sentence.", - tokens: [ - { - sourceCodeRange: { - endOffset: 23, - startOffset: 22, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 27, - startOffset: 23, - }, - text: "This", - }, - { - sourceCodeRange: { - endOffset: 28, - startOffset: 27, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 30, - startOffset: 28, - }, - text: "is", - }, - { - sourceCodeRange: { - endOffset: 31, - startOffset: 30, - }, - text: " ", - }, - { - sourceCodeRange: { - endOffset: 38, - startOffset: 31, - }, - text: "another", + text: "犬", }, { sourceCodeRange: { - endOffset: 39, - startOffset: 38, + startOffset: 4, + endOffset: 5, }, - text: " ", + text: "が", }, { sourceCodeRange: { - endOffset: 47, - startOffset: 39, + startOffset: 5, + endOffset: 8, }, - text: "sentence", + text: "大好き", }, { sourceCodeRange: { - endOffset: 48, - startOffset: 47, + startOffset: 8, + endOffset: 9, }, - text: ".", + text: "。", }, ], }, ], sourceCodeLocation: { - endOffset: 52, - endTag: { - endOffset: 52, - startOffset: 48, - }, startOffset: 0, + endOffset: 13, startTag: { - endOffset: 3, startOffset: 0, + endOffset: 3, + }, + endTag: { + startOffset: 9, + endOffset: 13, }, }, }, diff --git a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js index 460dbddec53..fadd25f9ddf 100644 --- a/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js +++ b/packages/yoastseo/spec/parse/language/LanguageProcessorSpec.js @@ -2,6 +2,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; import Sentence from "../../../src/parse/structure/Sentence"; +import splitIntoTokensCustom from "../../../src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom"; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); @@ -432,3 +433,23 @@ describe.each( splitIntoTokensTestCases )( "A test for the tokenize method", ( { expect( tokens ).toEqual( expectedTokens ); } ); } ); + +describe( "A test for the splitIntoTokens method in Japanese", () => { + it( "should return an array of tokens", function() { + const japaneseResearcher = Factory.buildMockResearcher( {}, true, false, false, + { splitIntoTokensCustom: splitIntoTokensCustom } ); + const languageProcessor = new LanguageProcessor( japaneseResearcher ); + const tokens = languageProcessor.splitIntoTokens( new Sentence( "ウクライナは、東ヨーロッパに位置する国家。" ) ); + expect( tokens ).toEqual( [ + { text: "ウクライナ", sourceCodeRange: {} }, + { text: "は", sourceCodeRange: {} }, + { text: "、", sourceCodeRange: {} }, + { text: "東ヨーロッパ", sourceCodeRange: {} }, + { text: "に", sourceCodeRange: {} }, + { text: "位置", sourceCodeRange: {} }, + { text: "する", sourceCodeRange: {} }, + { text: "国家", sourceCodeRange: {} }, + { text: "。", sourceCodeRange: {} }, + ] ); + } ); +} ); diff --git a/packages/yoastseo/spec/parse/structure/NodeSpec.js b/packages/yoastseo/spec/parse/structure/NodeSpec.js index 9f8f0a750bf..ed670e0382f 100644 --- a/packages/yoastseo/spec/parse/structure/NodeSpec.js +++ b/packages/yoastseo/spec/parse/structure/NodeSpec.js @@ -3,6 +3,7 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import build from "../../../src/parse/build/build"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; describe( "A test for the Node object", () => { it( "should correctly create a simple Node object", function() { @@ -11,21 +12,26 @@ describe( "A test for the Node object", () => { } ); describe( "A test for the findAll method", () => { + let paper; + + beforeEach( () => { + paper = new Paper( "" ); + } ); it( "should find all occurrences of a p tag", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; + paper._text = "

    Hello, world!

    Hello, yoast!

    "; const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); const languageProcessor = new LanguageProcessor( researcher ); - const tree = build( html, languageProcessor ); + const tree = build( paper, languageProcessor ); const searchResult = tree.findAll( ( node ) => node.name === "p" ); const expected = [ { name: "p", attributes: { "class": new Set( [ "yoast" ] ) }, - childNodes: [ { name: "#text", value: "Hello, world! " } ], + childNodes: [ { name: "#text", value: "Hello, world! ", sourceCodeRange: { startOffset: 22, endOffset: 36 } } ], isImplicit: false, sentences: [ { text: "Hello, world!", @@ -54,7 +60,7 @@ describe( "A test for the findAll method", () => { { name: "p", attributes: { "class": new Set( [ "yoast" ] ) }, - childNodes: [ { name: "#text", value: "Hello, yoast!" } ], + childNodes: [ { name: "#text", value: "Hello, yoast!", sourceCodeRange: { startOffset: 57, endOffset: 70 } } ], isImplicit: false, sentences: [ { text: "Hello, yoast!", @@ -86,15 +92,17 @@ describe( "A test for the findAll method", () => { } ); describe( "A test for the innerText method", () => { - const html = "

    Hello, world!

    Hello, yoast!

    "; + it( "should return the inner text of a node", function() { + const paper = new Paper( "

    Hello, world!

    Hello, yoast!

    " ); - const researcher = Factory.buildMockResearcher( {}, true, false, false, - { memoizedTokenizer: memoizedSentenceTokenizer } ); - const languageProcessor = new LanguageProcessor( researcher ); + const researcher = Factory.buildMockResearcher( {}, true, false, false, + { memoizedTokenizer: memoizedSentenceTokenizer } ); + const languageProcessor = new LanguageProcessor( researcher ); - const tree = build( html, languageProcessor ); + const tree = build( paper, languageProcessor ); - const innerText = tree.innerText(); + const innerText = tree.innerText(); - expect( innerText ).toEqual( "Hello, world! Hello, yoast!" ); + expect( innerText ).toEqual( "Hello, world! Hello, yoast!" ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/structure/ParagraphSpec.js b/packages/yoastseo/spec/parse/structure/ParagraphSpec.js index 4c5dc887e58..c2a6bf0f182 100644 --- a/packages/yoastseo/spec/parse/structure/ParagraphSpec.js +++ b/packages/yoastseo/spec/parse/structure/ParagraphSpec.js @@ -18,4 +18,13 @@ describe( "A test for the Paragraph object", function() { isImplicit: true, } ); } ); + + it( "should correctly construct an overarching Paragraph object", function() { + expect( new Paragraph( {}, [], {}, false, true ) ).toEqual( { + name: "p-overarching", + attributes: {}, + childNodes: [], + isImplicit: false, + } ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/structure/TextSpec.js b/packages/yoastseo/spec/parse/structure/TextSpec.js index b1e63e2a57c..3ac9ba20c25 100644 --- a/packages/yoastseo/spec/parse/structure/TextSpec.js +++ b/packages/yoastseo/spec/parse/structure/TextSpec.js @@ -2,8 +2,9 @@ import Text from "../../../src/parse/structure/Text"; describe( "A test for the Text object", function() { it( "should correctly construct a Text object", function() { - expect( new Text( "This is a text. It consists of 2 sentences" ) ).toEqual( { - name: "#text", value: "This is a text. It consists of 2 sentences", + expect( new Text( { value: "This is a text. It consists of 2 sentences", + sourceCodeLocation: { startOffset: 0, endOffset: 40 } } ) ).toEqual( { + name: "#text", value: "This is a text. It consists of 2 sentences", sourceCodeRange: { startOffset: 0, endOffset: 40 }, } ); } ); } ); diff --git a/packages/yoastseo/spec/parse/structure/TokenSpec.js b/packages/yoastseo/spec/parse/structure/TokenSpec.js index ed25c08ed5e..37dd975fa74 100644 --- a/packages/yoastseo/spec/parse/structure/TokenSpec.js +++ b/packages/yoastseo/spec/parse/structure/TokenSpec.js @@ -4,4 +4,7 @@ describe( "A test for the Token object", function() { it( "should correctly construct a Token object", function() { expect( new Token( "This" ) ).toEqual( { text: "This", sourceCodeRange: {} } ); } ); + it( "should normalize single quotes", function() { + expect( new Token( "cat’s" ) ).toEqual( { text: "cat's", sourceCodeRange: {} } ); + } ); } ); diff --git a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js index 0ef1fbf9aa7..7f3a61378a2 100644 --- a/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js +++ b/packages/yoastseo/spec/parse/traverse/findAllInTreeSpec.js @@ -3,19 +3,21 @@ import build from "../../../src/parse/build/build"; import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; -let languageProcessor; +let languageProcessor, paper; beforeEach( () => { const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); languageProcessor = new LanguageProcessor( researcher ); + paper = new Paper( "" ); } ); describe( "A test for findAllInTree", () => { it( "should correctly extract all p-tags from a tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "p" ); @@ -27,6 +29,7 @@ describe( "A test for findAllInTree", () => { childNodes: [ { name: "#text", value: "Hello, world! ", + sourceCodeRange: { startOffset: 22, endOffset: 36 }, } ], sentences: [ { text: "Hello, world!", @@ -59,6 +62,7 @@ describe( "A test for findAllInTree", () => { childNodes: [ { name: "#text", value: "Hello, yoast! ", + sourceCodeRange: { startOffset: 57, endOffset: 71 }, } ], sentences: [ { text: "Hello, yoast!", @@ -90,8 +94,8 @@ describe( "A test for findAllInTree", () => { } ); it( "should return an empty result if the tag doesnt exist within the tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "h1" ); @@ -99,8 +103,8 @@ describe( "A test for findAllInTree", () => { } ); it( "should return all sub-nodes if recurseFoundNodes is true", function() { - const html = "
    foo
    "; - const tree = build( html, languageProcessor ); + paper._text = "
    foo
    "; + const tree = build( paper, languageProcessor ); const searchResult = findAllInTree( tree, treeNode => treeNode.name === "div", true ); diff --git a/packages/yoastseo/spec/parse/traverse/innerTextSpec.js b/packages/yoastseo/spec/parse/traverse/innerTextSpec.js index 4ffa2ed71fc..4afc8e40d89 100644 --- a/packages/yoastseo/spec/parse/traverse/innerTextSpec.js +++ b/packages/yoastseo/spec/parse/traverse/innerTextSpec.js @@ -4,19 +4,21 @@ import LanguageProcessor from "../../../src/parse/language/LanguageProcessor"; import Factory from "../../specHelpers/factory"; import Node from "../../../src/parse/structure/Node"; import memoizedSentenceTokenizer from "../../../src/languageProcessing/helpers/sentence/memoizedSentenceTokenizer"; +import Paper from "../../../src/values/Paper"; -let languageProcessor; +let languageProcessor, paper; beforeEach( () => { const researcher = Factory.buildMockResearcher( {}, true, false, false, { memoizedTokenizer: memoizedSentenceTokenizer } ); languageProcessor = new LanguageProcessor( researcher ); + paper = new Paper(); } ); describe( "A test for innerText", () => { it( "should correctly extract the inner text from a tree", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; @@ -25,8 +27,8 @@ describe( "A test for innerText", () => { } ); it( "should correctly extract the inner text from a tree where the text contains markup", function() { - const html = "

    Hello, world!

    Hello, yoast!

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello, yoast!

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; @@ -35,8 +37,8 @@ describe( "A test for innerText", () => { } ); it( "should correctly extract the inner text from a tree if there is a subtree without text.", function() { - const html = "

    Hello, world!

    Hello,

    yoast
    !

    "; - const tree = build( html, languageProcessor ); + paper._text = "

    Hello, world!

    Hello,

    yoast
    !

    "; + const tree = build( paper, languageProcessor ); const searchResult = innerText( tree ); const expected = "Hello, world! Hello, yoast! "; @@ -44,6 +46,17 @@ describe( "A test for innerText", () => { expect( searchResult ).toEqual( expected ); } ); + it( "should consider break tags to be line breaks", function() { + // Matsuo Bashō's "old pond". + paper._text = "

    old pond
    frog leaps in
    water's sound

    "; + const tree = build( paper, languageProcessor ); + + const searchResult = innerText( tree ); + const expected = "old pond\nfrog leaps in\nwater's sound"; + + expect( searchResult ).toEqual( expected ); + } ); + it( "should return an empty string when presented an empty node", function() { const tree = new Node( "" ); diff --git a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js index 0266e13572d..894db1cac54 100644 --- a/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/assessmentSpec.js @@ -24,6 +24,13 @@ describe( "test hasEnoughContentForAssessment", () => { expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false ); } ); + it( "should return false if text has less than 50 chars after shortcodes are removed.", () => { + const mockPaper = new Paper( "Some text" + "[shortcode] ".repeat( 50 ), { shortcodes: [ "shortcode" ] } ); + const mockAssessment = new Assessment(); + + expect( mockAssessment.hasEnoughContentForAssessment( mockPaper ) ).toBe( false ); + } ); + it( "should return false if text is 49 chars and an image and no specification for contentNeededForAssessment", () => { const mockPaper = new Paper( "This text contains forty nine characterssssssssss" + // eslint-disable-next-line max-len diff --git a/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/cultureAssessmentsSpec.js b/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/cultureAssessmentsSpec.js index 795392e6665..7c9a867ab2e 100644 --- a/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/cultureAssessmentsSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/cultureAssessmentsSpec.js @@ -320,8 +320,9 @@ describe( "a test for targeting non-inclusive phrases in culture assessments", ( { identifier: "gypsy", text: "In North America, the word Gypsy is most commonly used as a reference to Romani ethnicity.", - expectedFeedback: "Be careful when using gypsy as it is potentially harmful. Consider using an alternative, such as " + - "Romani, Romani person, unless referring to someone who explicitly wants to be referred to with this term. " + + expectedFeedback: "Be careful when using gypsy as it is potentially harmful. " + + "Consider using an alternative, such as Rom, Roma person, Romani, Romani person, " + + "unless referring to someone who explicitly wants to be referred to with this term. " + "If you are referring to a lifestyle rather than the ethnic group or their music, consider using " + "an alternative such as traveler, wanderer, free-spirited." + " Learn more.", @@ -330,8 +331,9 @@ describe( "a test for targeting non-inclusive phrases in culture assessments", ( { identifier: "gypsy", text: "In North America, the word Gipsy is most commonly used as a reference to Romani ethnicity.", - expectedFeedback: "Be careful when using gipsy as it is potentially harmful. Consider using an alternative, such as " + - "Romani, Romani person, unless referring to someone who explicitly wants to be referred to with this term. " + + expectedFeedback: "Be careful when using gipsy as it is potentially harmful. " + + "Consider using an alternative, such as Rom, Roma person, Romani, Romani person, " + + "unless referring to someone who explicitly wants to be referred to with this term. " + "If you are referring to a lifestyle rather than the ethnic group or their music, consider using " + "an alternative such as traveler, wanderer, free-spirited." + " Learn more.", @@ -341,7 +343,7 @@ describe( "a test for targeting non-inclusive phrases in culture assessments", ( identifier: "gypsies", text: "In the English language, the Romani people are widely known by the exonym Gypsies.", expectedFeedback: "Be careful when using gypsies as it is potentially harmful. Consider using an alternative, " + - "such as Romani, Romani people, unless referring to someone who explicitly wants to be referred to " + + "such as Roma, Romani, Romani people, unless referring to someone who explicitly wants to be referred to " + "with this term. If you are referring to a lifestyle rather than the ethnic group or their music, " + "consider using an alternative such as travelers, wanderers, free-spirited. " + "Learn more.", @@ -351,7 +353,7 @@ describe( "a test for targeting non-inclusive phrases in culture assessments", ( identifier: "gypsies", text: "In the English language, the Romani people are widely known by the exonym Gipsies.", expectedFeedback: "Be careful when using gipsies as it is potentially harmful. Consider using an alternative, " + - "such as Romani, Romani people, unless referring to someone who explicitly wants to be referred to " + + "such as Roma, Romani, Romani people, unless referring to someone who explicitly wants to be referred to " + "with this term. If you are referring to a lifestyle rather than the ethnic group or their music, " + "consider using an alternative such as travelers, wanderers, free-spirited. " + "Learn more.", @@ -760,20 +762,6 @@ describe( "a test for targeting non-inclusive phrases in culture assessments", ( testInclusiveLanguageAssessments( testData ); } ); - it( "should return the appropriate score and feedback string for: 'ebonics'", () => { - const testData = [ - { - identifier: "ebonics", - text: "White North Americans do not always understand Ebonics.", - expectedFeedback: "Avoid using Ebonics as it is potentially harmful. " + - "Consider using an alternative, such as African American English, African American Language. " + - "Learn more.", - expectedScore: 3, - }, - ]; - - testInclusiveLanguageAssessments( testData ); - } ); it( "should return the appropriate score and feedback string for: 'low man on the totem pole'", () => { const testData = [ { diff --git a/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/otherAssessmentsSpec.js b/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/otherAssessmentsSpec.js index f4600fb824d..5807f163204 100644 --- a/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/otherAssessmentsSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/inclusiveLanguage/configuration/otherAssessmentsSpec.js @@ -27,15 +27,7 @@ describe( "Checks various conditions for the 'normal' and 'abnormal' assessments }, { identifier: "normal", - text: "This isn't normal behaviour children.", - expectedFeedback: "Avoid using normal as it is potentially harmful. " + - "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + - "Learn more.", - expectedScore: 3, - }, - { - identifier: "normal", - text: "This isn't normal behavior children.", + text: "He is a mentally normal person.", expectedFeedback: "Avoid using normal as it is potentially harmful. " + "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + "Learn more.", @@ -43,35 +35,29 @@ describe( "Checks various conditions for the 'normal' and 'abnormal' assessments }, { identifier: "normal", - text: "He is a mentally normal person.", + text: "I'm afraid this isn't psychologically normal.", expectedFeedback: "Avoid using normal as it is potentially harmful. " + "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + "Learn more.", expectedScore: 3, }, { - identifier: "normal", + identifier: "behaviorallyNormal", text: "I'm afraid this isn't behaviorally normal.", - expectedFeedback: "Avoid using normal as it is potentially harmful. " + - "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + + expectedFeedback: "Be careful when using behaviorally normal as it is potentially harmful. " + + "Unless you are referring to objects or animals, consider using an alternative, such as showing typical behavior " + + "or a specific characteristic or experience if it is known. " + "Learn more.", - expectedScore: 3, + expectedScore: 6, }, { - identifier: "normal", + identifier: "behaviorallyNormal", text: "I'm afraid this isn't behaviourally normal.", - expectedFeedback: "Avoid using normal as it is potentially harmful. " + - "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + + expectedFeedback: "Be careful when using behaviourally normal as it is potentially harmful. " + + "Unless you are referring to objects or animals, consider using an alternative, such as showing typical behavior " + + "or a specific characteristic or experience if it is known. " + "Learn more.", - expectedScore: 3, - }, - { - identifier: "normal", - text: "I'm afraid this isn't psychologically normal.", - expectedFeedback: "Avoid using normal as it is potentially harmful. " + - "Consider using an alternative, such as typical or a specific characteristic or experience if it is known. " + - "Learn more.", - expectedScore: 3, + expectedScore: 6, }, ]; testInclusiveLanguageAssessments( testData ); @@ -96,15 +82,7 @@ describe( "Checks various conditions for the 'normal' and 'abnormal' assessments }, { identifier: "abnormal", - text: "This isn't abnormal behaviour.", - expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + - "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + - "Learn more.", - expectedScore: 3, - }, - { - identifier: "abnormal", - text: "This isn't abnormal behavior.", + text: "He is a mentally abnormal person.", expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + "Learn more.", @@ -112,35 +90,41 @@ describe( "Checks various conditions for the 'normal' and 'abnormal' assessments }, { identifier: "abnormal", - text: "He is a mentally abnormal person.", + text: "I'm afraid this isn't psychologically abnormal.", expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + "Learn more.", expectedScore: 3, }, { - identifier: "abnormal", + identifier: "behaviorallyAbnormal", text: "I'm afraid this isn't behaviorally abnormal.", - expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + - "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + + expectedFeedback: "Be careful when using behaviorally abnormal as it is potentially harmful. " + + "Unless you are referring to objects or animals, consider using an alternative, " + + "such as showing atypical behavior, showing dysfunctional behavior " + + "or a specific characteristic or experience if it is known. " + "Learn more.", - expectedScore: 3, + expectedScore: 6, }, { - identifier: "abnormal", + identifier: "behaviorallyAbnormal", text: "I'm afraid this isn't behaviourally abnormal.", - expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + - "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + + expectedFeedback: "Be careful when using behaviourally abnormal as it is potentially harmful. " + + "Unless you are referring to objects or animals, consider using an alternative, " + + "such as showing atypical behavior, showing dysfunctional behavior " + + "or a specific characteristic or experience if it is known. " + "Learn more.", - expectedScore: 3, + expectedScore: 6, }, { - identifier: "abnormal", - text: "I'm afraid this isn't psychologically abnormal.", - expectedFeedback: "Avoid using abnormal as it is potentially harmful. " + - "Consider using an alternative, such as atypical or a specific characteristic or experience if it is known. " + + identifier: "abnormalBehavior", + text: "This isn't abnormal behaviour.", + expectedFeedback: "Be careful when using abnormal behaviour as it is potentially harmful. " + + "Unless you are referring to objects or animals, consider using an alternative, " + + "such as atypical behavior, unusual behavior " + + "or a specific characteristic or experience if it is known. " + "Learn more.", - expectedScore: 3, + expectedScore: 6, }, ]; testInclusiveLanguageAssessments( testData ); diff --git a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js index 6745f5495d7..04c5483404d 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/SubheadingDistributionTooLongAssessmentSpec.js @@ -43,6 +43,17 @@ describe( "An assessment for scoring too long text fragments without a subheadin "You are not using any subheadings, but your text is short enough and probably doesn't need them." ); } ); + it( "Scores a text that's short (<300 words) after excluding shortodes," + + " and which does not have subheadings.", function() { + const assessment = subheadingDistributionTooLong.getResult( + new Paper( shortText + "[shortcode] ".repeat( 150 ) + "[/shortcode] ".repeat( 150 ), { shortcodes: [ "shortcode" ] } ), + Factory.buildMockResearcher( [] ) + ); + expect( assessment.getScore() ).toBe( 9 ); + expect( assessment.getText() ).toBe( "Subheading distribution: " + + "You are not using any subheadings, but your text is short enough and probably doesn't need them." ); + } ); + it( "Scores a short text (<300 words), which has subheadings.", function() { const assessment = subheadingDistributionTooLong.getResult( new Paper( "a " + subheading + shortText ), diff --git a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js index 8cf18699849..1b145d0b147 100644 --- a/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/readability/TransitionWordsAssessmentSpec.js @@ -148,6 +148,12 @@ describe( "An assessment for transition word percentage", function() { expect( assessment ).toBe( false ); } ); + it( "should not be applicable if the text has more than 200 words, but part of the words are shortcodes", function() { + const mockPaper = new Paper( "Text " + "text ".repeat( 198 ) + "[shortcode]".repeat( 2 ), { shortcodes: [ "shortcode" ] } ); + const assessment = new TransitionWordsAssessment().isApplicable( mockPaper, new EnglishResearcher( mockPaper ) ); + expect( assessment ).toBe( false ); + } ); + it( "is applicable when used with a supported researcher, e.g. the English researcher", function() { const mockPaper = new Paper( "Lorem ipsum dolor sit amet, ne sed agam oblique alterum. Eos percipit singulis no. No scripta graecis cum. " + "Ut vim eius porro labore. Id quem civibus sit. Sed no primis urbanitas, aperiri laboramus voluptatibus ei per. Esse consul possim " + diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js index 7b74f7951c7..6acddfc8610 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeyphraseDistributionAssessmentSpec.js @@ -162,6 +162,16 @@ describe( "Checks if the assessment is applicable", function() { expect( assessmentIsApplicable ).toBe( false ); } ); + + it( "should not be applicable to a text consisting only of shortcodes", function() { + const shortcodeSentence = "[shortcode]".repeat( 15 ) + ". "; + const mockPaper = new Paper( shortcodeSentence.repeat( 15 ), { shortcodes: [ "shortcode" ] } ); + researcher.setPaper( mockPaper ); + + const assessmentIsApplicable = keyphraseDistributionAssessment.isApplicable( mockPaper, researcher ); + + expect( assessmentIsApplicable ).toBe( false ); + } ); } ); describe( "A test for marking keywords in the text", function() { diff --git a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js index dfd177c8fdb..27e0a372a45 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/KeywordDensityAssessmentSpec.js @@ -1,5 +1,4 @@ -/* eslint-disable capitalized-comments, spaced-comment */ -import KeywordDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; import GermanResearcher from "../../../../src/languageProcessing/languages/de/Researcher"; import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; @@ -7,191 +6,243 @@ import JapaneseResearcher from "../../../../src/languageProcessing/languages/ja/ import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import getMorphologyData from "../../../specHelpers/getMorphologyData"; +import buildTree from "../../../specHelpers/parse/buildTree"; const morphologyData = getMorphologyData( "en" ); const morphologyDataDe = getMorphologyData( "de" ); -// const morphologyDataJA = getMorphologyData( "ja" ); // Variable is used in language specific test (and thus removed) +const morphologyDataJA = getMorphologyData( "ja" ); const nonkeyword = "nonkeyword, "; const keyword = "keyword, "; const shortTextJapanese = "熱".repeat( 199 ); const longTextJapanese = "熱".repeat( 200 ); -// const japaneseSentence = "私の猫はかわいいです。小さくて可愛い花の刺繍に関する一般一般の記事です。".repeat( 20 ); // Variable is used in language specific test (and thus removed) -// const japaneseSentenceWithKeyphrase = "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) -// const japaneseSentenceWithKeyphraseExactMatch = "一日一冊の本を読むのはできるかどうかやってみます。"; // Variable is used in language specific test (and thus removed) -describe( "Tests for the keywordDensity assessment for languages without morphology", function() { - it( "runs the keywordDensity on the paper without keyword in the text", function() { - const paper = new Paper( nonkeyword.repeat( 1000 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 0 times. That's less than the recommended minimum of 5 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "runs the keywordDensity on the paper with a low keyphrase density (0.1%)", function() { - const paper = new Paper( nonkeyword.repeat( 999 ) + keyword, { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 1 time. That's less than the recommended minimum of 5 times for a text of this length." + - " Focus on your keyphrase!" ); - } ); - - it( "runs the keywordDensity on the paper with a good keyphrase density (0.5%)", function() { - const paper = new Paper( nonkeyword.repeat( 995 ) + keyword.repeat( 5 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 5 times. This is great!" ); - } ); - - it( "runs the keywordDensity on the paper with a good keyphrase density (2%)", function() { - const paper = new Paper( nonkeyword.repeat( 980 ) + keyword.repeat( 20 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. This is great!" ); - } ); - - it( "runs the keywordDensity on the paper with a slightly too high keyphrase density (3.5%)", function() { - const paper = new Paper( nonkeyword.repeat( 965 ) + keyword.repeat( 35 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -10 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 35 times. That's more than the recommended maximum of 29 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - it( "runs the keywordDensity on the paper with a very high keyphrase density (10%)", function() { - const paper = new Paper( nonkeyword.repeat( 900 ) + keyword.repeat( 100 ), { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 100 times. That's way more than the recommended maximum of 29 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - - it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - short keyphrase", function() { - const paper = new Paper( nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ), { keyword: "b c" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. This is great!" ); - } ); +// Test data for language without morphology. +const testDataWithDefaultResearcher = [ + { + description: "runs the keyphrase density on the paper without keyphrase occurrence found in the text", + paper: new Paper( "

    " + nonkeyword.repeat( 1000 ) + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keyphrase density on the paper with a low keyphrase density (0.1%)", + paper: new Paper( "

    " + nonkeyword.repeat( 999 ) + keyword + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 5 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "runs the keyphrase density on the paper with a good keyphrase density (0.5%)", + paper: new Paper( "

    " + nonkeyword.repeat( 995 ) + keyword.repeat( 5 ) + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 5 times. This is great!", + }, + }, + { + description: "runs the keyphrase density on the paper with a good keyphrase density (2%)", + paper: new Paper( "

    " + nonkeyword.repeat( 980 ) + keyword.repeat( 20 ) + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "runs the keyphrase density on the paper with a slightly too high keyphrase density (3.5%)", + paper: new Paper( "

    " + nonkeyword.repeat( 965 ) + keyword.repeat( 35 ) + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: -10, + text: "Keyphrase density: " + + "The keyphrase was found 35 times. That's more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "runs the keyphrase density on the paper with a very high keyphrase density (10%)", + paper: new Paper( "

    " + nonkeyword.repeat( 900 ) + keyword.repeat( 100 ) + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 100 times. That's way more than the recommended maximum of 29 times " + + "for a text of this length. Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - short keyphrase", + paper: new Paper( "

    " + nonkeyword.repeat( 960 ) + "b c, ".repeat( 20 ) + "

    ", { keyword: "b c" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. This is great!", + }, + }, + { + description: "does not count text inside elements we want to exclude from the analysis when calculating the recommended" + + "number of keyphrase usages", + paper: new Paper( "

    " + nonkeyword.repeat( 101 ) + "

    " + nonkeyword.repeat( 859 ) + + "
    " + "b c, ".repeat( 20 ) + "

    ", { keyword: "b c" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "adjusts the keyphrase density based on the length of the keyphrase with the actual density remaining at 2% - long keyphrase", + paper: new Paper( "

    " + nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ) + "

    ", { keyword: "b c d e f" } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times for a text of this length." + + " Don't overoptimize!", + }, + }, + { + description: "should not count shortcodes when calculating keyphrase density", + paper: new Paper( "

    " + nonkeyword.repeat( 99 ) + "[keyword]

    ", { keyword: "keyword", shortcodes: [ "keyword" ] } ), + expectedResult: { + score: 4, + text: "Keyphrase density: The keyphrase was found 0 times." + + " That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "returns a bad result if the keyphrase is only used once, regardless of the density", + paper: new Paper( "

    " + nonkeyword.repeat( 100 ) + keyword + "

    ", { keyword: "keyword" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + + "for a text of this length. Focus on your keyphrase!", + }, + }, + { + description: "returns a good result if the keyphrase is used twice and " + + "the recommended count is smaller than or equal to 2, regardless of the density", + paper: new Paper( "

    " + nonkeyword.repeat( 100 ) + "a b c, a b c

    ", { keyword: "a b c", locale: "xx_XX" } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 2 times. This is great!", + }, + }, +]; - it( "does not count text inside elements we want to exclude from the analysis when calculating the recommended" + - "number of keyphrase usages", function() { - const paper = new Paper( nonkeyword.repeat( 101 ) + "
    " + nonkeyword.repeat( 859 ) + - "
    " + "b c, ".repeat( 20 ), { keyword: "b c" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 3 times for a text of this length." + - " Don't overoptimize!" ); - } ); +describe.each( testDataWithDefaultResearcher )( "a keyphrase density test for languages without morphology", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new DefaultResearcher( paper ); + buildTree( paper, researcher ); + it( description, () => { + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); - it( "adjusts the keyphrase density based on the length of the keyword with the actual density remaining at 2% - long keyphrase", function() { - const paper = new Paper( nonkeyword.repeat( 900 ) + "b c d e f, ".repeat( 20 ), { keyword: "b c d e f" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( -50 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 20 times. That's way more than the recommended maximum of 12 times " + - "for a text of this length. Don't overoptimize!" ); - } ); - - it( "returns a bad result if the keyword is only used once, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + keyword, { keyword: "keyword" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 4 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 1 time. That's less than the recommended minimum of 2 times " + - "for a text of this length. Focus on your keyphrase!" ); - } ); - - it( "returns a good result if the keyword is used twice and " + - "the recommended count is smaller than or equal to 2, regardless of the density", function() { - const paper = new Paper( nonkeyword.repeat( 100 ) + "a b c, a b c", { keyword: "a b c", locale: "xx_XX" } ); - const researcher = new DefaultResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); - expect( result.getScore() ).toBe( 9 ); - expect( result.getText() ).toBe( "Keyphrase density: " + - "The keyphrase was found 2 times. This is great!" ); - } ); - - it( "applies to a paper with a keyword and a text of at least 100 words", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( true ); - } ); - - it( "does not apply to a paper with a keyword and a text of at least 100 words when the text is inside an element" + - "we want to exclude from the analysis", function() { - const paper = new Paper( "
    " + nonkeyword.repeat( 100 ) + "
    ", { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with text of 100 words but without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 100 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and with a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "does not apply to a paper with a text containing less than 100 words and without a keyword", function() { - const paper = new Paper( nonkeyword.repeat( 99 ), { keyword: "" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new DefaultResearcher( paper ) ) ).toBe( false ); - } ); - - it( "applies to a Japanese paper with a keyword and a text of at least 200 characters", function() { - const paper = new Paper( longTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( true ); + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); } ); +} ); - it( "does not apply to a Japanese paper with text of less than 200 characters", function() { - const paper = new Paper( shortTextJapanese, { keyword: "keyword" } ); - expect( new KeywordDensityAssessment().isApplicable( paper, new JapaneseResearcher( paper ) ) ).toBe( false ); +const testDataForApplicability = [ + { + description: "applies to a paper with a keyphrase and a text of at least 100 words", + paper: new Paper( "

    " + nonkeyword.repeat( 100 ) + "

    ", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a paper with a keyphrase and a text of at least 100 words when the text is inside an element" + + "we want to exclude from the analysis", + paper: new Paper( "
    " + nonkeyword.repeat( 100 ) + "
    ", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with text of 100 words but without a keyphrase", + paper: new Paper( "

    " + nonkeyword.repeat( 100 ) + "

    ", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and with a keyphrase", + paper: new Paper( "

    " + nonkeyword.repeat( 99 ) + "

    ", { keyword: "keyword" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "does not apply to a paper with a text containing less than 100 words and without a keyphrase", + paper: new Paper( "

    " + nonkeyword.repeat( 99 ) + "

    ", { keyword: "" } ), + researcher: new DefaultResearcher(), + expectedResult: false, + }, + { + description: "applies to a Japanese paper with a keyphrase and a text of at least 200 characters", + paper: new Paper( "

    " + longTextJapanese + "

    ", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: true, + }, + { + description: "does not apply to a Japanese paper with text of less than 200 characters", + paper: new Paper( "

    " + shortTextJapanese + "

    ", { keyword: "keyword" } ), + researcher: new JapaneseResearcher(), + expectedResult: false, + }, +]; +describe.each( testDataForApplicability )( "assessment applicability test", ( { + description, + paper, + researcher, + expectedResult, +} ) => { + researcher.setPaper( paper ); + buildTree( paper, researcher ); + it( description, () => { + expect( new KeyphraseDensityAssessment().isApplicable( paper, researcher ) ).toBe( expectedResult ); } ); } ); -describe( "Tests for the keywordDensity assessment for languages with morphology", function() { - it( "gives a GOOD result when keyword density is between 3 and 3.5%", function() { +describe( "Tests for the keyphrase density assessment for languages with morphology", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "en_EN" } ); const researcher = new EnglishResearcher( paper ); researcher.addResearchData( "morphology", morphologyData ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + buildTree( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a GOOD result when keyword density is between 3 and 3.5%, also for other languages with morphology support", function() { + it( "gives a GOOD result when keyphrase density is between 3 and 3.5%, also for other languages with morphology support", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); + buildTree( paper, researcher ); + researcher.addResearchData( "morphology", morphologyDataDe ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( 9 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. This is great!" ); } ); - it( "gives a BAD result when keyword density is between 3 and 3.5%, if morphology support is added, but there is no morphology data", function() { + it( "gives a BAD result when keyphrase density is between 3 and 3.5%, if morphology support is added," + + " but there is no morphology data", function() { const paper = new Paper( nonkeyword.repeat( 968 ) + keyword.repeat( 32 ), { keyword: "keyword", locale: "de_DE" } ); const researcher = new GermanResearcher( paper ); - const result = new KeywordDensityAssessment().getResult( paper, researcher ); + buildTree( paper, researcher ); + + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); expect( result.getScore() ).toBe( -10 ); expect( result.getText() ).toBe( "Keyphrase density: " + "The keyphrase was found 32 times. That's more than the recommended maximum of 29 times for a text of this length. " + @@ -199,236 +250,229 @@ describe( "Tests for the keywordDensity assessment for languages with morphology } ); } ); -describe( "A test for marking the keyword", function() { +describe( "A test for marking the keyphrase", function() { it( "returns markers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); - const paper = new Paper( "This is a very interesting paper with a keyword and another keyword.", { keyword: "keyword" } ); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); + const paper = new Paper( "

    This is a very interesting paper with a keyword and another keyword.

    ", { keyword: "keyword" } ); const researcher = new DefaultResearcher( paper ); - keywordDensityAssessment.getResult( paper, researcher ); + buildTree( paper, researcher ); + + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is a very interesting paper with a " + "keyword and another " + "keyword.", original: "This is a very interesting paper with a keyword and another keyword.", + position: { + startOffset: 43, + endOffset: 50, + startOffsetBlock: 40, + endOffsetBlock: 47, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: "This is a very interesting paper with a " + + "keyword and another " + + "keyword.", + original: "This is a very interesting paper with a keyword and another keyword.", + position: { + startOffset: 63, + endOffset: 70, + startOffsetBlock: 60, + endOffsetBlock: 67, + attributeId: "", + clientId: "", + isFirstSection: false, + }, } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); it( "returns markers for a keyphrase containing numbers", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); - const paper = new Paper( "This is the release of YoastSEO 9.3.", { keyword: "YoastSEO 9.3" } ); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); + const paper = new Paper( "

    This is the release of YoastSEO 9.3.

    ", { keyword: "YoastSEO 9.3" } ); + const researcher = new DefaultResearcher( paper ); - keywordDensityAssessment.getResult( paper, researcher ); + buildTree( paper, researcher ); + + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { marked: "This is the release of YoastSEO 9.3.", - original: "This is the release of YoastSEO 9.3." } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + original: "This is the release of YoastSEO 9.3.", + position: { + startOffset: 26, + endOffset: 38, + startOffsetBlock: 23, + endOffsetBlock: 35, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ) ]; + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); it( "returns markers for a keyphrase found in image caption", function() { - const keywordDensityAssessment = new KeywordDensityAssessment(); + const keyphraseDensityAssessment = new KeyphraseDensityAssessment(); const paper = new Paper( "

    a different cat with toy " + - "A flamboyant cat with a toy

    \n" + + "A flamboyant cat with a toy
    " + "

    ", { keyword: "cat toy" } ); const researcher = new EnglishResearcher( paper ); - keywordDensityAssessment.getResult( paper, researcher ); + buildTree( paper, researcher ); + + keyphraseDensityAssessment.getResult( paper, researcher ); const expected = [ new Mark( { - marked: "A flamboyant cat with a toy", - original: "A flamboyant cat with a toy" } ) ]; - expect( keywordDensityAssessment.getMarks() ).toEqual( expected ); + marked: " A flamboyant cat with a " + + "toy\n", + original: " A flamboyant cat with a toy\n", + position: { + startOffset: 201, + endOffset: 204, + startOffsetBlock: 198, + endOffsetBlock: 201, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + new Mark( { + marked: " A flamboyant cat with a " + + "toy\n", + original: " A flamboyant cat with a toy\n", + position: { + startOffset: 212, + endOffset: 215, + startOffsetBlock: 209, + endOffsetBlock: 212, + attributeId: "", + clientId: "", + isFirstSection: false, + }, + } ), + ]; + expect( keyphraseDensityAssessment.getMarks() ).toEqual( expected ); } ); +} ); + +const japaneseSentence = "私の猫はかわいいです。小さくて可愛い花の刺繍に関する一般一般の記事です。".repeat( 20 ); +const japaneseSentenceWithKeyphrase = "一日一冊の面白い本を買って読んでるのはできるかどうかやってみます。"; +const japaneseSentenceWithKeyphraseExactMatch = "一日一冊の本を読むのはできるかどうかやってみます。"; + +const testDataJapanese = [ + { + description: "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains only function words " + + "and there is no match in the text", + paper: new Paper( "

    " + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

    ", { + keyword: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of synonym occurrence when the synonym contains only function words " + + "and there is no match in the text", + paper: new Paper( "

    " + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

    ", { + keyword: "", + synonyms: "ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains spaces " + + "and there is no match in the text", + paper: new Paper( "

    " + japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ) + "

    ", { + keyword: "かしら かい を ばっかり", + locale: "ja", + } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + + "Focus on your keyphrase!", + }, + }, + { + description: "gives a very BAD result when keyphrase density is above 4% when the text contains way too many instances" + + " of the keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ), + expectedResult: { + score: -50, + text: "Keyphrase density: " + + "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + + "this length. Don't overoptimize!", + }, + }, + { + description: "gives a BAD result when keyphrase density is 0", + paper: new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ), + expectedResult: { + score: 4, + text: "Keyphrase density: " + + "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + + " Focus on your keyphrase!", + }, + }, + { + description: "gives a GOOD result when keyphrase density is between 0.5% and 3.5% when the text contains keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { + keyword: "一冊の本を読む", + locale: "ja", + } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!", + }, + }, + { + description: "gives a GOOD result when keyphrase density is between 0.5% and 3.5% when the text contains the exact match of keyphrase forms", + paper: new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { + keyword: "『一冊の本を読む』", + locale: "ja", + } ), + expectedResult: { + score: 9, + text: "Keyphrase density: " + + "The keyphrase was found 8 times. This is great!", + }, + }, +]; - // it( "returns markers for a Japanese keyphrase enclosed in double quotes", function() { - // const paper = new Paper( japaneseSentenceWithKeyphraseExactMatch.repeat( 3 ), { - // keyword: "『一冊の本を読む』", - // locale: "ja", - // } ); - // const researcher = new JapaneseResearcher( paper ); - // const assessment = new KeywordDensityAssessment(); - // researcher.addResearchData( "morphology", morphologyDataJA ); - // - // const result = assessment.getResult( paper, researcher ); - // const marks = [ - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // new Mark( { - // marked: "一日一冊の本を読むのはできるかどうかやってみます。", - // original: "一日一冊の本を読むのはできるかどうかやってみます。", - // } ), - // ]; - // - // expect( result.getScore() ).toBe( -50 ); - // expect( result.getText() ).toBe( "Keyphrase density: " + - // "The keyphrase was found 3 times." + - // " That's way more than the recommended maximum of 2 times for a text of this length. Don't" + - // " overoptimize!" ); - // expect( assessment.getMarks() ).toEqual( marks ); - // } ); +describe.each( testDataJapanese )( "test for keyphrase density in Japanese", ( { + description, + paper, + expectedResult, +} ) => { + const researcher = new JapaneseResearcher( paper ); + researcher.addResearchData( "morphology", morphologyDataJA ); + buildTree( paper, researcher ); + const result = new KeyphraseDensityAssessment().getResult( paper, researcher ); + + it( description, () => { + expect( result.getScore() ).toBe( expectedResult.score ); + expect( result.getText() ).toBe( expectedResult.text ); + } ); } ); -// -// describe( "A test for keyword density in Japanese", function() { -// it( "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains only function words " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "shouldn't return NaN/infinity times of synonym occurrence when the synonym contains only function words " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "", -// synonyms: "ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "shouldn't return NaN/infinity times of keyphrase occurrence when the keyphrase contains spaces " + -// "and there is no match in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "かしら かい を ばっかり", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 6 times for a text of this length. " + -// "Focus on your keyphrase!" ); -// } ); -// -// it( "gives a very BAD result when keyword density is above 4% when the text contains way too many instances" + -// " of the keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 32 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( -50 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 32 times. That's way more than the recommended maximum of 23 times for a text of " + -// "this length. Don't overoptimize!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 3% and 4% when the text contains too many instances" + -// " of the keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 16 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( -10 ); -// expect( result.getText() ).toBe( "Keyphrase density:" + -// " The keyphrase was found 16 times. That's more than the recommended maximum of 15 times for a text of this length." + -// " Don't overoptimize!" ); -// } ); -// -// it( "gives a BAD result when keyword density is 0", function() { -// const paper = new Paper( japaneseSentence, { keyword: "一冊の本を読む", locale: "ja" } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 0 and 0.5%", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 1 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density:" + -// " The keyphrase was found 1 time. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// -// it( "gives a GOOD result when keyword density is between 0.5% and 3.5% when the text contains keyphrase forms", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "should still gives a GOOD result when keyword density is between 0.5% and 3%, when the exact match of the keyphrase is in the text " + -// "and the keyphrase is enclosed in double quotes", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphraseExactMatch.repeat( 8 ), { -// keyword: "「一冊の本を読む」", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// researcher.addResearchData( "morphology", morphologyDataJA ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 9 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 8 times. This is great!" ); -// } ); -// -// it( "gives a BAD result when keyword density is between 0.5% and 3.5%, if morphology is added, but there is no morphology data", function() { -// const paper = new Paper( japaneseSentence + japaneseSentenceWithKeyphrase.repeat( 8 ), { -// keyword: "一冊の本を読む", -// locale: "ja", -// } ); -// const researcher = new JapaneseResearcher( paper ); -// const result = new KeywordDensityAssessment().getResult( paper, researcher ); -// expect( result.getScore() ).toBe( 4 ); -// expect( result.getText() ).toBe( "Keyphrase density: " + -// "The keyphrase was found 0 times. That's less than the recommended minimum of 2 times for a text of this length." + -// " Focus on your keyphrase!" ); -// } ); -// } ); -// + + diff --git a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js index 88ab2fd5836..0b8dd856975 100644 --- a/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js +++ b/packages/yoastseo/spec/scoring/assessments/seo/SingleH1AssessmentSpec.js @@ -2,13 +2,16 @@ import SingleH1Assessment from "../../../../src/scoring/assessments/seo/SingleH1 import Paper from "../../../../src/values/Paper.js"; import Mark from "../../../../src/values/Mark.js"; import EnglishResearcher from "../../../../src/languageProcessing/languages/en/Researcher"; +import buildTree from "../../../specHelpers/parse/buildTree"; const h1Assessment = new SingleH1Assessment(); describe( "An assessment to check whether there is more than one H1 in the text", function() { it( "returns the default result when the paper doesn't contain an H1", function() { const mockPaper = new Paper( "

    a paragraph

    " ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -17,7 +20,10 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns the default result when there's an H1 at the beginning of the body", function() { const mockPaper = new Paper( "

    heading

    a paragraph

    " ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); + expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -26,7 +32,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "also returns the default result when there's an H1 and it's not at the beginning of the body", function() { const mockPaper = new Paper( "

    Cool intro

    heading

    a paragraph

    " ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 0 ); expect( assessment.getText() ).toEqual( "" ); @@ -36,7 +44,9 @@ describe( "An assessment to check whether there is more than one H1 in the text" it( "returns a bad score and appropriate feedback when there are multiple one superfluous (i.e., non-title) " + "H1s in the body of the text", function() { const mockPaper = new Paper( "

    a paragraph

    heading 1

    a paragraph

    heading 2

    " ); - const assessment = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const assessment = h1Assessment.getResult( mockPaper, mockResearcher ); expect( assessment.getScore() ).toEqual( 1 ); expect( assessment.getText() ).toEqual( "Single title: " + @@ -47,12 +57,22 @@ describe( "An assessment to check whether there is more than one H1 in the text" describe( "A test for marking incorrect H1s in the body", function() { it( "returns markers for incorrect H1s in the body", function() { - const mockPaper = new Paper( "

    a paragraph

    heading

    " ); - h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockPaper = new Paper( "

    a paragraph

    this is a heading

    " ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + h1Assessment.getResult( mockPaper, mockResearcher ); const expected = [ - new Mark( { original: "

    heading

    ", - marked: "

    heading

    " } ), + new Mark( { + original: "

    this is a heading

    ", + marked: "

    this is a heading

    ", + position: { + clientId: "", + startOffset: 22, + endOffset: 56, + startOffsetBlock: 0, + endOffsetBlock: 34, + } } ), ]; expect( h1Assessment.getMarks() ).toEqual( expected ); @@ -60,10 +80,21 @@ describe( "A test for marking incorrect H1s in the body", function() { it( "also returns markers for H1s in the first position of the body", function() { const mockPaper = new Paper( "

    heading

    a paragraph

    " ); - h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + h1Assessment.getResult( mockPaper, mockResearcher ); const expected = [ - new Mark( { original: "

    heading

    ", - marked: "

    heading

    " } ), + new Mark( { + original: "

    heading

    ", + marked: "

    heading

    ", + position: { + startOffset: 4, + endOffset: 11, + clientId: "", + endOffsetBlock: 7, + startOffsetBlock: 0, + }, + } ), ]; expect( h1Assessment.getMarks() ).toEqual( expected ); @@ -71,7 +102,9 @@ describe( "A test for marking incorrect H1s in the body", function() { it( "doesn't return markers when there are no H1s in the body", function() { const mockPaper = new Paper( "

    a paragraph

    " ); - const results = h1Assessment.getResult( mockPaper, new EnglishResearcher( mockPaper ) ); + const mockResearcher = new EnglishResearcher( mockPaper ); + buildTree( mockPaper, mockResearcher ); + const results = h1Assessment.getResult( mockPaper, mockResearcher ); expect( results._hasMarks ).toEqual( false ); } ); diff --git a/packages/yoastseo/spec/scoring/assessorSpec.js b/packages/yoastseo/spec/scoring/assessorSpec.js index 07e778191b3..3df1f4539c2 100644 --- a/packages/yoastseo/spec/scoring/assessorSpec.js +++ b/packages/yoastseo/spec/scoring/assessorSpec.js @@ -26,21 +26,21 @@ describe( "an assessor object", function() { } ); } ); - var result5 = new AssessmentResult(); + const result5 = new AssessmentResult(); result5.setScore( 5 ); - var result4 = new AssessmentResult(); + const result4 = new AssessmentResult(); result4.setScore( 4 ); - var result8 = new AssessmentResult(); + const result8 = new AssessmentResult(); result8.setScore( 8 ); - var validResult = new AssessmentResult(); + const validResult = new AssessmentResult(); validResult.setScore( 9 ); validResult.setText( "all good" ); validResult.setHasMarks( true ); describe( "returning the overall score", function() { it( "returns the overall score", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.getValidResults = function() { return [ result5, result4, result8 ]; }; @@ -48,7 +48,7 @@ describe( "an assessor object", function() { } ); } ); - var mockAssessment = { + const mockAssessment = { /** * A mock assessment which always returns true. * @@ -61,15 +61,15 @@ describe( "an assessor object", function() { describe( "adding an assessment", function() { it( "adds an assessment", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.addAssessment( "testname", mockAssessment ); - var result = assessor.getAssessment( "testname" ); + const result = assessor.getAssessment( "testname" ); expect( result ).toEqual( mockAssessment ); } ); it( "overides the existing assessment if the newly added assessment has the same identifier", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.addAssessment( "testname", mockAssessment ); assessor.addAssessment( "testname", mockAssessment ); @@ -80,10 +80,10 @@ describe( "an assessor object", function() { describe( "removing an assessment", function() { it( "removes an assessment", function() { - var assessor = new Assessor( new DefaultResearcher() ); + const assessor = new Assessor( new DefaultResearcher() ); assessor.removeAssessment( "testname" ); - var result = assessor.getAssessment( "testname" ); + const result = assessor.getAssessment( "testname" ); expect( result ).toEqual( undefined ); // eslint-disable-line no-undefined } ); @@ -91,8 +91,8 @@ describe( "an assessor object", function() { describe( "assess", function() { it( "should add the marker to the assessment result", function() { - var paper = new Paper(); - var assessor = new Assessor( new DefaultResearcher(), { + const paper = new Paper(); + const assessor = new Assessor( new DefaultResearcher(), { /** * A mock marker function. * @@ -100,7 +100,7 @@ describe( "an assessor object", function() { */ marker: function() {}, } ); - var assessment = { + const assessment = { /** * A mock getResult function. * @@ -128,21 +128,21 @@ describe( "an assessor object", function() { assessor.addAssessment( "test1", assessment ); assessor.assess( paper ); - var results = assessor.getValidResults(); + const results = assessor.getValidResults(); expect( results[ 0 ].hasMarker() ).toBe( true ); } ); } ); describe( "hasMarker", function() { - var assessor; + let assessor; beforeEach( function() { assessor = new Assessor( new DefaultResearcher(), {} ); } ); it( "should return true when we have a global marker and a getMarks function", function() { - var assessment = { + const assessment = { /** * A mock getMarks function. * @@ -156,7 +156,7 @@ describe( "an assessor object", function() { } ); it( "should return false when we don't have a global marker", function() { - var assessment = { + const assessment = { /** * A mock getMarks function. * @@ -169,28 +169,28 @@ describe( "an assessor object", function() { } ); it( "should return false when we don't have a getMarks function", function() { - var assessment = {}; + const assessment = {}; assessor._options.marker = function() {}; expect( assessor.hasMarker( assessment ) ).toBe( false ); } ); it( "should return false when we don't have a global marker and don't have a getMarks function", function() { - var assessment = {}; + const assessment = {}; expect( assessor.hasMarker( assessment ) ).toBe( false ); } ); } ); describe( "getMarker", function() { - var assessor; + let assessor; beforeEach( function() { assessor = new Assessor( {} ); } ); it( "should compose the global marker and the getMarks function", function() { - var functions = { + const functions = { /** * A mock getMarks function. * @@ -204,11 +204,11 @@ describe( "an assessor object", function() { */ globalMarker: function() {}, }; - spyOn( functions, "getMarks" ); - spyOn( functions, "globalMarker" ); - var assessment = { getMarks: functions.getMarks }; + jest.spyOn( functions, "getMarks" ); + jest.spyOn( functions, "globalMarker" ); + const assessment = { getMarks: functions.getMarks }; assessor._options.marker = functions.globalMarker; - var marker = assessor.getMarker( assessment ); + const marker = assessor.getMarker( assessment ); marker(); diff --git a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js index 4bae9d573d1..c95c29f76b1 100644 --- a/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js +++ b/packages/yoastseo/spec/scoring/collectionPages/fullTextTests/runFullTextTests.js @@ -11,7 +11,7 @@ import buildTree from "../../../../../yoastseo/spec/specHelpers/parse/buildTree" // Import SEO assessments. import IntroductionKeywordAssessment from "../../../../src/scoring/assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "../../../../src/scoring/assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../../../../src/scoring/assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "../../../../src/scoring/assessments/seo/MetaDescriptionKeywordAssessment"; import MetaDescriptionLengthAssessment from "../../../../src/scoring/assessments/seo/MetaDescriptionLengthAssessment"; import TextLengthAssessment from "../../../../src/scoring/assessments/seo/TextLengthAssessment"; @@ -67,7 +67,7 @@ testPapers.forEach( function( testPaper ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ); - const keywordDensityAssessment = new KeywordDensityAssessment( { + const keyphraseDensityAssessment = new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ); @@ -169,11 +169,11 @@ testPapers.forEach( function( testPaper ) { } ); it( "returns a score and the associated feedback text for the keywordDensity assessment", function() { - const isApplicable = keywordDensityAssessment.isApplicable( paper, researcher ); + const isApplicable = keyphraseDensityAssessment.isApplicable( paper, researcher ); expect( isApplicable ).toBe( expectedResults.keywordDensity.isApplicable ); if ( isApplicable ) { - result.keywordDensity = keywordDensityAssessment.getResult( + result.keywordDensity = keyphraseDensityAssessment.getResult( paper, researcher ); diff --git a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js index 8ca7d4dfd55..9620e90fe88 100644 --- a/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js +++ b/packages/yoastseo/spec/scoring/helpers/assessments/recommendedKeywordCountSpec.js @@ -1,37 +1,82 @@ -import recommendedKeywordCount from "../../../../src/scoring/helpers/assessments/recommendedKeywordCount.js"; +import recommendedKeyphraseCount from "../../../../src/scoring/helpers/assessments/recommendedKeywordCount.js"; +import Paper from "../../../../src/values/Paper"; +import DefaultResearcher from "../../../../src/languageProcessing/languages/_default/Researcher"; +import japaneseWordHelper from "../../../../src/languageProcessing/languages/ja/helpers/getWords"; +import buildTree from "../../../specHelpers/parse/buildTree"; const maxRecommendedDensity = 3; const minRecommendedDensity = 0.5; const a = "a "; +const defaultResearcher = new DefaultResearcher(); -describe( "Test for getting the recommended keyword count for a text", function() { - it( "returns the maximum recommended keyword count for a text with 300 words and a 1-word keyphrase", function() { +describe( "Test for getting the recommended keyphrase count for a text", function() { + it( "returns the maximum recommended keyphrase count for a text with 300 words and a 1-word keyphrase", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 8 ); } ); - it( "returns the maximum recommended keyword count for a text with 300 words and a multi-word keyphrase", function() { + it( "returns the maximum recommended keyphrase count for a text with 300 words and a 1-word keyphrase, " + + "using the custom helper for retrieving the words", function() { + // The sentence contains 10 words. + const text = "計画段階では「東海道新線」と呼ばれ開業て.".repeat( 30 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max", japaneseWordHelper ) ).toBe( 8 ); + } ); + + it( "returns the maximum recommended keyphrase count for a text with 300 words and a multi-word keyphrase", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 2, maxRecommendedDensity, "max" ) ).toBe( 6 ); } ); - it( "returns the minimum recommended keyword count for a text with 1200 words and a 1-word keyphrase", function() { + it( "returns the minimum recommended keyphrase count for a text with 1200 words and a 1-word keyphrase", function() { const text = a.repeat( 1200 ); - expect( recommendedKeywordCount( text, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 6 ); } ); - it( "returns 2 as the default recommended minimum keyword count if the text is very short", function() { + it( "returns 2 as the default recommended minimum keyphrase count if the text is very short", function() { const text = a.repeat( 3 ); - expect( recommendedKeywordCount( text, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, minRecommendedDensity, "min" ) ).toBe( 2 ); } ); it( "returns 0 when the word count of the text is 0", function() { const text = ""; - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity, "max" ) ).toBe( 0 ); } ); - it( "returns the maximum recommended keyword count when the maxOrMin variable is not explicitly defined", function() { + it( "returns the maximum recommended keyphrase count when the maxOrMin variable is not explicitly defined", function() { const text = a.repeat( 300 ); - expect( recommendedKeywordCount( text, 1, maxRecommendedDensity ) ).toBe( 8 ); + const paper = new Paper( "

    " + text + "

    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 8 ); + } ); + + it( "returns 0 when the paper is empty", function() { + const paper = new Paper( "" ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); + } ); + + it( "returns 0 when the paper only contains excluded element", function() { + const paper = new Paper( "
    In ancient Egyptian mythology, cats were highly revered and considered sacred animals.
    " ); + buildTree( paper, defaultResearcher ); + + expect( recommendedKeyphraseCount( paper, 1, maxRecommendedDensity ) ).toBe( 0 ); } ); } ); diff --git a/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js index f7fc9ff8ab8..071decde716 100644 --- a/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/cornerstone/relatedKeywordAssessorSpec.js @@ -44,8 +44,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js index 954c58611e6..b1e2946d04a 100644 --- a/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/cornerstone/seoAssessorSpec.js @@ -101,8 +101,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js index 35a14fe7ebe..b5f838137d3 100644 --- a/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/relatedKeywordAssessorSpec.js @@ -53,8 +53,8 @@ describe( "has configuration overrides", () => { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "KeyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js b/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js index fc3b14ea253..d978ad3f3f2 100644 --- a/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js +++ b/packages/yoastseo/spec/scoring/productPages/seoAssessorSpec.js @@ -102,7 +102,7 @@ describe( "has configuration overrides", () => { } ); test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/spec/specHelpers/parse/buildTree.js b/packages/yoastseo/spec/specHelpers/parse/buildTree.js index 80caa20c1ae..e9eba6d0166 100644 --- a/packages/yoastseo/spec/specHelpers/parse/buildTree.js +++ b/packages/yoastseo/spec/specHelpers/parse/buildTree.js @@ -4,6 +4,7 @@ import adapt from "../../../src/parse/build/private/adapt"; import { parseFragment } from "parse5"; import filterTree from "../../../src/parse/build/private/filterTree"; import permanentFilters from "../../../src/parse/build/private/alwaysFilterElements"; +import { htmlEntitiesRegex } from "../../../src/helpers/htmlEntities"; /** * Builds an HTML tree for a given paper and researcher, and adds it to the paper. @@ -14,7 +15,8 @@ import permanentFilters from "../../../src/parse/build/private/alwaysFilterEleme */ export default function buildTree( paper, researcher ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + paper.setTree( build( paper, languageProcessor, shortcodes ) ); } /** @@ -23,7 +25,11 @@ export default function buildTree( paper, researcher ) { * @returns {void} */ export function buildTreeNoTokenize( paper ) { - let tree = adapt( parseFragment( paper.getText(), { sourceCodeLocationInfo: true } ) ); + let html = paper.getText(); + // Change HTML entities like "&" to "#amp;" to prevent early conversion to "&" -- which would invalidate token positions. + html = html.replace( htmlEntitiesRegex, "#$1" ); + + let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); tree = filterTree( tree, permanentFilters ); paper.setTree( tree ); } diff --git a/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js b/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js index 9509aefdca4..137b8e85145 100644 --- a/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js +++ b/packages/yoastseo/spec/specHelpers/scoring/relatedKeyphraseAssessorTests.js @@ -71,7 +71,7 @@ export function checkAssessmentAvailability( assessor ) { expect( assessments ).toEqual( [ "introductionKeyword", "keyphraseLength", - "keywordDensity", + "keyphraseDensity", ] ); } ); } @@ -111,8 +111,8 @@ export function checkUrls( assessor ) { expect( assessment._config.urlCallToAction ).toBe( "" ); } ); - test( "KeywordDensity", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "keyphraseDensity", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); expect( assessment ).toBeDefined(); expect( assessment._config ).toBeDefined(); diff --git a/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js b/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js index ae0a39a8a13..1df057a3300 100644 --- a/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js +++ b/packages/yoastseo/spec/specHelpers/scoring/seoAssessorTests.js @@ -104,7 +104,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false const paper = new Paper( text, { keyword: "keyword", synonyms: "synonym" } ); const assessments = assess( paper ); - const expected = isStoreBlog ? mostAssessments : mostAssessments.concat( "keywordDensity" ); + const expected = isStoreBlog ? mostAssessments : mostAssessments.concat( "keyphraseDensity" ); expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -140,7 +140,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false " Oratio vocibus offendit an mei, est esse pericula liberavisse.", { keyword: "keyword" } ); const assessments = assess( paper ); - const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keywordDensity" ] : [ "keywordDensity" ]; + const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keyphraseDensity" ] : [ "keyphraseDensity" ]; const expected = isStoreBlog ? mostAssessments : [ ...mostAssessments, ...keyphraseAssessments ]; expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -161,7 +161,7 @@ export function checkAssessmentAvailability( assessor, isProductAssessor = false " Oratio vocibus offendit an mei, est esse pericula liberavisse.", { keyword: "keyword", synonyms: "synonym" } ); const assessments = assess( paper ); - const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keywordDensity" ] : [ "keywordDensity" ]; + const keyphraseAssessments = isProductAssessor ? [ "keyphraseDistribution", "keyphraseDensity" ] : [ "keyphraseDensity" ]; const expected = isStoreBlog ? mostAssessments : [ ...mostAssessments, ...keyphraseAssessments ]; expect( assessments.sort() ).toEqual( expected.sort() ); } ); @@ -279,8 +279,8 @@ export function checkUrls( assessor, isProductAssessor = false ) { checkAssessmentUrls( assessment, urlTitle, urlCallToAction ); } ); - test( "KeywordDensityAssessment", () => { - const assessment = assessor.getAssessment( "keywordDensity" ); + test( "keyphraseDensityAssessment", () => { + const assessment = assessor.getAssessment( "keyphraseDensity" ); const urlTitle = isProductAssessor ? "https://yoa.st/shopify12" : "https://yoa.st/33v"; const urlCallToAction = isProductAssessor ? "https://yoa.st/shopify13" : "https://yoa.st/33w"; diff --git a/packages/yoastseo/spec/values/MarkSpec.js b/packages/yoastseo/spec/values/MarkSpec.js index 21cf9ed478d..59a01849bb1 100644 --- a/packages/yoastseo/spec/values/MarkSpec.js +++ b/packages/yoastseo/spec/values/MarkSpec.js @@ -35,11 +35,25 @@ describe( "a mark value object", function() { } ); describe( "position-based application", function() { + it( "should return the position property of the Mark", () => { + const mark = new Mark( { position: { startOffset: 3, endOffset: 28 } } ); + expect( mark.getPosition() ).toEqual( { startOffset: 3, endOffset: 28 } ); + } ); it( "should return the start and end offset of the position set via the constructor", () => { const mark = new Mark( { position: { startOffset: 3, endOffset: 28 } } ); expect( mark.getPositionStart() ).toBe( 3 ); expect( mark.getPositionEnd() ).toBe( 28 ); } ); + it( "should override the current value of start offset when `setPositionStart` is called", () => { + const mark = new Mark( { position: { startOffset: 3, endOffset: 28 } } ); + mark.setPositionStart( 0 ); + expect( mark.getPositionStart() ).toBe( 0 ); + } ); + it( "should override the current value of end offset when `setPositionEnd` is called", () => { + const mark = new Mark( { position: { startOffset: 3, endOffset: 28 } } ); + mark.setPositionEnd( 25 ); + expect( mark.getPositionEnd() ).toBe( 25 ); + } ); it( "should return falsy for the start and end offset of the position when the position is not set via the constructor", () => { const mark = new Mark( { original: "original", marked: "marked" } ); expect( mark.getPositionStart() ).toBeFalsy(); @@ -86,13 +100,99 @@ describe( "a mark value object", function() { describe( "tests the hasPosition", function() { it( "should return true if there is a position start", function() { - expect( () => new Mark( { position: { startOffset: 42, endOffset: 52 } } ).hasPosition() ).toBeTruthy(); + const mark = new Mark( { position: { startOffset: 42, endOffset: 52 } } ); + expect( mark.hasPosition() ).toBeTruthy(); + } ); + it( "should return true if the position start is 0", function() { + const mark = new Mark( { position: { startOffset: 0, endOffset: 52 } } ); + expect( mark.hasPosition() ).toBeTruthy(); + } ); + it( "should return false if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.hasPosition() ).toBeFalsy(); + } ); + } ); + + describe( "tests the hasBlockPosition", function() { + it( "should return true if there is a block position start", function() { + const mark = new Mark( { position: { startOffsetBlock: 42, endOffsetBlock: 52 } } ); + expect( mark.hasBlockPosition() ).toBeTruthy(); + } ); + it( "should return true if the block position start return 0", function() { + const mark = new Mark( { position: { startOffsetBlock: 0, endOffsetBlock: 52 } } ); + expect( mark.hasBlockPosition() ).toBeTruthy(); + } ); + it( "should return false if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.hasBlockPosition() ).toBeFalsy(); + } ); + } ); + + describe( "tests the block position start and end offsets", function() { + it( "should return the block position start", function() { + const mark = new Mark( { position: { startOffsetBlock: 42, endOffsetBlock: 52 } } ); + expect( mark.getBlockPositionStart() ).toBe( 42 ); + } ); + it( "should return false for block start offset if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.getBlockPositionStart() ).toBeFalsy(); + } ); + it( "should set the block start offset, overriding the current value if available", function() { + const mark = new Mark( { marked: "test", original: "test", position: { + startOffsetBlock: 0, + endOffsetBlock: 15, + } } ); + + mark.setBlockPositionStart( 3 ); + expect( mark.getBlockPositionStart() ).toBe( 3 ); + } ); + it( "should return the block position end", function() { + const mark = new Mark( { position: { startOffsetBlock: 0, endOffsetBlock: 52 } } ); + expect( mark.getBlockPositionEnd() ).toBe( 52 ); + } ); + it( "should return false for block end offset if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.getBlockPositionEnd() ).toBeFalsy(); + } ); + it( "should set the block end offset, overriding the current value if available", function() { + const mark = new Mark( { marked: "test", original: "test", position: { + startOffsetBlock: 0, + endOffsetBlock: 15, + } } ); + + mark.setBlockPositionEnd( 20 ); + expect( mark.getBlockPositionEnd() ).toBe( 20 ); + } ); + } ); + + describe( "tests the block information", function() { + it( "should return the block client id", function() { + const mark = new Mark( { position: { startOffsetBlock: 42, endOffsetBlock: 52, clientId: "123ClientId" } } ); + expect( mark.getBlockClientId() ).toBe( "123ClientId" ); + } ); + it( "should return false for block client id if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.getBlockClientId() ).toBeFalsy(); + } ); + it( "should return the block attribute id ", function() { + const mark = new Mark( { position: { startOffsetBlock: 0, endOffsetBlock: 52, attributeId: "attr123" } } ); + expect( mark.getBlockAttributeId() ).toBe( "attr123" ); + } ); + it( "should return false for the block attribute id if there is no position at all", function() { + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.getBlockAttributeId() ).toBeFalsy(); + } ); + it( "should return true if the Mark is created for the first section of Yoast sub-block", function() { + const mark = new Mark( { position: { startOffsetBlock: 0, endOffsetBlock: 52, isFirstSection: true } } ); + expect( mark.isMarkForFirstBlockSection() ).toBeTruthy(); } ); - it( "should return false if there is no position start", function() { - expect( () => new Mark( { position: { endOffset: 52 } } ).hasPosition() ).toBeTruthy(); + it( "should return false if the Mark is not created for the first section of Yoast sub-block", function() { + const mark = new Mark( { position: { startOffsetBlock: 0, endOffsetBlock: 52, isFirstSection: false } } ); + expect( mark.isMarkForFirstBlockSection() ).toBeFalsy(); } ); it( "should return false if there is no position at all", function() { - expect( () => new Mark( { } ).hasPosition() ).toBeTruthy(); + const mark = new Mark( { marked: "test", original: "test" } ); + expect( mark.isMarkForFirstBlockSection() ).toBeFalsy(); } ); } ); } ); diff --git a/packages/yoastseo/spec/worker/AnalysisWebWorkerSpec.js b/packages/yoastseo/spec/worker/AnalysisWebWorkerSpec.js index e16bfad39e6..66c60ce9997 100644 --- a/packages/yoastseo/spec/worker/AnalysisWebWorkerSpec.js +++ b/packages/yoastseo/spec/worker/AnalysisWebWorkerSpec.js @@ -496,7 +496,8 @@ describe( "AnalysisWebWorker", () => { scope.onmessage( createMessage( "analyze", { paper: paper.serialize() } ) ); } ); - it( "does not assess the tree when it could not be built", done => { + // Skipped because the input isn't valid HTML -- editors will generally take care of that. + it.skip( "does not assess the tree when it could not be built", done => { const paper = new Paper( "

    This { diff --git a/packages/yoastseo/src/helpers/htmlEntities.js b/packages/yoastseo/src/helpers/htmlEntities.js new file mode 100644 index 00000000000..c8980c09ce4 --- /dev/null +++ b/packages/yoastseo/src/helpers/htmlEntities.js @@ -0,0 +1,40 @@ +// Contains 1) special characters usually converted into HTML entities (cf. _wp_specialchars). +// And 2) their corresponding HTML entities, stripped from the initial ampersand (e.g. 'lt;' instead of '<'). +const htmlEntities = new Map( [ + [ "amp;", "&" ], + [ "lt;", "<" ], + [ "gt;", ">" ], + [ "quot;", '"' ], + [ "apos;", "'" ], + [ "ndash;", "–" ], + [ "mdash;", "—" ], + [ "copy;", "©" ], + [ "reg;", "®" ], + [ "trade;", "™" ], + [ "pound;", "£" ], + [ "yen;", "¥" ], + [ "euro;", "€" ], + [ "dollar;", "$" ], + [ "deg;", "°" ], + [ "asymp;", "≈" ], + [ "ne;", "≠" ], +] ); + +// Regex to find all HTML entities. +const htmlEntitiesRegex = new RegExp( "&(" + [ ...htmlEntities.keys() ].join( "|" ) + ")", "ig" ); + +// Contains special characters along with their hashed HTML entities (e.g. '#amp;' instead of '&' for the ampersand character '&'). +const hashedHtmlEntities = new Map(); +htmlEntities.forEach( ( value, key ) => hashedHtmlEntities.set( "#" + key, value ) ); + +// Regex to find hashed HTML entities attached to the beginning (hashedHtmlEntitiesRegexStart) or to the end of a string (hashedHtmlEntitiesRegexEnd). +const hashedHtmlEntitiesRegexStart = new RegExp( "^(" + [ ...hashedHtmlEntities.keys() ].join( "|" ) + ")" ); +const hashedHtmlEntitiesRegexEnd = new RegExp( "(" + [ ...hashedHtmlEntities.keys() ].join( "|" ) + ")$" ); + +export { + htmlEntities, + htmlEntitiesRegex, + hashedHtmlEntities, + hashedHtmlEntitiesRegexStart, + hashedHtmlEntitiesRegexEnd, +}; diff --git a/packages/yoastseo/src/helpers/index.js b/packages/yoastseo/src/helpers/index.js index a99ecdca61f..580f65ae341 100644 --- a/packages/yoastseo/src/helpers/index.js +++ b/packages/yoastseo/src/helpers/index.js @@ -3,6 +3,7 @@ import { getLanguagesWithWordFormSupport } from "./getLanguagesWithWordFormSuppo import formatNumber from "./formatNumber"; import { getLanguagesWithWordComplexity } from "./getLanguagesWithWordComplexity"; import { createAnchorOpeningTag } from "./shortlinker"; +import * as htmlEntities from "./htmlEntities"; export { measureTextWidth, @@ -10,4 +11,5 @@ export { formatNumber, getLanguagesWithWordComplexity, createAnchorOpeningTag, + htmlEntities, }; diff --git a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js index 5d44d3bb345..adf4ce5ca57 100644 --- a/packages/yoastseo/src/languageProcessing/AbstractResearcher.js +++ b/packages/yoastseo/src/languageProcessing/AbstractResearcher.js @@ -12,7 +12,7 @@ import findTransitionWords from "./researches/findTransitionWords"; import functionWordsInKeyphrase from "./researches/functionWordsInKeyphrase"; import getAnchorsWithKeyphrase from "./researches/getAnchorsWithKeyphrase"; import getFleschReadingScore from "./researches/getFleschReadingScore"; -import getKeywordDensity from "./researches/getKeywordDensity.js"; +import getKeyphraseDensity, { getKeywordDensity } from "./researches/getKeywordDensity.js"; import getLinks from "./researches/getLinks.js"; import getLinkStatistics from "./researches/getLinkStatistics"; import getParagraphs from "./researches/getParagraphs"; @@ -25,7 +25,7 @@ import getSubheadingTextLengths from "./researches/getSubheadingTextLengths.js"; import h1s from "./researches/h1s"; import imageCount from "./researches/imageCount.js"; import keyphraseLength from "./researches/keyphraseLength"; -import keywordCount from "./researches/keywordCount"; +import getKeyphraseCount, { keywordCount } from "./researches/keywordCount"; import { keywordCountInSlug, keywordCountInUrl } from "./researches/keywordCountInUrl"; import matchKeywordInSubheadings from "./researches/matchKeywordInSubheadings"; import metaDescriptionKeyword from "./researches/metaDescriptionKeyword"; @@ -63,6 +63,8 @@ export default class AbstractResearcher { functionWordsInKeyphrase, getAnchorsWithKeyphrase, getFleschReadingScore, + getKeyphraseCount, + getKeyphraseDensity, getKeywordDensity, getLinks, getLinkStatistics, diff --git a/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js new file mode 100644 index 00000000000..f4017ccc547 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/highlighting/getMarkingsInSentence.js @@ -0,0 +1,125 @@ +import Mark from "../../../../src/values/Mark"; + +const markStart = ""; +const markEnd = ""; + +/** + * Adds `yoastmark` tags to the keyphrase matches in the sentence. + * + * This is a helper for search-based highlighting. + * + * @param {Sentence} sentence The sentence to add the `yoastmark` tags to. + * @param {Token[]} matches The array of the keyphrase matches. + * + * @returns {string} The sentence with the added `yoastmark` tags. + */ +const createMarksForSentence = ( sentence, matches ) => { + const tokens = sentence.tokens; + + const newTokens = []; + for ( let i = tokens.length - 1; i >= 0; i-- ) { + const token = tokens[ i ]; + if ( matches.some( match => match.sourceCodeRange.startOffset === token.sourceCodeRange.startOffset || + match.sourceCodeRange.endOffset === token.sourceCodeRange.endOffset ) ) { + newTokens.unshift( markStart, token.text, markEnd ); + } else { + newTokens.unshift( token.text ); + } + } + const markedSentence = newTokens.join( "" ); + // Merge consecutive markings into one marking. + return markedSentence.replace( new RegExp( "([ \u00A0]?)", "ig" ), "$1" ); +}; + +/** + * Merges consecutive and overlapping markings into one marking. + * + * This is a helper for position-based highlighting. + * + * @param {Mark[]} markings An array of markings to merge. + * + * @returns {Mark[]} An array of markings where consecutive and overlapping markings are merged. + */ +const mergeConsecutiveAndOverlappingMarkings = ( markings ) => { + const newMarkings = []; + + // Sort markings by start offset. This is probably redundant, but for extra safety. + markings.sort( function( a, b ) { + return a.getPositionStart() - b.getPositionStart(); + } ); + + markings.forEach( ( marking ) => { + if ( newMarkings.length === 0 ) { + newMarkings.push( marking ); + return; + } + + const lastMarking = newMarkings[ newMarkings.length - 1 ]; + // Adding 1 to the position end, with the assumption that the language uses spaces. + // When we adapt Japanese to use this helper, this check should also be adapted. + if ( ( lastMarking.getPositionEnd() + 1 === marking.getPositionStart() ) || + ( marking.getPositionStart() <= lastMarking.getPositionEnd() ) ) { + // The marking is consecutive to the last marking, so we extend the last marking to include the new marking. + lastMarking.setPositionEnd( marking.getPositionEnd() ); + lastMarking.setBlockPositionEnd( marking.getBlockPositionEnd() ); + } else { + // The marking is not consecutive to the last marking, so we add it to the array by itself. + newMarkings.push( marking ); + } + } ); + + return newMarkings; +}; + + +/** + * Gets the Mark objects of all keyphrase matches in the sentence. + * Currently, this function creates Mark objects compatible for both search-based and position-based highlighting. + * In a pure position-based highlighting, we don't need to provide 'marked' and 'original' when creating the Mark object. + * + * @param {Sentence} sentence The sentence to check. + * @param {Token[]} matchesInSentence An array containing the keyphrase matches in the sentence. + * + * @returns {Mark[]} The array of Mark objects of the keyphrase matches in the sentence. + */ +function getMarkingsInSentence( sentence, matchesInSentence ) { + if ( matchesInSentence.length === 0 ) { + return []; + } + + // Create the marked sentence that is used for search-based highlighting. + const markedSentence = createMarksForSentence( sentence, matchesInSentence ); + + /* + * Note that there is a paradigm shift: + * With search-based highlighting, there would be one Mark object for the entire sentence. + * With position-based highlighting, there is a Mark object for each match. + * Hence, in order to be backwards compatible with search-based highlighting, + * all Mark objects for a sentence have the same markedSentence. + */ + const markings = matchesInSentence.map( token => { + const startOffset = token.sourceCodeRange.startOffset; + const endOffset = token.sourceCodeRange.endOffset; + return new Mark( { + position: { + startOffset: startOffset, + endOffset: endOffset, + // Relative to start of block positions. + startOffsetBlock: startOffset - ( sentence.parentStartOffset || 0 ), + endOffsetBlock: endOffset - ( sentence.parentStartOffset || 0 ), + // The client id of the block the match was found in. + clientId: sentence.parentClientId || "", + // The attribute id of the Yoast sub-block the match was found in. + attributeId: sentence.parentAttributeId || "", + // Whether the match was found in the first section of the Yoast sub-block. + isFirstSection: sentence.isParentFirstSectionOfBlock || false, + }, + marked: markedSentence, + original: sentence.text, + } ); + } ); + + return mergeConsecutiveAndOverlappingMarkings( markings ); +} + +export default getMarkingsInSentence; diff --git a/packages/yoastseo/src/languageProcessing/helpers/index.js b/packages/yoastseo/src/languageProcessing/helpers/index.js index f24c546c114..4ecacde422f 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/index.js +++ b/packages/yoastseo/src/languageProcessing/helpers/index.js @@ -1,9 +1,11 @@ import matchStringWithRegex from "./regex/matchStringWithRegex"; import { normalize } from "./sanitize/quotes"; +import { filterShortcodesFromHTML } from "./sanitize/filterShortcodesFromTree"; import removeHtmlBlocks from "./html/htmlParser"; export { matchStringWithRegex, normalize, removeHtmlBlocks, + filterShortcodesFromHTML, }; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js new file mode 100644 index 00000000000..ea1008addd2 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/isDoubleQuoted.js @@ -0,0 +1,13 @@ +import doubleQuotes from "../sanitize/doubleQuotes"; +import { includes } from "lodash-es"; + +/** + * Checks if the keyphrase is double-quoted. + * @param {string} keyphrase The keyphrase to check. + * @returns {boolean} Whether the keyphrase is double-quoted. + */ +const isDoubleQuoted = ( keyphrase ) => { + return ( includes( doubleQuotes, keyphrase[ 0 ] ) && includes( doubleQuotes, keyphrase[ keyphrase.length - 1 ] ) ); +}; + +export default isDoubleQuoted; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js deleted file mode 100644 index 957c0128eb8..00000000000 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchKeyphraseWithSentence.js +++ /dev/null @@ -1,76 +0,0 @@ -import { cloneDeep } from "lodash-es"; - -const wordCouplers = [ "_", "-", "'" ]; - -/** - * Matches a keyword with a sentence object from the html parser. - * - * @param {(string[])[]} keywordForms The keyword forms. - * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ [ "key", "keys" ], [ "word", "words" ] ] - * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). - * @param {Sentence} sentence The sentence to match against the keywordForms. - * - * @returns {Token[]} The tokens that match the keywordForms. - * - * The algorithm is as follows: - * - * It iterates over all tokens in the sentence. It compares the current token with the keyword forms. - * If it matches, it adds the token to the matches array. - * - * The keyword forms are tokenized differently than the sentence. - * The keyword forms are tokenized with researcher.getResearch( "morphology" ) and the sentence is tokenized with the html parser. - * This leads to differences in tokenization. For example, the html parser tokenizes "key-word" as [ "key", "-", "word" ]. The morphology - * tokenizes it as [ "key-word" ]. - * This function corrects for these differences by combining tokens that are separated by a word coupler (e.g. "-") into one token: the matchToken. - * This matchToken is then compared with the keyword forms. - */ -const matchKeyphraseWithSentence = ( keywordForms, sentence ) => { - const tokens = sentence.tokens.slice(); - - // Filter out all tokens that do not match the keyphrase forms. - const matches = []; - - // Iterate over all tokens in the sentence. - for ( let i = 0; i < tokens.length; i++ ) { - const tokenForMatching = cloneDeep( tokens[ i ] ); - - // The token used for matching (tokenForMatching) may consist of multiple tokens combined, - // since we want to combine words separated by a hyphen/underscore into one token. - // This array keeps track of all tokens that are combined into the token used for matching - // and is later used to add all individual tokens to the array of matches. - const tokensForMatching = [ ]; - // Add the current token to the tokens for matching. - tokensForMatching.push( cloneDeep( tokens[ i ] ) ); - - // While the next token is a word coupler, add it to the current token. - while ( tokens[ i + 1 ] && wordCouplers.includes( tokens[ i + 1 ].text ) ) { - // Add the word coupler to the token for matching. - i++; - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - - // If there is a token after the word coupler, add it to the token for matching. as well. - i++; - if ( ! tokens[ i ] ) { - break; - } - tokenForMatching.text += tokens[ i ].text; - tokenForMatching.sourceCodeRange.endOffset = tokens[ i ].sourceCodeRange.endOffset; - tokensForMatching.push( tokens[ i ] ); - } - - // Compare the matchtoken with the keyword forms. - keywordForms.forEach( ( keywordForm ) => { - keywordForm.forEach( ( keywordFormPart ) => { - if ( tokenForMatching.text.toLowerCase() === keywordFormPart.toLowerCase() ) { - matches.push( ...tokensForMatching ); - } - } ); - } ); - } - - return matches; -}; - -export default matchKeyphraseWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js index 37c80b302e7..b1de86b9bc7 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchTextWithWord.js @@ -13,8 +13,8 @@ import { map } from "lodash-es"; * * @param {string} text The text to use for matching the wordToMatch. * @param {string} wordToMatch The word to match in the text - * @param {string} locale The locale used for transliteration. - * @param {function} matchWordCustomHelper The helper function to match word in text. + * @param {string} locale The locale used for transliteration. + * @param {function} matchWordCustomHelper The helper function to match word in text. * * @returns {Object} An array with all matches of the text, the number of the matches, and the lowest number of positions of the matches. */ diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js new file mode 100644 index 00000000000..4a82c1bdabe --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/match/matchWordFormsWithSentence.js @@ -0,0 +1,161 @@ +import matchTextWithTransliteration from "./matchTextWithTransliteration"; +import getWordsForHTMLParser from "../word/getWordsForHTMLParser"; + +/** + * Tokenizes the word form of the keyphrase for exact matching. This function gets the word form and tokenizes it. + * This function assumes that if a keyphrase needs to be matched exactly, there will be only one word form. + * This is the result of how the focus keyphrase is processed in buildTopicStems.js in the buildStems function. + * + * @param {(string[])} wordForms The word forms to tokenize. + * + * @returns {string[]} The tokenized word forms. + */ +export const tokenizeKeyphraseFormsForExactMatching = ( wordForms ) => { + // Tokenize word form of the keyphrase. + const wordFormText = wordForms[ 0 ]; + return getWordsForHTMLParser( wordFormText ); +}; + +/** + * Gets the exact matches of the keyphrase. + * Exact matching happens when the user puts the keyphrase in double quotes. + * + * @param {Sentence} sentence The sentence to match the word forms with. + * @param {string[]} wordForms The word forms to match. + * @param {string} locale The locale used in the analysis. + * + * @returns {{count: number, matches: Token[]}} Object containing the number of the exact matches and the matched tokens. + */ +const findExactMatchKeyphraseInSentence = ( sentence, wordForms, locale ) => { + const result = { + count: 0, + matches: [], + }; + // Tokenize word forms of the keyphrase. + const keyphraseTokens = tokenizeKeyphraseFormsForExactMatching( wordForms ); + + const sentenceTokens = sentence.tokens; + + // Initialize the index of the word token of the keyphrase. + let indexOfWordInKeyphrase = 0; + // Initialize the index of the word token of the sentence. + let indexOfWordInSentence = 0; + let currentMatch = []; + + // Check if the tokenized word forms occur in the same order in the sentence tokens. + while ( indexOfWordInSentence < sentenceTokens.length ) { + // If the current sentence token matches the current word token of the keyphrase, add it to the current match. + const sentenceTokenText = sentenceTokens[ indexOfWordInSentence ].text; + const keyphraseTokenText = keyphraseTokens[ indexOfWordInKeyphrase ]; + + const foundMatches = matchTextWithTransliteration( sentenceTokenText.toLowerCase(), keyphraseTokenText.toLowerCase(), locale ); + + if ( foundMatches.length > 0 ) { + currentMatch.push( sentenceTokens[ indexOfWordInSentence ] ); + indexOfWordInKeyphrase++; + } else { + indexOfWordInKeyphrase = 0; + currentMatch = []; + } + + /* + * If the current match has the same length as the keyphrase tokens, the keyphrase forms have been matched. + * Add the current match to the matches array and reset the index of the word in keyphrase and the current match. + */ + if ( currentMatch.length === keyphraseTokens.length ) { + result.matches.push( ...currentMatch ); + result.count++; + indexOfWordInKeyphrase = 0; + currentMatch = []; + } + + indexOfWordInSentence++; + } + return result; +}; + +/** + * Matches a word form of the keyphrase with the tokens from the sentence. + * + * With this approach, we transliterate the word form of the keyphrase before matching it with the sentence tokens. + * However, we don't do the transliteration step for the sentence tokens. + * As a result, for example, the word form "acción" from the keyphrase will match the word "accion" in the sentence. + * But, the word form "accion" from the keyphrase will NOT match the word "acción" in the sentence. + * + * @param {Token[]} tokens The array of tokens to check. + * @param {string} wordForm The word form of the keyphrase. + * @param {string} locale The locale used in the analysis. + * + * @returns {Token[]} The array of the matched tokens. + */ +const matchWordFormInTokens = ( tokens, wordForm, locale ) => { + let matches = []; + + tokens.forEach( token => { + const occurrence = matchTextWithTransliteration( token.text, wordForm, locale ); + if ( occurrence.length > 0 ) { + matches = matches.concat( token ); + } + } ); + + return matches; +}; + +/** + * Finds keyphrase forms in a sentence. + * + * @param {Sentence|string} sentence The sentence to check. + * @param {string[]} wordForms The word forms of the keyphrase to check. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * + * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. + */ +const matchWordFormsInSentence = ( sentence, wordForms, locale, matchWordCustomHelper ) => { + const result = { + count: 0, + matches: [], + }; + + wordForms.forEach( wordForm => { + let occurrences = []; + if ( matchWordCustomHelper ) { + occurrences = matchWordCustomHelper( sentence, wordForm ); + } else { + const tokens = sentence.tokens.slice(); + occurrences = matchWordFormInTokens( tokens, wordForm, locale ); + } + result.count += occurrences.length; + result.matches = result.matches.concat( occurrences ); + } ); + + return result; +}; + +/** + * Matches the word forms of a keyphrase with a sentence object from the html parser. + * + * @param {Sentence|string} sentence The sentence to match against the word forms of a keyphrase. + * @param {string[]} wordForms The array of word forms of the keyphrase. + * E.g. If the keyphrase is "key word", then (if premium is activated) this will be [ "key", "keys" ] OR [ "word", "words" ] + * The forms are retrieved higher up (among others in keywordCount.js) with researcher.getResearch( "morphology" ). + * + * @param {string} locale The locale used for transliteration. + * @param {function} matchWordCustomHelper Custom function to match a word form with sentence. + * @param {boolean} useExactMatching Whether to match the keyphrase forms exactly or not. + * Exact match is used when the keyphrase is enclosed in double quotes. + * + * @returns {{count: number, matches: (Token|string)[]}} Object containing the number of the matches and the matched tokens. + */ +const matchWordFormsWithSentence = ( sentence, wordForms, locale, matchWordCustomHelper, useExactMatching = false ) => { + /* + * Only use `findExactMatchKeyphraseInSentence` when the custom helper is not available. + * When the custom helper is available, the step for the exact matching happens in the helper. + */ + if ( useExactMatching && ! matchWordCustomHelper ) { + return findExactMatchKeyphraseInSentence( sentence, wordForms, locale ); + } + return matchWordFormsInSentence( sentence, wordForms, locale, matchWordCustomHelper ); +}; + +export default matchWordFormsWithSentence; diff --git a/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js b/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js index a0fc4b26104..6afa1c422db 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js +++ b/packages/yoastseo/src/languageProcessing/helpers/match/processExactMatchRequest.js @@ -1,4 +1,4 @@ -import doubleQuotes from "../sanitize/doubleQuotes"; +import isDoubleQuoted from "./isDoubleQuoted"; /** * Checks if exact match functionality is requested by enclosing the keyphrase in double quotation marks. @@ -11,7 +11,7 @@ export default function processExactMatchRequest( keyphrase ) { const exactMatchRequest = { exactMatchRequested: false, keyphrase: keyphrase }; // Check if only exact match processing is requested by the user. If so, strip the quotation marks from the keyphrase. - if ( doubleQuotes.includes( keyphrase[ 0 ] ) && doubleQuotes.includes( keyphrase[ keyphrase.length - 1 ] ) ) { + if ( isDoubleQuoted( keyphrase ) ) { exactMatchRequest.keyphrase = keyphrase.substring( 1, keyphrase.length - 1 ); exactMatchRequest.exactMatchRequested = true; } diff --git a/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js b/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js index 2ac596f4e77..b3657013a49 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js +++ b/packages/yoastseo/src/languageProcessing/helpers/morphology/buildTopicStems.js @@ -1,7 +1,8 @@ import getWords from "../word/getWords.js"; import { normalizeSingle } from "../sanitize/quotes"; -import { includes, isUndefined, escapeRegExp, memoize } from "lodash-es"; +import { isUndefined, escapeRegExp, memoize } from "lodash-es"; +import isDoubleQuoted from "../match/isDoubleQuoted"; /** * A topic phrase (i.e., a keyphrase or synonym) with stem-original pairs for the words in the topic phrase. @@ -62,8 +63,7 @@ const buildStems = function( keyphrase, stemmer, functionWords ) { } // If the keyphrase is embedded in double quotation marks, return keyword itself, without outer-most quotation marks. - const doubleQuotes = [ "“", "”", "〝", "〞", "〟", "‟", "„", "\"" ]; - if ( includes( doubleQuotes, keyphrase[ 0 ] ) && includes( doubleQuotes, keyphrase[ keyphrase.length - 1 ] ) ) { + if ( isDoubleQuoted( keyphrase ) ) { keyphrase = keyphrase.substring( 1, keyphrase.length - 1 ); return new TopicPhrase( [ new StemOriginalPair( escapeRegExp( keyphrase ), keyphrase ) ], diff --git a/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js new file mode 100644 index 00000000000..531e6af6eee --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/sanitize/filterShortcodesFromTree.js @@ -0,0 +1,131 @@ + +/** + * Creates a regex to filter shortcodes from HTML. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {RegExp} The regex to recognize the shortcodes. + */ +const createShortcodeTagsRegex = shortcodeTags => { + const shortcodeTagsRegexString = `\\[\\/?(${ shortcodeTags.join( "|" ) })[^\\]]*\\]`; + return new RegExp( shortcodeTagsRegexString, "g" ); +}; + +/** + * Filters shortcodes from HTML string. + * @param {string} html The HTML to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {string} The filtered HTML. + */ +export const filterShortcodesFromHTML = ( html, shortcodeTags ) => { + if ( ! shortcodeTags || shortcodeTags.length === 0 ) { + return html; + } + const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); + return html.replace( shortcodeTagsRegex, "" ); +}; + +/** + * Checks if a token is part of an opening shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @returns {boolean} Whether the token is part of an opening shortcode. + */ +const tokenIsPartOfOpeningShortcode = ( tokens, index ) => { + return tokens[ index - 1 ] && tokens[ index - 1 ].text === "["; +}; + +/** + * Checks if a token is part of a closing shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @returns {boolean} Whether the token is part of a closing shortcode. + */ +const tokenIsPartOfClosingShortcode = ( tokens, index ) => { + return tokens[ index - 1 ] && tokens[ index - 1 ].text === "/" && tokens[ index - 2 ] && tokens[ index - 2 ].text === "["; +}; + +/** + * Checks if a token is part of a shortcode. + * @param {Token[]} tokens The tokens to check. + * @param {number} index The index of the current token. + * @param {string[]} shortcodeTags An array of active shortcode tags. + * @param {boolean} encounteredClosingBracket Whether a closing bracket has been encountered (while looping backwards through the tokens). + * @returns {boolean} Whether the token is part of a shortcode. + */ +const tokenIsShortcode = ( tokens, index, shortcodeTags, encounteredClosingBracket ) => { + return encounteredClosingBracket && shortcodeTags.includes( tokens[ index ].text ) && + ( tokenIsPartOfOpeningShortcode( tokens, index ) || tokenIsPartOfClosingShortcode( tokens, index ) ); +}; + +/** + * Filters shortcodes from a sentence. + * @param {Sentence} sentence The sentence to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @param {RegExp} shortcodeTagsRegex The regex to filter the shortcodes. + * @returns {void} + * + */ +const filterShortcodesFromSentence = ( sentence, shortcodeTags, shortcodeTagsRegex ) => { + const tokens = sentence.tokens; + + let encounteredClosingBracket = false; + // traverse through tokens backwards + for ( let i = tokens.length - 1; i >= 0; i-- ) { + if ( tokens[ i ].text === "]" ) { + encounteredClosingBracket = true; + } + + if ( tokenIsShortcode( tokens, i, shortcodeTags, encounteredClosingBracket ) ) { + while ( tokens[ i ].text !== "]" ) { + tokens.splice( i, 1 ); + } + tokens.splice( i, 1 ); + encounteredClosingBracket = false; + + while ( tokens[ i - 1 ] && "[/".includes( tokens[ i - 1 ].text ) ) { + tokens.splice( i - 1, 1 ); + i--; + } + } + } + sentence.tokens = tokens; + + sentence.text = sentence.text.replace( shortcodeTagsRegex, "" ); +}; + +/** + * A recursive function that filters shortcodes from sentences. + * @param {Node} tree The tree to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @param {RegExp} shortcodeTagsRegex The regex to filter the shortcodes. + * @returns {void} + */ +const filterShortcodesFromSentences = ( tree, shortcodeTags, shortcodeTagsRegex ) => { + if ( tree.sentences ) { + tree.sentences.forEach( sentence => { + filterShortcodesFromSentence( sentence, shortcodeTags, shortcodeTagsRegex ); + } ); + } + + if ( tree.childNodes ) { + tree.childNodes.forEach( childNode => { + filterShortcodesFromSentences( childNode, shortcodeTags, shortcodeTagsRegex ); + } ); + } +}; + +/** + * Filters shortcodes from the tree. + * @param {Node} tree The tree to filter. + * @param {string[]} shortcodeTags The tags of the shortcodes to filter. + * @returns {void} + */ +const filterShortcodesFromTree = ( tree, shortcodeTags ) => { + if ( ! shortcodeTags || shortcodeTags.length === 0 ) { + return; + } + const shortcodeTagsRegex = createShortcodeTagsRegex( shortcodeTags ); + + filterShortcodesFromSentences( tree, shortcodeTags, shortcodeTagsRegex ); +}; + +export default filterShortcodesFromTree; diff --git a/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js new file mode 100644 index 00000000000..0fcbd57b06e --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/sentence/getSentencesFromTree.js @@ -0,0 +1,55 @@ +/** + * Retrieves the start offset for a given node. + * @param {Node} node The current node. + * @returns {number} The start offset. + */ +function getStartOffset( node ) { + return node.sourceCodeLocation && + ( ( node.sourceCodeLocation.startTag && node.sourceCodeLocation.startTag.endOffset ) || node.sourceCodeLocation.startOffset ) || 0; +} + +/** + * Retrieves the parent node for a given node. + * @param {Paper} paper The current paper. + * @param {Node} node The current node. + * @returns {Node} The parent node. + */ +function getParentNode( paper, node ) { + // Includes a fallback so that if a parent node cannot be found for an implicit paragraph, we use the current node as the parent node. + return paper.getTree().findAll( treeNode => treeNode.childNodes && treeNode.childNodes.includes( node ) )[ 0 ] || node; +} + +/** + * Gets all the sentences from paragraph and heading nodes. + * These two node types are the nodes that should contain sentences for the analysis. + * + * @param {Paper} paper The paper to get the sentences from. + * + * @returns {Sentence[]} The array of sentences retrieved from paragraph and heading nodes plus sourceCodeLocation of the parent node. + */ +export default function( paper ) { + // Get all nodes that have a sentence property which is not an empty array. + const tree = paper.getTree().findAll( treeNode => !! treeNode.sentences ); + + return tree.flatMap( node => node.sentences.map( sentence => { + let parentNode = node; + + // For implicit paragraphs, base the details on the parent of this node. + if ( node.isImplicit ) { + parentNode = getParentNode( paper, node ); + } + + return { + ...sentence, + // The parent node's start offset is the start offset of the parent node if it doesn't have a `startTag` property. + parentStartOffset: getStartOffset( parentNode ), + // The block client id of the parent node. + parentClientId: parentNode.clientId || "", + // The attribute id of the parent node, if available, otherwise an empty string. + // Only used for position-based highlighting in sub-blocks of Yoast blocks. + parentAttributeId: node.attributeId || "", + // Whether the parent node is the first section of Yoast sub-blocks. Only used for position-based highlighting. + isParentFirstSectionOfBlock: node.isFirstSection || false, + }; + } ) ); +} diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js new file mode 100644 index 00000000000..945821cdf82 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getAllWordsFromTree.js @@ -0,0 +1,23 @@ +import getSentencesFromTree from "../sentence/getSentencesFromTree"; +import { flatMap } from "lodash-es"; +import removePunctuation from "../sanitize/removePunctuation"; + +/** + * Gets the words from the tree, i.e. from the paragraph and heading nodes. + * These two node types are the nodes that should contain words for the analysis. + * + * @param {Paper} paper The paper to get the tree and words from. + * + * @returns {String[]} Array of words retrieved from the tree. + */ +export default function( paper ) { + const sentences = getSentencesFromTree( paper ); + // Get all the tokens from each sentence. + const tokens = sentences.map( sentence => sentence.tokens ); + let words = flatMap( tokens ).map( token => token.text ); + // Remove punctuation and spaces. + words = words.map( token => removePunctuation( token ) ); + + // Filter out empty tokens. + return words.filter( word => word.trim() !== "" ); +} diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js new file mode 100644 index 00000000000..47473262443 --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/helpers/word/getWordsForHTMLParser.js @@ -0,0 +1,69 @@ +import { punctuationRegexEnd, punctuationRegexStart } from "../sanitize/removePunctuation"; +import { hashedHtmlEntitiesRegexEnd, hashedHtmlEntitiesRegexStart } from "../../../helpers/htmlEntities"; + +/* + * The following regex matches a word separator. A word separator is either a whitespace, a slash, a + * tab or a non-breaking space. Brackets are added to deal correctly with shortcodes downstream. + * The regex is used to split a text into tokens. + * Do not add punctuation marks to this regex, as they are handled separately inside splitIntoTokens(). + * The word separator explicitly only contains characters that split two words and not a word and a space. + * - A space is a word separator because it separates two words if it occurs between two words. For example: "foo bar" + * - A tab is a word separator because it separates two words if it occurs between two words. For example: "foo bar" + * - A non-breaking space is a word separator because it separates two words if it occurs between two words. For example: "foo\u00A0bar" + * - Open and closing brackets are added to deal correctly with shortcodes downstream. + */ +const wordSeparatorsRegex = /([\s\t\u00A0[\]])/; + +/** + * Tokenizes a text similar to getWords, but in a suitable way for the HTML parser. + * 1. It does not normalize whitespace. + * This operation is too risky for the HTML parser because it may throw away characters and as a result, the token positions are corrupted. + * 2. It does not remove punctuation marks but keeps them. + * + * This algorithm splits the text by word separators: tokens that are the border between two words. + * This algorithm separates punctuation marks from words and keeps them as separate tokens. + * It only splits them off if they appear at the start or end of a word. + * + * @param {string} text The text to tokenize. + * @returns {string[]} An array of tokens. + */ +const getWordsForHTMLParser = ( text ) => { + if ( ! text ) { + return []; + } + + // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. + const rawTokens = text.split( wordSeparatorsRegex ).filter( x => x !== "" ); + + const tokenTexts = []; + rawTokens.forEach( token => { + // Pretokens contains all that occurs before the first letter of the token. + const preTokens = []; + // Posttokens contains all that occurs after the last letter of the token. + const postTokens = []; + + // Add all punctuation marks that occur before the first letter of the token to the pretokens array. + // Also, prevent matching with a hashed HTML entity in the beginning of the token. + while ( punctuationRegexStart.test( token ) && ! hashedHtmlEntitiesRegexStart.test( token ) ) { + preTokens.push( token[ 0 ] ); + token = token.slice( 1 ); + } + // Add all punctuation marks that occur after the last letter of the token to the posttokens array. + // Also, prevent matching with a hashed HTML entity at the end of the token. + while ( punctuationRegexEnd.test( token ) && ! hashedHtmlEntitiesRegexEnd.test( token ) ) { + // Using unshift here because we are iterating from the end of the string to the beginning, + // and we want to keep the order of the punctuation marks. + // Therefore, we add them to the start of the array. + postTokens.unshift( token[ token.length - 1 ] ); + token = token.slice( 0, -1 ); + } + + let currentTokens = [ ...preTokens, token, ...postTokens ]; + currentTokens = currentTokens.filter( x => x !== "" ); + tokenTexts.push( ...currentTokens ); + } ); + + return tokenTexts; +}; + +export default getWordsForHTMLParser; diff --git a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js index d29f325656e..92d618c60a0 100644 --- a/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js +++ b/packages/yoastseo/src/languageProcessing/helpers/word/markWordsInSentences.js @@ -107,6 +107,21 @@ export const collectMarkingsInSentence = function( sentence, wordsFoundInSentenc return ( markup.replace( new RegExp( " ", "ig" ), " " ) ); }; +/** + * Adds marks to a sentence. + * + * @param {string} sentence The sentence in which we want to apply highlighting. + * @param {Array} wordsFoundInSentence The words to highlight in a sentence. + * @param {function} matchWordCustomHelper The language-specific helper function to match word in text. + * @returns {Mark[]} The array of Mark objects of each sentence. + */ +export function markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) { + return [ new Mark( { + original: sentence, + marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), + } ) ]; +} + /** * Adds marks to an array of sentences. * @@ -125,10 +140,7 @@ export function markWordsInSentences( wordsToMark, sentences, locale, matchWordC wordsFoundInSentence = matchWords( sentence, wordsToMark, locale, matchWordCustomHelper ).matches; if ( wordsFoundInSentence.length > 0 ) { - markings = markings.concat( new Mark( { - original: sentence, - marked: collectMarkingsInSentence( sentence, wordsFoundInSentence, matchWordCustomHelper ), - } ) ); + markings = markings.concat( markWordsInASentence( sentence, wordsFoundInSentence, matchWordCustomHelper ) ); } } ); diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js index 4312b87d4bb..10fc57d734c 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/Researcher.js @@ -10,6 +10,7 @@ import customCountLength from "./helpers/countCharacters"; import matchTransitionWordsHelper from "./helpers/matchTransitionWords"; import getContentWords from "./helpers/getContentWords"; import memoizedTokenizer from "./helpers/memoizedSentenceTokenizer"; +import splitIntoTokensCustom from "./helpers/splitIntoTokensCustom"; // All config import firstWordExceptions from "./config/firstWordExceptions"; @@ -72,6 +73,7 @@ export default class Researcher extends AbstractResearcher { customCountLength, matchTransitionWordsHelper, memoizedTokenizer, + splitIntoTokensCustom, } ); Object.assign( this.defaultResearches, { diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js index 343e2e0c6ae..43ee70b21fc 100644 --- a/packages/yoastseo/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js +++ b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/matchTextWithWord.js @@ -1,5 +1,6 @@ import getContentWords from "./getContentWords"; import processExactMatchRequest from "../../../helpers/match/processExactMatchRequest"; +import { normalizeSingle } from "../../../helpers/sanitize/quotes"; /** * Checks for word matches in a text and returns an array containing the matched word(s). @@ -10,9 +11,24 @@ import processExactMatchRequest from "../../../helpers/match/processExactMatchRe * @returns {Array} An array of the matched word(s). */ export default function( text, wordToMatch ) { + /* + * Lowercase the text so that it matches the wordToMatch which is already lowercased. + * Note that Japanese doesn't differentiate between upper and lower case, so this is only needed in case + * the text contains non-Japanese characters. + */ + text = text.toLowerCase(); + // Check if the exact match is requested. const isExactMatchRequested = processExactMatchRequest( wordToMatch ); if ( isExactMatchRequested.exactMatchRequested ) { + /* + * Normalize single quotes in case they differ between the text and the word to match. + * Normalizing is only needed for exact matching, because with non-exact matching single quotes are considered + * word boundaries. + * Quotes in wordToMatch are already normalized at an earlier point. + */ + text = normalizeSingle( text ); + const keyphrase = isExactMatchRequested.keyphrase; const matches = []; diff --git a/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js new file mode 100644 index 00000000000..959bd1fd99c --- /dev/null +++ b/packages/yoastseo/src/languageProcessing/languages/ja/helpers/splitIntoTokensCustom.js @@ -0,0 +1,22 @@ +import { map } from "lodash-es"; +import TinySegmenter from "tiny-segmenter"; + +/** + * Split sentence into tokens. + * + * @param {Sentence} sentence The sentence to split. + * + * @returns {Token[]} The tokens. + */ +function splitIntoTokensCustom( sentence ) { + // Retrieve sentence from sentence class + const sentenceText = sentence.text; + // Return empty string if sentence is empty + if ( sentenceText === "" ) { + return []; + } + // Split sentences into words that are also tokens + const words = new TinySegmenter().segment( sentenceText ); + return map( words ); +} +export default splitIntoTokensCustom; diff --git a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js index 6465709e478..25932359050 100644 --- a/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js +++ b/packages/yoastseo/src/languageProcessing/researches/countSentencesFromText.js @@ -1,6 +1,7 @@ import getSentences from "../helpers/sentence/getSentences"; import sentencesLength from "../helpers/sentence/sentencesLength.js"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Count sentences in the text. @@ -12,6 +13,7 @@ export default function( paper, researcher ) { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); return sentencesLength( sentences, researcher ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js b/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js index dba7cd9b4a1..89588e096dc 100644 --- a/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js +++ b/packages/yoastseo/src/languageProcessing/researches/findTransitionWords.js @@ -5,6 +5,7 @@ import { isWordInSentence as matchWordInSentence } from "../helpers/word/matchWo import removeHtmlBlocks from "../helpers/html/htmlParser"; import { flattenDeep } from "lodash-es"; +import { filterShortcodesFromHTML } from "../helpers"; let regexFromDoubleArray = null; let regexFromDoubleArrayCacheKey = ""; @@ -110,6 +111,7 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); const sentenceResults = checkSentencesForTransitionWords( sentences, transitionWords, twoPartTransitionWords, matchTransitionWordsHelper ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js index 25cc513707a..824453ad6df 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js +++ b/packages/yoastseo/src/languageProcessing/researches/getKeywordDensity.js @@ -1,32 +1,44 @@ -/** @module analyses/getKeywordDensity */ - -import countWords from "../helpers/word/countWords.js"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; +import getAllWordsFromTree from "../helpers/word/getAllWordsFromTree"; /** - * Calculates the keyword density. + * Calculates the keyphrase density. * - * @param {Object} paper The paper containing keyword and text. - * @param {Object} researcher The researcher. + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. * - * @returns {Object} The keyword density. + * @returns {Object} The keyphrase density. */ -export default function( paper, researcher ) { +export default function getKeyphraseDensity( paper, researcher ) { const getWordsCustomHelper = researcher.getHelper( "getWordsCustomHelper" ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - let wordCount = countWords( text ); + let wordCount = 0; - // If there is a custom getWords helper use its output for countWords. + // If there is a custom getWords helper, use its output for countWords. if ( getWordsCustomHelper ) { wordCount = getWordsCustomHelper( paper.getText() ).length; + } else { + wordCount = getAllWordsFromTree( paper ).length; } if ( wordCount === 0 ) { return 0; } - const keywordCount = researcher.getResearch( "keywordCount" ); + const keyphraseCount = researcher.getResearch( "getKeyphraseCount" ); - return ( keywordCount.count / wordCount ) * 100; + return ( keyphraseCount.count / wordCount ) * 100; +} + +/** + * Calculates the keyphrase density. + * + * @deprecated Use getKeyphraseDensity instead. + * + * @param {Object} paper The paper containing keyphrase and text. + * @param {Object} researcher The researcher. + * + * @returns {Object} The keyphrase density. + */ +export function getKeywordDensity( paper, researcher ) { + console.warn( "This function is deprecated, use getKeyphraseDensity instead." ); + return getKeywordDensity( paper, researcher ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js b/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js index 4ca827ff5b1..97f2bb3274c 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js +++ b/packages/yoastseo/src/languageProcessing/researches/getLongCenterAlignedTexts.js @@ -44,6 +44,7 @@ function getLongCenterAlignedElements( elements, elementType ) { export default function( paper ) { let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // Get all paragraphs from the text. We only retrieve the paragraphs with

    tags. const allParagraphs = helpers.matchStringWithRegex( text, paragraphsRegex ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js index acda161f183..a85cb29d00f 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js +++ b/packages/yoastseo/src/languageProcessing/researches/getParagraphLength.js @@ -6,6 +6,7 @@ import countWords from "../helpers/word/countWords.js"; import matchParagraphs from "../helpers/html/matchParagraphs.js"; import { filter } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Gets all paragraphs and their word counts or character counts from the text. @@ -18,6 +19,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = excludeTableOfContentsTag( text ); // Exclude the Estimated Reading time text from the research diff --git a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js index c1bd628f69b..d099d005e63 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js +++ b/packages/yoastseo/src/languageProcessing/researches/getPassiveVoiceResult.js @@ -4,6 +4,7 @@ import Sentence from "../../languageProcessing/values/Sentence.js"; import { forEach } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Looks for morphological passive voice. @@ -18,6 +19,7 @@ export const getMorphologicalPassives = function( paper, researcher ) { const isPassiveSentence = researcher.getHelper( "isPassiveSentence" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); const sentences = getSentences( text, memoizedTokenizer ) .map( function( sentence ) { @@ -55,6 +57,7 @@ export const getPeriphrasticPassives = function( paper, researcher ) { const getClauses = researcher.getHelper( "getClauses" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); const sentences = getSentences( text, memoizedTokenizer ) .map( function( sentence ) { diff --git a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js index a8d0eb66151..22f9c3ae56a 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js +++ b/packages/yoastseo/src/languageProcessing/researches/getSentenceBeginnings.js @@ -5,6 +5,7 @@ import { stripFullTags as stripTags } from "../helpers/sanitize/stripHTMLTags.js import { filter, forEach, isEmpty } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Compares the first word of each sentence with the first word of the following sentence. @@ -95,6 +96,7 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // Remove any HTML whitespace padding and replace it with a single whitespace. text = text.replace( /[\s\n]+/g, " " ); diff --git a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js index 07adb8693c5..618413675e1 100644 --- a/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js +++ b/packages/yoastseo/src/languageProcessing/researches/getSubheadingTextLengths.js @@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent import countWords from "../helpers/word/countWords"; import { forEach } from "lodash-es"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Gets the subheadings from the text and returns the length of these subheading in an array. @@ -15,6 +16,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = excludeTableOfContentsTag( text ); const matches = getSubheadingTexts( text ); diff --git a/packages/yoastseo/src/languageProcessing/researches/h1s.js b/packages/yoastseo/src/languageProcessing/researches/h1s.js index 359884dced0..c246a2deaf7 100644 --- a/packages/yoastseo/src/languageProcessing/researches/h1s.js +++ b/packages/yoastseo/src/languageProcessing/researches/h1s.js @@ -1,45 +1,26 @@ -import { getBlocks } from "../helpers/html/html"; -import { reject } from "lodash-es"; - -const h1Regex = /(.+?)<\/h1>/; - /** - * Gets a block from a text and checks if it is totally empty or if it is an empty paragraph. - * - * @param {string} block A HTML block extracted from the paper. - * - * @returns {boolean} Whether the block is empty or not. - */ -const emptyBlock = function( block ) { - block = block.trim(); - return block === "

    " || block === ""; -}; - - -/** - * Gets all H1s in a text, including their content and their position with regards to other HTML blocks. + * Gets all H1s in a text, including their content and their position information. * * @param {Paper} paper The paper for which to get the H1s. * * @returns {Array} An array with all H1s, their content and position. */ export default function( paper ) { - const text = paper.getText(); + const tree = paper.getTree(); - let blocks = getBlocks( text ); - blocks = reject( blocks, emptyBlock ); + const h1Matches = tree.findAll( node => node.name === "h1" ); - const h1s = []; - blocks.forEach( ( block, index ) => { - const match = h1Regex.exec( block ); - if ( match ) { - h1s.push( { + return h1Matches + .map( h1Match => ( + { tag: "h1", - content: match[ 1 ], - position: index, - } ); - } - } ); - - return h1s; + content: h1Match.findAll( node => node.name === "#text" ).map( textNode => textNode.value ).join( "" ), + position: { + startOffset: h1Match.sourceCodeLocation.startTag.endOffset, + endOffset: h1Match.sourceCodeLocation.endTag.startOffset, + clientId: h1Match.clientId || "", + }, + } + ) ) + .filter( h1 => !! h1.content ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js index 43920b9c699..3dbd010134f 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js +++ b/packages/yoastseo/src/languageProcessing/researches/keyphraseDistribution.js @@ -204,6 +204,7 @@ const keyphraseDistributionResearcher = function( paper, researcher ) { let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = mergeListItems( text ); const sentences = getSentences( text, memoizedTokenizer ); const topicForms = researcher.getResearch( "morphology" ); diff --git a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js index dabd39ed1ec..315f91fb7c5 100644 --- a/packages/yoastseo/src/languageProcessing/researches/keywordCount.js +++ b/packages/yoastseo/src/languageProcessing/researches/keywordCount.js @@ -1,65 +1,130 @@ -/** @module analyses/getKeywordCount */ - -import matchWords from "../helpers/match/matchTextWithArray"; -import { flattenDeep, uniq as unique } from "lodash-es"; +import { flatten, flattenDeep } from "lodash-es"; +import getSentencesFromTree from "../helpers/sentence/getSentencesFromTree"; +import { normalizeSingle } from "../helpers/sanitize/quotes"; +import getMarkingsInSentence from "../helpers/highlighting/getMarkingsInSentence"; +import matchWordFormsWithSentence from "../helpers/match/matchWordFormsWithSentence"; +import isDoubleQuoted from "../helpers/match/isDoubleQuoted"; +import { markWordsInASentence } from "../helpers/word/markWordsInSentences"; import getSentences from "../helpers/sentence/getSentences"; -import { markWordsInSentences } from "../helpers/word/markWordsInSentences"; -import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** - * Calculates the keyword count, takes morphology into account. + * Counts the occurrences of the keyphrase in the text and creates the Mark objects for the matches. * - * @param {object} paper The paper containing keyword and text. - * @param {object} researcher The researcher. + * @param {(Sentence|string)[]} sentences The sentences to check. + * @param {Array} keyphraseForms The keyphrase forms. + * @param {string} locale The locale used in the analysis. + * @param {function} matchWordCustomHelper A custom helper to match words with a text. + * @param {boolean} isExactMatchRequested Whether the exact matching is requested. * - * @returns {object} An array of all the matches, markings and the keyword count. + * @returns {{markings: Mark[], count: number}} The number of keyphrase occurrences in the text and the Mark objects of the matches. */ -export default function( paper, researcher ) { - const topicForms = researcher.getResearch( "morphology" ); - const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); +export function countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ) { + const result = { count: 0, markings: [] }; - // A helper to return all the matches for the keyphrase. - const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); + sentences.forEach( sentence => { + const matchesInSentence = keyphraseForms.map( wordForms => matchWordFormsWithSentence( sentence, + wordForms, locale, matchWordCustomHelper, isExactMatchRequested ) ); - let text = paper.getText(); - text = removeHtmlBlocks( text ); - const locale = paper.getLocale(); + // A sentence has at least one full-match of the keyphrase if each word occurs at least once. + const isEachWordFound = matchesInSentence.every( wordForms => wordForms.count > 0 ); + + if ( isEachWordFound ) { + /* + * Retrieve all the occurrences' count of each word of the keyphrase and save it in an array. + * matches: [ [ { matches: ["red"], count: 1 } ], [ { matches: ["pandas"], count: 2 } ] ] + * counts: [ 1, 2 ] + */ + const counts = matchesInSentence.map( match => match.count ); + /* + * The number of the full-match count is the lowest count of the occurrences. + * counts: [ 1, 2 ] + * totalMatchCount: 1 + * + * From the example above, the full-match is 1, because one of the "pandas" occurrences is not accompanied by "red" + * to be counted as a full-match. + */ + const totalMatchCount = Math.min( ...counts ); + const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); + + let markings = []; - const sentences = getSentences( text, memoizedTokenizer ); + if ( matchWordCustomHelper ) { + // Currently, this check is only applicable for Japanese. + markings = markWordsInASentence( sentence, foundWords, matchWordCustomHelper ); + } else { + markings = getMarkingsInSentence( sentence, foundWords ); + } - const keywordsFound = { - count: 0, - matches: [], - sentencesWithKeywords: [], - }; + result.count += totalMatchCount; + result.markings.push( markings ); + } + } ); + + return result; +} + +/** + * Calculates the keyphrase count, takes morphology into account. + * + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. + * + * @returns {{count: number, markings: Mark[], keyphraseLength: number}} An object containing the keyphrase count, markings and the kephrase length. + */ +export default function getKeyphraseCount( paper, researcher ) { + const result = { count: 0, markings: [], keyphraseLength: 0 }; + const topicForms = researcher.getResearch( "morphology" ); + let keyphraseForms = topicForms.keyphraseForms; + const keyphraseLength = keyphraseForms.length; /* - * Count the amount of keyphrase occurrences in the sentences. - * An occurrence is counted when all keywords of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. - * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + * Normalize single quotes so that word form with different type of single quotes can still be matched. + * For example, "key‛word" should match "key'word". */ - sentences.forEach( sentence => { - const matchesInSentence = topicForms.keyphraseForms.map( keywordForms => matchWords( sentence, - keywordForms, locale, matchWordCustomHelper ) ); + keyphraseForms = keyphraseForms.map( word => word.map( form => normalizeSingle( form ) ) ); - const hasAllKeywords = matchesInSentence.every( keywordForm => keywordForm.count > 0 ); + if ( keyphraseLength === 0 ) { + return result; + } - if ( hasAllKeywords ) { - const counts = matchesInSentence.map( match => match.count ); - const foundWords = flattenDeep( matchesInSentence.map( match => match.matches ) ); - keywordsFound.count += Math.min( ...counts ); - keywordsFound.matches.push( foundWords ); - keywordsFound.sentencesWithKeywords.push( sentence ); - } - } ); + const matchWordCustomHelper = researcher.getHelper( "matchWordCustomHelper" ); + const customSentenceTokenizer = researcher.getHelper( "memoizedTokenizer" ); + const locale = paper.getLocale(); + const text = matchWordCustomHelper + ? filterShortcodesFromHTML( paper.getText(), paper._attributes && paper._attributes.shortcodes ) + : paper.getText(); + + // When the custom helper is available, we're using the sentences retrieved from the text for the analysis. + const sentences = matchWordCustomHelper ? getSentences( text, customSentenceTokenizer ) : getSentencesFromTree( paper ); + // Exact matching is requested when the keyphrase is enclosed in double quotes. + const isExactMatchRequested = isDoubleQuoted( paper.getKeyword() ); - const matches = unique( flattenDeep( keywordsFound.matches ) ).sort( ( a, b ) => b.length - a.length ); - const keyphraseForms = flattenDeep( topicForms.keyphraseForms ); + /* + * Count the amount of keyphrase occurrences in the sentences. + * An occurrence is counted when all words of the keyphrase are contained within the sentence. Each sentence can contain multiple keyphrases. + * (e.g. "The apple potato is an apple and a potato." has two occurrences of the keyphrase "apple potato"). + */ + const keyphraseFound = countKeyphraseInText( sentences, keyphraseForms, locale, matchWordCustomHelper, isExactMatchRequested ); + + result.count = keyphraseFound.count; + result.markings = flatten( keyphraseFound.markings ); + result.keyphraseLength = keyphraseLength; - return { - count: keywordsFound.count, - matches: matches, - markings: markWordsInSentences( keyphraseForms, keywordsFound.sentencesWithKeywords, locale, matchWordCustomHelper ), - length: topicForms.keyphraseForms.length, - }; + return result; +} + +/** + * Calculates the keyphrase count, takes morphology into account. + * + * @deprecated Use getKeyphraseCount instead. + * + * @param {Paper} paper The paper containing keyphrase and text. + * @param {Researcher} researcher The researcher. + * + * @returns {Object} An array of all the matches, markings and the keyphrase count. + */ +export function keywordCount( paper, researcher ) { + console.warn( "This function is deprecated, use getKeyphraseCount instead." ); + return getKeyphraseCount( paper, researcher ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js index fa036350f29..82e4a332912 100644 --- a/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js +++ b/packages/yoastseo/src/languageProcessing/researches/matchKeywordInSubheadings.js @@ -3,6 +3,7 @@ import excludeTableOfContentsTag from "../helpers/sanitize/excludeTableOfContent import stripSomeTags from "../helpers/sanitize/stripNonTextTags"; import { findTopicFormsInString } from "../helpers/match/findKeywordFormsInString"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Computes the amount of subheadings reflecting the topic. @@ -44,6 +45,7 @@ export default function matchKeywordInSubheadings( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); text = stripSomeTags( excludeTableOfContentsTag( text ) ); const topicForms = researcher.getResearch( "morphology" ); const locale = paper.getLocale(); diff --git a/packages/yoastseo/src/languageProcessing/researches/sentences.js b/packages/yoastseo/src/languageProcessing/researches/sentences.js index 18a93b9960d..bce3cce5735 100644 --- a/packages/yoastseo/src/languageProcessing/researches/sentences.js +++ b/packages/yoastseo/src/languageProcessing/researches/sentences.js @@ -1,5 +1,6 @@ import getSentences from "../helpers/sentence/getSentences"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * Returns the sentences from a paper. @@ -14,5 +15,6 @@ export default function( paper, researcher ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return getSentences( text, memoizedTokenizer ); } diff --git a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js index e21aabcd32c..c1d6792f21d 100644 --- a/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js +++ b/packages/yoastseo/src/languageProcessing/researches/wordComplexity.js @@ -87,6 +87,7 @@ export default function wordComplexity( paper, researcher ) { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = helpers.removeHtmlBlocks( text ); + text = helpers.filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); // Find the complex words in each sentence. diff --git a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js index 67ea613c7f7..03e61387ec2 100644 --- a/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js +++ b/packages/yoastseo/src/languageProcessing/researches/wordCountInText.js @@ -1,5 +1,6 @@ import wordCount from "../helpers/word/countWords.js"; import removeHtmlBlocks from "../helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../helpers"; /** * A result of the word count calculation. @@ -19,6 +20,7 @@ import removeHtmlBlocks from "../helpers/html/htmlParser"; export default function( paper ) { let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return { text: text, count: wordCount( text ), diff --git a/packages/yoastseo/src/markers/removeDuplicateMarks.js b/packages/yoastseo/src/markers/removeDuplicateMarks.js index 41e4c2d84b0..04df1cd5745 100644 --- a/packages/yoastseo/src/markers/removeDuplicateMarks.js +++ b/packages/yoastseo/src/markers/removeDuplicateMarks.js @@ -1,12 +1,24 @@ import { uniqBy } from "lodash-es"; /** - * Removes duplicate marks from an array + * Removes duplicate marks from an array. + * If the marks object have position information, however, + * we don't want to remove the duplicated objects with the same original strings. + * + * @param {Array} marks The marks to remove duplications from. * - * @param {Array} marks The marks to remove duplications from * @returns {Array} A list of de-duplicated marks. */ function removeDuplicateMarks( marks ) { + /* + * We don't remove duplicates when mark has position information, for the reasons below: + * 1. Not removing duplicates is simpler than removing the duplicates by looking at the value of mark.getPosition(). + * 2. Our current approach of creating a mark object with position information eliminates the chance of having duplicates. + */ + if ( !! marks && ( marks.length === 0 || !! marks[ 0 ].hasPosition() ) ) { + return marks; + } + return uniqBy( marks, function( mark ) { return mark.getOriginal(); } ); diff --git a/packages/yoastseo/src/parse/build/build.js b/packages/yoastseo/src/parse/build/build.js index fcb045075ae..b96bab84afd 100644 --- a/packages/yoastseo/src/parse/build/build.js +++ b/packages/yoastseo/src/parse/build/build.js @@ -6,18 +6,29 @@ import tokenize from "./private/tokenize"; import filterTree from "./private/filterTree"; import permanentFilters from "./private/alwaysFilterElements"; import { filterBeforeTokenizing } from "./private/filterBeforeTokenizing"; +import parseBlocks from "./private/parseBlocks"; +import filterShortcodesFromTree from "../../languageProcessing/helpers/sanitize/filterShortcodesFromTree"; +import { htmlEntitiesRegex } from "../../helpers/htmlEntities"; /** - * Parses the HTML string to a tree representation of - * the HTML document. + * Parses the HTML string to a tree representation of the HTML document. * - * @param {string} htmlString The HTML string. + * @param {Paper} paper The paper to build the tree from. * @param {LanguageProcessor} languageProcessor The language processor to use. + * @param {string[]} [shortcodes] An optional array of all active shortcodes. * * @returns {Node} The tree representation of the HTML string. */ -export default function build( htmlString, languageProcessor ) { - let tree = adapt( parseFragment( htmlString, { sourceCodeLocationInfo: true } ) ); +export default function build( paper, languageProcessor, shortcodes ) { + let html = paper.getText(); + // Change HTML entities like "&" to "#amp;" to prevent early conversion to "&" -- which would invalidate token positions. + html = html.replace( htmlEntitiesRegex, "#$1" ); + + let tree = adapt( parseFragment( html, { sourceCodeLocationInfo: true } ) ); + if ( tree.childNodes && tree.childNodes.length > 0 ) { + parseBlocks( paper, tree ); + } + /* * Filter out some content from the tree so that it can be correctly tokenized. We don't want to tokenize text in * between tags such as 'code' and 'script', but we do want to take into account the length of those elements when @@ -28,6 +39,11 @@ export default function build( htmlString, languageProcessor ) { // Add sentences and tokens to the tree's paragraph and heading nodes. tree = tokenize( tree, languageProcessor ); + // Filter out shortcodes from the tree. + if ( shortcodes ) { + filterShortcodesFromTree( tree, shortcodes ); + } + /* * Filter out elements we don't want to include in the analysis. Only do this after tokenization as we need to * have all inline elements in the tree during tokenization to correctly calculate sentence and token positions. diff --git a/packages/yoastseo/src/parse/build/private/adapt.js b/packages/yoastseo/src/parse/build/private/adapt.js index b01f1dcc6b0..96ca6d2da4a 100644 --- a/packages/yoastseo/src/parse/build/private/adapt.js +++ b/packages/yoastseo/src/parse/build/private/adapt.js @@ -52,6 +52,23 @@ function isBlockElement( nodeName ) { ); } +/** + * Checks whether the current node is an overarching paragraph. + * Overarching paragraphs have double `
    ` nodes (line breaks) in their children. + * We consider those to be indicating the end and start of an implicit paragraph, similar to the `autop` function in WordPress. + * + * @param {string} nodeName The name of the current node. + * @param {Node[]} children The children of the current nodes. + * + * @returns {boolean} Whether the current node is an overarching paragraph. + */ +function isOverarchingParagraph( nodeName, children ) { + return isParagraph( nodeName ) && children.some( ( node, index, childNodes ) => { + const nextNode = childNodes.length - 1 !== index && childNodes[ index + 1 ]; + return node.name === "br" && nextNode && nextNode.name === "br"; + } ); +} + /** * Adapts the `parse5` tree to our own tree representation. * @@ -65,21 +82,26 @@ function isBlockElement( nodeName ) { */ export default function adapt( tree ) { if ( isText( tree.nodeName ) ) { - return new Text( tree.value ); + return new Text( tree ); } let children = []; + let isOverarching = false; if ( ! isEmpty( tree.childNodes ) ) { children = tree.childNodes.map( adapt ); if ( isBlockElement( tree.nodeName ) ) { children = combineIntoImplicitParagraphs( children, tree.sourceCodeLocation ); } + if ( isOverarchingParagraph( tree.nodeName, children ) ) { + isOverarching = true; + children = combineIntoImplicitParagraphs( children, tree.sourceCodeLocation ); + } } const attributes = adaptAttributes( tree.attrs ); if ( isParagraph( tree.nodeName ) ) { - return new Paragraph( attributes, children, tree.sourceCodeLocation ); + return new Paragraph( attributes, children, tree.sourceCodeLocation, false, isOverarching ); } if ( isHeading( tree.nodeName ) ) { diff --git a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js index a833d9f1fe9..7a8f99c3363 100644 --- a/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js +++ b/packages/yoastseo/src/parse/build/private/alwaysFilterElements.js @@ -5,7 +5,8 @@ import { elementHasName, elementHasClass, elementHasID } from "./filterHelpers"; // These are elements that we don't want to include in the analysis and that can be child nodes of paragraphs or headings. -export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment" ]; +export const canBeChildOfParagraph = [ "code", "kbd", "math", "q", "samp", "script", "var", "#comment", "cite", "form", + "map", "noscript", "output" ]; const permanentFilters = [ // Filters out Yoast blocks that don't need to be part of the analysis. @@ -48,6 +49,12 @@ const permanentFilters = [ elementHasName( "textarea" ), elementHasName( "title" ), elementHasName( "var" ), + elementHasName( "#comment" ), + elementHasName( "cite" ), + elementHasName( "form" ), + elementHasName( "map" ), + elementHasName( "noscript" ), + elementHasName( "output" ), ]; export default permanentFilters; diff --git a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js index 1c8fa799495..d35c87b67e8 100644 --- a/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js +++ b/packages/yoastseo/src/parse/build/private/combineIntoImplicitParagraphs.js @@ -1,6 +1,7 @@ import isPhrasingContent from "./isPhrasingContent"; import { Paragraph } from "../../structure"; import { isEmpty } from "lodash-es"; +import SourceCodeLocation from "../../structure/SourceCodeLocation"; /** * Checks whether a node is inter-element whitespace. @@ -27,7 +28,35 @@ function hasChildren( node ) { } /** - * Combines runs of phrasing content ("inline" tags like `a` and `span`, and text) into implicit paragraphs. + * Updates the source code location of an implicit paragraph based on its children. + * @param {Paragraph} implicitParagraph The implicit paragraph to update. + * @returns {void} + */ +const updateImplicitParagraphLocation = ( implicitParagraph ) => { + const [ firstChild ] = implicitParagraph.childNodes.slice( 0 ); + const [ lastChild ] = implicitParagraph.childNodes.slice( -1 ); + if ( ( firstChild.sourceCodeRange || firstChild.sourceCodeLocation ) && ( lastChild.sourceCodeRange || lastChild.sourceCodeLocation ) ) { + const startOffset = firstChild.sourceCodeRange ? firstChild.sourceCodeRange.startOffset : firstChild.sourceCodeLocation.startOffset; + const endOffset = lastChild.sourceCodeRange ? lastChild.sourceCodeRange.endOffset : lastChild.sourceCodeLocation.endOffset; + implicitParagraph.sourceCodeLocation = new SourceCodeLocation( { startOffset: startOffset, endOffset: endOffset } ); + } +}; + +/** + * Checks whether the current node is part of a double line break. + * @param {Node} currentNode The current node. + * @param {Node} prevNode The previous node (if available). + * @param {Node} nextNode The next node (if available). + * @returns {boolean} Whether the current node is part of a double line break + */ +const isPartOfDoubleBreak = ( currentNode, prevNode, nextNode ) => { + const prevNodeIsBreak = prevNode && prevNode.name === "br"; + const nextNodeIsBreak = nextNode && nextNode.name === "br"; + return currentNode.name === "br" && ( prevNodeIsBreak || nextNodeIsBreak ); +}; + +/** + * Combines series of consecutive phrasing content ("inline" tags like `a` and `span`, and text) into implicit paragraphs. * * @see https://html.spec.whatwg.org/#paragraphs * @@ -38,36 +67,48 @@ function hasChildren( node ) { */ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { const newNodes = []; - let currentSourceCodeLocation = {}; - // For implicit paragraphs, strip off the start and end tag information from the parent's source code location. - if ( ! isEmpty( parentSourceCodeLocation ) ) { - currentSourceCodeLocation = { + if ( isEmpty( parentSourceCodeLocation ) ) { + const firstNode = nodes[ 0 ]; + const lastNode = nodes[ nodes.length - 1 ]; + + if ( firstNode && lastNode && firstNode.sourceCodeLocation && lastNode.sourceCodeLocation ) { + currentSourceCodeLocation = new SourceCodeLocation( { + startOffset: firstNode.sourceCodeLocation.startOffset, + endOffset: lastNode.sourceCodeLocation.endOffset, + } ); + } + } else { + currentSourceCodeLocation = new SourceCodeLocation( { startOffset: parentSourceCodeLocation.startTag ? parentSourceCodeLocation.startTag.endOffset : parentSourceCodeLocation.startOffset, endOffset: parentSourceCodeLocation.endTag ? parentSourceCodeLocation.endTag.startOffset : parentSourceCodeLocation.endOffset, - }; + } ); } let implicitParagraph = Paragraph.createImplicit( {}, [], currentSourceCodeLocation ); + nodes.forEach( ( node, index, allNodes ) => { + const prevNode = 0 !== index && allNodes[ index - 1 ]; + const nextNode = allNodes.length - 1 !== index && allNodes[ index + 1 ]; - nodes.forEach( node => { - if ( isPhrasingContent( node.name ) && ! isInterElementWhitespace( node ) ) { + if ( isPhrasingContent( node.name ) && ! isInterElementWhitespace( node ) && ! isPartOfDoubleBreak( node, prevNode, nextNode ) ) { + // If the node is phrasing content, add it to the implicit paragraph. implicitParagraph.childNodes.push( node ); } else { + // If the node is not phrasing content, this means that the implicit paragraph has ended. if ( hasChildren( implicitParagraph ) ) { - // The implicit paragraph has children: end the current implicit paragraph and start a new one. + // If the implicit paragraph has children this means an implicit paragraph was created: + // end the current implicit paragraph and start a new one. - if ( ! isEmpty( node.sourceCodeLocation ) ) { - // Update the endOffset of the current implicit paragraph to be the start of the current node. - if ( ! isEmpty( implicitParagraph.sourceCodeLocation ) ) { - implicitParagraph.sourceCodeLocation.endOffset = node.sourceCodeLocation.startOffset; - } - // Update the startOffset of the next implicit paragraph to be the end of the current node. + // But before pushing, we need to update the source code location of the implicit paragraph based on its child nodes. + updateImplicitParagraphLocation( implicitParagraph ); + + // Update the startOffset of the next implicit paragraph to be the end of the current node. + if ( node.sourceCodeLocation ) { currentSourceCodeLocation.startOffset = node.sourceCodeLocation.endOffset; } @@ -79,9 +120,10 @@ function combineIntoImplicitParagraphs( nodes, parentSourceCodeLocation = {} ) { } ); if ( hasChildren( implicitParagraph ) ) { + updateImplicitParagraphLocation( implicitParagraph ); + newNodes.push( implicitParagraph ); } - return newNodes; } diff --git a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js index 504d058c56c..250dfe24a08 100644 --- a/packages/yoastseo/src/parse/build/private/getTextElementPositions.js +++ b/packages/yoastseo/src/parse/build/private/getTextElementPositions.js @@ -26,7 +26,18 @@ function getDescendantPositions( descendantNodes ) { descendantTagPositions.push( node.sourceCodeLocation ); } else { if ( node.sourceCodeLocation.startTag ) { - descendantTagPositions.push( node.sourceCodeLocation.startTag ); + const startRange = { + startOffset: node.sourceCodeLocation.startTag.startOffset, + endOffset: node.sourceCodeLocation.startTag.endOffset, + }; + /* + * Here, we need to account for the fact that earlier (in innerText.js), we treated a
    as a newline character. + * Therefore, we need to subtract 1 from the endOffset to not count it twice. + */ + if ( node.name === "br" ) { + startRange.endOffset = startRange.endOffset - 1; + } + descendantTagPositions.push( startRange ); } /* * Check whether node has an end tag before adding it to the array. @@ -93,6 +104,20 @@ function adjustTextElementStart( descendantTagPositions, textElementStart ) { return textElementStart; } +/** + * Retrieves the initial start offset for the first text element. + * Normally, that is the end offset of the start tag of the parent node. + * In case of implicit paragraphs, we use the start offset of the node as a whole. + * + * @param {Paragraph|Heading} node The node to retrieve the start position from. + * @returns {number} The start offset for the first text element. + */ +const getTextElementStart = ( node ) => { + return node instanceof Paragraph && node.isImplicit + ? node.sourceCodeLocation.startOffset + : node.sourceCodeLocation.startTag.endOffset; +}; + /** * Gets the start and end positions of text elements (sentences or tokens) in the source code. * @@ -110,16 +135,12 @@ export default function getTextElementPositions( node, textElements, startOffset } /* - * Set the start position of the first element. If the node is an implicit paragraph, which don't have start and - * end tags, set the start position to the start of the node. Otherwise, set the start position to the end of the - * node's start tag. + * Set the (initial) start offset of the first text element. + * If the start offset has been provided (in the case of Tokens), use that. + * Otherwise, determine the start offset from the parent Paragraph or Heading node. */ - let textElementStart; - if ( node instanceof Paragraph && node.isImplicit ) { - textElementStart = node.sourceCodeLocation.startOffset; - } else { - textElementStart = startOffset >= 0 ? startOffset : node.sourceCodeLocation.startTag.endOffset; - } + let textElementStart = startOffset >= 0 ? startOffset : getTextElementStart( node ); + let textElementEnd; let descendantTagPositions = []; @@ -128,9 +149,11 @@ export default function getTextElementPositions( node, textElements, startOffset * should have this property). If such nodes exist, store the positions of each node's opening and closing tags in * an array. These positions will have to be taken into account when calculating the position of the text elements. */ - const descendantNodes = node.findAll( descendantNode => descendantNode.sourceCodeLocation, true ); - if ( descendantNodes.length > 0 ) { - descendantTagPositions = getDescendantPositions( descendantNodes ); + if ( node.findAll ) { + const descendantNodes = node.findAll( descendantNode => descendantNode.sourceCodeLocation, true ); + if ( descendantNodes.length > 0 ) { + descendantTagPositions = getDescendantPositions( descendantNodes ); + } } textElements.forEach( ( textElement ) => { diff --git a/packages/yoastseo/src/parse/build/private/parseBlocks.js b/packages/yoastseo/src/parse/build/private/parseBlocks.js new file mode 100644 index 00000000000..67eff4563c3 --- /dev/null +++ b/packages/yoastseo/src/parse/build/private/parseBlocks.js @@ -0,0 +1,138 @@ +const blockTokenizer = /)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g; + +/** + * Get all the blocks including the inner blocks. + * + * @param {Paper} paper The paper to get all the blocks from. + * + * @returns {Object[]} An array of blocks. + */ +const getAllBlocks = ( paper ) => { + const blocks = paper._attributes.wpBlocks; + const flattenBlocks = []; + if ( ! ( blocks && blocks.length > 0 ) ) { + return []; + } + blocks.forEach( ( block ) => { + if ( block.innerBlocks.length > 0 ) { + const innerBlocks = block.innerBlocks; + flattenBlocks.push( + block, ...innerBlocks + ); + } else { + flattenBlocks.push( block ); + } + } ); + return flattenBlocks; +}; + + +/** + * Gets the offset of each block and set it to the tree. + * + * @param {Object[]} blocks An array of blocks. + * @param {string} text The text. + * + * @returns {void} + */ +function updateBlocksOffset( blocks, text ) { + if ( blocks.length === 0 ) { + return; + } + blocks.forEach( currentBlock => { + const matches = blockTokenizer.exec( text ); + + if ( null === matches ) { + return; + } + + const [ match ] = matches; + + const startedAt = matches.index; + const length = match.length; + + currentBlock.startOffset = startedAt; + currentBlock.contentOffset = startedAt + length + 1; + + if ( currentBlock.innerBlocks && currentBlock.innerBlocks.length > 0 ) { + updateBlocksOffset( currentBlock.innerBlocks, text ); + } + } ); +} + + +/** + * Gets the client id for each block and set it to the child nodes. + * Additionally, when a node has an attribute with 'id' key, also set this id to the first. + * + * @param {Node} rootNode The root node. + * @param {Object[]} blocks An array of blocks. + * @param {string} clientId The client id to set. + * + * @returns {void} + */ +function updateClientIdAndAttrIdForSubtree( rootNode, blocks, clientId ) { + if ( ! rootNode ) { + return; + } + + let currentClientId = clientId; + if ( rootNode.sourceCodeLocation && rootNode.sourceCodeLocation.startOffset ) { + const foundBlock = blocks.find( block => block.contentOffset === rootNode.sourceCodeLocation.startOffset ); + if ( foundBlock ) { + currentClientId = foundBlock.clientId; + } + } + + // If the clientId is not undefined, also set this to the root node. + if ( currentClientId ) { + rootNode.clientId = currentClientId; + } + + // If the node has children, update the clientId for them. + ( rootNode.childNodes || [] ).forEach( ( node ) => { + if ( node.attributes && node.attributes.id ) { + /* + * If the node's child has an attribute with 'id' key, also set this id to the first and third child node. + * This step is specifically for parsing the Yoast blocks. + * + * For Yoast blocks, if a node has attribute with 'id' key, this means that this node represents a sub-block. + * A sub-block has four child nodes: + * 1. The first child node of the sub-block, represents the first section of that sub-block. + * - For example, in Yoast FAQ block, the first child node would represent the "question" section. + * 2. The second child node of the sub-block, only contains a new line. + * 3. The third child node of the sub-block, represents the second section of that sub-block. + * - For example, in Yoast FAQ block, the second child node would represent the "answer" section. + * 4. The fourth child node of the sub-block, only contains a new line. + */ + if ( node.childNodes && node.childNodes.length > 3 ) { + node.childNodes[ 0 ].attributeId = node.attributes.id; + node.childNodes[ 0 ].isFirstSection = true; + + node.childNodes[ 2 ].attributeId = node.attributes.id; + node.childNodes[ 2 ].isFirstSection = false; + } + } + updateClientIdAndAttrIdForSubtree( node, blocks, currentClientId ); + } ); +} + +/** + * Parses blocks and updates the clientId for the subtree. + * Additionally, when a node has an attribute with 'id' key, also set this id to the first and third child node of that node. + * + * @param {Paper} paper The paper to parse. + * @param {Node} node The node to parse. + * + * @returns {void} + */ +export default function( paper, node ) { + const blocks = paper._attributes.wpBlocks || []; + blockTokenizer.lastIndex = 0; + updateBlocksOffset( blocks, paper.getText() ); + + const rawBlocks = getAllBlocks( paper ); + + // eslint-disable-next-line no-undefined + updateClientIdAndAttrIdForSubtree( node, rawBlocks, undefined ); +} diff --git a/packages/yoastseo/src/parse/build/private/tokenize.js b/packages/yoastseo/src/parse/build/private/tokenize.js index 6f8933fd0ba..34e2423ed8e 100644 --- a/packages/yoastseo/src/parse/build/private/tokenize.js +++ b/packages/yoastseo/src/parse/build/private/tokenize.js @@ -1,17 +1,18 @@ import { Heading, Paragraph } from "../../structure"; import getTextElementPositions from "./getTextElementPositions"; +import { hashedHtmlEntities } from "../../../helpers/htmlEntities"; /** * Splits the sentence into tokens, determines their positions in the source code, and puts them on the sentence. * * @param {Paragraph|Heading} node The paragraph or heading node to split into sentences. * @param {Sentence} sentence The sentence. - * @param {function} splitIntoTokens The function to use to split the sentence into tokens. + * @param {LanguageProcessor} languageProcessor The language processor for the current language. * * @returns {Sentence} The sentence, with tokens. */ -function getTokens( node, sentence, splitIntoTokens ) { - sentence.tokens = splitIntoTokens( sentence ); +function getTokens( node, sentence, languageProcessor ) { + sentence.tokens = languageProcessor.splitIntoTokens( sentence ); sentence.tokens = getTextElementPositions( node, sentence.tokens, sentence.sourceCodeRange.startOffset ); return sentence; } @@ -31,11 +32,26 @@ function getSentences( node, languageProcessor ) { // Add position information to the sentences. sentences = getTextElementPositions( node, sentences ); // Tokenize sentences into tokens. - return sentences.map( sentence => getTokens( node, sentence, languageProcessor.splitIntoTokens ) ); + return sentences.map( sentence => { + sentence = getTokens( node, sentence, languageProcessor ); + // Now positions have been determined, change HTML entities that had earlier been converted to hashed versions back to their short version. + // For example, "&" was earlier converted into "#amp;" and is now converted into "&". + // We make this change in both the Sentence and the accompanying Tokens. + hashedHtmlEntities.forEach( ( character, hashedHtmlEntity ) => { + // We use split/join instead of replaceAll to support older browsers. + sentence.text = sentence.text.split( hashedHtmlEntity ).join( character ); + sentence.tokens.map( token => { + token.text = token.text.split( hashedHtmlEntity ).join( character ); + return token; + } ); + } ); + return sentence; + } ); } /** - * Splits any paragraph and heading nodes in the tree into sentences and tokens. + * Splits any Paragraph and Heading nodes in the tree into sentences and tokens. + * Excludes overarching Paragraphs, as those will have (implicit) paragraphs as their children. * * @param {Node} tree The tree to process. * @param {LanguageProcessor} languageProcessor The language processor to use. @@ -43,7 +59,7 @@ function getSentences( node, languageProcessor ) { * @returns {Node} The processed tree. */ function tokenize( tree, languageProcessor ) { - if ( tree instanceof Paragraph || tree instanceof Heading ) { + if ( ( tree instanceof Paragraph && tree.name !== "p-overarching" ) || tree instanceof Heading ) { tree.sentences = getSentences( tree, languageProcessor ); } diff --git a/packages/yoastseo/src/parse/language/LanguageProcessor.js b/packages/yoastseo/src/parse/language/LanguageProcessor.js index 68644efefd7..35e912ff7be 100644 --- a/packages/yoastseo/src/parse/language/LanguageProcessor.js +++ b/packages/yoastseo/src/parse/language/LanguageProcessor.js @@ -1,22 +1,9 @@ import Sentence from "../structure/Sentence"; import Token from "../structure/Token"; -import { punctuationRegexStart, punctuationRegexEnd } from "../../languageProcessing/helpers/sanitize/removePunctuation"; +import getWordsForHTMLParser from "../../languageProcessing/helpers/word/getWordsForHTMLParser"; const whitespaceRegex = /^\s+$/; -/* - * The following regex matches a word separator. A word separator is either a whitespace, a slash, a backslash, a - * tab or a non-breaking space. - * The regex is used to split a text into tokens. - * Do not add punctuation marks to this regex, as they are handled separately inside splitIntoTokens(). - * The word separator explicitly only contains characters that split two words and not a word and a space. - * A space is a word separator because it separates two words if it occurs between two words. For example: "foo bar" - * A slash is a word separator because it separates two words if it directly borders those words. For example: "foo/bar" - * A backslash is a word separator because it separates two words if it occurs between two words. For example: "foo\bar" - * A tab is a word separator because it separates two words if it occurs between two words. For example: "foo bar" - * A non-breaking space is a word separator because it separates two words if it occurs between two words. For example: "foo\u00A0bar" - */ -const wordSeparatorsRegex = /([\s\t\u00A0])/; /** * Contains language-specific logic for splitting a text into sentences and tokens. @@ -72,37 +59,14 @@ class LanguageProcessor { // Retrieve sentence from sentence class const sentenceText = sentence.text; - // Split the sentence string into tokens. Those tokens are unrefined as they may contain punctuation. - const rawTokens = sentenceText.split( wordSeparatorsRegex ).filter( x => x !== "" ); - - const tokenTexts = []; - rawTokens.forEach( token => { - if ( token === "" ) { - return; - } - // Pretokens contains all that occurs before the first letter of the token. - const preTokens = []; - // Posttokens contains all that occurs after the last letter of the token. - const postTokens = []; - - // Add all punctuation marks that occur before the first letter of the token to the pretokens array. - while ( punctuationRegexStart.test( token ) ) { - preTokens.push( token[ 0 ] ); - token = token.slice( 1 ); - } - // Add all punctuation marks that occur after the last letter of the token to the posttokens array. - while ( punctuationRegexEnd.test( token ) ) { - // Using unshift here because we are iterating from the end of the string to the beginning, - // and we want to keep the order of the punctuation marks. - // Therefore, we add them to the start of the array. - postTokens.unshift( token[ token.length - 1 ] ); - token = token.slice( 0, -1 ); - } + // If there is a custom getWords helper use its output for retrieving words/tokens. + const tokenTextsCustom = this.researcher.getHelper( "splitIntoTokensCustom" ); + if ( tokenTextsCustom ) { + const tokensCustom = tokenTextsCustom( sentence ); + return tokensCustom.map( tokenText => new Token( tokenText ) ); + } - let currentTokens = [ ...preTokens, token, ...postTokens ]; - currentTokens = currentTokens.filter( x => x !== "" ); - tokenTexts.push( ...currentTokens ); - } ); + const tokenTexts = getWordsForHTMLParser( sentenceText ); return tokenTexts.map( tokenText => new Token( tokenText ) ); } diff --git a/packages/yoastseo/src/parse/structure/Node.js b/packages/yoastseo/src/parse/structure/Node.js index aa5b82a2602..a0fb631904b 100644 --- a/packages/yoastseo/src/parse/structure/Node.js +++ b/packages/yoastseo/src/parse/structure/Node.js @@ -46,9 +46,9 @@ class Node { /** * Finds all nodes in the tree that satisfies the given condition. * - * @param {function} condition The condition that a node should satisfy to end up in the list. - * @param {boolean} recurseFoundNodes=false Whether to recurse into found nodes - * to see if the condition also applies to sub-nodes of the found node. + * @param {function} condition The condition that a node should satisfy to end up in the list. + * @param {boolean} recurseFoundNodes=false Whether to recurse into found nodes to see if the condition + * also applies to sub-nodes of the found node. * * @returns {(Node|Text|Paragraph|Heading)[]} The list of nodes that satisfy the condition. */ diff --git a/packages/yoastseo/src/parse/structure/Paragraph.js b/packages/yoastseo/src/parse/structure/Paragraph.js index 9dee5c59b75..611eb6a41a6 100644 --- a/packages/yoastseo/src/parse/structure/Paragraph.js +++ b/packages/yoastseo/src/parse/structure/Paragraph.js @@ -13,9 +13,13 @@ class Paragraph extends Node { * @param {(Node|Text)[]} childNodes This paragraph's child nodes. * @param {Object} sourceCodeLocationInfo This paragraph's location in the source code, from parse5. * @param {boolean} isImplicit Whether this paragraph is an implicit paragraph, or an explicit paragraph. + * @param {boolean} isOverarching Whether this paragraph is overarching text that is separated by double line breaks. */ - constructor( attributes = {}, childNodes = [], sourceCodeLocationInfo = {}, isImplicit = false ) { - super( "p", attributes, childNodes, sourceCodeLocationInfo ); + constructor( attributes = {}, childNodes = [], sourceCodeLocationInfo = {}, + isImplicit = false, isOverarching = false ) { + // We rename any overarching `

    ` element to not match it as a stand-alone paragraph downstream. + const name = isOverarching ? "p-overarching" : "p"; + super( name, attributes, childNodes, sourceCodeLocationInfo ); /** * Whether this paragraph is explicit (defined by an explicit `

    ` tag within the markup), * or implicit (defined by a run of phrasing content). diff --git a/packages/yoastseo/src/parse/structure/Text.js b/packages/yoastseo/src/parse/structure/Text.js index cb8d3590948..f9c703424fd 100644 --- a/packages/yoastseo/src/parse/structure/Text.js +++ b/packages/yoastseo/src/parse/structure/Text.js @@ -1,20 +1,26 @@ +import SourceCodeLocation from "./SourceCodeLocation"; /** * A text. */ class Text { /** - * Creates a new text. + * Creates a new Text object, that consist of some text and a source code range. * - * @param {string} value This text's value, e.g. its content. + * @param {object} textNode The current #text node in the parse5 tree. */ - constructor( value ) { + constructor( textNode ) { this.name = "#text"; /** * This text's content. * * @type {string} */ - this.value = value; + this.value = textNode.value; + + this.sourceCodeRange = new SourceCodeLocation( { + startOffset: textNode.sourceCodeLocation.startOffset, + endOffset: textNode.sourceCodeLocation.endOffset, + } ); } } diff --git a/packages/yoastseo/src/parse/structure/Token.js b/packages/yoastseo/src/parse/structure/Token.js index 8298d89cc23..fd8e2f8f8d3 100644 --- a/packages/yoastseo/src/parse/structure/Token.js +++ b/packages/yoastseo/src/parse/structure/Token.js @@ -1,3 +1,5 @@ +import { normalizeSingle } from "../../languageProcessing"; + /** * A token representing a word, whitespace or punctuation in the sentence. */ @@ -6,14 +8,16 @@ class Token { * Creates a new token. * * @param {string} text The token's text. + * @param {SourceCodeRange} sourceCodeRange The start and end positions of the token in the source code. */ - constructor( text ) { - this.text = text; + constructor( text, sourceCodeRange = {} ) { + // Normalize single quotes so that tokens can be matched with strings regardless of single quote type. + this.text = normalizeSingle( text ); /** * The start and end positions of the token in the source code. * @type {SourceCodeRange} */ - this.sourceCodeRange = {}; + this.sourceCodeRange = sourceCodeRange; } } diff --git a/packages/yoastseo/src/parse/traverse/findAllInTree.js b/packages/yoastseo/src/parse/traverse/findAllInTree.js index 7adbb3e68a2..82a1b388ac5 100644 --- a/packages/yoastseo/src/parse/traverse/findAllInTree.js +++ b/packages/yoastseo/src/parse/traverse/findAllInTree.js @@ -1,9 +1,10 @@ /** * Finds all nodes in the tree that satisfies the given condition. * - * @param {Object} tree The tree. - * @param {function} condition The condition that a node should satisfy to end up in the list. - * @param {boolean} recurseFoundNodes=false Whether to recurse into found nodes to see if the condition also applies to sub-nodes of the found node. + * @param {Object} tree The tree. + * @param {function} condition The condition that a node should satisfy to end up in the list. + * @param {boolean} recurseFoundNodes=false Whether to recurse into found nodes to see if the condition also applies to + * sub-nodes of the found node. * If false, as soon as a node is found that satisfies the condition, it is added to the list and no further recursion is done through its children. * If true, the node is added to the list and its children are also checked for the condition. * If they satisfy the condition, they are also added to the list. diff --git a/packages/yoastseo/src/parse/traverse/innerText.js b/packages/yoastseo/src/parse/traverse/innerText.js index ab454196eef..71df6b6dfce 100644 --- a/packages/yoastseo/src/parse/traverse/innerText.js +++ b/packages/yoastseo/src/parse/traverse/innerText.js @@ -14,6 +14,8 @@ export default function innerText( node ) { node.childNodes.forEach( child => { if ( child.name === "#text" ) { text += child.value; + } else if ( child.name === "br" ) { + text += "\n"; } else { text += innerText( child ); } diff --git a/packages/yoastseo/src/pluggable.js b/packages/yoastseo/src/pluggable.js index e5dd5717ad0..f953327980f 100644 --- a/packages/yoastseo/src/pluggable.js +++ b/packages/yoastseo/src/pluggable.js @@ -119,7 +119,7 @@ Pluggable.prototype._reloaded = function( pluginName ) { * @param {string} pluginName The plugin that is registering the modification. * @param {number} priority (optional) Used to specify the order in which the callables associated with a particular filter are called. * Lower numbers correspond with earlier execution. - * @returns {boolean} Whether or not applying the hook was successfull. + * @returns {boolean} Whether or not applying the hook was successful. */ Pluggable.prototype._registerModification = function( modification, callable, pluginName, priority ) { if ( typeof modification !== "string" ) { diff --git a/packages/yoastseo/src/scoring/assessments/assessment.js b/packages/yoastseo/src/scoring/assessments/assessment.js index b349ddd8270..a9d6edcb950 100644 --- a/packages/yoastseo/src/scoring/assessments/assessment.js +++ b/packages/yoastseo/src/scoring/assessments/assessment.js @@ -3,6 +3,7 @@ import { sanitizeString } from "../../languageProcessing"; import { isUndefined } from "lodash-es"; import removeHtmlBlocks from "../../languageProcessing/helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../../languageProcessing/helpers"; /** * Represents the defaults of an assessment. @@ -43,6 +44,7 @@ class Assessment { hasEnoughContentForAssessment( paper, contentNeededForAssessment = 50 ) { let text = isUndefined( paper ) ? "" : paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); // The isUndefined check is necessary, because if paper is undefined .getText will throw a typeError. return sanitizeString( text ).length >= contentNeededForAssessment; diff --git a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/cultureAssessments.js b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/cultureAssessments.js index 5eb8f23d816..71742913267 100644 --- a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/cultureAssessments.js +++ b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/cultureAssessments.js @@ -6,6 +6,7 @@ import { potentiallyHarmfulUnless, harmfulNonInclusive, harmfulPotentiallyNonInclusive, + potentiallyHarmfulUnlessAnimalsObjects, } from "./feedbackStrings"; /* @@ -112,8 +113,7 @@ const cultureAssessments = [ nonInclusivePhrases: [ "oriental" ], inclusiveAlternatives: "Asian. When possible, be more specific (e.g. East Asian)", score: SCORES.POTENTIALLY_NON_INCLUSIVE, - feedbackFormat: harmfulPotentiallyNonInclusive + " Unless you are referring to objects or animals, " + - "consider using an alternative, such as %2$s.", + feedbackFormat: potentiallyHarmfulUnlessAnimalsObjects, }, { identifier: "asianAmerican", @@ -242,15 +242,15 @@ const cultureAssessments = [ { identifier: "gypsy", nonInclusivePhrases: [ "gypsy", "gipsy" ], - inclusiveAlternatives: [ "Romani, Romani person", "traveler, wanderer, free-spirited" ], + inclusiveAlternatives: [ "Rom, Roma person, Romani, Romani person", "traveler, wanderer, free-spirited" ], score: SCORES.POTENTIALLY_NON_INCLUSIVE, feedbackFormat: [ potentiallyHarmfulUnless, "If you are referring to a lifestyle rather than the ethnic group or " + - "their music, consider using an alternative such as %3$s." ].join( " " ), + "their music, consider using an alternative such as %3$s." ].join( " " ), }, { identifier: "gypsies", nonInclusivePhrases: [ "gypsies", "gipsies" ], - inclusiveAlternatives: [ "Romani, Romani people", "travelers, wanderers, free-spirited" ], + inclusiveAlternatives: [ "Roma, Romani, Romani people", "travelers, wanderers, free-spirited" ], score: SCORES.POTENTIALLY_NON_INCLUSIVE, feedbackFormat: [ potentiallyHarmfulUnless, "If you are referring to a lifestyle rather than the ethnic group or " + "their music, consider using an alternative such as %3$s." ].join( " " ), @@ -312,14 +312,6 @@ const cultureAssessments = [ score: SCORES.NON_INCLUSIVE, feedbackFormat: potentiallyHarmful, }, - { - identifier: "ebonics", - nonInclusivePhrases: [ "Ebonics" ], - inclusiveAlternatives: "African American English, African American Language", - score: SCORES.NON_INCLUSIVE, - feedbackFormat: potentiallyHarmful, - caseSensitive: true, - }, { identifier: "powWow", nonInclusivePhrases: [ "pow-wow" ], diff --git a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/feedbackStrings.js b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/feedbackStrings.js index 320ddcfd9d2..1f005c8814d 100644 --- a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/feedbackStrings.js +++ b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/feedbackStrings.js @@ -53,3 +53,7 @@ export const potentiallyHarmfulUnlessNonInclusive = [ harmfulNonInclusive, alter * "Alternatively, if talking about a specific person, use their preferred descriptor if known." */ export const preferredDescriptorIfKnown = "Alternatively, if talking about a specific person, use their preferred descriptor if known."; + +// An additional string to target phrases that are potentially non-inclusive unless referring to animals or objects. +export const potentiallyHarmfulUnlessAnimalsObjects = [ harmfulPotentiallyNonInclusive, + "Unless you are referring to objects or animals, consider using an alternative, such as %2$s." ].join( " " ); diff --git a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/otherAssessments.js b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/otherAssessments.js index 88d3d5298ad..31be7686431 100644 --- a/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/otherAssessments.js +++ b/packages/yoastseo/src/scoring/assessments/inclusiveLanguage/configuration/otherAssessments.js @@ -1,4 +1,5 @@ import { SCORES } from "./scores"; +import { potentiallyHarmfulUnlessAnimalsObjects } from "./feedbackStrings"; const otherAssessments = [ { @@ -7,26 +8,47 @@ const otherAssessments = [ inclusiveAlternatives: [ "marginalized groups", "underrepresented groups", "gender and sexuality minorities" ], score: SCORES.POTENTIALLY_NON_INCLUSIVE, feedbackFormat: "Be careful when using %1$s as it is potentially overgeneralizing. " + - "Consider using an alternative, such as %2$s, %3$s or specific minorities, such as %4$s.", + "Consider using an alternative, such as %2$s, %3$s or specific minorities, such as %4$s.", }, { identifier: "normal", - nonInclusivePhrases: [ "normal person", "normal people", "normal behaviour", "normal behavior", "mentally" + - " normal", "behaviorally normal", "behaviourally normal", "psychologically normal" ], + nonInclusivePhrases: [ "normal person", "normal people", "mentally normal", "psychologically normal" ], inclusiveAlternatives: [ "typical" ], score: SCORES.NON_INCLUSIVE, feedbackFormat: "Avoid using normal as it is potentially harmful. " + "Consider using an alternative, such as %2$s or a specific characteristic or experience if it is known.", }, + { + identifier: "behaviorallyNormal", + nonInclusivePhrases: [ "behaviorally normal", "behaviourally normal" ], + inclusiveAlternatives: [ "showing typical behavior or a specific characteristic or experience if it is known" ], + score: SCORES.POTENTIALLY_NON_INCLUSIVE, + feedbackFormat: potentiallyHarmfulUnlessAnimalsObjects, + }, { identifier: "abnormal", - nonInclusivePhrases: [ "abnormal person", "abnormal people", "abnormal behaviour", "abnormal behavior", "mentally" + - " abnormal", "behaviorally abnormal", "behaviourally abnormal", "psychologically abnormal" ], + nonInclusivePhrases: [ "abnormal person", "abnormal people", "mentally abnormal", "psychologically abnormal" ], inclusiveAlternatives: [ "atypical" ], score: SCORES.NON_INCLUSIVE, feedbackFormat: "Avoid using abnormal as it is potentially harmful. " + "Consider using an alternative, such as %2$s or a specific characteristic or experience if it is known.", }, + { + identifier: "behaviorallyAbnormal", + nonInclusivePhrases: [ "behaviorally abnormal", "behaviourally abnormal" ], + inclusiveAlternatives: [ "showing atypical behavior, showing dysfunctional behavior " + + "or a specific characteristic or experience if it is known" ], + score: SCORES.POTENTIALLY_NON_INCLUSIVE, + feedbackFormat: potentiallyHarmfulUnlessAnimalsObjects, + }, + { + identifier: "abnormalBehavior", + nonInclusivePhrases: [ "abnormal behavior", "abnormal behaviour" ], + inclusiveAlternatives: [ "atypical behavior, unusual behavior " + + "or a specific characteristic or experience if it is known" ], + score: SCORES.POTENTIALLY_NON_INCLUSIVE, + feedbackFormat: potentiallyHarmfulUnlessAnimalsObjects, + }, ]; otherAssessments.forEach( assessment => { diff --git a/packages/yoastseo/src/scoring/assessments/index.js b/packages/yoastseo/src/scoring/assessments/index.js index c3e0c6a3c44..b482613b67f 100644 --- a/packages/yoastseo/src/scoring/assessments/index.js +++ b/packages/yoastseo/src/scoring/assessments/index.js @@ -12,7 +12,7 @@ import FunctionWordsInKeyphraseAssessment from "./seo/FunctionWordsInKeyphraseAs import InternalLinksAssessment from "./seo/InternalLinksAssessment"; import IntroductionKeywordAssessment from "./seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment, { KeywordDensityAssessment } from "./seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./seo/MetaDescriptionKeywordAssessment"; import MetaDescriptionLengthAssessment from "./seo/MetaDescriptionLengthAssessment"; import OutboundLinksAssessment from "./seo/OutboundLinksAssessment"; @@ -44,6 +44,7 @@ const seo = { InternalLinksAssessment, IntroductionKeywordAssessment, KeyphraseLengthAssessment, + KeyphraseDensityAssessment, KeywordDensityAssessment, MetaDescriptionKeywordAssessment, MetaDescriptionLengthAssessment, diff --git a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js index a030e83b7ee..b241572f3e4 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/SubheadingDistributionTooLongAssessment.js @@ -10,6 +10,7 @@ import getWords from "../../../languageProcessing/helpers/word/getWords"; import AssessmentResult from "../../../values/AssessmentResult"; import { stripFullTags as stripTags } from "../../../languageProcessing/helpers/sanitize/stripHTMLTags"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; /** * Represents the assessment for calculating the text after each subheading. @@ -85,6 +86,7 @@ class SubheadingsDistributionTooLong extends Assessment { const customCountLength = researcher.getHelper( "customCountLength" ); let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); return customCountLength ? customCountLength( text ) : getWords( text ).length; } diff --git a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js index 32a1f44e99b..c9650a2c3b5 100644 --- a/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/readability/TransitionWordsAssessment.js @@ -11,6 +11,7 @@ import marker from "../../../markers/addMark.js"; import Assessment from "../assessment"; import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; import getWords from "../../../languageProcessing/helpers/word/getWords"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; /** @@ -195,6 +196,7 @@ export default class TransitionWordsAssessment extends Assessment { } let text = paper.getText(); text = removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length; // Do not use hasEnoughContent in this assessment as it is mostly redundant with `textLength >= this._config.applicableIfTextLongerThan` diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js index 9cbc1495cc9..6a14daf3c81 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment.js @@ -2,6 +2,7 @@ import { __, sprintf } from "@wordpress/i18n"; import { merge } from "lodash-es"; import { languageProcessing, AssessmentResult, Assessment, helpers } from "yoastseo"; +import { filterShortcodesFromHTML } from "../../../languageProcessing/helpers"; const { getSentences, helpers: languageProcessingHelpers } = languageProcessing; const { createAnchorOpeningTag } = helpers; @@ -181,6 +182,7 @@ class KeyphraseDistributionAssessment extends Assessment { const memoizedTokenizer = researcher.getHelper( "memoizedTokenizer" ); let text = paper.getText(); text = languageProcessingHelpers.removeHtmlBlocks( text ); + text = filterShortcodesFromHTML( text, paper._attributes && paper._attributes.shortcodes ); const sentences = getSentences( text, memoizedTokenizer ); return paper.hasText() && paper.hasKeyword() && sentences.length >= 15 && researcher.hasResearch( "keyphraseDistribution" ); diff --git a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js index 02805580e8a..a11b618a3c6 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/KeywordDensityAssessment.js @@ -1,19 +1,18 @@ import { __, _n, sprintf } from "@wordpress/i18n"; import { merge } from "lodash-es"; -import recommendedKeywordCount from "../../helpers/assessments/recommendedKeywordCount.js"; +import recommendedKeyphraseCount from "../../helpers/assessments/recommendedKeywordCount.js"; import Assessment from "../assessment"; import AssessmentResult from "../../../values/AssessmentResult"; import { inRangeEndInclusive, inRangeStartEndInclusive, inRangeStartInclusive } from "../../helpers/assessments/inRange"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import keyphraseLengthFactor from "../../helpers/assessments/keyphraseLengthFactor.js"; -import removeHtmlBlocks from "../../../languageProcessing/helpers/html/htmlParser"; -import getWords from "../../../languageProcessing/helpers/word/getWords"; +import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAllWordsFromTree"; /** * Represents the assessment that will look if the keyphrase density is within the recommended range. */ -class KeywordDensityAssessment extends Assessment { +class KeyphraseDensityAssessment extends Assessment { /** * Sets the identifier and the config. * @@ -67,7 +66,7 @@ class KeywordDensityAssessment extends Assessment { applicableIfTextLongerThan: 100, }; - this.identifier = "keywordDensity"; + this.identifier = "keyphraseDensity"; this._config = merge( defaultConfig, config ); } @@ -75,20 +74,20 @@ class KeywordDensityAssessment extends Assessment { /** * Determines correct boundaries depending on the availability of morphological forms. * - * @param {string} text The paper text. + * @param {Paper} paper The paper to analyze. * @param {number} keyphraseLength The length of the keyphrase in words. * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. * * @returns {void} */ - setBoundaries( text, keyphraseLength, customGetWords ) { + setBoundaries( paper, keyphraseLength, customGetWords ) { if ( this._hasMorphologicalForms ) { this._boundaries = this._config.parameters.multipleWordForms; } else { this._boundaries = this._config.parameters.noWordForms; } - this._minRecommendedKeywordCount = recommendedKeywordCount( text, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); - this._maxRecommendedKeywordCount = recommendedKeywordCount( text, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); + this._minRecommendedKeyphraseCount = recommendedKeyphraseCount( paper, keyphraseLength, this._boundaries.minimum, "min", customGetWords ); + this._maxRecommendedKeyphraseCount = recommendedKeyphraseCount( paper, keyphraseLength, this._boundaries.maximum, "max", customGetWords ); } /** @@ -102,25 +101,23 @@ class KeywordDensityAssessment extends Assessment { */ getResult( paper, researcher ) { const customGetWords = researcher.getHelper( "getWordsCustomHelper" ); - this._keywordCount = researcher.getResearch( "keywordCount" ); - const keyphraseLength = this._keywordCount.length; + this._keyphraseCount = researcher.getResearch( "getKeyphraseCount" ); + const keyphraseLength = this._keyphraseCount.keyphraseLength; const assessmentResult = new AssessmentResult(); - this._keywordDensity = researcher.getResearch( "getKeywordDensity" ); + this._keyphraseDensity = researcher.getResearch( "getKeyphraseDensity" ); this._hasMorphologicalForms = researcher.getData( "morphology" ) !== false; - let text = paper.getText(); - text = removeHtmlBlocks( text ); - this.setBoundaries( text, keyphraseLength, customGetWords ); + this.setBoundaries( paper, keyphraseLength, customGetWords ); - this._keywordDensity = this._keywordDensity * keyphraseLengthFactor( keyphraseLength ); + this._keyphraseDensity = this._keyphraseDensity * keyphraseLengthFactor( keyphraseLength ); const calculatedScore = this.calculateResult(); assessmentResult.setScore( calculatedScore.score ); assessmentResult.setText( calculatedScore.resultText ); - assessmentResult.setHasMarks( this._keywordCount.count > 0 ); + assessmentResult.setHasMarks( this._keyphraseCount.count > 0 ); return assessmentResult; } @@ -131,35 +128,35 @@ class KeywordDensityAssessment extends Assessment { * @returns {boolean} Returns true if the keyphrase count is 0. */ hasNoMatches() { - return this._keywordCount.count === 0; + return this._keyphraseCount.count === 0; } /** * Checks whether there are too few keyphrase matches in the text. * - * @returns {boolean} Returns true if the rounded keyword density is between 0 and the recommended minimum - * or if there there is only 1 keyword match (regardless of the density). + * @returns {boolean} Returns true if the rounded keyphrase density is between 0 and the recommended minimum + * or if there is only 1 keyphrase match (regardless of the density). */ hasTooFewMatches() { return inRangeStartInclusive( - this._keywordDensity, + this._keyphraseDensity, 0, this._boundaries.minimum - ) || this._keywordCount.count === 1; + ) || this._keyphraseCount.count === 1; } /** * Checks whether there is a good number of keyphrase matches in the text. * - * @returns {boolean} Returns true if the rounded keyword density is between the recommended minimum - * and the recommended maximum or if the keyword count is 2 and the recommended minimum is lower than 2. + * @returns {boolean} Returns true if the rounded keyphrase density is between the recommended minimum + * and the recommended maximum or if the keyphrase count is 2 and the recommended minimum is lower than 2. */ hasGoodNumberOfMatches() { return inRangeStartEndInclusive( - this._keywordDensity, + this._keyphraseDensity, this._boundaries.minimum, this._boundaries.maximum - ) || ( this._keywordCount.count === 2 && this._minRecommendedKeywordCount <= 2 ); + ) || ( this._keyphraseCount.count === 2 && this._minRecommendedKeyphraseCount <= 2 ); } /** @@ -172,7 +169,7 @@ class KeywordDensityAssessment extends Assessment { */ hasTooManyMatches() { return inRangeEndInclusive( - this._keywordDensity, + this._keyphraseDensity, this._boundaries.maximum, this._boundaries.overMaximum ); @@ -199,7 +196,7 @@ class KeywordDensityAssessment extends Assessment { ), this._config.urlTitle, "", - this._minRecommendedKeywordCount, + this._minRecommendedKeyphraseCount, this._config.urlCallToAction ), }; @@ -219,14 +216,14 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The keyphrase was found %5$d time. That's less than the recommended minimum of %3$d times for a text of this length. %4$sFocus on your keyphrase%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The keyphrase was found %5$d times. That's less than the recommended minimum of %3$d times for a text of this length. %4$sFocus on your keyphrase%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", - this._minRecommendedKeywordCount, + this._minRecommendedKeyphraseCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -242,12 +239,12 @@ class KeywordDensityAssessment extends Assessment { _n( "%1$sKeyphrase density%2$s: The keyphrase was found %3$d time. This is great!", "%1$sKeyphrase density%2$s: The keyphrase was found %3$d times. This is great!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -266,14 +263,14 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The keyphrase was found %5$d time. That's more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The keyphrase was found %5$d times. That's more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", - this._maxRecommendedKeywordCount, + this._maxRecommendedKeyphraseCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } @@ -292,31 +289,31 @@ class KeywordDensityAssessment extends Assessment { "%1$sKeyphrase density%2$s: The keyphrase was found %5$d time. That's way more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", // eslint-disable-next-line max-len "%1$sKeyphrase density%2$s: The keyphrase was found %5$d times. That's way more than the recommended maximum of %3$d times for a text of this length. %4$sDon't overoptimize%2$s!", - this._keywordCount.count, + this._keyphraseCount.count, "wordpress-seo" ), this._config.urlTitle, "", - this._maxRecommendedKeywordCount, + this._maxRecommendedKeyphraseCount, this._config.urlCallToAction, - this._keywordCount.count + this._keyphraseCount.count ), }; } /** - * Marks keywords in the text for the keyword density assessment. + * Marks the occurrences of keyphrase in the text for the keyphrase density assessment. * * @returns {Array} Marks that should be applied. */ getMarks() { - return this._keywordCount.markings; + return this._keyphraseCount.markings; } /** - * Checks whether the paper has a text of the minimum required length and a keyword is set. Language-specific length requirements and methods + * Checks whether the paper has a text of the minimum required length and a keyphrase is set. Language-specific length requirements and methods * of counting text length may apply (e.g. for Japanese, the text should be counted in characters instead of words, which also makes the minimum * required length higher). * @@ -331,12 +328,37 @@ class KeywordDensityAssessment extends Assessment { if ( customApplicabilityConfig ) { this._config.applicableIfTextLongerThan = customApplicabilityConfig; } - let text = paper.getText(); - text = removeHtmlBlocks( text ); - const textLength = customCountLength ? customCountLength( text ) : getWords( text ).length; + + const textLength = customCountLength ? customCountLength( paper.getText() ) : getAllWordsFromTree( paper ).length; return paper.hasText() && paper.hasKeyword() && textLength >= this._config.applicableIfTextLongerThan; } } -export default KeywordDensityAssessment; +/** + * This assessment checks if the keyphrase density is within the recommended range. + * KeywordDensityAssessment was the previous name for KeyphraseDensityAssessment (hence the name of this file). + * We keep (and expose) this assessment for backwards compatibility. + * + * @deprecated Use KeyphraseDensityAssessment instead. + */ +class KeywordDensityAssessment extends KeyphraseDensityAssessment { + /** + * Sets the identifier and the config. + * + * @param {Object} config The configuration to use. + * @returns {void} + */ + constructor( config = {} ) { + super( config ); + this.identifier = "keywordDensity"; + console.warn( "This object is deprecated, use KeyphraseDensityAssessment instead." ); + } +} + +export { + KeyphraseDensityAssessment, + KeywordDensityAssessment, +}; + +export default KeyphraseDensityAssessment; diff --git a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js index cf7e21b2796..e644919ef67 100644 --- a/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js +++ b/packages/yoastseo/src/scoring/assessments/seo/SingleH1Assessment.js @@ -1,8 +1,8 @@ import { __, sprintf } from "@wordpress/i18n"; -import { isUndefined, map, merge } from "lodash-es"; +import { isUndefined, merge } from "lodash-es"; import Assessment from "../assessment.js"; -import { createAnchorOpeningTag } from "../../../helpers/shortlinker"; +import { createAnchorOpeningTag } from "../../../helpers"; import marker from "../../../markers/addMark.js"; import AssessmentResult from "../../../values/AssessmentResult.js"; import Mark from "../../../values/Mark.js"; @@ -91,12 +91,17 @@ class SingleH1Assessment extends Assessment { * @returns {Array} Array with all the marked H1s. */ getMarks() { - const h1s = this._h1s; - - return map( h1s, function( h1 ) { + return this._h1s.map( function( h1 ) { return new Mark( { original: "

    " + h1.content + "

    ", marked: "

    " + marker( h1.content ) + "

    ", + position: { + startOffset: h1.position.startOffset, + endOffset: h1.position.endOffset, + startOffsetBlock: 0, + endOffsetBlock: h1.position.endOffset - h1.position.startOffset, + clientId: h1.position.clientId, + }, } ); } ); } diff --git a/packages/yoastseo/src/scoring/assessor.js b/packages/yoastseo/src/scoring/assessor.js index e1eee576099..fedab386dce 100644 --- a/packages/yoastseo/src/scoring/assessor.js +++ b/packages/yoastseo/src/scoring/assessor.js @@ -124,7 +124,8 @@ Assessor.prototype.assess = function( paper ) { this._researcher.setPaper( paper ); const languageProcessor = new LanguageProcessor( this._researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + paper.setTree( build( paper, languageProcessor, shortcodes ) ); let assessments = this.getAvailableAssessments(); this.results = []; diff --git a/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js index e01202bf435..70f2f6c2275 100644 --- a/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, FunctionWordsInKeyphraseAssessment, } = assessments.seo; @@ -33,7 +33,7 @@ const CollectionCornerstoneRelatedKeywordAssessor = function( researcher, option urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js index adc19bf1359..d92bb9f52b7 100644 --- a/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/cornerstone/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -38,7 +38,7 @@ const CollectionCornerstoneSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js index fd469e12255..fddad61f666 100644 --- a/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, FunctionWordsInKeyphraseAssessment, } = assessments.seo; @@ -33,7 +33,7 @@ const CollectionRelatedKeywordAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js b/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js index 755ed6aa011..8adcad31a61 100644 --- a/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/collectionPages/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -37,7 +37,7 @@ const CollectionSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js index 47ff0d74a46..ca825e19bae 100644 --- a/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/cornerstone/relatedKeywordAssessor.js @@ -3,7 +3,7 @@ import Assessor from "../assessor.js"; import IntroductionKeyword from "../assessments/seo/IntroductionKeywordAssessment.js"; import KeyphraseLength from "../assessments/seo/KeyphraseLengthAssessment.js"; -import KeywordDensity from "../assessments/seo/KeywordDensityAssessment.js"; +import KeyphraseDensityAssessment from "../assessments/seo/KeywordDensityAssessment.js"; import MetaDescriptionKeyword from "../assessments/seo/MetaDescriptionKeywordAssessment.js"; import TextCompetingLinks from "../assessments/seo/TextCompetingLinksAssessment.js"; import FunctionWordsInKeyphrase from "../assessments/seo/FunctionWordsInKeyphraseAssessment"; @@ -25,7 +25,7 @@ const relatedKeywordAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeyword(), new KeyphraseLength( { isRelatedKeyphrase: true } ), - new KeywordDensity(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeyword(), new TextCompetingLinks(), new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js index 2b8a4bde863..d816dcd6c51 100644 --- a/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/cornerstone/seoAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "../assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "../assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "../assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "../assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "../assessments/seo/MetaDescriptionKeywordAssessment"; import TextCompetingLinksAssessment from "../assessments/seo/TextCompetingLinksAssessment"; import InternalLinksAssessment from "../assessments/seo/InternalLinksAssessment"; @@ -36,7 +36,7 @@ const CornerstoneSEOAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLength( { scores: { diff --git a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js index 100ffdd3cbd..830f223bd48 100644 --- a/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js +++ b/packages/yoastseo/src/scoring/helpers/assessments/recommendedKeywordCount.js @@ -1,43 +1,43 @@ -import countWords from "../../../languageProcessing/helpers/word/countWords"; +import getAllWordsFromTree from "../../../languageProcessing/helpers/word/getAllWordsFromTree"; import keyphraseLengthFactor from "./keyphraseLengthFactor.js"; /** - * Calculates a recommended keyword count for a text. The formula to calculate this number is based on the - * keyword density formula. + * Calculates a recommended keyphrase count for a paper's text. The formula to calculate this number is based on the + * keyphrase density formula. * - * @param {string} text The paper text. - * @param {number} keyphraseLength The length of the focus keyphrase in words. - * @param {number} recommendedKeywordDensity The recommended keyword density (either maximum or minimum). - * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyword density. - * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. + * @param {Paper} paper The paper to analyze. + * @param {number} keyphraseLength The length of the focus keyphrase in words. + * @param {number} recommendedKeyphraseDensity The recommended keyphrase density (either maximum or minimum). + * @param {string} maxOrMin Whether it's a maximum or minimum recommended keyphrase density. + * @param {function} customGetWords A helper to get words from the text for languages that don't use the default approach. * - * @returns {number} The recommended keyword count. + * @returns {number} The recommended keyphrase count. */ -export default function( text, keyphraseLength, recommendedKeywordDensity, maxOrMin, customGetWords ) { - const wordCount = customGetWords ? customGetWords( text ).length : countWords( text ); +export default function( paper, keyphraseLength, recommendedKeyphraseDensity, maxOrMin, customGetWords ) { + const wordCount = customGetWords ? customGetWords( paper.getText() ).length : getAllWordsFromTree( paper ).length; if ( wordCount === 0 ) { return 0; } const lengthKeyphraseFactor = keyphraseLengthFactor( keyphraseLength ); - const recommendedKeywordCount = ( recommendedKeywordDensity * wordCount ) / ( 100 * lengthKeyphraseFactor ); + const recommendedKeyphraseCount = ( recommendedKeyphraseDensity * wordCount ) / ( 100 * lengthKeyphraseFactor ); /* - * The recommended keyword count should always be at least 2, - * regardless of the keyword density, the word count, or the keyphrase length. + * The recommended keyphrase count should always be at least 2, + * regardless of the keyphrase density, the word count, or the keyphrase length. */ - if ( recommendedKeywordCount < 2 ) { + if ( recommendedKeyphraseCount < 2 ) { return 2; } switch ( maxOrMin ) { case "min": // Round up for the recommended minimum count. - return Math.ceil( recommendedKeywordCount ); + return Math.ceil( recommendedKeyphraseCount ); default: case "max": // Round down for the recommended maximum count. - return Math.floor( recommendedKeywordCount ); + return Math.floor( recommendedKeyphraseCount ); } } diff --git a/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js index 0a82c18f8eb..ddd75eadd4c 100644 --- a/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -41,7 +41,7 @@ const ProductCornerStoneRelatedKeywordAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js index 1bddf28a6e2..de436bf355d 100644 --- a/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/cornerstone/seoAssessor.js @@ -12,7 +12,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, KeyphraseInSEOTitleAssessment, @@ -54,7 +54,7 @@ const ProductCornerstoneSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js index 1d1a112d453..941900517d9 100644 --- a/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -41,7 +41,7 @@ const ProductRelatedKeywordAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/productPages/seoAssessor.js b/packages/yoastseo/src/scoring/productPages/seoAssessor.js index 1e13264d867..2f4a6e0a0e6 100644 --- a/packages/yoastseo/src/scoring/productPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/productPages/seoAssessor.js @@ -12,7 +12,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, KeyphraseInSEOTitleAssessment, @@ -54,7 +54,7 @@ const ProductSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( options.keyphraseLengthUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseLengthCTAUrl ), }, true ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( options.keyphraseDensityUrlTitle ), urlCallToAction: createAnchorOpeningTag( options.keyphraseDensityCTAUrl ), } ), diff --git a/packages/yoastseo/src/scoring/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/relatedKeywordAssessor.js index bfc0ae07fb3..ddf792d9abf 100644 --- a/packages/yoastseo/src/scoring/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/relatedKeywordAssessor.js @@ -3,7 +3,7 @@ import { inherits } from "util"; import Assessor from "./assessor.js"; import IntroductionKeyword from "./assessments/seo/IntroductionKeywordAssessment.js"; import KeyphraseLength from "./assessments/seo/KeyphraseLengthAssessment.js"; -import KeywordDensity from "./assessments/seo/KeywordDensityAssessment.js"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment.js"; import MetaDescriptionKeyword from "./assessments/seo/MetaDescriptionKeywordAssessment.js"; import ImageKeyphrase from "./assessments/seo/KeyphraseInImageTextAssessment"; import TextCompetingLinks from "./assessments/seo/TextCompetingLinksAssessment.js"; @@ -25,7 +25,7 @@ const relatedKeywordAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeyword(), new KeyphraseLength( { isRelatedKeyphrase: true } ), - new KeywordDensity(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeyword(), new TextCompetingLinks(), new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js b/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js index c0c23f765e6..bb75942b814 100644 --- a/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js +++ b/packages/yoastseo/src/scoring/relatedKeywordTaxonomyAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import Assessor from "./assessor"; import FunctionWordsInKeyphrase from "./assessments/seo/FunctionWordsInKeyphraseAssessment"; @@ -22,7 +22,7 @@ const RelatedKeywordTaxonomyAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment( { isRelatedKeyphrase: true } ), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), // Text Images assessment here. new FunctionWordsInKeyphrase(), diff --git a/packages/yoastseo/src/scoring/seoAssessor.js b/packages/yoastseo/src/scoring/seoAssessor.js index 3fec383fa64..ae228ea591d 100644 --- a/packages/yoastseo/src/scoring/seoAssessor.js +++ b/packages/yoastseo/src/scoring/seoAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import TextCompetingLinksAssessment from "./assessments/seo/TextCompetingLinksAssessment"; import InternalLinksAssessment from "./assessments/seo/InternalLinksAssessment"; @@ -35,7 +35,7 @@ const SEOAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLength(), new SubheadingsKeyword(), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js index b75edd7bba7..0b1698b4f7e 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -36,7 +36,7 @@ const StorePostsAndPagesCornerstoneRelatedKeywordAssessor = function( researcher urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js index 87507c710f7..7df60cacb2b 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/cornerstone/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -45,7 +45,7 @@ const StorePostsAndPagesCornerstoneSEOAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js index 798d4904bed..767d5ca5a92 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/relatedKeywordAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, TextCompetingLinksAssessment, ImageKeyphraseAssessment, @@ -36,7 +36,7 @@ const StorePostsAndPagesRelatedKeywordAssessor = function( researcher, options ) urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js b/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js index d4bca039d35..d22decde113 100644 --- a/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js +++ b/packages/yoastseo/src/scoring/storePostsAndPages/seoAssessor.js @@ -6,7 +6,7 @@ const { createAnchorOpeningTag } = helpers; const { IntroductionKeywordAssessment, KeyphraseLengthAssessment, - KeywordDensityAssessment, + KeyphraseDensityAssessment, MetaDescriptionKeywordAssessment, KeyphraseInSEOTitleAssessment, SlugKeywordAssessment, @@ -45,7 +45,7 @@ const StorePostsAndPagesSEOAssessor = function( researcher, options ) { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify10" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify11" ), } ), - new KeywordDensityAssessment( { + new KeyphraseDensityAssessment( { urlTitle: createAnchorOpeningTag( "https://yoa.st/shopify12" ), urlCallToAction: createAnchorOpeningTag( "https://yoa.st/shopify13" ), } ), diff --git a/packages/yoastseo/src/scoring/taxonomyAssessor.js b/packages/yoastseo/src/scoring/taxonomyAssessor.js index 98e2978ccf2..34bc50b6d36 100644 --- a/packages/yoastseo/src/scoring/taxonomyAssessor.js +++ b/packages/yoastseo/src/scoring/taxonomyAssessor.js @@ -2,7 +2,7 @@ import { inherits } from "util"; import IntroductionKeywordAssessment from "./assessments/seo/IntroductionKeywordAssessment"; import KeyphraseLengthAssessment from "./assessments/seo/KeyphraseLengthAssessment"; -import KeywordDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; +import KeyphraseDensityAssessment from "./assessments/seo/KeywordDensityAssessment"; import MetaDescriptionKeywordAssessment from "./assessments/seo/MetaDescriptionKeywordAssessment"; import KeyphraseInSEOTitleAssessment from "./assessments/seo/KeyphraseInSEOTitleAssessment"; import SlugKeywordAssessment from "./assessments/seo/UrlKeywordAssessment"; @@ -46,7 +46,7 @@ const TaxonomyAssessor = function( researcher, options ) { this._assessments = [ new IntroductionKeywordAssessment(), new KeyphraseLengthAssessment(), - new KeywordDensityAssessment(), + new KeyphraseDensityAssessment(), new MetaDescriptionKeywordAssessment(), new MetaDescriptionLengthAssessment(), getTextLengthAssessment(), diff --git a/packages/yoastseo/src/values/Mark.js b/packages/yoastseo/src/values/Mark.js index d9e3f7d4e04..a3f6e8a01a7 100644 --- a/packages/yoastseo/src/values/Mark.js +++ b/packages/yoastseo/src/values/Mark.js @@ -50,6 +50,15 @@ Mark.prototype.getFieldsToMark = function() { return this._properties.fieldsToMark; }; +/** + * Returns the position information. + * + * @returns {number} The position information. + */ +Mark.prototype.getPosition = function() { + return this._properties.position; +}; + /** * Returns the start position. * @@ -68,6 +77,98 @@ Mark.prototype.getPositionEnd = function() { return this._properties.position && this._properties.position.endOffset; }; +/** + * Sets the start position. + * + * @param {number} positionStart The new start position. + * + * @returns {void} + */ +Mark.prototype.setPositionStart = function( positionStart ) { + this._properties.position.startOffset = positionStart; +}; + +/** + * Sets the end position. + * + * @param {number} positionEnd The new end position. + * + * @returns {void} + */ +Mark.prototype.setPositionEnd = function( positionEnd ) { + this._properties.position.endOffset = positionEnd; +}; + +/** + * Returns the start position of a block. + * + * @param {number} startOffsetBlock The block start offset. + * + * @returns {number} The start position of a block. + */ +Mark.prototype.setBlockPositionStart = function( startOffsetBlock ) { + this._properties.position.startOffsetBlock = startOffsetBlock; +}; + +/** + * Returns the end position of a block. + * + * @param {number} endOffsetBlock The block end offset. + * + * @returns {number} The end position of a block. + */ +Mark.prototype.setBlockPositionEnd = function( endOffsetBlock ) { + this._properties.position.endOffsetBlock = endOffsetBlock; +}; + +/** + * Gets the block client id. + * + * @returns {string} The block client id. + */ +Mark.prototype.getBlockClientId = function() { + return this._properties.position && this._properties.position.clientId; +}; + +/** + * Gets the block attribute id. + * + * @returns {string} The block attribute id. + */ +Mark.prototype.getBlockAttributeId = function() { + return this._properties.position && this._properties.position.attributeId; +}; + + +/** + * Checks if the mark object is intended for the first section of a Yoast sub-block. + * This method will be used only for Yoast blocks where each block consists of sub-blocks + * with two sections. + * + * @returns {boolean} Whether the mark object is intended for the first section of a Yoast sub-block. + */ +Mark.prototype.isMarkForFirstBlockSection = function() { + return this._properties.position && this._properties.position.isFirstSection; +}; + +/** + * Returns the start position inside block. + * + * @returns {number} The start position inside block if the mark position information, undefined otherwise. + */ +Mark.prototype.getBlockPositionStart = function() { + return this._properties.position && this._properties.position.startOffsetBlock; +}; + +/** + * Returns the end position inside block if the mark has position information, undefined otherwise. + * + * @returns {number} The end position inside block. + */ +Mark.prototype.getBlockPositionEnd = function() { + return this._properties.position && this._properties.position.endOffsetBlock; +}; + /** * Applies this mark to the given text with replacement-based highlighting. * @@ -136,7 +237,18 @@ Mark.prototype.isValid = function() { * @returns {boolean} Returns true if the Mark object has position information, false otherwise. */ Mark.prototype.hasPosition = function() { - return !! this.getPositionStart && this.getPositionStart(); + return ! isUndefined( this.getPositionStart() ); +}; + + +/** + * Checks if a mark has block position information available. + * A block has position information if the block start offset is available. + * + * @returns {boolean} Returns true if the Mark object has block position information, false otherwise. + */ +Mark.prototype.hasBlockPosition = function() { + return ! isUndefined( this.getBlockPositionStart() ); }; /** diff --git a/packages/yoastseo/src/values/Paper.js b/packages/yoastseo/src/values/Paper.js index 9c191bef2f8..46756c7da71 100644 --- a/packages/yoastseo/src/values/Paper.js +++ b/packages/yoastseo/src/values/Paper.js @@ -68,6 +68,7 @@ function Paper( text, attributes ) { this._attributes = attributes; } + /** * Checks whether a keyword is available. * @returns {boolean} Returns true if the Paper has a keyword. diff --git a/packages/yoastseo/src/worker/AnalysisWebWorker.js b/packages/yoastseo/src/worker/AnalysisWebWorker.js index bd9441723db..333efe4af2a 100644 --- a/packages/yoastseo/src/worker/AnalysisWebWorker.js +++ b/packages/yoastseo/src/worker/AnalysisWebWorker.js @@ -1081,8 +1081,8 @@ export default class AnalysisWebWorker { this._researcher.setPaper( this._paper ); const languageProcessor = new LanguageProcessor( this._researcher ); - - this._paper.setTree( build( this._paper.getText(), languageProcessor ) ); + const shortcodes = this._paper._attributes && this._paper._attributes.shortcodes; + this._paper.setTree( build( this._paper, languageProcessor, shortcodes ) ); // Update the configuration locale to the paper locale. this.setLocale( this._paper.getLocale() ); @@ -1405,7 +1405,8 @@ export default class AnalysisWebWorker { // Build and set the tree if it's not been set before. if ( paper.getTree() === null ) { const languageProcessor = new LanguageProcessor( researcher ); - paper.setTree( build( paper.getText(), languageProcessor ) ); + const shortcodes = paper._attributes && paper._attributes.shortcodes; + paper.setTree( build( paper, languageProcessor, shortcodes ) ); } } diff --git a/readme.txt b/readme.txt index d2b1215d89c..f55a642575e 100644 --- a/readme.txt +++ b/readme.txt @@ -5,7 +5,7 @@ License: GPLv3 License URI: http://www.gnu.org/licenses/gpl.html Tags: SEO, XML sitemap, Content analysis, Readability, Schema Tested up to: 6.3 -Stable tag: 21.2 +Stable tag: 21.3 Requires PHP: 7.2.5 Improve your WordPress SEO: Write better content and have a fully optimized WordPress site using the Yoast SEO plugin. @@ -340,39 +340,42 @@ Your question has most likely been answered on our help center: [yoast.com/help/ == Changelog == -= 21.3 = += 21.4 = -Release date: 2023-10-03 +Release date: 2023-10-17 #### Enhancements -* Improves the performance of post saving. +* Improves keyphrase matching in Japanese by being able to match keyphrase occurrences that contain upper case characters. +* Improves performance in getting the primary term. Props to [nlemoine](https://github.com/nlemoine). +* Improves the keyphrase matching in the _keyphrase density_ assessment. +* Introduces more robust HTML processing and highlighting for the _keyphrase density_ and _single H1_ assessments. +* Prevent database update requests on page loads when the site representation settings contain conflicting data. Props to [jboonstra](https://github.com/jboonstra). +* Updates the list of HTML elements that should be excluded from the content analysis. #### Bugfixes -* Fixes a bug where the notifications counter of the admin bar menu would not show with the correct style on the frontend. +* Fixes a bug where highlighting was not applied to keyphrase occurrences that contained different types of apostrophes than `'`. +* Fixes a bug where PHP notice would happen when the sitemap index is generated on MySQL 8+. +* Fixes a bug where resource cleanup regarding emojis would cause a fatal error when enabling the `Remove emoji scripts` option in the _crawl optimization_ settings. Props to [MishaBelikov](https://github.com/MishaBelikov). +* Fixes a bug where sentences would not be highlighted when square brackets were present in the same sentence. +* Fixes a bug where the first-time configuration' site representation logo button would not be translated. Props to [fxbenard](https://github.com/fxbenard). +* Fixes a bug where the _single title_ assessment would be triggered when adding a H1 without text. -= 21.2 = += 21.3 = -Release date: 2023-09-19 +Release date: 2023-10-03 -Yoast SEO 21.2 is out today! In this release, we've improved the naming of several features and enhanced the sidebar in the block editor, making it easier to use. Find out more about what's new in Yoast SEO 21.2 in [our release post](https://yoa.st/release-19-9-23)! +Yoast SEO 21.3 is out! In this release, we've focused on improving the plugin's performance, especially regarding handling huge posts on complex websites. Find out more about what's new in Yoast SEO 21.3 in [our release post](https://yoa.st/release-3-10-23)! #### Enhancements -* Renames Google preview to Search appearance in the metabox and sidebar. +* Enhances post-saving performance in certain conditions for a smoother and more efficient user experience. #### Bugfixes -* Fixes a bug where, even if `Show author archives without posts in search results` is enabled, the archive page would have a `noindex` in the `robots` metatag. -* Fixes a bug where notices about incorrect calls to `wpdb::prepare` would be thrown on Yoast SEO Premium activation. -* Fixes a bug where pagination meta tags would be wrong when using Query Loop Block with custom query variables. -* Fixes a bug where the redirect notification would mention "posts" when a tag was deleted or trashed. -* Fixing a bug where adding special characters like "»" as a title separator would break the RSS feed. - -#### Other - -* Sets the minimum supported WordPress version to 6.2. +* Fixes a bug where the notifications counter of the admin bar menu would not show with the correct style on the frontend. +* Fixes a bug where the slug in the search appearance editor would not be set when published posts were edited in the classic editor and the "core/editor" store was available. = Earlier versions = For the changelog of earlier versions, please refer to [the changelog on yoast.com](https://yoa.st/yoast-seo-changelog). diff --git a/src/builders/indexable-hierarchy-builder.php b/src/builders/indexable-hierarchy-builder.php index cdc663bd75c..e2e0f7392d8 100644 --- a/src/builders/indexable-hierarchy-builder.php +++ b/src/builders/indexable-hierarchy-builder.php @@ -132,21 +132,6 @@ public function build( Indexable $indexable ) { * @return bool True when indexable has a built hierarchy. */ protected function hierarchy_is_built( Indexable $indexable ) { - /** - * Filters ignoring checking if the hierarchy is already built. - * - * Used when adding term with `wp_set_object_terms` together with `wp_insert_post`. - * - * @since 21.2 - * - * @param bool $ignore_already_saved If the hierarchy already saved check should be ignored. - * @return bool The filtered value of the `$ignore_already_saved` parameter. - */ - $ignore_already_saved = apply_filters( 'wpseo_hierarchy_ignore_already_saved', false ); - if ( $ignore_already_saved ) { - return false; - } - if ( \in_array( $indexable->id, $this->saved_ancestors, true ) ) { return true; } diff --git a/src/integrations/admin/indexables-exclude-taxonomy-integration.php b/src/integrations/admin/indexables-exclude-taxonomy-integration.php index 09e76662e87..4d1592b8d8f 100644 --- a/src/integrations/admin/indexables-exclude-taxonomy-integration.php +++ b/src/integrations/admin/indexables-exclude-taxonomy-integration.php @@ -41,13 +41,15 @@ public function register_hooks() { * * @param array $excluded_taxonomies The excluded taxonomies. * - * @return array The excluded post types, including the specific post type. + * @return array The excluded taxonomies, including specific taxonomies. */ public function exclude_taxonomies_for_indexation( $excluded_taxonomies ) { + $taxonomies_to_exclude = \array_merge( $excluded_taxonomies, [ 'wp_pattern_category' ] ); + if ( $this->options_helper->get( 'disable-post_format', false ) ) { - return \array_merge( $excluded_taxonomies, [ 'post_format' ] ); + return \array_merge( $taxonomies_to_exclude, [ 'post_format' ] ); } - return $excluded_taxonomies; + return $taxonomies_to_exclude; } } diff --git a/src/integrations/admin/link-count-columns-integration.php b/src/integrations/admin/link-count-columns-integration.php index 5182a095539..be354c9d024 100644 --- a/src/integrations/admin/link-count-columns-integration.php +++ b/src/integrations/admin/link-count-columns-integration.php @@ -135,6 +135,7 @@ public function add_post_columns( $columns ) { $columns[ 'wpseo-' . self::COLUMN_LINKS ] = \sprintf( '%2$s', \esc_attr__( 'Number of outgoing internal links in this post. See "Yoast Columns" text in the help tab for more info.', 'wordpress-seo' ), + /* translators: Hidden accessibility text. */ \esc_html__( 'Outgoing internal links', 'wordpress-seo' ) ); @@ -142,6 +143,7 @@ public function add_post_columns( $columns ) { $columns[ 'wpseo-' . self::COLUMN_LINKED ] = \sprintf( '%2$s', \esc_attr__( 'Number of internal links linking to this post. See "Yoast Columns" text in the help tab for more info.', 'wordpress-seo' ), + /* translators: Hidden accessibility text. */ \esc_html__( 'Received internal links', 'wordpress-seo' ) ); } diff --git a/src/integrations/admin/workouts-integration.php b/src/integrations/admin/workouts-integration.php index 7d384ca5861..8f1159185ea 100644 --- a/src/integrations/admin/workouts-integration.php +++ b/src/integrations/admin/workouts-integration.php @@ -207,6 +207,7 @@ private function get_update_premium_notice() { ); $button = '' . \esc_html__( 'Renew your subscription', 'wordpress-seo' ) + /* translators: Hidden accessibility text. */ . '' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ) . '' . '' . ''; @@ -236,6 +237,7 @@ private function get_update_premium_notice() { ); $button = '' . \esc_html__( 'Get help activating your subscription', 'wordpress-seo' ) + /* translators: Hidden accessibility text. */ . '' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ) . '' . ''; } diff --git a/src/integrations/alerts/black-friday-promotion-notification.php b/src/integrations/alerts/black-friday-promotion-notification.php new file mode 100644 index 00000000000..92e69566766 --- /dev/null +++ b/src/integrations/alerts/black-friday-promotion-notification.php @@ -0,0 +1,16 @@ +%2$s'; } diff --git a/src/integrations/settings-integration.php b/src/integrations/settings-integration.php index 2fe69486b09..e5d851299cc 100644 --- a/src/integrations/settings-integration.php +++ b/src/integrations/settings-integration.php @@ -25,6 +25,8 @@ use Yoast\WP\SEO\Helpers\Woocommerce_Helper; use Yoast\WP\SEO\Helpers\Options_Helper; use Yoast\WP\SEO\Content_Type_Visibility\Application\Content_Type_Visibility_Dismiss_Notifications; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; + /** * Class Settings_Integration. @@ -364,6 +366,9 @@ public function enqueue_assets() { \wp_enqueue_media(); $this->asset_manager->enqueue_script( 'new-settings' ); $this->asset_manager->enqueue_style( 'new-settings' ); + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + $this->asset_manager->enqueue_style( 'black-friday-banner' ); + } $this->asset_manager->localize_script( 'new-settings', 'wpseoScriptData', $this->get_script_data() ); } @@ -456,6 +461,7 @@ protected function get_preferences( $settings ) { 'isWooCommerceActive' => $this->woocommerce_helper->is_active(), 'isLocalSeoActive' => \defined( 'WPSEO_LOCAL_FILE' ), 'isNewsSeoActive' => \defined( 'WPSEO_NEWS_FILE' ), + 'promotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'siteUrl' => \get_bloginfo( 'url' ), 'siteTitle' => \get_bloginfo( 'name' ), 'sitemapUrl' => WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ), diff --git a/src/integrations/support-integration.php b/src/integrations/support-integration.php index fc8fe0c2a4f..edfa99e6258 100644 --- a/src/integrations/support-integration.php +++ b/src/integrations/support-integration.php @@ -8,6 +8,7 @@ use Yoast\WP\SEO\Helpers\Current_Page_Helper; use Yoast\WP\SEO\Helpers\Product_Helper; use Yoast\WP\SEO\Helpers\Short_Link_Helper; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Class Support_Integration. @@ -130,6 +131,9 @@ public function enqueue_assets() { \remove_action( 'admin_print_scripts', 'print_emoji_detection_script' ); $this->asset_manager->enqueue_script( 'support' ); $this->asset_manager->enqueue_style( 'support' ); + if ( YoastSEO()->classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + $this->asset_manager->enqueue_style( 'black-friday-banner' ); + } $this->asset_manager->localize_script( 'support', 'wpseoScriptData', $this->get_script_data() ); } @@ -152,16 +156,17 @@ public function remove_notices() { */ public function get_script_data() { return [ - 'preferences' => [ + 'preferences' => [ 'isPremium' => $this->product_helper->is_premium(), 'isRtl' => \is_rtl(), + 'promotions' => YoastSEO()->classes->get( Promotion_Manager::class )->get_current_promotions(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], ], - 'linkParams' => $this->shortlink_helper->get_query_params(), + 'linkParams' => $this->shortlink_helper->get_query_params(), ]; } } diff --git a/src/integrations/third-party/elementor.php b/src/integrations/third-party/elementor.php index 3c3d743db6c..f5dd067a656 100644 --- a/src/integrations/third-party/elementor.php +++ b/src/integrations/third-party/elementor.php @@ -26,7 +26,7 @@ use Yoast\WP\SEO\Integrations\Integration_Interface; use Yoast\WP\SEO\Introductions\Infrastructure\Wistia_Embed_Permission_Repository; use Yoast\WP\SEO\Presenters\Admin\Meta_Fields_Presenter; -use Yoast\WP\SEO\Promotions\Application\Promotion_Manager_Interface; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Integrates the Yoast SEO metabox in the Elementor editor. @@ -102,11 +102,11 @@ class Elementor implements Integration_Interface { protected $inclusive_language_analysis; /** - * The promotions manager. + * Holds the promotion manager. * - * @var Promotion_Manager_Interface + * @var Promotion_Manager */ - private $promotion_manager; + protected $promotion_manager; /** * Returns the conditionals based in which this loadable should be active. @@ -120,16 +120,16 @@ public static function get_conditionals() { /** * Constructor. * - * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. - * @param Options_Helper $options The options helper. - * @param Capability_Helper $capability The capability helper. - * @param Promotion_Manager_Interface $promotion_manager The promotions manager. + * @param WPSEO_Admin_Asset_Manager $asset_manager The asset manager. + * @param Options_Helper $options The options helper. + * @param Capability_Helper $capability The capability helper. + * @param Promotion_Manager $promotion_manager The promotion manager. */ public function __construct( WPSEO_Admin_Asset_Manager $asset_manager, Options_Helper $options, Capability_Helper $capability, - Promotion_Manager_Interface $promotion_manager + Promotion_Manager $promotion_manager ) { $this->asset_manager = $asset_manager; $this->options = $options; @@ -434,8 +434,7 @@ public function enqueue() { 'has_taxonomies' => $this->current_post_type_has_taxonomies(), ], 'shortcodes' => [ - 'wpseo_filter_shortcodes_nonce' => \wp_create_nonce( 'wpseo-filter-shortcodes' ), - 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(), + 'wpseo_shortcode_tags' => $this->get_valid_shortcode_tags(), ], ]; @@ -468,7 +467,7 @@ public function enqueue() { ], 'dismissedAlerts' => $dismissed_alerts, 'webinarIntroElementorUrl' => WPSEO_Shortlinker::get( 'https://yoa.st/webinar-intro-elementor' ), - 'blackFridayBlockEditorUrl' => ( $this->promotion_manager->is( 'black_friday_2023_checklist' ) ) ? WPSEO_Shortlinker::get( 'https://yoa.st/black-friday-checklist' ) : '', + 'currentPromotions' => $this->promotion_manager->get_current_promotions(), 'usedKeywordsNonce' => \wp_create_nonce( 'wpseo-keyword-usage-and-post-types' ), 'linkParams' => WPSEO_Shortlinker::get_query_params(), 'pluginUrl' => \plugins_url( '', \WPSEO_FILE ), diff --git a/src/integrations/third-party/wordproof.php b/src/integrations/third-party/wordproof.php index c9d63ba1d4b..2928bd1bdca 100644 --- a/src/integrations/third-party/wordproof.php +++ b/src/integrations/third-party/wordproof.php @@ -89,10 +89,19 @@ public function register_hooks() { */ \add_action( 'wp_enqueue_scripts', [ $this, 'enqueue_assets' ], 10, 0 ); - /** - * Add async to the wordproof scripts. - */ - \add_filter( 'script_loader_tag', [ $this, 'add_async_to_script' ], 10, 3 ); + if ( version_compare( strtok( get_bloginfo( 'version' ), '-' ), '6.3', '>=' ) ) { + \add_action( + 'wp_enqueue_scripts', + function() { + \wp_script_add_data( WPSEO_Admin_Asset_Manager::PREFIX . 'wordproof-uikit', 'strategy', 'async' ); + }, + 11, + 0 + ); + } + else { + \add_filter( 'script_loader_tag', [ $this, 'add_async_to_script' ], 10, 3 ); + } /** * Removes the post meta timestamp key for the old privacy page. diff --git a/src/integrations/watchers/indexable-ancestor-watcher.php b/src/integrations/watchers/indexable-ancestor-watcher.php index 9c76a4529b1..fddf0c92d18 100644 --- a/src/integrations/watchers/indexable-ancestor-watcher.php +++ b/src/integrations/watchers/indexable-ancestor-watcher.php @@ -92,9 +92,6 @@ public function __construct( */ public function register_hooks() { \add_action( 'wpseo_save_indexable', [ $this, 'reset_children' ], \PHP_INT_MAX, 2 ); - if ( ! \check_ajax_referer( 'inlineeditnonce', '_inline_edit', false ) ) { - \add_action( 'set_object_terms', [ $this, 'build_post_hierarchy' ], 10, 6 ); - } } /** @@ -106,29 +103,6 @@ public static function get_conditionals() { return [ Migrations_Conditional::class ]; } - /** - * Validates if the current primary category is still present. If not just remove the post meta for it. - * - * @param int $object_id Object ID. - * @param array $terms Unused. An array of object terms. - * @param array $tt_ids An array of term taxonomy IDs. - * @param string $taxonomy Taxonomy slug. - * @param bool $append Whether to append new terms to the old terms. - * @param array $old_tt_ids Old array of term taxonomy IDs. - */ - public function build_post_hierarchy( $object_id, $terms, $tt_ids, $taxonomy, $append, $old_tt_ids ) { - $post = \get_post( $object_id ); - if ( $this->post_type_helper->is_excluded( $post->post_type ) ) { - return; - } - - $indexable = $this->indexable_repository->find_by_id_and_type( $post->ID, $post->post_type ); - - if ( $indexable instanceof Indexable ) { - $this->indexable_hierarchy_builder->build( $indexable ); - } - } - /** * If an indexable's permalink has changed, updates its children in the hierarchy table and resets the children's permalink. * diff --git a/src/presenters/admin/help-link-presenter.php b/src/presenters/admin/help-link-presenter.php index 72c5a32f37f..45db58bfa64 100644 --- a/src/presenters/admin/help-link-presenter.php +++ b/src/presenters/admin/help-link-presenter.php @@ -72,7 +72,8 @@ public function present() { if ( $this->opens_in_new_browser_tab ) { $target_blank_attribute = ' target="_blank"'; - $new_tab_message = ' ' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ); + /* translators: Hidden accessibility text. */ + $new_tab_message = ' ' . \__( '(Opens in a new browser tab)', 'wordpress-seo' ); } return \sprintf( diff --git a/src/presenters/admin/sidebar-presenter.php b/src/presenters/admin/sidebar-presenter.php index 2b88d682736..08bef10162d 100644 --- a/src/presenters/admin/sidebar-presenter.php +++ b/src/presenters/admin/sidebar-presenter.php @@ -4,6 +4,7 @@ use WPSEO_Shortlinker; use Yoast\WP\SEO\Presenters\Abstract_Presenter; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; /** * Presenter class for the Yoast SEO sidebar. @@ -16,6 +17,8 @@ class Sidebar_Presenter extends Abstract_Presenter { * @return string The sidebar HTML. */ public function present() { + $title = \__( 'BLACK FRIDAY - 30% OFF', 'wordpress-seo' ); + $assets_uri = \trailingslashit( \plugin_dir_url( \WPSEO_FILE ) ); $buy_yoast_seo_shortlink = WPSEO_Shortlinker::get( 'https://yoa.st/jj' ); \ob_start(); @@ -42,6 +45,13 @@ class="attachment-full size-full content-visible" sizes="(min-width: 1321px) 75px"> + classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) : ?> + +

    ', '' ); + echo \esc_html__( 'Use AI to generate titles and meta descriptions, automatically redirect deleted pages, get 24/7 support and much, much more!', 'wordpress-seo' ); ?>

    + classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) : ?> +
    +

    classes->get( Promotion_Manager::class )->is( 'black-friday-2023-promotion' ) ) { + echo \esc_html__( 'Claim your 30% off now!', 'wordpress-seo' ); + } + else { + /* translators: %s expands to Yoast SEO Premium */ + \printf( \esc_html__( 'Get %s', 'wordpress-seo' ), 'Yoast SEO Premium' ); + } ?>

    -

    + -

    + - - - - - - + + + + + + 4.6 / 5 diff --git a/src/promotions/domain/black-friday-checklist-promotion.php b/src/promotions/domain/black-friday-checklist-promotion.php index 44868498eb1..8ef4e6dd661 100644 --- a/src/promotions/domain/black-friday-checklist-promotion.php +++ b/src/promotions/domain/black-friday-checklist-promotion.php @@ -14,7 +14,7 @@ class Black_Friday_Checklist_Promotion extends Abstract_Promotion implements Pro */ public function __construct() { parent::__construct( - 'black_friday_2023_checklist', + 'black-friday-2023-checklist', new Time_Interval( \gmmktime( 11, 00, 00, 9, 19, 2023 ), \gmmktime( 11, 00, 00, 10, 31, 2023 ) diff --git a/src/promotions/domain/black-friday-promotion.php b/src/promotions/domain/black-friday-promotion.php index f809b11542d..1693d315350 100644 --- a/src/promotions/domain/black-friday-promotion.php +++ b/src/promotions/domain/black-friday-promotion.php @@ -14,7 +14,7 @@ class Black_Friday_Promotion extends Abstract_Promotion implements Promotion_Int */ public function __construct() { parent::__construct( - 'black_friday_2023', + 'black-friday-2023-promotion', new Time_Interval( \gmmktime( 11, 00, 00, 11, 23, 2023 ), \gmmktime( 11, 00, 00, 11, 28, 2023 ) ) ); } diff --git a/tests/integration/sitemaps/test-class-wpseo-post-type-sitemap-provider.php b/tests/integration/sitemaps/test-class-wpseo-post-type-sitemap-provider.php index 1ed0d6750a6..6d4ad891938 100644 --- a/tests/integration/sitemaps/test-class-wpseo-post-type-sitemap-provider.php +++ b/tests/integration/sitemaps/test-class-wpseo-post-type-sitemap-provider.php @@ -9,6 +9,7 @@ * Class WPSEO_Post_Type_Sitemap_Provider_Test. * * @group sitemaps + * @coversDefaultClass \Yoast\WP\SEO\Models\SEO_Links\WPSEO_Post_Type_Sitemap_Provider */ class WPSEO_Post_Type_Sitemap_Provider_Test extends WPSEO_UnitTestCase { @@ -38,7 +39,7 @@ public function set_up() { /** * No entries in the post or page types should still generate an index entry. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_no_entries() { $index_links = self::$class_instance->get_index_links( 1 ); @@ -48,7 +49,7 @@ public function test_get_index_links_no_entries() { /** * Multiple pages of a post-type should result in multiple index entries. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_one_entry_paged() { @@ -68,7 +69,7 @@ public function test_get_index_links_one_entry_paged() { /** * Multiple entries should be on the same sitemap if not over page limit. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_multiple_entries_non_paged() { $this->factory->post->create(); @@ -82,7 +83,7 @@ public function test_get_index_links_multiple_entries_non_paged() { /** * Makes sure the filtered out entries do not cause a sitemap index link to return a 404. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_empty_bucket() { @@ -115,7 +116,7 @@ public function test_get_index_links_empty_bucket() { /** * Makes sure invalid sitemap pages return no contents (404). * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_empty_sitemap() { // Fetch the global sitemap. @@ -135,7 +136,7 @@ public function test_get_index_links_empty_sitemap() { /** * Tests the sitemap links for the different homepage possibilities. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links */ public function test_get_sitemap_links() { $sitemap_provider = new WPSEO_Post_Type_Sitemap_Provider_Double(); @@ -190,7 +191,7 @@ public function test_get_sitemap_links() { /** * Tests the excluded posts with the usage of the filter. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_excluded_posts + * @covers ::get_excluded_posts */ public function test_get_excluded_posts_with_set_filter() { $sitemap_provider = new WPSEO_Post_Type_Sitemap_Provider_Double(); @@ -213,7 +214,7 @@ public function test_get_excluded_posts_with_set_filter() { /** * Tests the excluded posts with the usage of a filter that returns an invalid value. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_excluded_posts + * @covers ::get_excluded_posts */ public function test_get_excluded_posts_with_set_filter_that_has_invalid_return_value() { $sitemap_provider = new WPSEO_Post_Type_Sitemap_Provider_Double(); @@ -257,7 +258,7 @@ public function filter_with_invalid_output( $excluded_post_ids ) { /** * Tests if external URLs are not being included in the sitemap. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_url + * @covers ::get_url */ public function test_get_url() { $current_home = get_option( 'home' ); @@ -282,7 +283,7 @@ public function test_get_url() { /** * Tests a regular post is added to the sitemap. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links */ public function test_regular_post() { $this->factory->post->create(); @@ -294,7 +295,7 @@ public function test_regular_post() { /** * Tests to make sure password protected posts are not in the sitemap. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links */ public function test_password_protected_post() { // Create password protected post. @@ -315,7 +316,7 @@ public function test_password_protected_post() { /** * Tests to make sure a regular attachment is include in the sitemap. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links */ public function test_regular_attachment() { // Enable attachments in the sitemap. @@ -343,7 +344,7 @@ public function test_regular_attachment() { /** * Tests to make sure attachment is not added when parent is a protected post. * - * @covers WPSEO_Post_Type_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links * * @link https://github.com/Yoast/wordpress-seo/issues/9194 */ diff --git a/tests/integration/sitemaps/test-class-wpseo-taxonomy-sitemap-provider.php b/tests/integration/sitemaps/test-class-wpseo-taxonomy-sitemap-provider.php index 0f171869e82..8731d7ef87e 100644 --- a/tests/integration/sitemaps/test-class-wpseo-taxonomy-sitemap-provider.php +++ b/tests/integration/sitemaps/test-class-wpseo-taxonomy-sitemap-provider.php @@ -9,6 +9,7 @@ * Class WPSEO_Taxonomy_Sitemap_Provider_Test. * * @group sitemaps + * @coversDefaultClass WPSEO_Taxonomy_Sitemap_Provider */ class WPSEO_Taxonomy_Sitemap_Provider_Test extends WPSEO_UnitTestCase { @@ -31,7 +32,7 @@ public function set_up() { /** * Tests the retrieval of the index links. * - * @covers WPSEO_Taxonomy_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links() { @@ -57,7 +58,7 @@ public function test_get_index_links() { /** * Tests retrieval of the sitemap links. * - * @covers WPSEO_Taxonomy_Sitemap_Provider::get_sitemap_links + * @covers ::get_sitemap_links */ public function test_get_sitemap_links() { @@ -71,7 +72,7 @@ public function test_get_sitemap_links() { /** * Makes sure invalid sitemap pages return no contents (404). * - * @covers WPSEO_Taxonomy_Sitemap_Provider::get_index_links + * @covers ::get_index_links */ public function test_get_index_links_empty_sitemap() { // Fetch the global sitemap. @@ -87,4 +88,44 @@ public function test_get_index_links_empty_sitemap() { // Expect an empty page (404) to be returned. $this->expectOutputString( '' ); } + + /** + * Data provider for is_valid_taxonomy test. + * + * @return array + */ + public function data_provider_is_valis_taxonomy() { + return [ + 'Pattern Categories' => [ + 'taxonomy' => 'wp_pattern_category', + 'expected' => false, + ], + 'nav_menu' => [ + 'taxonomy' => 'nav_menu', + 'expected' => false, + ], + 'link_category' => [ + 'taxonomy' => 'link_category', + 'expected' => false, + ], + 'post_format' => [ + 'taxonomy' => 'post_format', + 'expected' => false, + ], + ]; + } + + /** + * Tetst of is_valid_taxonomy. + * + * @covers ::is_valid_taxonomy + * + * @dataProvider data_provider_is_valis_taxonomy + * + * @param string $taxonomy Taxonomy name. + * @param bool $expected Expected result. + */ + public function test_is_valid_taxonomy( $taxonomy, $expected ) { + $this->assertSame( $expected, self::$class_instance->is_valid_taxonomy( $taxonomy ) ); + } } diff --git a/tests/unit/integrations/support-integration-test.php b/tests/unit/integrations/support-integration-test.php index cf995c02180..6c642a3a67c 100644 --- a/tests/unit/integrations/support-integration-test.php +++ b/tests/unit/integrations/support-integration-test.php @@ -11,6 +11,7 @@ use Yoast\WP\SEO\Helpers\Product_Helper; use Yoast\WP\SEO\Helpers\Short_Link_Helper; use Yoast\WP\SEO\Integrations\Support_Integration; +use Yoast\WP\SEO\Promotions\Application\Promotion_Manager; use Yoast\WP\SEO\Tests\Unit\TestCase; /** @@ -23,33 +24,47 @@ class Support_Integration_Test extends TestCase { const PAGE = 'wpseo_page_support'; /** - * Holds the WPSEO_Admin_Asset_Manager. + * Holds the WPSEO_Admin_Asset_Manager mock. * * @var Mockery\MockInterface|WPSEO_Admin_Asset_Manager */ private $asset_manager; /** - * Holds the Current_Page_Helper. + * Holds the Current_Page_Helper mock. * - * @var Current_Page_Helper + * @var Mockery\MockInterface|Current_Page_Helper */ private $current_page_helper; /** - * Holds the Product_Helper. + * Holds the Product_Helper mock. * - * @var Product_Helper + * @var Mockery\MockInterface|Product_Helper */ private $product_helper; /** - * Holds the Short_Link_Helper. + * Holds the Short_Link_Helper mock. * - * @var Short_Link_Helper + * @var Mockery\MockInterface|Short_Link_Helper */ private $shortlink_helper; + /** + * Holds the Promotion_Manager mock. + * + * @var Mockery\MockInterface|Promotion_Manager + */ + private $promotion_manager; + + /** + * Holds the container. + * + * @var Mockery\MockInterface|Container + */ + private $container; + /** * The class under test. * @@ -67,6 +82,8 @@ public function set_up() { $this->current_page_helper = Mockery::mock( Current_Page_Helper::class ); $this->product_helper = Mockery::mock( Product_Helper::class ); $this->shortlink_helper = Mockery::mock( Short_Link_Helper::class ); + $this->promotion_manager = Mockery::mock( Promotion_Manager::class ); + $this->container = $this->create_container_with( [ Promotion_Manager::class => $this->promotion_manager ] ); $this->instance = new Support_Integration( $this->asset_manager, @@ -216,17 +233,54 @@ public function test_remove_notices() { $this->instance->remove_notices(); } + /** + * Data provider for test_enqueue_assets + * + * @return array + */ + public function data_provider_enqueu_black_friday_style() { + return [ + 'is black friday' => [ + 'is_black_friday' => true, + 'expected' => 1, + ], + 'is not black friday' => [ + 'is_black_friday' => false, + 'expected' => 0, + ], + ]; + } + /** * Test enqueue_assets * * @covers ::enqueue_assets * @covers ::get_script_data + * + * @dataProvider data_provider_enqueu_black_friday_style + * + * @param bool $is_black_friday Whether it is black friday or not. + * @param int $expected The number of times the action should be called. + * @return void */ - public function test_enqueue_assets() { + public function test_enqueue_assets( $is_black_friday, $expected ) { Monkey\Functions\expect( 'remove_action' ) ->with( 'admin_print_scripts', 'print_emoji_detection_script' ) ->once(); + + // In get_script_data method. + $this->assert_promotions(); + + // In enqueue_assets method. + $this->promotion_manager->expects( 'is' ) + ->with( 'black-friday-2023-promotion' ) + ->once() + ->andReturn( $is_black_friday ); + + Monkey\Functions\expect( 'YoastSEO' ) + ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $this->container ) ] ); + $this->asset_manager ->expects( 'enqueue_script' ) ->with( 'support' ) @@ -237,6 +291,11 @@ public function test_enqueue_assets() { ->with( 'support' ) ->once(); + $this->asset_manager + ->expects( 'enqueue_style' ) + ->with( 'black-friday-banner' ) + ->times( $expected ); + $this->asset_manager ->expects( 'localize_script' ) ->once(); @@ -289,19 +348,36 @@ public function expect_get_script_data() { public function test_get_script_data() { $link_params = $this->expect_get_script_data(); + $this->assert_promotions(); + $expected = [ - 'preferences' => [ + 'preferences' => [ 'isPremium' => false, 'isRtl' => false, + 'promotions' => [ 'black-friday-2023-promotion' ], 'pluginUrl' => 'http://basic.wordpress.test/wp-content/worspress-seo', 'upsellSettings' => [ 'actionId' => 'load-nfd-ctb', 'premiumCtbId' => 'f6a84663-465f-4cb5-8ba5-f7a6d72224b2', ], ], - 'linkParams' => $link_params, + 'linkParams' => $link_params, ]; $this->assertSame( $expected, $this->instance->get_script_data() ); } + + /** + * Asserts the check for promotions. + * + * @return void + */ + protected function assert_promotions() { + $this->promotion_manager->expects( 'get_current_promotions' ) + ->once() + ->andReturn( [ 'black-friday-2023-promotion' ] ); + + Monkey\Functions\expect( 'YoastSEO' ) + ->andReturn( (object) [ 'classes' => $this->create_classes_surface( $this->container ) ] ); + } } diff --git a/tests/unit/integrations/watchers/indexable-ancestor-watcher-test.php b/tests/unit/integrations/watchers/indexable-ancestor-watcher-test.php index 1b55929e38d..04e2eed2e54 100644 --- a/tests/unit/integrations/watchers/indexable-ancestor-watcher-test.php +++ b/tests/unit/integrations/watchers/indexable-ancestor-watcher-test.php @@ -168,116 +168,15 @@ public function test_get_conditionals() { ); } - /** - * Data provider for the register_hooks test. - * - * @return array The data. - */ - public function data_provider_register_hooks() { - return [ - 'When ancestor is changes inline edit' => [ - 'is_inline_edit' => true, - 'set_object_terms_action' => false, - ], - 'When ancestor is changes not inline edit' => [ - 'is_inline_edit' => false, - 'set_object_terms_action' => 10, - ], - ]; - } - /** * Tests if the expected hooks are registered. * * @covers ::register_hooks - * - * @dataProvider data_provider_register_hooks - * - * @param bool $is_inline_edit Whether or not the request is an inline edit. - * @param bool|int $set_object_terms_action The set_object_terms action return value. */ - public function test_register_hooks( $is_inline_edit, $set_object_terms_action ) { - - Functions\expect( 'check_ajax_referer' ) - ->once() - ->with( 'inlineeditnonce', '_inline_edit', false ) - ->andReturn( $is_inline_edit ); - + public function test_register_hooks() { $this->instance->register_hooks(); self::assertNotFalse( \has_action( 'wpseo_save_indexable', [ $this->instance, 'reset_children' ] ) ); - self::assertSame( $set_object_terms_action, \has_action( 'set_object_terms', [ $this->instance, 'build_post_hierarchy' ] ) ); - } - - /** - * Data provider for the build_post_hierarchy test. - * - * @return array The data. - */ - public static function data_provider_build_post_hierarchy() { - $indexable = Mockery::mock( Indexable_Mock::class ); - return [ - 'Building hierarchy' => [ - 'is_excluded' => false, - 'find_by_id_and_type_times' => 1, - 'indexable' => $indexable, - 'build_times' => 1, - ], - 'Not building hierarchy because no indexable' => [ - 'is_excluded' => false, - 'find_by_id_and_type_times' => 1, - 'indexable' => (object) [ 'ID' => 5 ], - 'build_times' => 0, - ], - 'Not building because excluded post type' => [ - 'is_excluded' => true, - 'find_by_id_and_type_times' => 0, - 'indexable' => (object) [ 'ID' => 5 ], - 'build_times' => 0, - ], - ]; - } - - /** - * Tests the build_post_hierarchy. - * - * @covers ::build_post_hierarchy - * - * @dataProvider data_provider_build_post_hierarchy - * - * @param bool $is_excluded Whether or not the post type is excluded. - * @param int $find_by_id_and_type_times The number of times the find_by_id_and_type method is called. - * @param Indexable_Mock $indexable The indexable. - * @param int $build_times The number of times the build method is called. - */ - public function test_build_post_hierarchy( $is_excluded, $find_by_id_and_type_times, $indexable, $build_times ) { - $post = Mockery::mock( \WP_Post::class ); - $post->post_type = 'post'; - $post->ID = 5; - - Functions\expect( 'get_post' ) - ->once() - ->with( 5 ) - ->andReturn( $post ); - - $this->post_type_helper - ->expects( 'is_excluded' ) - ->once() - ->with( $post->post_type ) - ->andReturn( $is_excluded ); - - $this->indexable_repository - ->expects( 'find_by_id_and_type' ) - ->times( $find_by_id_and_type_times ) - ->with( $post->ID, $post->post_type ) - ->andReturn( $indexable ); - - $this->indexable_hierarchy_builder - ->expects( 'build' ) - ->times( $build_times ) - ->with( $indexable ); - - $this->instance->build_post_hierarchy( 5, [ 'test_term' ], [ 7 ], 'category', false, [] ); } /** diff --git a/wp-seo-main.php b/wp-seo-main.php index d8c6d0b882a..71495bf2c57 100644 --- a/wp-seo-main.php +++ b/wp-seo-main.php @@ -15,7 +15,7 @@ * {@internal Nobody should be able to overrule the real version number as this can cause * serious issues with the options, so no if ( ! defined() ).}} */ -define( 'WPSEO_VERSION', '21.3-RC2' ); +define( 'WPSEO_VERSION', '21.4-RC7' ); if ( ! defined( 'WPSEO_PATH' ) ) { diff --git a/wp-seo.php b/wp-seo.php index e9ecaa38d4d..6e50c742a98 100644 --- a/wp-seo.php +++ b/wp-seo.php @@ -8,7 +8,7 @@ * * @wordpress-plugin * Plugin Name: Yoast SEO - * Version: 21.3-RC2 + * Version: 21.4-RC7 * Plugin URI: https://yoa.st/1uj * Description: The first true all-in-one SEO solution for WordPress, including on-page content analysis, XML sitemaps and much more. * Author: Team Yoast @@ -20,7 +20,7 @@ * Requires PHP: 7.2.5 * * WC requires at least: 7.1 - * WC tested up to: 8.1 + * WC tested up to: 8.2 * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by