Skip to content

Commit

Permalink
Merge pull request #20695 from Yoast/238-move-the-frequency-lists-of-…
Browse files Browse the repository at this point in the history
…word-complexity-assessment-to-premium-configuration-repo

Move the frequency lists of the word complexity assessment out of  `yoastseo` package
  • Loading branch information
mykola authored Oct 10, 2023
2 parents 2a43743 + e5772c1 commit e133573
Show file tree
Hide file tree
Showing 24 changed files with 190 additions and 8,538 deletions.
3 changes: 3 additions & 0 deletions apps/content-analysis/src/analysis.worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import CollectionCornerstoneSEOAssessor from "yoastseo/src/scoring/collectionPag
import CollectionRelatedKeywordAssessor from "yoastseo/src/scoring/collectionPages/relatedKeywordAssessor";
import CollectionCornerstoneRelatedKeywordAssessor from "yoastseo/src/scoring/collectionPages/cornerstone/relatedKeywordAssessor";

import registerPremiumAssessments from "./utils/registerPremiumAssessments";

self.onmessage = ( event ) => {
const language = event.data.language;
Expand All @@ -36,6 +37,8 @@ self.onmessage = ( event ) => {

const worker = new AnalysisWebWorker( self, new Researcher() );

registerPremiumAssessments( worker, language );

// Set custom assessors.
// Store product pages.
worker.setCustomSEOAssessorClass( productSEOAssessor, "productPage", { introductionKeyphraseUrlTitle: "https://yoa.st/shopify8",
Expand Down
71 changes: 71 additions & 0 deletions apps/content-analysis/src/utils/registerPremiumAssessments.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import { getLanguagesWithWordComplexity } from "yoastseo/src/helpers/getLanguagesWithWordComplexity";

// Configs for the supported languages.
import wordComplexityConfigEnglish from "yoastseo/src/languageProcessing/languages/en/config/wordComplexity";
import wordComplexityConfigGerman from "yoastseo/src/languageProcessing/languages/de/config/wordComplexity";
import wordComplexityConfigSpanish from "yoastseo/src/languageProcessing/languages/es/config/wordComplexity";
import wordComplexityConfigFrench from "yoastseo/src/languageProcessing/languages/fr/config/wordComplexity";

// Helpers for the supported languages.
import wordComplexityHelperEnglish from "yoastseo/src/languageProcessing/languages/en/helpers/checkIfWordIsComplex";
import wordComplexityHelperGerman from "yoastseo/src/languageProcessing/languages/de/helpers/checkIfWordIsComplex";
import wordComplexityHelperSpanish from "yoastseo/src/languageProcessing/languages/es/helpers/checkIfWordIsComplex";
import wordComplexityHelperFrench from "yoastseo/src/languageProcessing/languages/fr/helpers/checkIfWordIsComplex";
// Research.
import wordComplexity from "yoastseo/src/languageProcessing/researches/wordComplexity";
import keyPhraseDistribution from "yoastseo/src/languageProcessing/researches/keyphraseDistribution";

// Assessment.
import WordComplexityAssessment from "yoastseo/src/scoring/assessments/readability/WordComplexityAssessment";
import KeyphraseDistributionAssessment from "yoastseo/src/scoring/assessments/seo/KeyphraseDistributionAssessment";

const helpers = {
de: wordComplexityHelperGerman,
en: wordComplexityHelperEnglish,
es: wordComplexityHelperSpanish,
fr: wordComplexityHelperFrench,
};

const configs = {
de: wordComplexityConfigGerman,
en: wordComplexityConfigEnglish,
es: wordComplexityConfigSpanish,
fr: wordComplexityConfigFrench,
};

const pluginName = "YoastSEOPremium";

export default function( worker, language ) {
if ( getLanguagesWithWordComplexity().includes( language ) ) {
// Get the word complexity config for the specific language.
const wordComplexityConfig = configs[ language ];
// Get the word complexity helper for the specific language.
const wordComplexityHelper = helpers[ language ];
// Initialize the assessment for regular content.
const wordComplexityAssessment = new WordComplexityAssessment();
// Initialize the assessment for cornerstone content.
const wordComplexityAssessmentCornerstone = new WordComplexityAssessment( {
scores: {
acceptableAmount: 3,
},
} );

// Register the word complexity config.
worker.registerResearcherConfig( "wordComplexity", wordComplexityConfig );

// Register the word complexity helper.
worker.registerHelper( "checkIfWordIsComplex", wordComplexityHelper );

// Register the word complexity research.
worker.registerResearch( "wordComplexity", wordComplexity );

// Register the word complexity assessment for regular content.
worker.registerAssessment( "wordComplexity", wordComplexityAssessment, pluginName, "readability" );

// Register the word complexity assessment for cornerstone content.
worker.registerAssessment( "wordComplexity", wordComplexityAssessmentCornerstone, pluginName, "cornerstoneReadability" );
}
const keyphraseDistributionAssessment = new KeyphraseDistributionAssessment();
worker.registerResearch( "keyphraseDistribution", keyPhraseDistribution );
worker.registerAssessment( "keyphraseDistributionAssessment", keyphraseDistributionAssessment, pluginName, "seo" );
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import stopWords from "../../../../src/languageProcessing/languages/de/config/st
import syllables from "../../../../src/languageProcessing/languages/de/config/syllables.json";
import checkIfWordIsComplex from "../../../../src/languageProcessing/languages/de/helpers/checkIfWordIsComplex";
import wordComplexityConfig from "../../../../src/languageProcessing/languages/de/config/wordComplexity";
const morphologyDataDE = getMorphologyData( "de" );
const premiumData = getMorphologyData( "de" );

describe( "a test for the German Researcher", function() {
const researcher = new Researcher( new Paper( "" ) );
Expand Down Expand Up @@ -55,7 +55,7 @@ describe( "a test for the German Researcher", function() {
} );

it( "stems a word using the German stemmer", function() {
researcher.addResearchData( "morphology", morphologyDataDE );
researcher.addResearchData( "morphology", premiumData );
expect( researcher.getHelper( "getStemmer" )( researcher )( "Katzen" ) ).toEqual( "Katz" );
} );

Expand All @@ -81,8 +81,8 @@ describe( "a test for the German Researcher", function() {
it( "checks if a word is complex in German", function() {
researcher.addHelper( "checkIfWordIsComplex", checkIfWordIsComplex );

expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "optimierungen" ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "boxen" ) ).toEqual( false );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "optimierungen", premiumData.de ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "boxen", premiumData.de ) ).toEqual( false );
} );

it( "checks if a word is a function word in German", function() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
import checkIfWordIsComplex from "../../../../../src/languageProcessing/languages/de/helpers/checkIfWordIsComplex";
import wordComplexityConfig from "../../../../../src/languageProcessing/languages/de/config/wordComplexity";
import getMorphologyData from "../../../../specHelpers/getMorphologyData";
const premiumData = getMorphologyData( "de" ).de;

describe( "a test checking if the word is complex in German", function() {
it( "returns singular word form as non-complex if it is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "präsident" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "präsident", premiumData ) ).toEqual( false );
} );

it( "returns plural word form as non-complex if its singular form is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "Verstärkungen" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "Verstärkungen", premiumData ) ).toEqual( false );
} );

it( "returns plural word form as non-complex if its singular form is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "Gouverneure" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "Gouverneure", premiumData ) ).toEqual( false );
} );
it( "returns word as non-complex if it is found in the function words list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "verschiedenes" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "verschiedenes", premiumData ) ).toEqual( false );
} );

it( "returns plural word as complex if it (and its singular form) are not in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "optimierungen" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "optimierungen", premiumData ) ).toEqual( true );
} );

it( "returns word longer than 10 characters as complex if it's not in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "architektonisch" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "architektonisch", premiumData ) ).toEqual( true );
} );

it( "returns plural word as non complex if the word is less than 10 characters", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "boxen" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "boxen", premiumData ) ).toEqual( false );
} );

it( "recognized contractions when the contraction uses ’ (right single quotation mark) instead of ' (apostrophe)", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "l’histoire" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "l’histoire", premiumData ) ).toEqual( false );
} );
} );
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import stopWords from "../../../../src/languageProcessing/languages/en/config/st
import syllables from "../../../../src/languageProcessing/languages/en/config/syllables.json";
import checkIfWordIsComplex from "../../../../src/languageProcessing/languages/en/helpers/checkIfWordIsComplex";
import wordComplexityConfig from "../../../../src/languageProcessing/languages/en/config/wordComplexity";
const morphologyDataEN = getMorphologyData( "en" );
const premiumData = getMorphologyData( "en" );

describe( "a test for the English Researcher", function() {
const researcher = new Researcher( new Paper( "This is another paper!" ) );
Expand Down Expand Up @@ -55,7 +55,7 @@ describe( "a test for the English Researcher", function() {
} );

it( "stems a word using the English stemmer", function() {
researcher.addResearchData( "morphology", morphologyDataEN );
researcher.addResearchData( "morphology", premiumData );
expect( researcher.getHelper( "getStemmer" )( researcher )( "cats" ) ).toEqual( "cat" );
} );

Expand All @@ -82,7 +82,7 @@ describe( "a test for the English Researcher", function() {
it( "checks if a word is complex in English", function() {
researcher.addHelper( "checkIfWordIsComplex", checkIfWordIsComplex );

expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "polygonal", morphologyDataEN.en ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "investigations", morphologyDataEN.en ) ).toEqual( false );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "polygonal", premiumData.en ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "investigations", premiumData.en ) ).toEqual( false );
} );
} );
Original file line number Diff line number Diff line change
Expand Up @@ -2,54 +2,53 @@ import checkIfWordIsComplex from "../../../../../src/languageProcessing/language
import wordComplexityConfig from "../../../../../src/languageProcessing/languages/en/config/wordComplexity";
import getMorphologyData from "../../../../specHelpers/getMorphologyData";

const morphologyData = getMorphologyData( "en" ).en;
const premiumData = getMorphologyData( "en" ).en;

describe( "a test checking if the word is complex in English", function() {
it( "returns singular word as non-complex if it is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "example", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "example", premiumData ) ).toEqual( false );
} );

it( "returns word as non-complex if its singular version is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "examples", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "examples", premiumData ) ).toEqual( false );
} );

it( "returns singular word as non-complex if it is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "release", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "release", premiumData ) ).toEqual( false );
} );

it( "returns plural word as non-complex if its singular version is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "releases", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "releases", premiumData ) ).toEqual( false );
} );

it( "returns plural (with phonetical) change word as non-complex if its singular version is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "opportunities" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "opportunities", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "opportunities", premiumData ) ).toEqual( false );
} );

it( "returns long irregular plural word as complex if its singular version is not found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "metamorphoses", morphologyData ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "metamorphoses", premiumData ) ).toEqual( true );
} );

it( "returns long plural word as complex if its singular version is not found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "anesthesias", morphologyData ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "anesthesias", premiumData ) ).toEqual( true );
} );

it( "returns plural word as complex if it is not in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "refrigerators", morphologyData ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "refrigerators", premiumData ) ).toEqual( true );
} );

it( "returns plural word as non complex if the word starts with capital letter", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "Tortoiseshell", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "Outsiders", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "Tortoiseshell", premiumData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "Outsiders", premiumData ) ).toEqual( false );
} );

it( "returns words as non complex if the word is less than 7 characters", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "cat", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "rabbit", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "cat", premiumData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "rabbit", premiumData ) ).toEqual( false );
} );

it( "returns words as non complex if the word is less than 7 characters", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "cat", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "rabbit", morphologyData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "cat", premiumData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "rabbit", premiumData ) ).toEqual( false );
} );
} );
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import checkIfWordIsComplex from "../../../../src/languageProcessing/languages/e
import wordComplexityConfig from "../../../../src/languageProcessing/languages/es/config/wordComplexity";
import sentenceLength from "../../../../src/languageProcessing/languages/es/config/sentenceLength";

const morphologyDataES = getMorphologyData( "es" );
const premiumData = getMorphologyData( "es" );

describe( "a test for the Spanish Researcher", function() {
const researcher = new Researcher( new Paper( "Este es un documento nuevo!" ) );
Expand All @@ -27,8 +27,8 @@ describe( "a test for the Spanish Researcher", function() {
it( "checks if a word is complex in Spanish", function() {
researcher.addHelper( "checkIfWordIsComplex", checkIfWordIsComplex );

expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "situados" ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "sobre" ) ).toEqual( false );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "situados", premiumData.es ) ).toEqual( true );
expect( researcher.getHelper( "checkIfWordIsComplex" )( wordComplexityConfig, "sobre", premiumData.es ) ).toEqual( false );
} );

it( "returns the Spanish function words", function() {
Expand Down Expand Up @@ -68,7 +68,7 @@ describe( "a test for the Spanish Researcher", function() {
} );

it( "stems a word using the Spanish stemmer", function() {
researcher.addResearchData( "morphology", morphologyDataES );
researcher.addResearchData( "morphology", premiumData );
expect( researcher.getHelper( "getStemmer" )( researcher )( "gatos" ) ).toEqual( "gat" );
} );

Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,37 @@
import checkIfWordIsComplex from "../../../../../src/languageProcessing/languages/es/helpers/checkIfWordIsComplex";
import wordComplexityConfig from "../../../../../src/languageProcessing/languages/es/config/wordComplexity";
import getMorphologyData from "../../../../specHelpers/getMorphologyData";
const premiumData = getMorphologyData( "es" ).es;

describe( "a test checking if the word is complex in Spanish", function() {
it( "returns singular word form as non-complex if it is found in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "original" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "original", premiumData ) ).toEqual( false );
} );

// eslint-disable-next-line max-len
it( "returns plural word form as non-complex if its singular form is found in the list when the singular word form ends with a consonant", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "originales" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "originales", premiumData ) ).toEqual( false );
} );

// eslint-disable-next-line max-len
it( "returns plural word form as non-complex if its singular form is found in the list when the singular word form ends with a vowel", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "parecidos" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "parecidos", premiumData ) ).toEqual( false );
} );

it( "returns word as non-complex if it starts with a capital (even if non capitalized form is complex)", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "Alhambra" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "alhambra" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "Alhambra", premiumData ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "alhambra", premiumData ) ).toEqual( true );
} );

it( "returns plural word as complex if it (and its singular form) are not in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "situados" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "situados", premiumData ) ).toEqual( true );
} );

it( "returns word longer than 7 characters as complex if it's not in the list", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "contemplada" ) ).toEqual( true );
expect( checkIfWordIsComplex( wordComplexityConfig, "contemplada", premiumData ) ).toEqual( true );
} );

it( "returns plural word as non complex if the word is less than 7 characters", function() {
expect( checkIfWordIsComplex( wordComplexityConfig, "cosas" ) ).toEqual( false );
expect( checkIfWordIsComplex( wordComplexityConfig, "cosas", premiumData ) ).toEqual( false );
} );
} );
Loading

0 comments on commit e133573

Please sign in to comment.