Skip to content

Commit

Permalink
Add language-specific reading time calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
albinazs committed Jun 18, 2024
1 parent 1b09403 commit 481f0b4
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 70 deletions.
66 changes: 30 additions & 36 deletions client/src/includes/contentMetrics.test.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,35 @@
import { getContentMetrics } from './contentMetrics';
import { getWordCount, getReadingTime } from './contentMetrics';

describe('getContentMetrics', () => {
it('should return correct wordCount and readingTime using Intl.Segmenter', () => {
const result = getContentMetrics('en-US', 'This is a test sentence.');
expect(result.wordCount).toBe(5);
expect(result.readingTime).toBe(0);
});

it('should handle empty text', () => {
const result = getContentMetrics('en-US', '');
expect(result.wordCount).toBe(0);
expect(result.readingTime).toBe(0);
});

it('should handle text with punctuation correctly', () => {
const text = `This is a longer text to test the word count and reading time calculation!
Bread is a staple food prepared from a dough of flour and water; usually by baking.
Throughout recorded history it has been popular around the world and is one of the
oldest artificial foods: having been of importance since the dawn of agriculture.
Proportions of types of flour and other ingredients vary widely? as do modes of preparation.
As a result, types, shapes, sizes, and textures of breads differ around the world.
Bread may be leavened by processes such as reliance on naturally occurring sourdough
microbes, chemicals, industrially produced yeast, or high-pressure aeration...
Some breads are baked before they have a chance to rise, often for traditional
or religious reasons. Inclusions like fruits; nuts, and fats are sometimes added.
Commercial bread typically includes additives to enhance flavor, texture, color,
longevity, and production efficiency!
`;
const result = getContentMetrics('en-US', text);
expect(result.wordCount).toBe(148);
expect(result.readingTime).toBe(1);
describe.each`
text | lang | wordCount
${'¿Donde esta la biblioteca?'} | ${'es'} | ${4}
${"It's lots. Of; Punctuation"} | ${'en'} | ${4}
${'האהבה היא אוקיינוס שאין לו התחלה ואין לו סוף.'} | ${'he'} | ${9}
${'元気です、ありがとう。あなたは?'} | ${'zh'} | ${5}
${'Dit is een testzin in het Nederlands.'} | ${'nl'} | ${7}
${'Je suis content de te voir!'} | ${'fr'} | ${6}
${'Ich liebe dich!'} | ${'de'} | ${3}
${'Mi piace molto questo libro.'} | ${'it'} | ${5}
${'저는 오늘 날씨가 좋아요.'} | ${'ko'} | ${4}
`('getWordCount', ({ text, lang, wordCount }) => {
test(`correctly counts words in '${text}' for language '${lang}'`, () => {
expect(getWordCount(lang, text)).toBe(wordCount);
});
});

it('should return integers for wordCount and readingTime', () => {
const text = 'Yet another text';
const result = getContentMetrics('en-US', text);
expect(Number.isInteger(result.wordCount)).toBe(true);
expect(Number.isInteger(result.readingTime)).toBe(true);
describe.each`
lang | wordCount | readingTime
${'es'} | ${1000} | ${4}
${'fr'} | ${1000} | ${5}
${'ar'} | ${360} | ${2}
${'it'} | ${360} | ${1}
${'en'} | ${238} | ${1}
${'he'} | ${224} | ${1}
${'zh'} | ${520} | ${2}
${'nl'} | ${320} | ${1}
${'ko'} | ${50} | ${0}
`('getReadingTime', ({ lang, wordCount, readingTime }) => {
test(`calculates reading time for '${wordCount}' words in language '${lang}'`, () => {
expect(getReadingTime(lang, wordCount)).toBe(readingTime);
});
});
72 changes: 39 additions & 33 deletions client/src/includes/contentMetrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,44 @@ interface ContentMetrics {
readingTime: number;
}

interface SegmentData {
segment: string;
isWordLike?: boolean | undefined;
}
export const getWordCount = (lang: string, text: string): number => {
const segmenter = new Intl.Segmenter(lang, { granularity: 'word' });
const segments: Intl.SegmentData[] = Array.from(segmenter.segment(text));
const wordCount = segments.reduce(
(count, segment) => (segment.isWordLike ? count + 1 : count),
0,
);

export const getContentMetrics = (
lang: string,
text: string,
): ContentMetrics => {
let wordCount = 0;
return wordCount;
};

if (typeof Intl.Segmenter === 'function') {
const segmenter = new Intl.Segmenter(lang, { granularity: 'word' });
const segments: SegmentData[] = Array.from(segmenter.segment(text));
wordCount = segments.reduce(
(count, segment) => (segment.isWordLike ? count + 1 : count),
0,
);
} else {
// Fallback to regex if Intl.Segmenter is not supported
wordCount =
text
.trim()
.replace(/['";:,.?¿\-!¡]+/g, '')
.match(/\S+/g)?.length || 0;
}
/*
Language-specific reading speeds according to a meta-analysis of 190 studies on reading rates.
Study preprint: https://osf.io/preprints/psyarxiv/xynwg/
DOI: https://doi.org/10.1016/j.jml.2019.104047
*/
const readingSpeeds = {
ar: 181, // Arabic
zh: 260, // Chinese
nl: 228, // Dutch
en: 238, // English
fi: 195, // Finnish
fr: 214, // French
de: 260, // German
he: 224, // Hebrew
it: 285, // Italian
ko: 226, // Korean
es: 278, // Spanish
sv: 218, // Swedish
};

// Silent-reading adults average 238 words per minute
const readingTime = Math.round(wordCount / 238);
export const getReadingTime = (lang: string, wordCount: number): number => {
const locale = lang.split('-')[0];
// Fallback to English reading speed if the locale is not found
const readingSpeed = readingSpeeds[locale] || readingSpeeds.en;
const readingTime = Math.round(wordCount / readingSpeed);

return {
wordCount,
readingTime,
};
return readingTime;
};

const renderContentMetrics = ({ wordCount, readingTime }: ContentMetrics) => {
Expand Down Expand Up @@ -79,8 +83,10 @@ export const runContentCheck = () => {
if (!iframe || !iframeDocument || !text) {
return;
}
const lang = iframeDocument.documentElement.lang || 'en-US';
const contentMetrics = getContentMetrics(lang, text);
const lang = iframeDocument.documentElement.lang || 'en';

const wordCount = getWordCount(lang, text);
const readingTime = getReadingTime(lang, wordCount);

renderContentMetrics(contentMetrics);
renderContentMetrics({ wordCount, readingTime });
};
2 changes: 1 addition & 1 deletion tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"jsx": "react",
"lib": ["ES2022", "DOM", "DOM.iterable"],
"lib": ["ES2022", "ES2022.Intl", "DOM", "DOM.iterable"],
"moduleResolution": "node",
"noImplicitAny": false, // TODO: Enable once all existing code is typed
"noUnusedLocals": true,
Expand Down

0 comments on commit 481f0b4

Please sign in to comment.