Skip to content

Commit

Permalink
Join grapheme clusters on zero-width joiners
Browse files Browse the repository at this point in the history
  • Loading branch information
1ec5 committed Aug 21, 2024
1 parent 6b5c031 commit 5fd311f
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions build/generate-unicode-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,9 @@ fs.writeFileSync('src/data/unicode_properties.ts',
* Returns whether two grapheme clusters detected by \`Intl.Segmenter\` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode ${indicSyllabicCategory.version}, published ${indicSyllabicCategory.date}.
const invisibleStackersRegExp = /[${indicSyllabicCategory.characterClass}]$/u;
return invisibleStackersRegExp.test(former) || /^\\p{gc=Mc}/u.test(latter);
const terminalJoinersRegExp = /[\u200D${indicSyllabicCategory.characterClass}]$/u;
return terminalJoinersRegExp.test(former) || /^\\p{gc=Mc}/u.test(latter);
}
`);
5 changes: 3 additions & 2 deletions src/data/unicode_properties.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
* Returns whether two grapheme clusters detected by `Intl.Segmenter` can be combined to prevent an invisible combining mark from appearing unexpectedly.
*/
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode 16.0.0, published 2024-04-30.
const invisibleStackersRegExp = /[\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
return invisibleStackersRegExp.test(former) || /^\p{gc=Mc}/u.test(latter);
const terminalJoinersRegExp = /[\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
return terminalJoinersRegExp.test(former) || /^\p{gc=Mc}/u.test(latter);
}

0 comments on commit 5fd311f

Please sign in to comment.