Skip to content

Commit

Permalink
Fixed ligatures in Sinhala
Browse files Browse the repository at this point in the history
Replace zero-width joiners with temporary strip markers to prevent ICU from stripping them.
  • Loading branch information
1ec5 committed Aug 22, 2024
1 parent 5fd311f commit 544344d
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 7 deletions.
2 changes: 1 addition & 1 deletion build/generate-unicode-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ fs.writeFileSync('src/data/unicode_properties.ts',
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode ${indicSyllabicCategory.version}, published ${indicSyllabicCategory.date}.
const terminalJoinersRegExp = /[\u200D${indicSyllabicCategory.characterClass}]$/u;
const terminalJoinersRegExp = /[\\u200D${indicSyllabicCategory.characterClass}]$/u;
return terminalJoinersRegExp.test(former) || /^\\p{gc=Mc}/u.test(latter);
}
`);
2 changes: 1 addition & 1 deletion src/data/unicode_properties.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
export function canCombineGraphemes(former: string, latter: string): boolean {
// Zero-width joiner
// Indic_Syllabic_Category=Invisible_Stacker as of Unicode 16.0.0, published 2024-04-30.
const terminalJoinersRegExp = /[\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
const terminalJoinersRegExp = /[\u200D\u1039\u17D2\u1A60\u1BAB\uAAF6\u{10A3F}\u{11133}\u{113D0}\u{1193E}\u{11A47}\u{11A99}\u{11D45}\u{11D97}\u{11F42}]$/u;
return terminalJoinersRegExp.test(former) || /^\p{gc=Mc}/u.test(latter);
}
16 changes: 11 additions & 5 deletions src/symbol/shaping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,14 @@ function shapeText(
lines = [];
// ICU operates on code units.
lineBreaks = lineBreaks.map(index => logicalInput.toCodeUnitIndex(index));
// Replace zero-width joiners with temporary strip markers (from the Private Use Area) to prevent ICU from stripping them out.
const markedInput = logicalInput.toString().replace(/\u200D/g, '\uF8FF');
const untaggedLines =
processBidirectionalText(logicalInput.toString(), lineBreaks);
processBidirectionalText(markedInput, lineBreaks);
for (const line of untaggedLines) {
const taggedLine = new TaggedString();
taggedLine.text = line;
// Restore zero-width joiners from temporary strip markers.
taggedLine.text = line.replace(/\uF8FF/g, '\u200D');
taggedLine.sections = logicalInput.sections;
// eslint-disable-next-line @typescript-eslint/no-unused-vars
for (const char of splitByGraphemeCluster(line)) {
Expand All @@ -290,20 +293,23 @@ function shapeText(
lines = [];
// ICU operates on code units.
lineBreaks = lineBreaks.map(index => logicalInput.toCodeUnitIndex(index));
// Replace zero-width joiners with temporary strip markers (from the Private Use Area) to prevent ICU from stripping them out.
const markedInput = logicalInput.toString().replace(/\u200D/g, '\uF8FF');

// Convert grapheme cluster–based section index to be based on code units.
let i = 0;
const sectionIndex = [];
for (const grapheme of splitByGraphemeCluster(logicalInput.text)) {
for (const grapheme of splitByGraphemeCluster(markedInput)) {
sectionIndex.push(...Array(grapheme.length).fill(logicalInput.sectionIndex[i]));
i++;
}

const processedLines =
processStyledBidirectionalText(logicalInput.text, sectionIndex, lineBreaks);
processStyledBidirectionalText(markedInput, sectionIndex, lineBreaks);
for (const line of processedLines) {
const taggedLine = new TaggedString();
taggedLine.text = line[0];
// Restore zero-width joiners from temporary strip markers.
taggedLine.text = line[0].replace(/\uF8FF/g, '\u200D');
taggedLine.sectionIndex = line[1];
taggedLine.sections = logicalInput.sections;
lines.push(taggedLine);
Expand Down

0 comments on commit 544344d

Please sign in to comment.