Skip to content

Commit

Permalink
Improve IPA translation logic
Browse files Browse the repository at this point in the history
  • Loading branch information
aryanpingle committed Sep 30, 2024
1 parent 76d0305 commit 91ddb14
Showing 1 changed file with 57 additions and 27 deletions.
84 changes: 57 additions & 27 deletions src/ipa.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,66 @@ export function translateWord(word: string): string | null {
/**
* Convert a sentence from English to Phonetic.
*/
export function translateSentence(sentence: string): string | null {
// Match all sequences of letters, translate them if possible

export function translateSentence(sentence: string): string {
// Regex that matches characters that could be part of an IPA word
const wordCharRegex = /[\p{Letter}']/u;
let englishEscape = false;
const punctuationRegex = /[^\s\n\.,?!\-]+/gu;
return sentence.replace(punctuationRegex, (word) => {
const startsWithAt = word.startsWith("@");
const endsWithAt = word.endsWith("@");

if (startsWithAt && endsWithAt) {
// Start and end english sequence
return word;
} else if (startsWithAt) {
// Start english sequence
let result = "";
let buffer = "";

// Utilities
function translateAndDumpBuffer() {
const lowerCaseBuffer = buffer.toLowerCase();
console.log("Tryna translate ", lowerCaseBuffer);
if (lowerCaseBuffer in IPADict) {
// Add the translated word to the result
const translation = IPADict[lowerCaseBuffer];
result += translation;
buffer = "";
} else {
// Ignore this word
buffer = "";
}
}

// Parse each character of the English input
for (let i = 0; i < sentence.length; ++i) {
const c = sentence.charAt(i);

// If this is part of an english-escaped sequence,
// just add it bro.
if (englishEscape === true) {
result += c;
if (c === "@") {
englishEscape = false;
}
continue;
}

// Start of an english-escaped sequence
if (c === "@") {
translateAndDumpBuffer();
result += c;
englishEscape = true;
return word;
} else if (endsWithAt) {
// End english sequence
englishEscape = false;
return word;
} else if (englishEscape) {
// Continue english sequence
return word;
continue;
}

const lowerCaseWord = word.toLowerCase();
if (lowerCaseWord in IPADict) {
return translateWord(lowerCaseWord);
} else {
return "";
// Punctuation
if (!wordCharRegex.test(c)) {
translateAndDumpBuffer();
result += c;
continue;
}

// Whatever it is, must be a character of a valid word
// (not necessarily translatable)
buffer += c;

// If this is the last character
if (i === sentence.length - 1) {
translateAndDumpBuffer();
}
});
}

return result;
}

0 comments on commit 91ddb14

Please sign in to comment.