Skip to content
This repository has been archived by the owner on Jan 4, 2025. It is now read-only.

Commit

Permalink
feat: add utf8 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Vexcited committed Mar 1, 2024
1 parent 9f4df81 commit 3315251
Showing 1 changed file with 131 additions and 0 deletions.
131 changes: 131 additions & 0 deletions packages/library/src/utils/utf8.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"use strict";

/**
* Object containing Latin-1 characters and their corresponding UTF-8 characters.
* Key is the actual character, value is the expected character.
*/
const replacements: Record<string, string> = {
"€": "€",
"Å’": "Œ",
"Ž": "Ž",
"“": "“",
"â€\u009D": "”",
"•": "•",
"–": "–",
"—": "—",
Ëœ: "˜",
"â„¢": "™",
"Å¡": "š",
"›": "›",
"Å“": "œ",
"ž": "ž",
"Ÿ": "Ÿ",
"Â ": " ",
"¡": "¡",
"¢": "¢",
"£": "£",
"¤": "¤",
"Â¥": "¥",
"¦": "¦",
"§": "§",
"¨": "¨",
"©": "©",
"­": "­",
"®": "®",
"¯": "¯",
"°": "°",
"±": "±",
"²": "²",
"³": "³",
"´": "´",
µ: "µ",
"¶": "¶",
"·": "·",
"¸": "¸",
"¹": "¹",
º: "º",
"»": "»",
"¼": "¼",
"½": "½",
"¾": "¾",
"¿": "¿",
"À": "À",
"Â": "Â",
Ã: "Ã",
"Ä": "Ä",
"Ã…": "Å",
"Æ": "Æ",
"Ç": "Ç",
È: "È",
"É": "É",
Ê: "Ê",
"Ë": "Ë",
ÃŒ: "Ì",
"Ã\u008D": "Í",
ÃŽ: "Î",
"Ã\u008F": "Ï",
"Ã\u0090": "Ð",
"Ñ": "Ñ",
"Ã’": "Ò",
"Ó": "Ó",
"Ô": "Ô",
"Õ": "Õ",
"Ö": "Ö",
"×": "×",
"Ø": "Ø",
"Ù": "Ù",
Ú: "Ú",
"Û": "Û",
Ãœ: "Ü",
"Ã\u009D": "Ý",
Þ: "Þ",
ß: "ß",
"Ã\u00A0": "à",
"á": "á",
"â": "â",
"ã": "ã",
"ä": "ä",
"Ã¥": "å",
"æ": "æ",
"ç": "ç",
"è": "è",
"é": "é",
ê: "ê",
"ë": "ë",
"ì": "ì",
"Ã\u00AD": "í",
"î": "î",
"ï": "ï",
"ð": "ð",
"ñ": "ñ",
"ò": "ò",
"ó": "ó",
"ô": "ô",
õ: "õ",
"ö": "ö",
"÷": "÷",
"ø": "ø",
"ù": "ù",
ú: "ú",
"û": "û",
"ü": "ü",
"ý": "ý",
"þ": "þ",
"ÿ": "ÿ",
"½": "œ"
};

// Cache immutable regex as they are expensive to create and garbage collect.
const matchRegex = new RegExp(Object.keys(replacements).join("|"), "gu");

/**
* @author Frazer Smith
* @description Fixes common encoding errors when converting from Latin-1 (and Windows-1252) to UTF-8.
* @see {@link http://www.i18nqa.com/debug/utf8-debug.html | UTF-8 Encoding Debugging Chart}
* @see {@link https://github.com/Fdawgs/fix-latin1-to-utf8/blob/main/src/index.js | Original source}
*/
function fixLatin1ToUtf8 (str: string): string {
return str.replace(matchRegex, (match) => replacements[match]).normalize();
}

export default fixLatin1ToUtf8;

0 comments on commit 3315251

Please sign in to comment.