diff --git a/transformations/font_change/README.md b/transformations/font_change/README.md new file mode 100644 index 000000000..6d0a92001 --- /dev/null +++ b/transformations/font_change/README.md @@ -0,0 +1,30 @@ +# Font Change + +The Font Change transformation modifies words in the input to have a stylized appearance using suitable Unicode characters, as often in encountered in social media posts. + +Authors: [Shahab Raji](mailto:shahab.raji@rutgers.edu) (Rutgers University) and [Gerard de Melo](http://gerard.demelo.org/) +(Hasso Plattner Institute / University of Potsdam) + + +## How does the transformation work? + +Font Change adapts the appearance of randomly selected words in the input sentence. For each selected word, one of several possible appearance changes is chosen randomly. + +Examples: + +> The quick brown fox jumps over the lazy dog. + +to + +> The quick brown 🅵🅾🆇 ɾnɯds over the lazy ᴅᴏɢ. + +## Data and code provenance + +The changes in text are achieved using Unicode characters based on mapping tables from the [𝓾𝓷𝓲𝓬𝓸𝓭𝓮 𝙛𝙤𝙧𝙢𝙖𝙩𝙩𝙚𝙧](https://github.com/DenverCoder1/unicode-formatter) (MIT license) tool. + +The code is implemented by the authors. + +## Target tasks + +This transformation can be used for data augmentation in text classification tasks. + diff --git a/transformations/font_change/__init__.py b/transformations/font_change/__init__.py new file mode 100644 index 000000000..930cdce0b --- /dev/null +++ b/transformations/font_change/__init__.py @@ -0,0 +1 @@ +from .transformation import * diff --git a/transformations/font_change/fonts.json b/transformations/font_change/fonts.json new file mode 100644 index 000000000..268c4de71 --- /dev/null +++ b/transformations/font_change/fonts.json @@ -0,0 +1,2427 @@ +{ + "normal": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "A", + "B": "B", + "C": "C", + "D": "D", + "E": "E", + "F": "F", + "G": "G", + "H": "H", + "I": "I", + "J": "J", + "K": "K", + "L": "L", + "M": "M", + "N": "N", + "O": "O", + "P": "P", + "Q": "Q", + "R": "R", + "S": "S", + "T": "T", + "U": "U", + "V": "V", + "W": "W", + "X": "X", + "Y": "Y", + "Z": "Z", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "a", + "b": "b", + "c": "c", + "d": "d", + "e": "e", + "f": "f", + "g": "g", + "h": "h", + "i": "i", + "j": "j", + "k": "k", + "l": "l", + "m": "m", + "n": "n", + "o": "o", + "p": "p", + "q": "q", + "r": "r", + "s": "s", + "t": "t", + "u": "u", + "v": "v", + "w": "w", + "x": "x", + "y": "y", + "z": "z", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "sans": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\ud835\udfe2", + "1": "\ud835\udfe3", + "2": "\ud835\udfe4", + "3": "\ud835\udfe5", + "4": "\ud835\udfe6", + "5": "\ud835\udfe7", + "6": "\ud835\udfe8", + "7": "\ud835\udfe9", + "8": "\ud835\udfea", + "9": "\ud835\udfeb", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udda0", + "B": "\ud835\udda1", + "C": "\ud835\udda2", + "D": "\ud835\udda3", + "E": "\ud835\udda4", + "F": "\ud835\udda5", + "G": "\ud835\udda6", + "H": "\ud835\udda7", + "I": "\ud835\udda8", + "J": "\ud835\udda9", + "K": "\ud835\uddaa", + "L": "\ud835\uddab", + "M": "\ud835\uddac", + "N": "\ud835\uddad", + "O": "\ud835\uddae", + "P": "\ud835\uddaf", + "Q": "\ud835\uddb0", + "R": "\ud835\uddb1", + "S": "\ud835\uddb2", + "T": "\ud835\uddb3", + "U": "\ud835\uddb4", + "V": "\ud835\uddb5", + "W": "\ud835\uddb6", + "X": "\ud835\uddb7", + "Y": "\ud835\uddb8", + "Z": "\ud835\uddb9", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\uddba", + "b": "\ud835\uddbb", + "c": "\ud835\uddbc", + "d": "\ud835\uddbd", + "e": "\ud835\uddbe", + "f": "\ud835\uddbf", + "g": "\ud835\uddc0", + "h": "\ud835\uddc1", + "i": "\ud835\uddc2", + "j": "\ud835\uddc3", + "k": "\ud835\uddc4", + "l": "\ud835\uddc5", + "m": "\ud835\uddc6", + "n": "\ud835\uddc7", + "o": "\ud835\uddc8", + "p": "\ud835\uddc9", + "q": "\ud835\uddca", + "r": "\ud835\uddcb", + "s": "\ud835\uddcc", + "t": "\ud835\uddcd", + "u": "\ud835\uddce", + "v": "\ud835\uddcf", + "w": "\ud835\uddd0", + "x": "\ud835\uddd1", + "y": "\ud835\uddd2", + "z": "\ud835\uddd3", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "sansBold": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\ud835\udfec", + "1": "\ud835\udfed", + "2": "\ud835\udfee", + "3": "\ud835\udfef", + "4": "\ud835\udff0", + "5": "\ud835\udff1", + "6": "\ud835\udff2", + "7": "\ud835\udff3", + "8": "\ud835\udff4", + "9": "\ud835\udff5", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\uddd4", + "B": "\ud835\uddd5", + "C": "\ud835\uddd6", + "D": "\ud835\uddd7", + "E": "\ud835\uddd8", + "F": "\ud835\uddd9", + "G": "\ud835\uddda", + "H": "\ud835\udddb", + "I": "\ud835\udddc", + "J": "\ud835\udddd", + "K": "\ud835\uddde", + "L": "\ud835\udddf", + "M": "\ud835\udde0", + "N": "\ud835\udde1", + "O": "\ud835\udde2", + "P": "\ud835\udde3", + "Q": "\ud835\udde4", + "R": "\ud835\udde5", + "S": "\ud835\udde6", + "T": "\ud835\udde7", + "U": "\ud835\udde8", + "V": "\ud835\udde9", + "W": "\ud835\uddea", + "X": "\ud835\uddeb", + "Y": "\ud835\uddec", + "Z": "\ud835\udded", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\uddee", + "b": "\ud835\uddef", + "c": "\ud835\uddf0", + "d": "\ud835\uddf1", + "e": "\ud835\uddf2", + "f": "\ud835\uddf3", + "g": "\ud835\uddf4", + "h": "\ud835\uddf5", + "i": "\ud835\uddf6", + "j": "\ud835\uddf7", + "k": "\ud835\uddf8", + "l": "\ud835\uddf9", + "m": "\ud835\uddfa", + "n": "\ud835\uddfb", + "o": "\ud835\uddfc", + "p": "\ud835\uddfd", + "q": "\ud835\uddfe", + "r": "\ud835\uddff", + "s": "\ud835\ude00", + "t": "\ud835\ude01", + "u": "\ud835\ude02", + "v": "\ud835\ude03", + "w": "\ud835\ude04", + "x": "\ud835\ude05", + "y": "\ud835\ude06", + "z": "\ud835\ude07", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "sansItalic": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\ude08", + "B": "\ud835\ude09", + "C": "\ud835\ude0a", + "D": "\ud835\ude0b", + "E": "\ud835\ude0c", + "F": "\ud835\ude0d", + "G": "\ud835\ude0e", + "H": "\ud835\ude0f", + "I": "\ud835\ude10", + "J": "\ud835\ude11", + "K": "\ud835\ude12", + "L": "\ud835\ude13", + "M": "\ud835\ude14", + "N": "\ud835\ude15", + "O": "\ud835\ude16", + "P": "\ud835\ude17", + "Q": "\ud835\ude18", + "R": "\ud835\ude19", + "S": "\ud835\ude1a", + "T": "\ud835\ude1b", + "U": "\ud835\ude1c", + "V": "\ud835\ude1d", + "W": "\ud835\ude1e", + "X": "\ud835\ude1f", + "Y": "\ud835\ude20", + "Z": "\ud835\ude21", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\ude22", + "b": "\ud835\ude23", + "c": "\ud835\ude24", + "d": "\ud835\ude25", + "e": "\ud835\ude26", + "f": "\ud835\ude27", + "g": "\ud835\ude28", + "h": "\ud835\ude29", + "i": "\ud835\ude2a", + "j": "\ud835\ude2b", + "k": "\ud835\ude2c", + "l": "\ud835\ude2d", + "m": "\ud835\ude2e", + "n": "\ud835\ude2f", + "o": "\ud835\ude30", + "p": "\ud835\ude31", + "q": "\ud835\ude32", + "r": "\ud835\ude33", + "s": "\ud835\ude34", + "t": "\ud835\ude35", + "u": "\ud835\ude36", + "v": "\ud835\ude37", + "w": "\ud835\ude38", + "x": "\ud835\ude39", + "y": "\ud835\ude3a", + "z": "\ud835\ude3b", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "sansBoldItalic": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\ude3c", + "B": "\ud835\ude3d", + "C": "\ud835\ude3e", + "D": "\ud835\ude3f", + "E": "\ud835\ude40", + "F": "\ud835\ude41", + "G": "\ud835\ude42", + "H": "\ud835\ude43", + "I": "\ud835\ude44", + "J": "\ud835\ude45", + "K": "\ud835\ude46", + "L": "\ud835\ude47", + "M": "\ud835\ude48", + "N": "\ud835\ude49", + "O": "\ud835\ude4a", + "P": "\ud835\ude4b", + "Q": "\ud835\ude4c", + "R": "\ud835\ude4d", + "S": "\ud835\ude4e", + "T": "\ud835\ude4f", + "U": "\ud835\ude50", + "V": "\ud835\ude51", + "W": "\ud835\ude52", + "X": "\ud835\ude53", + "Y": "\ud835\ude54", + "Z": "\ud835\ude55", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\ude56", + "b": "\ud835\ude57", + "c": "\ud835\ude58", + "d": "\ud835\ude59", + "e": "\ud835\ude5a", + "f": "\ud835\ude5b", + "g": "\ud835\ude5c", + "h": "\ud835\ude5d", + "i": "\ud835\ude5e", + "j": "\ud835\ude5f", + "k": "\ud835\ude60", + "l": "\ud835\ude61", + "m": "\ud835\ude62", + "n": "\ud835\ude63", + "o": "\ud835\ude64", + "p": "\ud835\ude65", + "q": "\ud835\ude66", + "r": "\ud835\ude67", + "s": "\ud835\ude68", + "t": "\ud835\ude69", + "u": "\ud835\ude6a", + "v": "\ud835\ude6b", + "w": "\ud835\ude6c", + "x": "\ud835\ude6d", + "y": "\ud835\ude6e", + "z": "\ud835\ude6f", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "monospace": { + "\"": "\"", + "\\": "\\", + " ": "\u2002", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\ud835\udff6", + "1": "\ud835\udff7", + "2": "\ud835\udff8", + "3": "\ud835\udff9", + "4": "\ud835\udffa", + "5": "\ud835\udffb", + "6": "\ud835\udffc", + "7": "\ud835\udffd", + "8": "\ud835\udffe", + "9": "\ud835\udfff", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\ude70", + "B": "\ud835\ude71", + "C": "\ud835\ude72", + "D": "\ud835\ude73", + "E": "\ud835\ude74", + "F": "\ud835\ude75", + "G": "\ud835\ude76", + "H": "\ud835\ude77", + "I": "\ud835\ude78", + "J": "\ud835\ude79", + "K": "\ud835\ude7a", + "L": "\ud835\ude7b", + "M": "\ud835\ude7c", + "N": "\ud835\ude7d", + "O": "\ud835\ude7e", + "P": "\ud835\ude7f", + "Q": "\ud835\ude80", + "R": "\ud835\ude81", + "S": "\ud835\ude82", + "T": "\ud835\ude83", + "U": "\ud835\ude84", + "V": "\ud835\ude85", + "W": "\ud835\ude86", + "X": "\ud835\ude87", + "Y": "\ud835\ude88", + "Z": "\ud835\ude89", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\ude8a", + "b": "\ud835\ude8b", + "c": "\ud835\ude8c", + "d": "\ud835\ude8d", + "e": "\ud835\ude8e", + "f": "\ud835\ude8f", + "g": "\ud835\ude90", + "h": "\ud835\ude91", + "i": "\ud835\ude92", + "j": "\ud835\ude93", + "k": "\ud835\ude94", + "l": "\ud835\ude95", + "m": "\ud835\ude96", + "n": "\ud835\ude97", + "o": "\ud835\ude98", + "p": "\ud835\ude99", + "q": "\ud835\ude9a", + "r": "\ud835\ude9b", + "s": "\ud835\ude9c", + "t": "\ud835\ude9d", + "u": "\ud835\ude9e", + "v": "\ud835\ude9f", + "w": "\ud835\udea0", + "x": "\ud835\udea1", + "y": "\ud835\udea2", + "z": "\ud835\udea3", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "fullwidth": { + "\"": "\"", + "\\": "\uff3c", + " ": "\u3000", + "!": "\uff01", + "#": "\uff03", + "$": "\uff04", + "%": "\uff05", + "&": "\uff06", + "'": "\uff07", + "(": "\uff08", + ")": "\uff09", + "*": "\uff0a", + "+": "\uff0b", + ",": "\uff0c", + "-": "\uff0d", + ".": "\uff0e", + "/": "\uff0f", + "0": "\uff10", + "1": "\uff11", + "2": "\uff12", + "3": "\uff13", + "4": "\uff14", + "5": "\uff15", + "6": "\uff16", + "7": "\uff17", + "8": "\uff18", + "9": "\uff19", + ":": "\uff1a", + ";": "\uff1b", + "<": "<", + "=": "\uff1d", + ">": ">", + "?": "\uff1f", + "@": "\uff20", + "A": "\uff21", + "B": "\uff22", + "C": "\uff23", + "D": "\uff24", + "E": "\uff25", + "F": "\uff26", + "G": "\uff27", + "H": "\uff28", + "I": "\uff29", + "J": "\uff2a", + "K": "\uff2b", + "L": "\uff2c", + "M": "\uff2d", + "N": "\uff2e", + "O": "\uff2f", + "P": "\uff30", + "Q": "\uff31", + "R": "\uff32", + "S": "\uff33", + "T": "\uff34", + "U": "\uff35", + "V": "\uff36", + "W": "\uff37", + "X": "\uff38", + "Y": "\uff39", + "Z": "\uff3a", + "[": "\uff3b", + "]": "\uff3d", + "^": "\uff3e", + "_": "\uff3f", + "`": "\uff40", + "a": "\uff41", + "b": "\uff42", + "c": "\uff43", + "d": "\uff44", + "e": "\uff45", + "f": "\uff46", + "g": "\uff47", + "h": "\uff48", + "i": "\uff49", + "j": "\uff4a", + "k": "\uff4b", + "l": "\uff4c", + "m": "\uff4d", + "n": "\uff4e", + "o": "\uff4f", + "p": "\uff50", + "q": "\uff51", + "r": "\uff52", + "s": "\uff53", + "t": "\uff54", + "u": "\uff55", + "v": "\uff56", + "w": "\uff57", + "x": "\uff58", + "y": "\uff59", + "z": "\uff5a", + "{": "\uff5b", + "|": "\uff5c", + "}": "\uff5d", + "~": "\uff5e" + }, + "fraktur": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udd04", + "B": "\ud835\udd05", + "C": "\u212d", + "D": "\ud835\udd07", + "E": "\ud835\udd08", + "F": "\ud835\udd09", + "G": "\ud835\udd0a", + "H": "\u210c", + "I": "\u2111", + "J": "\ud835\udd0d", + "K": "\ud835\udd0e", + "L": "\ud835\udd0f", + "M": "\ud835\udd10", + "N": "\ud835\udd11", + "O": "\ud835\udd12", + "P": "\ud835\udd13", + "Q": "\ud835\udd14", + "R": "\u211c", + "S": "\ud835\udd16", + "T": "\ud835\udd17", + "U": "\ud835\udd18", + "V": "\ud835\udd19", + "W": "\ud835\udd1a", + "X": "\ud835\udd1b", + "Y": "\ud835\udd1c", + "Z": "\u2128", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udd1e", + "b": "\ud835\udd1f", + "c": "\ud835\udd20", + "d": "\ud835\udd21", + "e": "\ud835\udd22", + "f": "\ud835\udd23", + "g": "\ud835\udd24", + "h": "\ud835\udd25", + "i": "\ud835\udd26", + "j": "\ud835\udd27", + "k": "\ud835\udd28", + "l": "\ud835\udd29", + "m": "\ud835\udd2a", + "n": "\ud835\udd2b", + "o": "\ud835\udd2c", + "p": "\ud835\udd2d", + "q": "\ud835\udd2e", + "r": "\ud835\udd2f", + "s": "\ud835\udd30", + "t": "\ud835\udd31", + "u": "\ud835\udd32", + "v": "\ud835\udd33", + "w": "\ud835\udd34", + "x": "\ud835\udd35", + "y": "\ud835\udd36", + "z": "\ud835\udd37", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "boldFraktur": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udd6c", + "B": "\ud835\udd6d", + "C": "\ud835\udd6e", + "D": "\ud835\udd6f", + "E": "\ud835\udd70", + "F": "\ud835\udd71", + "G": "\ud835\udd72", + "H": "\ud835\udd73", + "I": "\ud835\udd74", + "J": "\ud835\udd75", + "K": "\ud835\udd76", + "L": "\ud835\udd77", + "M": "\ud835\udd78", + "N": "\ud835\udd79", + "O": "\ud835\udd7a", + "P": "\ud835\udd7b", + "Q": "\ud835\udd7c", + "R": "\ud835\udd7d", + "S": "\ud835\udd7e", + "T": "\ud835\udd7f", + "U": "\ud835\udd80", + "V": "\ud835\udd81", + "W": "\ud835\udd82", + "X": "\ud835\udd83", + "Y": "\ud835\udd84", + "Z": "\ud835\udd85", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udd86", + "b": "\ud835\udd87", + "c": "\ud835\udd88", + "d": "\ud835\udd89", + "e": "\ud835\udd8a", + "f": "\ud835\udd8b", + "g": "\ud835\udd8c", + "h": "\ud835\udd8d", + "i": "\ud835\udd8e", + "j": "\ud835\udd8f", + "k": "\ud835\udd90", + "l": "\ud835\udd91", + "m": "\ud835\udd92", + "n": "\ud835\udd93", + "o": "\ud835\udd94", + "p": "\ud835\udd95", + "q": "\ud835\udd96", + "r": "\ud835\udd97", + "s": "\ud835\udd98", + "t": "\ud835\udd99", + "u": "\ud835\udd9a", + "v": "\ud835\udd9b", + "w": "\ud835\udd9c", + "x": "\ud835\udd9d", + "y": "\ud835\udd9e", + "z": "\ud835\udd9f", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "serifBold": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\ud835\udfce", + "1": "\ud835\udfcf", + "2": "\ud835\udfd0", + "3": "\ud835\udfd1", + "4": "\ud835\udfd2", + "5": "\ud835\udfd3", + "6": "\ud835\udfd4", + "7": "\ud835\udfd5", + "8": "\ud835\udfd6", + "9": "\ud835\udfd7", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udc00", + "B": "\ud835\udc01", + "C": "\ud835\udc02", + "D": "\ud835\udc03", + "E": "\ud835\udc04", + "F": "\ud835\udc05", + "G": "\ud835\udc06", + "H": "\ud835\udc07", + "I": "\ud835\udc08", + "J": "\ud835\udc09", + "K": "\ud835\udc0a", + "L": "\ud835\udc0b", + "M": "\ud835\udc0c", + "N": "\ud835\udc0d", + "O": "\ud835\udc0e", + "P": "\ud835\udc0f", + "Q": "\ud835\udc10", + "R": "\ud835\udc11", + "S": "\ud835\udc12", + "T": "\ud835\udc13", + "U": "\ud835\udc14", + "V": "\ud835\udc15", + "W": "\ud835\udc16", + "X": "\ud835\udc17", + "Y": "\ud835\udc18", + "Z": "\ud835\udc19", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udc1a", + "b": "\ud835\udc1b", + "c": "\ud835\udc1c", + "d": "\ud835\udc1d", + "e": "\ud835\udc1e", + "f": "\ud835\udc1f", + "g": "\ud835\udc20", + "h": "\ud835\udc21", + "i": "\ud835\udc22", + "j": "\ud835\udc23", + "k": "\ud835\udc24", + "l": "\ud835\udc25", + "m": "\ud835\udc26", + "n": "\ud835\udc27", + "o": "\ud835\udc28", + "p": "\ud835\udc29", + "q": "\ud835\udc2a", + "r": "\ud835\udc2b", + "s": "\ud835\udc2c", + "t": "\ud835\udc2d", + "u": "\ud835\udc2e", + "v": "\ud835\udc2f", + "w": "\ud835\udc30", + "x": "\ud835\udc31", + "y": "\ud835\udc32", + "z": "\ud835\udc33", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "serifItalic": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udc34", + "B": "\ud835\udc35", + "C": "\ud835\udc36", + "D": "\ud835\udc37", + "E": "\ud835\udc38", + "F": "\ud835\udc39", + "G": "\ud835\udc3a", + "H": "\ud835\udc3b", + "I": "\ud835\udc3c", + "J": "\ud835\udc3d", + "K": "\ud835\udc3e", + "L": "\ud835\udc3f", + "M": "\ud835\udc40", + "N": "\ud835\udc41", + "O": "\ud835\udc42", + "P": "\ud835\udc43", + "Q": "\ud835\udc44", + "R": "\ud835\udc45", + "S": "\ud835\udc46", + "T": "\ud835\udc47", + "U": "\ud835\udc48", + "V": "\ud835\udc49", + "W": "\ud835\udc4a", + "X": "\ud835\udc4b", + "Y": "\ud835\udc4c", + "Z": "\ud835\udc4d", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udc4e", + "b": "\ud835\udc4f", + "c": "\ud835\udc50", + "d": "\ud835\udc51", + "e": "\ud835\udc52", + "f": "\ud835\udc53", + "g": "\ud835\udc54", + "h": "\u210e", + "i": "\ud835\udc56", + "j": "\ud835\udc57", + "k": "\ud835\udc58", + "l": "\ud835\udc59", + "m": "\ud835\udc5a", + "n": "\ud835\udc5b", + "o": "\ud835\udc5c", + "p": "\ud835\udc5d", + "q": "\ud835\udc5e", + "r": "\ud835\udc5f", + "s": "\ud835\udc60", + "t": "\ud835\udc61", + "u": "\ud835\udc62", + "v": "\ud835\udc63", + "w": "\ud835\udc64", + "x": "\ud835\udc65", + "y": "\ud835\udc66", + "z": "\ud835\udc67", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "serifBoldItalic": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udc68", + "B": "\ud835\udc69", + "C": "\ud835\udc6a", + "D": "\ud835\udc6b", + "E": "\ud835\udc6c", + "F": "\ud835\udc6d", + "G": "\ud835\udc6e", + "H": "\ud835\udc6f", + "I": "\ud835\udc70", + "J": "\ud835\udc71", + "K": "\ud835\udc72", + "L": "\ud835\udc73", + "M": "\ud835\udc74", + "N": "\ud835\udc75", + "O": "\ud835\udc76", + "P": "\ud835\udc77", + "Q": "\ud835\udc78", + "R": "\ud835\udc79", + "S": "\ud835\udc7a", + "T": "\ud835\udc7b", + "U": "\ud835\udc7c", + "V": "\ud835\udc7d", + "W": "\ud835\udc7e", + "X": "\ud835\udc7f", + "Y": "\ud835\udc80", + "Z": "\ud835\udc81", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udc82", + "b": "\ud835\udc83", + "c": "\ud835\udc84", + "d": "\ud835\udc85", + "e": "\ud835\udc86", + "f": "\ud835\udc87", + "g": "\ud835\udc88", + "h": "\ud835\udc89", + "i": "\ud835\udc8a", + "j": "\ud835\udc8b", + "k": "\ud835\udc8c", + "l": "\ud835\udc8d", + "m": "\ud835\udc8e", + "n": "\ud835\udc8f", + "o": "\ud835\udc90", + "p": "\ud835\udc91", + "q": "\ud835\udc92", + "r": "\ud835\udc93", + "s": "\ud835\udc94", + "t": "\ud835\udc95", + "u": "\ud835\udc96", + "v": "\ud835\udc97", + "w": "\ud835\udc98", + "x": "\ud835\udc99", + "y": "\ud835\udc9a", + "z": "\ud835\udc9b", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "doubleStruck": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\ud835\udfd8", + "1": "\ud835\udfd9", + "2": "\ud835\udfda", + "3": "\ud835\udfdb", + "4": "\ud835\udfdc", + "5": "\ud835\udfdd", + "6": "\ud835\udfde", + "7": "\ud835\udfdf", + "8": "\ud835\udfe0", + "9": "\ud835\udfe1", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udd38", + "B": "\ud835\udd39", + "C": "\u2102", + "D": "\ud835\udd3b", + "E": "\ud835\udd3c", + "F": "\ud835\udd3d", + "G": "\ud835\udd3e", + "H": "\u210d", + "I": "\ud835\udd40", + "J": "\ud835\udd41", + "K": "\ud835\udd42", + "L": "\ud835\udd43", + "M": "\ud835\udd44", + "N": "\u2115", + "O": "\ud835\udd46", + "P": "\u2119", + "Q": "\u211a", + "R": "\u211d", + "S": "\ud835\udd4a", + "T": "\ud835\udd4b", + "U": "\ud835\udd4c", + "V": "\ud835\udd4d", + "W": "\ud835\udd4e", + "X": "\ud835\udd4f", + "Y": "\ud835\udd50", + "Z": "\u2124", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udd52", + "b": "\ud835\udd53", + "c": "\ud835\udd54", + "d": "\ud835\udd55", + "e": "\ud835\udd56", + "f": "\ud835\udd57", + "g": "\ud835\udd58", + "h": "\ud835\udd59", + "i": "\ud835\udd5a", + "j": "\ud835\udd5b", + "k": "\ud835\udd5c", + "l": "\ud835\udd5d", + "m": "\ud835\udd5e", + "n": "\ud835\udd5f", + "o": "\ud835\udd60", + "p": "\ud835\udd61", + "q": "\ud835\udd62", + "r": "\ud835\udd63", + "s": "\ud835\udd64", + "t": "\ud835\udd65", + "u": "\ud835\udd66", + "v": "\ud835\udd67", + "w": "\ud835\udd68", + "x": "\ud835\udd69", + "y": "\ud835\udd6a", + "z": "\ud835\udd6b", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "script": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udc9c", + "B": "\u212c", + "C": "\ud835\udc9e", + "D": "\ud835\udc9f", + "E": "\u2130", + "F": "\u2131", + "G": "\ud835\udca2", + "H": "\u210b", + "I": "\u2110", + "J": "\ud835\udca5", + "K": "\ud835\udca6", + "L": "\u2112", + "M": "\u2133", + "N": "\ud835\udca9", + "O": "\ud835\udcaa", + "P": "\ud835\udcab", + "Q": "\ud835\udcac", + "R": "\u211b", + "S": "\ud835\udcae", + "T": "\ud835\udcaf", + "U": "\ud835\udcb0", + "V": "\ud835\udcb1", + "W": "\ud835\udcb2", + "X": "\ud835\udcb3", + "Y": "\ud835\udcb4", + "Z": "\ud835\udcb5", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udcb6", + "b": "\ud835\udcb7", + "c": "\ud835\udcb8", + "d": "\ud835\udcb9", + "e": "\u212f", + "f": "\ud835\udcbb", + "g": "\u210a", + "h": "\ud835\udcbd", + "i": "\ud835\udcbe", + "j": "\ud835\udcbf", + "k": "\ud835\udcc0", + "l": "\ud835\udcc1", + "m": "\ud835\udcc2", + "n": "\ud835\udcc3", + "o": "\u2134", + "p": "\ud835\udcc5", + "q": "\ud835\udcc6", + "r": "\ud835\udcc7", + "s": "\ud835\udcc8", + "t": "\ud835\udcc9", + "u": "\ud835\udcca", + "v": "\ud835\udccb", + "w": "\ud835\udccc", + "x": "\ud835\udccd", + "y": "\ud835\udcce", + "z": "\ud835\udccf", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "boldScript": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud835\udcd0", + "B": "\ud835\udcd1", + "C": "\ud835\udcd2", + "D": "\ud835\udcd3", + "E": "\ud835\udcd4", + "F": "\ud835\udcd5", + "G": "\ud835\udcd6", + "H": "\ud835\udcd7", + "I": "\ud835\udcd8", + "J": "\ud835\udcd9", + "K": "\ud835\udcda", + "L": "\ud835\udcdb", + "M": "\ud835\udcdc", + "N": "\ud835\udcdd", + "O": "\ud835\udcde", + "P": "\ud835\udcdf", + "Q": "\ud835\udce0", + "R": "\ud835\udce1", + "S": "\ud835\udce2", + "T": "\ud835\udce3", + "U": "\ud835\udce4", + "V": "\ud835\udce5", + "W": "\ud835\udce6", + "X": "\ud835\udce7", + "Y": "\ud835\udce8", + "Z": "\ud835\udce9", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud835\udcea", + "b": "\ud835\udceb", + "c": "\ud835\udcec", + "d": "\ud835\udced", + "e": "\ud835\udcee", + "f": "\ud835\udcef", + "g": "\ud835\udcf0", + "h": "\ud835\udcf1", + "i": "\ud835\udcf2", + "j": "\ud835\udcf3", + "k": "\ud835\udcf4", + "l": "\ud835\udcf5", + "m": "\ud835\udcf6", + "n": "\ud835\udcf7", + "o": "\ud835\udcf8", + "p": "\ud835\udcf9", + "q": "\ud835\udcfa", + "r": "\ud835\udcfb", + "s": "\ud835\udcfc", + "t": "\ud835\udcfd", + "u": "\ud835\udcfe", + "v": "\ud835\udcff", + "w": "\ud835\udd00", + "x": "\ud835\udd01", + "y": "\ud835\udd02", + "z": "\ud835\udd03", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "circled": { + "\"": "\"", + "\\": "\u29b8", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "\u229b", + "+": "\u2295", + ",": ",", + "-": "\u2296", + ".": "\u2a00", + "/": "\u2298", + "0": "\u24ea", + "1": "\u2460", + "2": "\u2461", + "3": "\u2462", + "4": "\u2463", + "5": "\u2464", + "6": "\u2465", + "7": "\u2466", + "8": "\u2467", + "9": "\u2468", + ":": ":", + ";": ";", + "<": "\u29c0", + "=": "\u229c", + ">": "\u29c1", + "?": "?", + "@": "@", + "A": "\u24b6", + "B": "\u24b7", + "C": "\u24b8", + "D": "\u24b9", + "E": "\u24ba", + "F": "\u24bb", + "G": "\u24bc", + "H": "\u24bd", + "I": "\u24be", + "J": "\u24bf", + "K": "\u24c0", + "L": "\u24c1", + "M": "\u24c2", + "N": "\u24c3", + "O": "\u24c4", + "P": "\u24c5", + "Q": "\u24c6", + "R": "\u24c7", + "S": "\u24c8", + "T": "\u24c9", + "U": "\u24ca", + "V": "\u24cb", + "W": "\u24cc", + "X": "\u24cd", + "Y": "\u24ce", + "Z": "\u24cf", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\u24d0", + "b": "\u24d1", + "c": "\u24d2", + "d": "\u24d3", + "e": "\u24d4", + "f": "\u24d5", + "g": "\u24d6", + "h": "\u24d7", + "i": "\u24d8", + "j": "\u24d9", + "k": "\u24da", + "l": "\u24db", + "m": "\u24dc", + "n": "\u24dd", + "o": "\u24de", + "p": "\u24df", + "q": "\u24e0", + "r": "\u24e1", + "s": "\u24e2", + "t": "\u24e3", + "u": "\u24e4", + "v": "\u24e5", + "w": "\u24e6", + "x": "\u24e7", + "y": "\u24e8", + "z": "\u24e9", + "{": "{", + "|": "\u29b6", + "}": "}", + "~": "~" + }, + "circledNegative": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "\u24ff", + "1": "\u2776", + "2": "\u2777", + "3": "\u2778", + "4": "\u2779", + "5": "\u277a", + "6": "\u277b", + "7": "\u277c", + "8": "\u277d", + "9": "\u277e", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud83c\udd50", + "B": "\ud83c\udd51", + "C": "\ud83c\udd52", + "D": "\ud83c\udd53", + "E": "\ud83c\udd54", + "F": "\ud83c\udd55", + "G": "\ud83c\udd56", + "H": "\ud83c\udd57", + "I": "\ud83c\udd58", + "J": "\ud83c\udd59", + "K": "\ud83c\udd5a", + "L": "\ud83c\udd5b", + "M": "\ud83c\udd5c", + "N": "\ud83c\udd5d", + "O": "\ud83c\udd5e", + "P": "\ud83c\udd5f", + "Q": "\ud83c\udd60", + "R": "\ud83c\udd61", + "S": "\ud83c\udd62", + "T": "\ud83c\udd63", + "U": "\ud83c\udd64", + "V": "\ud83c\udd65", + "W": "\ud83c\udd66", + "X": "\ud83c\udd67", + "Y": "\ud83c\udd68", + "Z": "\ud83c\udd69", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud83c\udd50", + "b": "\ud83c\udd51", + "c": "\ud83c\udd52", + "d": "\ud83c\udd53", + "e": "\ud83c\udd54", + "f": "\ud83c\udd55", + "g": "\ud83c\udd56", + "h": "\ud83c\udd57", + "i": "\ud83c\udd58", + "j": "\ud83c\udd59", + "k": "\ud83c\udd5a", + "l": "\ud83c\udd5b", + "m": "\ud83c\udd5c", + "n": "\ud83c\udd5d", + "o": "\ud83c\udd5e", + "p": "\ud83c\udd5f", + "q": "\ud83c\udd60", + "r": "\ud83c\udd61", + "s": "\ud83c\udd62", + "t": "\ud83c\udd63", + "u": "\ud83c\udd64", + "v": "\ud83c\udd65", + "w": "\ud83c\udd66", + "x": "\ud83c\udd67", + "y": "\ud83c\udd68", + "z": "\ud83c\udd69", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "squared": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud83c\udd30", + "B": "\ud83c\udd31", + "C": "\ud83c\udd32", + "D": "\ud83c\udd33", + "E": "\ud83c\udd34", + "F": "\ud83c\udd35", + "G": "\ud83c\udd36", + "H": "\ud83c\udd37", + "I": "\ud83c\udd38", + "J": "\ud83c\udd39", + "K": "\ud83c\udd3a", + "L": "\ud83c\udd3b", + "M": "\ud83c\udd3c", + "N": "\ud83c\udd3d", + "O": "\ud83c\udd3e", + "P": "\ud83c\udd3f", + "Q": "\ud83c\udd40", + "R": "\ud83c\udd41", + "S": "\ud83c\udd42", + "T": "\ud83c\udd43", + "U": "\ud83c\udd44", + "V": "\ud83c\udd45", + "W": "\ud83c\udd46", + "X": "\ud83c\udd47", + "Y": "\ud83c\udd48", + "Z": "\ud83c\udd49", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud83c\udd30", + "b": "\ud83c\udd31", + "c": "\ud83c\udd32", + "d": "\ud83c\udd33", + "e": "\ud83c\udd34", + "f": "\ud83c\udd35", + "g": "\ud83c\udd36", + "h": "\ud83c\udd37", + "i": "\ud83c\udd38", + "j": "\ud83c\udd39", + "k": "\ud83c\udd3a", + "l": "\ud83c\udd3b", + "m": "\ud83c\udd3c", + "n": "\ud83c\udd3d", + "o": "\ud83c\udd3e", + "p": "\ud83c\udd3f", + "q": "\ud83c\udd40", + "r": "\ud83c\udd41", + "s": "\ud83c\udd42", + "t": "\ud83c\udd43", + "u": "\ud83c\udd44", + "v": "\ud83c\udd45", + "w": "\ud83c\udd46", + "x": "\ud83c\udd47", + "y": "\ud83c\udd48", + "z": "\ud83c\udd49", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "squaredNegative": { + "\"": "\"", + "\\": "\u29c5", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "\u29c6", + "+": "\u229e", + ",": ",", + "-": "\u229f", + ".": "\u22a1", + "/": "\u29c4", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\ud83c\udd70", + "B": "\ud83c\udd71", + "C": "\ud83c\udd72", + "D": "\ud83c\udd73", + "E": "\ud83c\udd74", + "F": "\ud83c\udd75", + "G": "\ud83c\udd76", + "H": "\ud83c\udd77", + "I": "\ud83c\udd78", + "J": "\ud83c\udd79", + "K": "\ud83c\udd7a", + "L": "\ud83c\udd7b", + "M": "\ud83c\udd7c", + "N": "\ud83c\udd7d", + "O": "\ud83c\udd7e", + "P": "\ud83c\udd7f", + "Q": "\ud83c\udd80", + "R": "\ud83c\udd81", + "S": "\ud83c\udd82", + "T": "\ud83c\udd83", + "U": "\ud83c\udd84", + "V": "\ud83c\udd85", + "W": "\ud83c\udd86", + "X": "\ud83c\udd87", + "Y": "\ud83c\udd88", + "Z": "\ud83c\udd89", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\ud83c\udd70", + "b": "\ud83c\udd71", + "c": "\ud83c\udd72", + "d": "\ud83c\udd73", + "e": "\ud83c\udd74", + "f": "\ud83c\udd75", + "g": "\ud83c\udd76", + "h": "\ud83c\udd77", + "i": "\ud83c\udd78", + "j": "\ud83c\udd79", + "k": "\ud83c\udd7a", + "l": "\ud83c\udd7b", + "m": "\ud83c\udd7c", + "n": "\ud83c\udd7d", + "o": "\ud83c\udd7e", + "p": "\ud83c\udd7f", + "q": "\ud83c\udd80", + "r": "\ud83c\udd81", + "s": "\ud83c\udd82", + "t": "\ud83c\udd83", + "u": "\ud83c\udd84", + "v": "\ud83c\udd85", + "w": "\ud83c\udd86", + "x": "\ud83c\udd87", + "y": "\ud83c\udd88", + "z": "\ud83c\udd89", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "parenthesized": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "\u2474", + "2": "\u2475", + "3": "\u2476", + "4": "\u2477", + "5": "\u2478", + "6": "\u2479", + "7": "\u247a", + "8": "\u247b", + "9": "\u247c", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "\u249c", + "B": "\u249d", + "C": "\u249e", + "D": "\u249f", + "E": "\u24a0", + "F": "\u24a1", + "G": "\u24a2", + "H": "\u24a3", + "I": "\u24a4", + "J": "\u24a5", + "K": "\u24a6", + "L": "\u24a7", + "M": "\u24a8", + "N": "\u24a9", + "O": "\u24aa", + "P": "\u24ab", + "Q": "\u24ac", + "R": "\u24ad", + "S": "\u24ae", + "T": "\u24af", + "U": "\u24b0", + "V": "\u24b1", + "W": "\u24b2", + "X": "\u24b3", + "Y": "\u24b4", + "Z": "\u24b5", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\u249c", + "b": "\u249d", + "c": "\u249e", + "d": "\u249f", + "e": "\u24a0", + "f": "\u24a1", + "g": "\u24a2", + "h": "\u24a3", + "i": "\u24a4", + "j": "\u24a5", + "k": "\u24a6", + "l": "\u24a7", + "m": "\u24a8", + "n": "\u24a9", + "o": "\u24aa", + "p": "\u24ab", + "q": "\u24ac", + "r": "\u24ad", + "s": "\u24ae", + "t": "\u24af", + "u": "\u24b0", + "v": "\u24b1", + "w": "\u24b2", + "x": "\u24b3", + "y": "\u24b4", + "z": "\u24b5", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "smallCaps": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "(", + ")": ")", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "/", + "0": "0", + "1": "1", + "2": "2", + "3": "3", + "4": "4", + "5": "5", + "6": "6", + "7": "7", + "8": "8", + "9": "9", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "?", + "@": "@", + "A": "A", + "B": "B", + "C": "C", + "D": "D", + "E": "E", + "F": "F", + "G": "G", + "H": "H", + "I": "I", + "J": "J", + "K": "K", + "L": "L", + "M": "M", + "N": "N", + "O": "O", + "P": "P", + "Q": "Q", + "R": "R", + "S": "S", + "T": "T", + "U": "U", + "V": "V", + "W": "W", + "X": "X", + "Y": "Y", + "Z": "Z", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\u1d00", + "b": "\u0299", + "c": "\u1d04", + "d": "\u1d05", + "e": "\u1d07", + "f": "\ua730", + "g": "\u0262", + "h": "\u029c", + "i": "\u026a", + "j": "\u1d0a", + "k": "\u1d0b", + "l": "\u029f", + "m": "\u1d0d", + "n": "\u0274", + "o": "\u1d0f", + "p": "\u1d29", + "q": "\ua7af", + "r": "\u0280", + "s": "\ua731", + "t": "\u1d1b", + "u": "\u1d1c", + "v": "\u1d20", + "w": "\u1d21", + "x": "x", + "y": "\u028f", + "z": "\u1d22", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "subscript": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "\u208d", + ")": "\u208e", + "*": "*", + "+": "\u208a", + ",": ",", + "-": "\u208b", + ".": ".", + "/": "/", + "0": "\u2080", + "1": "\u2081", + "2": "\u2082", + "3": "\u2083", + "4": "\u2084", + "5": "\u2085", + "6": "\u2086", + "7": "\u2087", + "8": "\u2088", + "9": "\u2089", + ":": ":", + ";": ";", + "<": "<", + "=": "\u208c", + ">": ">", + "?": "?", + "@": "@", + "A": "\u1d00", + "B": "\u0299", + "C": "\u1d04", + "D": "\u1d05", + "E": "\u1d07", + "F": "\ua730", + "G": "\u0262", + "H": "\u029c", + "I": "\u026a", + "J": "\u1d0a", + "K": "\u1d0b", + "L": "\u029f", + "M": "\u1d0d", + "N": "\u0274", + "O": "\u1d0f", + "P": "\u1d18", + "Q": "\ud83c\uddf6", + "R": "\u0280", + "S": "\ua731", + "T": "\u1d1b", + "U": "\u1d1c", + "V": "\u1d20", + "W": "\u1d21", + "X": "x", + "Y": "\u028f", + "Z": "\u1d22", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\u2090", + "b": "\u1d66", + "c": "\ud835\udcb8", + "d": "\ud835\udcb9", + "e": "\u2091", + "f": "\ud835\udcbb", + "g": "\ud835\udcf0", + "h": "\u2095", + "i": "\u1d62", + "j": "\u2c7c", + "k": "\u2096", + "l": "\u2097", + "m": "\u2098", + "n": "\u2099", + "o": "\u2092", + "p": "\u209a", + "q": "\u1d69", + "r": "\u1d63", + "s": "\u209b", + "t": "\u209c", + "u": "\u1d64", + "v": "\u1d65", + "w": "\ud835\udccc", + "x": "\u2093", + "y": "\u1d67", + "z": "\ud835\udccf", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "superscript": { + "\"": "\"", + "\\": "\\", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": "\u207d", + ")": "\u207e", + "*": "*", + "+": "\u207a", + ",": ",", + "-": "\u207b", + ".": ".", + "/": "/", + "0": "\u2070", + "1": "\u00b9", + "2": "\u00b2", + "3": "\u00b3", + "4": "\u2074", + "5": "\u2075", + "6": "\u2076", + "7": "\u2077", + "8": "\u2078", + "9": "\u2079", + ":": ":", + ";": ";", + "<": "<", + "=": "\u207c", + ">": ">", + "?": "?", + "@": "@", + "A": "\u1d2c", + "B": "\u1d2e", + "C": "\u1d9c", + "D": "\u1d30", + "E": "\u1d31", + "F": "\u1da0", + "G": "\u1d33", + "H": "\u1d34", + "I": "\u1d35", + "J": "\u1d36", + "K": "\u1d37", + "L": "\u1d38", + "M": "\u1d39", + "N": "\u1d3a", + "O": "\u1d3c", + "P": "\u1d3e", + "Q": "\u1d60", + "R": "\u1d3f", + "S": "\u02e2", + "T": "\u1d40", + "U": "\u1d41", + "V": "\u2c7d", + "W": "\u1d42", + "X": "\u02e3", + "Y": "\u02b8", + "Z": "\u1dbb", + "[": "[", + "]": "]", + "^": "^", + "_": "_", + "`": "`", + "a": "\u1d43", + "b": "\u1d47", + "c": "\u1d9c", + "d": "\u1d48", + "e": "\u1d49", + "f": "\u1da0", + "g": "\u1d4d", + "h": "\u02b0", + "i": "\u2071", + "j": "\u02b2", + "k": "\u1d4f", + "l": "\u02e1", + "m": "\u1d50", + "n": "\u207f", + "o": "\u1d52", + "p": "\u1d56", + "q": "\u1d60", + "r": "\u02b3", + "s": "\u02e2", + "t": "\u1d57", + "u": "\u1d58", + "v": "\u1d5b", + "w": "\u02b7", + "x": "\u02e3", + "y": "\u02b8", + "z": "\u1dbb", + "{": "{", + "|": "|", + "}": "}", + "~": "~" + }, + "inverted": { + "\"": "\u201e", + "\\": "\\", + " ": " ", + "!": "\u00a1", + "#": "#", + "$": "$", + "%": "%", + "&": "\u214b", + "'": ",", + "(": ")", + ")": "(", + "*": "*", + "+": "+", + ",": "\u2018", + "-": "-", + ".": "\u02d9", + "/": "/", + "0": "0", + "1": "\u0196", + "2": "\u0547", + "3": "\u0190", + "4": "\u152d", + "5": "\u03db", + "6": "9", + "7": "\u2c62", + "8": "8", + "9": "6", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "\u00bf", + "@": "@", + "A": "\u2200", + "B": "\ua4ed", + "C": "\u2183", + "D": "\ua4f7", + "E": "\u018e", + "F": "\u2132", + "G": "\u2141", + "H": "H", + "I": "I", + "J": "\u017f", + "K": "\ua4d8", + "L": "\u2142", + "M": "W", + "N": "N", + "O": "O", + "P": "\u0500", + "Q": "\u1ff8", + "R": "\ua4e4", + "S": "S", + "T": "\u22a5", + "U": "\u2229", + "V": "\ua4e5", + "W": "M", + "X": "X", + "Y": "\u2144", + "Z": "Z", + "[": "]", + "]": "[", + "^": "^", + "_": "\u203e", + "`": "`", + "a": "\u0250", + "b": "q", + "c": "\u0254", + "d": "p", + "e": "\u01dd", + "f": "\u025f", + "g": "\u0183", + "h": "\u0265", + "i": "\u0131", + "j": "\u027e", + "k": "\u029e", + "l": "\u05df", + "m": "\u026f", + "n": "u", + "o": "o", + "p": "d", + "q": "b", + "r": "\u0279", + "s": "s", + "t": "\u0287", + "u": "n", + "v": "\u028c", + "w": "\u028d", + "x": "x", + "y": "\u028e", + "z": "z", + "{": "}", + "|": "|", + "}": "{", + "~": "~" + }, + "mirrored": { + "\"": "\"", + "\\": "/", + " ": " ", + "!": "!", + "#": "#", + "$": "$", + "%": "%", + "&": "&", + "'": "'", + "(": ")", + ")": "(", + "*": "*", + "+": "+", + ",": ",", + "-": "-", + ".": ".", + "/": "\\", + "0": "0", + "1": "\u07c1", + "2": "\u03c2", + "3": "\u0190", + "4": "\u07c2", + "5": "\u091f", + "6": "\u10db", + "7": "\u0662", + "8": "8", + "9": "\u0b67", + ":": ":", + ";": ";", + "<": "<", + "=": "=", + ">": ">", + "?": "\u2e2e", + "@": "@", + "A": "A", + "B": "\ua4ed", + "C": "\u2183", + "D": "\ua4f7", + "E": "\u018e", + "F": "\ua7fb", + "G": "\u04d8", + "H": "H", + "I": "I", + "J": "\u10b1", + "K": "\ua4d8", + "L": "\u2143", + "M": "M", + "N": "\u0418", + "O": "O", + "P": "\ua7fc", + "Q": "\u03d8", + "R": "\u042f", + "S": "\ua644", + "T": "T", + "U": "U", + "V": "V", + "W": "W", + "X": "X", + "Y": "Y", + "Z": "Z", + "[": "]", + "]": "[", + "^": "^", + "_": "_", + "`": "`", + "a": "\u0252", + "b": "d", + "c": "\u2184", + "d": "b", + "e": "\u0258", + "f": "\u0287", + "g": "\u03f1", + "h": "\u029c", + "i": "i", + "j": "\u012f", + "k": "\u029e", + "l": "l", + "m": "m", + "n": "\u1d0e", + "o": "o", + "p": "q", + "q": "p", + "r": "\u1d19", + "s": "\ua645", + "t": "\u0248", + "u": "\u03c5", + "v": "v", + "w": "w", + "x": "x", + "y": "\u03b3", + "z": "z", + "{": "}", + "|": "|", + "}": "{", + "~": "~" + } +} \ No newline at end of file diff --git a/transformations/font_change/list_of_languages.txt b/transformations/font_change/list_of_languages.txt new file mode 100644 index 000000000..771a3045f --- /dev/null +++ b/transformations/font_change/list_of_languages.txt @@ -0,0 +1,1022 @@ +aa +aai +aak +aau +abi +abr +abt +aby +acd +ace +ach +ada +ade +adj +adz +aey +af +agc +agd +agg +agm +ago +agq +aha +ahl +ajg +ak +ala +ali +aln +amm +amn +amo +amp +an +anc +ank +ann +any +aoj +aom +aoz +ape +apr +aps +apz +arh +arn +aro +asa +asg +aso +ast +ata +atg +atj +auy +avn +avt +avu +awb +awo +awx +ay +ayb +az +ban +bar +bas +bav +bba +bbb +bbc +bbd +bbj +bbp +bbr +bcf +bch +bci +bcm +bcn +bco +bcu +bdd +bef +beh +bem +bet +bew +bex +bez +bfd +bhg +bhl +bhy +bi +bib +big +bik +bim +bin +bio +biq +bjh +bjn +bjo +bjr +bjt +bjz +bkc +bkm +bkq +bku +bkv +bm +bmh +bmk +bmq +bmu +bng +bnm +bnp +boj +bom +bon +bqc +bqp +bqv +br +brz +bs +bsj +bss +bto +btt +buc +bud +bug +buk +bum +buo +bus +buu +bvb +bwd +bwr +bxh +bye +byr +bys +byv +byx +bza +bze +bzf +bzh +bzw +ca +cad +can +cbj +cch +ceb +cfa +cgg +ch +chk +cho +chp +cic +cjv +ckl +cko +cky +cla +cme +co +cps +crs +cs +csb +cy +da +dad +daf +dag +dah +dak +dav +dbd +dbq +ddn +de +ded +den +dga +dgh +dgi +dgr +dgz +dia +dje +dnj +dob +dop +dow +dri +dsb +dtm +dtp +dts +dua +duc +dud +dug +dva +dww +dyo +dyu +dzg +ebu +ee +efi +egl +eka +ema +emi +en +enn +enq +eo +eri +es +esu +et +etr +etu +etx +eu +ewo +ext +faa +fab +fag +fai +fan +ff +ffi +ffm +fi +fil +fit +fj +flr +fmp +fo +fod +fon +for +fpe +fqs +fr +frc +frp +frr +frs +fud +fue +fuf +fuh +fuq +fur +fuv +fuy +fvr +fy +ga +gaa +gaf +gag +gah +gaj +gam +gaw +gay +gba +gbf +gby +gcr +gd +gde +gdn +gdr +geb +gej +gel +gfk +ghs +gil +gim +gjn +gkn +gkp +gl +gmm +gn +gnd +gng +god +goi +gor +gos +grb +grw +gsw +gub +guc +gud +gur +guw +gux +guz +gv +gvf +gvs +gwi +gyi +ha +hag +ham +haw +hbb +hhy +hi-Latn +hia +hif +hig +hih +hil +hla +hmt +hnn +ho +hot +hr +hsb +ht +hu +hui +hz +ia +ian +iar +iba +ibb +iby +ica +ich +id +idd +idi +idu +ife +ig +igb +ige +ijj +ik +ikk +ikt +ikw +ikx +ilo +imo +in +io +iou +iri +is +it +iwm +iws +izh +izi +jab +jam +jbo +jbu +jen +jgk +jgo +jib +jmc +jra +jut +jv +jw +kab +kac +kad +kai +kaj +kam +kao +kbm +kbp +kbq +kbx +kcg +kck +kcl +kct +kde +kdl +kea +ken +kez +kfo +kg +kge +kgf +kgp +kha +khq +khs +khz +ki +kij +kiu +kiw +kj +kjd +kjs +kjy +kkc +kkj +kl +kln +klq +klt +klx +kmb +kmh +kmo +kms +kmu +kmw +knf +knp +kol +kos +koz +kpe +kpf +kpo +kpr +kpx +kqb +kqf +kqs +kr +kri +krj +krl +krs +ksb +ksd +ksf +ksh +ksj +ksr +ktm +kto +ktr +ku +kub +kud +kue +kuj +kun +kup +kus +kvg +kvr +kw +kwj +kwo +kwq +kxa +kxe +kxw +kxz +ky-Latn +ky-TR +kye +kyx +kzj +kzr +kzt +la +lag +laj +las +lb +lbu +lbw +lcm +ldb +led +lee +lem +leq +leu +lg +lgg +li +lia +lid +lig +lih +lij +ljp +lkt +lle +lln +lmo +lmp +ln +lns +lnu +loj +lok +lol +lor +los +loz +lt +ltg +lu +lua +luo +luy +lv +lzz +mad +maf +mak +man +mas +maw +maz +mbh +mbo +mbq +mbu +mbw +mci +mcp +mcq +mcr +mcu +mda +mdh +mdj +mdr +med +mee +mek +men +mer +met +meu +mfe +mfn +mfo +mfq +mg +mgh +mgl +mgo +mgy +mh +mhi +mhl +mi +mif +min +miw +mkl +mkp +mkw +mle +mlp +mls +mmo +mmu +mmx +mna +mnf +mo +moa +moe +moh +mos +mox +mpp +mps +mpt +mpx +mql +ms +ms-ID +mt +mtc +mtf +mti +mua +mur +mus +mva +mvn +mwk +mwv +mxc +mxm +myk +myw +myx +mzk +mzm +mzp +mzw +mzz +na +nac +naf +nak +nap +naq +nas +nb +nca +nce +ncf +nch +nco +ncu +nd +ndc +nds +neb +nex +nfr +ng +nga +ngb +ngl +nhb +nhe +nhw +nif +nii +nij +nin +niu +niy +niz +njo +nkg +nko +nl +nmg +nmz +nn +nnf +nnh +nnk +nnm +no +nop +nou +nr +nrb +nsn +nso +nss +ntm +ntr +nui +nup +nus +nuv +nux +nv +nwb +nxq +nxr +ny +nym +nyn +nzi +oc +ogc +okr +okv +om +ong +onn +ons +opm +oro +ozm +pag +pam +pap +pau +pbi +pcd +pcm +pdc +pdt +ped +pex +pfl +pil +pip +pko +pl +pla +pms +png +pnn +pon +ppo +prg +pss +pt +ptp +puu +pwa +qu +quc +qug +rai +rao +rcf +rej +rel +res +rgn +ria +rif-NL +rm +rmf +rmo +rmu +rn +rna +rng +ro +rob +rof +roo +rro +rtm +rug +rw +rwk +rwo +saf +saq +sas +sav +sba +sbe +sbp +sc +scn +sco +scs +sdc +se +sef +seh +sei +ses +sg +sgs +sgz +shk +sid +sig +sil +sim +sjr +sk +skc +sks +sl +sld +sli +sll +sly +sm +sma +smj +smn +smq +sms +sn +snc +snk +snp +snx +sny +so +sok +soq +soy +spd +spl +sps +sq +sr-ME +sr-RO +sr-RU +sr-TR +srn +srr +ss +ssd +ssg +ssy +st +stk +stq +su +sua +sue +suk +sur +sus +sv +sw +swc +swg +swp +sxn +sxw +szl +tal +tan +taq +tbc +tbd +tbf +tbg +tbo +tbw +tbz +tci +tdu +ted +tem +teo +tet +tfi +tgc +tgo +tgu +tif +tik +tim +tio +tiv +tk +tkl +tkr +tl +tlf +tlx +tly +tmh +tmy +tn +tnh +to +tof +tog +toq +tpi +tpm +tpz +tqo +tr +tru +trv +ts +tsg +tsw +ttd +tte +ttj +ttr +ttt +tuh +tul +tum +tuq +tvd +tvl +tvu +twh +twq +ty +tya +tzm +ubu +uli +umb +und +uok +uri +urt +urw +usa +utr +uvh +uvl +uz +vag +van +ve +vec +vep +vi +vic +viv +vls +vmf +vmw +vo +vot +vro +vun +vut +wa +wae +waj +wan +war +wbp +wci +wer +wgi +whg +wib +wiu +wiv +wja +wji +wls +wmo +wnc +wnu +wo +wob +wos +wrs +wsk +wuv +wwa +xav +xbi +xes +xh +xla +xog +xon +xrb +xsi +xsm +xwe +yam +yao +yap +yas +yat +yav +yay +yaz +yba +ybb +yby +yer +ygr +ygw +yko +yle +ylg +yll +yml +yo +yon +yrb +yre +yrl +yss +yua +yuj +yut +yuw +za +zag +zea +zia +zlm +zmi +zne +zu +zza \ No newline at end of file diff --git a/transformations/font_change/requirements.txt b/transformations/font_change/requirements.txt new file mode 100644 index 000000000..de3503dc8 --- /dev/null +++ b/transformations/font_change/requirements.txt @@ -0,0 +1 @@ +nltk==3.6.2 \ No newline at end of file diff --git a/transformations/font_change/test.json b/transformations/font_change/test.json new file mode 100644 index 000000000..957537834 --- /dev/null +++ b/transformations/font_change/test.json @@ -0,0 +1,71 @@ +{ + "type": "font_change", + "test_cases": [ + { + "class": "FontChange", + "inputs": { + "sentence": "Apple is looking at buying U.K. startup for $132 billion." + }, + "outputs": [ + { + "sentence": "Apple is looking at buying U.K. startup for $132 \ud83c\udd71\ud83c\udd78\ud83c\udd7b\ud83c\udd7b\ud83c\udd78\ud83c\udd7e\ud83c\udd7d." + } + ] + }, + { + "class": "FontChange", + "inputs": { + "sentence": "We had to box part of the pizza to take it home and we were out the door by 6:42." + }, + "outputs": [ + { + "sentence": "We had to box part of the pizza to take it \ud835\ude5d\ud835\ude64\ud835\ude62\ud835\ude5a and we were out the \ud835\udcb9\u2134\u2134\ud835\udcc7 by 6:42." + } + ] + }, + { + "class": "FontChange", + "inputs": { + "sentence": "The quick brown fox jumps over the lazy dog." + }, + "outputs": [ + { + "sentence": "The quick brown \ud83c\udd75\ud83c\udd7e\ud83c\udd87 \u027en\u026fds over the lazy \u1d05\u1d0f\u0262." + } + ] + }, + { + "class": "FontChange", + "inputs": { + "sentence": "Mumbai, Bengaluru, New Delhi are among the many famous places in India." + }, + "outputs": [ + { + "sentence": "Mumbai, Bengaluru, New \ud83c\udd73\ud83c\udd74\ud83c\udd7b\ud83c\udd77\ud83c\udd78 are \u0250\u026fou\u0183 the many famous places in I\u0274\u1d05\u026a\u1d00." + } + ] + }, + { + "class": "FontChange", + "inputs": { + "sentence": "New Delhi is among the many famous places in India." + }, + "outputs": [ + { + "sentence": "New Delhi is among the \ud83c\udd7c\ud83c\udd70\ud83c\udd7d\ud83c\udd88 \u025f\u0250\u026fons places in I\u0274\u1d05\u026a\u1d00." + } + ] + }, + { + "class": "FontChange", + "inputs": { + "sentence": "Oh, and their spring rolls and the accompanying peanuts and hot sauces were also delicious." + }, + "outputs": [ + { + "sentence": "Oh, and their \ud835\ude68\ud835\ude65\ud835\ude67\ud835\ude5e\ud835\ude63\ud835\ude5c rolls and the accompanying peanuts and hot \ud835\udcc8\ud835\udcb6\ud835\udcca\ud835\udcb8\u212f\ud835\udcc8 were \ud835\udcb6\ud835\udcc1\ud835\udcc8\u2134 delicious." + } + ] + } + ] +} \ No newline at end of file diff --git a/transformations/font_change/transformation.py b/transformations/font_change/transformation.py new file mode 100644 index 000000000..292bf7ebf --- /dev/null +++ b/transformations/font_change/transformation.py @@ -0,0 +1,104 @@ +import itertools +import json +import os +import random +import re + +from nltk import download as nltkdl +from nltk.corpus import stopwords +from nltk.tokenize import word_tokenize + +from interfaces.SentenceOperation import SentenceOperation +from tasks.TaskTypes import TaskType + + +def font_change(sentence, fonts, seed=666, max_outputs=1): + """ + Randomly choose words and a font for each word and transform the characters one by one. + + parameters: + sentence (str): input sentence + fonts (dict): dictionary containing character replacements for various fonts + max_outputs (int): number of outputs for each input + + returns: + perturbed_texts (list): a list of sentences where random words are in different fonts. + """ + random.seed(seed) + perturbed_texts = [] + + for _ in itertools.repeat(None, max_outputs): + # tokens_match_list: a list of re.match objects of the words in the sentence. (No stop words) + tokens_match_list = [] + for token in word_tokenize(sentence): + if token not in stopwords.words("english"): + if token != ".": + tokens_match_list.extend( + list(re.finditer(re.escape(token), sentence)) + ) + + transformed_sentence = list(sentence) + + # tokens_to_change: a list of randomly chosen words from tokens_match_list (up to three words) + tokens_to_change = random.sample( + tokens_match_list, + random.randint(1, min(3, len(tokens_match_list) - 1)), + ) + for ttc in tokens_to_change: + while True: + font = random.sample(list(fonts.keys()), 1)[0] + if font != "normal": + break + + for i in range(ttc.start(), ttc.end()): + try: + transformed_sentence[i] = fonts[font][ + transformed_sentence[i] + ] + except KeyError: + transformed_sentence[i] = transformed_sentence[i] + + perturbed_texts.append("".join(transformed_sentence)) + return perturbed_texts + + +class FontChange(SentenceOperation): + tasks = [ + TaskType.TEXT_CLASSIFICATION, + TaskType.TEXT_TAGGING, + ] + with open("list_of_languages.txt", "r") as f: + languages = [x.rstrip() for x in f.readlines()] + keywords = [ + "noise", + "rule-based", + "written", + "visual", + "highly-meaning-preserving", + "high-precision", + "high-coverage", + "high-generations", + ] + + def __init__(self, seed=664, max_outputs=1): + nltkdl("stopwords") + nltkdl("punkt") + super().__init__(seed, max_outputs=max_outputs) + + # Mapping tables based on unicode-formatter (MIT license) + # https://github.com/DenverCoder1/unicode-formatter + + dict_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "fonts.json" + ) + with open(dict_path) as f: + self.fonts = json.load(f) + + def generate(self, sentence: str): + perturbed_texts = font_change( + sentence, + self.fonts, + seed=self.seed, + max_outputs=self.max_outputs, + ) + return perturbed_texts