add fix width.

dojyorin · Jul 25, 2023 · 2c4577c · 2c4577c
1 parent 80e99c8
commit 2c4577c
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 9 deletions.
diff --git a/src/text.ts b/src/text.ts
@@ -62,6 +62,58 @@ export function trimExtend(data:string):string{
     return data.trim().replace(/\r/g, "").replace(/ +/g, " ").replace(/\t+/g, "\t").replace(/\n+/g, "\n").replace(/^ /mg, "").replace(/ $/mg, "");
 }
 
+/**
+* Convert half-width Japanese kana to full-width and full-width alphanumeric symbols to half-width.
+* @example
+* ```ts
+* const text = "１＋１＝２";
+* const formated = fixWidth(text);
+* ```
+*/
+export function fixWidth(data:string){
+    return Object.entries({
+        "ｳﾞ": "ヴ",
+        "ｶﾞ": "ガ", "ｷﾞ": "ギ", "ｸﾞ": "グ", "ｹﾞ": "ゲ", "ｺﾞ": "ゴ",
+        "ｻﾞ": "ザ", "ｼﾞ": "ジ", "ｽﾞ": "ズ", "ｾﾞ": "ゼ", "ｿﾞ": "ゾ",
+        "ﾀﾞ": "ダ", "ﾁﾞ": "ヂ", "ﾂﾞ": "ヅ", "ﾃﾞ": "デ", "ﾄﾞ": "ド",
+        "ﾊﾞ": "バ", "ﾋﾞ": "ビ", "ﾌﾞ": "ブ", "ﾍﾞ": "ベ", "ﾎﾞ": "ボ",
+        "ﾊﾟ": "パ", "ﾋﾟ": "ピ", "ﾌﾟ": "プ", "ﾍﾟ": "ペ", "ﾎﾟ": "ポ",
+        "ｱ": "ア", "ｲ": "イ", "ｳ": "ウ", "ｴ": "エ", "ｵ": "オ",
+        "ｶ": "カ", "ｷ": "キ", "ｸ": "ク", "ｹ": "ケ", "ｺ": "コ",
+        "ｻ": "サ", "ｼ": "シ", "ｽ": "ス", "ｾ": "セ", "ｿ": "ソ",
+        "ﾀ": "タ", "ﾁ": "チ", "ﾂ": "ツ", "ﾃ": "テ", "ﾄ": "ト",
+        "ﾅ": "ナ", "ﾆ": "ニ", "ﾇ": "ヌ", "ﾈ": "ネ", "ﾉ": "ノ",
+        "ﾊ": "ハ", "ﾋ": "ヒ", "ﾌ": "フ", "ﾍ": "ヘ", "ﾎ": "ホ",
+        "ﾏ": "マ", "ﾐ": "ミ", "ﾑ": "ム", "ﾒ": "メ", "ﾓ": "モ",
+        "ﾔ": "ヤ", "ﾕ": "ユ", "ﾖ": "ヨ",
+        "ﾗ": "ラ", "ﾘ": "リ", "ﾙ": "ル", "ﾚ": "レ", "ﾛ": "ロ",
+        "ﾜ": "ワ", "ｦ": "ヲ", "ﾝ": "ン",
+        "ｧ": "ァ", "ｨ": "ィ", "ｩ": "ゥ", "ｪ": "ェ", "ｫ": "ォ",
+        "ｯ": "ッ",
+        "ｬ": "ャ", "ｭ": "ュ", "ｮ": "ョ",
+        "､": "、", "｡": "。", "･": "・", "ｰ": "ー", "｢": "「", "｣": "」",
+        "Ａ": "A", "Ｂ": "B", "Ｃ": "C", "Ｄ": "D", "Ｅ": "E", "Ｆ": "F", "Ｇ": "G", "Ｈ": "H", "Ｉ": "I", "Ｊ": "J", "Ｋ": "K", "Ｌ": "L", "Ｍ": "M",
+        "Ｎ": "N", "Ｏ": "O", "Ｐ": "P", "Ｑ": "Q", "Ｒ": "R", "Ｓ": "S", "Ｔ": "T", "Ｕ": "U", "Ｖ": "V", "Ｗ": "W", "Ｘ": "X", "Ｙ": "Y", "Ｚ": "Z",
+        "ａ": "a", "ｂ": "b", "ｃ": "c", "ｄ": "d", "ｅ": "e", "ｆ": "f", "ｇ": "g", "ｈ": "h", "ｉ": "i", "ｊ": "j", "ｋ": "k", "ｌ": "l", "ｍ": "m",
+        "ｎ": "n", "ｏ": "o", "ｐ": "p", "ｑ": "q", "ｒ": "r", "ｓ": "s", "ｔ": "t", "ｕ": "u", "ｖ": "v", "ｗ": "w", "ｘ": "x", "ｙ": "y", "ｚ": "z",
+        "０": "0", "１": "1", "２": "2", "３": "3", "４": "4", "５": "5", "６": "6", "７": "7", "８": "8", "９": "9",
+        "！": "!", "＂": "\"", "＃": "#", "＄": "$", "％": "%", "＆": "&", "＇": "'", "（": "(", "）": ")", "＊": "*", "＋": "+", "，": ",", "－": "-", "．": ".", "／": "/", "：": ":",
+        "；": ";", "＜": "<", "＝": "=", "＞": ">", "？": "?", "＠": "@", "［": "[", "＼": "\\", "］": "]", "＾": "^", "＿": "_", "｀": "`", "｛": "{", "｜": "|", "｝": "}", "～": "~", "　": " "
+    }).reduce((text, [k, v]) => text.replace(new RegExp(k, "g"), v), data);
+}
+
+/**
+* Clean up text with `fixWidth()` and `trimExtend()`.
+* @example
+* ```ts
+* const text = "１  ＋  １  ＝  ２  ";
+* const formated = cleanText(text);
+* ```
+*/
+export function cleanText(data:string){
+    return trimExtend(fixWidth(data));
+}
+
 /**
 * Accurately recognize string that contain character above `0x010000` and array them one by character.
 * Useful for calculate number of characters with string contains emoji.

diff --git a/test/platform.deno.test.ts b/test/platform.deno.test.ts
@@ -4,15 +4,15 @@ import {isWin} from "../src/platform.deno.ts";
 Deno.test({
     ignore: Deno.build.os !== "windows",
     name: "Platform: Windows",
-    async fn(){
+    fn(){
         assertEquals(isWin(), true);
     }
 });
 
 Deno.test({
     ignore: Deno.build.os === "windows",
     name: "Platform: Posix",
-    async fn(){
+    fn(){
         assertEquals(isWin(), false);
     }
 });
diff --git a/test/text.test.ts b/test/text.test.ts
@@ -1,5 +1,5 @@
 import {assertEquals} from "../deps.test.ts";
-import {utfEncode, utfDecode, hexEncode, hexDecode, trimExtend, accurateSegment} from "../src/text.ts";
+import {utfEncode, utfDecode, hexEncode, hexDecode, trimExtend, fixWidth, cleanText, accurateSegment} from "../src/text.ts";
 
 const sampleText = "  Lorem ipsum\r dolor   sit  \r\r amet. ";
 const sampleBin = new Uint8Array([
@@ -12,7 +12,7 @@ const sampleBin = new Uint8Array([
 
 Deno.test({
     name: "Text: UTF8 Encode and Decode",
-    async fn(){
+    fn(){
         const encode = utfEncode(sampleText);
         const decode = utfDecode(encode);
 
@@ -22,7 +22,7 @@ Deno.test({
 
 Deno.test({
     name: "Text: HEX Encode and Decode",
-    async fn(){
+    fn(){
         const encode = hexEncode(sampleBin);
         const decode = hexDecode(encode);
 
@@ -32,16 +32,34 @@ Deno.test({
 
 Deno.test({
     name: "Text: Trim",
-    async fn(){
+    fn(){
         const result = trimExtend(sampleText);
 
         assertEquals(result, "Lorem ipsum dolor sit amet.");
     }
 });
 
+Deno.test({
+    name: "Text: Fix Width",
+    fn(){
+        const result = fixWidth("１＋１＝２");
+
+        assertEquals(result, "1+1=2");
+    }
+});
+
+Deno.test({
+    name: "Text: Clean Up",
+    fn(){
+        const result = cleanText("１  ＋  １  ＝  ２  ");
+
+        assertEquals(result, "1 + 1 = 2");
+    }
+});
+
 Deno.test({
     name: "Text: Segment",
-    async fn(){
+    fn(){
         const {length} = accurateSegment("😄😁😆😅😂");
 
         assertEquals(length, 5);

diff --git a/test/time.test.ts b/test/time.test.ts
@@ -5,7 +5,7 @@ const sample = new Date(2000, 0, 1, 0, 0, 0, 0);
 
 Deno.test({
     name: "Date: Encode and Decode",
-    async fn(){
+    fn(){
         const encode = unixtimeEncode(sample);
         const decode = unixtimeDecode(encode);
 
@@ -15,7 +15,7 @@ Deno.test({
 
 Deno.test({
     name: "Date: Parse",
-    async fn(){
+    fn(){
         const result = unixtimeParse(sample.toISOString());
 
         assertEquals(result, 946684800);