From 9c7d75eca0b972d9c8832b20c5bcb6f427a0b615 Mon Sep 17 00:00:00 2001 From: SkyEye_FAST Date: Thu, 15 Aug 2024 23:12:33 +0800 Subject: [PATCH] Update --- base.py | 2 ++ converter.py | 78 ++++++++++++++++++++++++++++++--------------- data/rep_ja_kk.json | 3 +- fix_data.py | 56 ++++++++++---------------------- output/ja_kk.json | 4 +-- output/ja_my.json | 4 +-- pack.py | 48 +++++++--------------------- 7 files changed, 87 insertions(+), 108 deletions(-) diff --git a/base.py b/base.py index 125d831..f528193 100644 --- a/base.py +++ b/base.py @@ -53,3 +53,5 @@ def load_json(file: str, folder: str = "data") -> Ldata: rep_zh: Ldata = load_json("rep_zh") # 连写的中文转写方案替换修正 finals: Tuple[str, ...] = tuple("aāááàoōóǒòeēéěè") # 可能的零声母开头 + +rep_ja_kk: Ldata = load_json("rep_ja_kk") # 片假名替换修正 diff --git a/converter.py b/converter.py index 9e1bea6..4b30f75 100644 --- a/converter.py +++ b/converter.py @@ -5,7 +5,7 @@ import re import time import inspect -from typing import Dict, List, Set +from typing import List, Set, Tuple, Callable, Optional from romajitable import to_kana as tk from pypinyin import Style, lazy_pinyin, load_phrases_dict @@ -26,7 +26,6 @@ # 初始化其他自定义数据 fixed_zh_u = load_json("fixed_zh_universal") tone_to_ipa: Ldata = {"1": "˥", "2": "˧˥", "3": "˨˩˦", "4": "˥˩", "5": ""} # IPA声调 -rep_ja_kk: Ldata = load_json("rep_ja_kk") # 片假名替换修正 manyoganas_dict: Ldata = load_json("manyogana") # 万叶假名 @@ -121,32 +120,34 @@ def segment_str(text: str, auto_cut: bool = True) -> List[str]: return jieba.lcut(text) if auto_cut else text.split() -def to_katakana(text: str) -> str: +def to_katakana(text: str, rep: Ldata) -> str: """ 将字符串中的英文转写为片假名。 Args: text (str): 需要转换的字符串 + rep (Ldata): 需要替换格式的内容 Returns: str: 转换结果 """ - return replace_multiple(tk(text).katakana, rep_ja_kk) + return replace_multiple(tk(text).katakana, rep) -def to_manyogana(text: str) -> str: +def to_manyogana(text: str, rep: Ldata) -> str: """ 将字符串中的片假名转写为万叶假名。 Args: text (str): 需要转换的字符串 + rep (Ldata): 需要替换格式的内容 Returns: str: 转换结果 """ - return "".join(manyoganas_dict.get(char, char) for char in to_katakana(text)) + return "".join(manyoganas_dict.get(char, char) for char in to_katakana(text, rep)) def to_pinyin(text: str, rep: Ldata, auto_cut: bool = True) -> str: @@ -314,22 +315,30 @@ def to_xiaojing(text: str, rep: Ldata, auto_cut: bool = True) -> str: return replace_multiple(" ".join(output_list), rep) -def save_to_json(input_dict: Ldata, config: Dict) -> None: - """将生成的语言文件保存至JSON。 +def convert( + input_dict: Ldata, + func: Callable[[str], str], + fix_dict: Optional[Ldata] = None, + auto_cut: bool = True, + rep: Ldata = rep_zh, +) -> Tuple[Ldata, float]: + """ + 转换语言数据。 Args: input_dict (Ldata): 输入的数据 - config (Dict): 含有配置的字典 + func (Callable[[str], str]): 生成语言文件所用的函数 + fix_dict (Optional[Ldata], optional): 语言文件中需要修复的内容. 默认为None + auto_cut (bool, optional): 是否自动分词,默认为True + rep (Ldata, optional): 需要替换的内容,默认为rep_zh的内容 + + Returns: + (Ldata, float): 转换结果及耗时 """ start_time = time.time() - func = config["func"] - - auto_cut = config.get("auto_cut", True) - rep = config.get("rep", rep_zh) - - output_dict = {} + output_dict: Ldata = {} for k, v in input_dict.items(): func_signature = inspect.signature(func) kwargs = {} @@ -339,16 +348,33 @@ def save_to_json(input_dict: Ldata, config: Dict) -> None: kwargs["rep"] = rep output_dict[k] = func(v, **kwargs) - output_dict.update(fixed_zh_u) - if config.get("fixed_dict"): - output_dict.update(config["fixed_dict"]) - file_path = ( - P / config.get("output_folder", "output") / f"{config['output_file']}.json" - ) - with open(file_path, "w", encoding="utf-8") as j: - json.dump(output_dict, j, indent=2, ensure_ascii=False) + if rep is rep_zh: + output_dict.update(fixed_zh_u) + + if fix_dict: + output_dict.update(fix_dict) + elapsed_time = time.time() - start_time + + return output_dict, elapsed_time + + +def save_to_json( + input_data: Tuple[Ldata, float], + output_file: str, + output_folder: str = "output", +) -> None: + """将生成的语言文件保存至JSON。 + + Args: + input_data (Tuple[Ldata, float]): 输入的数据 + output_file (str): 保存的文件名,无格式后缀 + output_folder (str, optional): 保存的文件夹,默认为“output” + """ + + input_dict, elapsed_time = input_data + file_path = P / output_folder / f"{output_file}.json" + with open(file_path, "w", encoding="utf-8") as j: + json.dump(input_dict, j, indent=2, ensure_ascii=False) size = f"{round(file_path.stat().st_size / 1024, 2)} KB" - print( - f"已生成语言文件“{config['output_file']}.json”,大小{size},耗时{elapsed_time:.2f} s。" - ) + print(f"已生成语言文件“{output_file}.json”,大小{size},耗时{elapsed_time:.2f} s。") diff --git a/data/rep_ja_kk.json b/data/rep_ja_kk.json index 2f865fc..bb71af6 100644 --- a/data/rep_ja_kk.json +++ b/data/rep_ja_kk.json @@ -19,5 +19,6 @@ "サムエル・åベルグ": "サミュエル・オーバーグ", "レナ・ライネ": "レナ・レイン", "エン_ウス": "en_us", - "パラディストルäド": "パラダイスツリー" + "パラディストルäド": "パラダイスツリー", + "「・フ4・」":"[ F4 ]" } \ No newline at end of file diff --git a/fix_data.py b/fix_data.py index 0c51aec..36042a9 100644 --- a/fix_data.py +++ b/fix_data.py @@ -4,6 +4,7 @@ from base import load_json from converter import ( save_to_json, + convert, to_pinyin, to_wadegiles, to_romatzyh, @@ -15,52 +16,27 @@ fixed_zh_source = load_json("fixed_zh_source") save_to_json( - fixed_zh_source, - { - "output_file": "fixed_zh_py", - "func": to_pinyin, - "output_folder": "data", - "auto_cut": False, - "rep": rep, - }, + convert(fixed_zh_source, to_pinyin, auto_cut=False, rep=rep), + "fixed_zh_py", + "data", ) save_to_json( - fixed_zh_source, - { - "output_file": "fixed_zh_wg", - "func": to_wadegiles, - "output_folder": "data", - "auto_cut": False, - "rep": rep, - }, + convert(fixed_zh_source, to_wadegiles, auto_cut=False, rep=rep), + "fixed_zh_wg", + "data", ) save_to_json( - fixed_zh_source, - { - "output_file": "fixed_zh_gr", - "func": to_romatzyh, - "output_folder": "data", - "auto_cut": False, - "rep": rep, - }, + convert(fixed_zh_source, to_romatzyh, auto_cut=False, rep=rep), + "fixed_zh_gr", + "data", ) save_to_json( - fixed_zh_source, - { - "output_file": "fixed_zh_cy", - "func": to_cyrillic, - "output_folder": "data", - "auto_cut": False, - "rep": rep, - }, + convert(fixed_zh_source, to_cyrillic, auto_cut=False, rep=rep), + "fixed_zh_cy", + "data", ) save_to_json( - fixed_zh_source, - { - "output_file": "fixed_zh_xj", - "func": to_xiaojing, - "output_folder": "data", - "auto_cut": False, - "rep": rep, - }, + convert(fixed_zh_source, to_xiaojing, auto_cut=False, rep=rep), + "fixed_zh_xj", + "data", ) diff --git a/output/ja_kk.json b/output/ja_kk.json index 9ffeaf9..7bbe8f6 100644 --- a/output/ja_kk.json +++ b/output/ja_kk.json @@ -4684,7 +4684,7 @@ "known_server_link.status": "スタツス", "known_server_link.support": "スッポルト", "known_server_link.website": "ヱブシテ", - "language.code": "zho-Hans_CN", + "language.code": "en_us", "language.name": "エングリスホ", "language.region": "ウニテド・スタテス", "lanServer.otherPlayers": "セッチングス・フォル・オトヘル・プライェルス", @@ -5547,7 +5547,7 @@ "painting.minecraft.sunflowers.author": "クリストッフェル・ゼッテルストランド", "painting.minecraft.sunflowers.title": "スンフロヱルス", "painting.minecraft.sunset.author": "クリストッフェル・ゼッテルストランド", - "painting.minecraft.sunset.title": "sunset_dense", + "painting.minecraft.sunset.title": "スンセト_デンセ", "painting.minecraft.tides.author": "クリストッフェル・ゼッテルストランド", "painting.minecraft.tides.title": "チデス", "painting.minecraft.unpacked.author": "サラホ・ボエヴィング", diff --git a/output/ja_my.json b/output/ja_my.json index 5293728..e83a507 100644 --- a/output/ja_my.json +++ b/output/ja_my.json @@ -4684,7 +4684,7 @@ "known_server_link.status": "須多川須", "known_server_link.support": "須川保流止", "known_server_link.website": "恵夫之天", - "language.code": "zho-Hans_CN", + "language.code": "en_us", "language.name": "江尓具利須保", "language.region": "宇仁天特・須多天須", "lanServer.otherPlayers": "世川千尓具須・不於流・於止部流・不良伊江流須", @@ -5547,7 +5547,7 @@ "painting.minecraft.sunflowers.author": "久利須止川不江流・是川天流須止良尓特", "painting.minecraft.sunflowers.title": "須尓不呂恵流須", "painting.minecraft.sunset.author": "久利須止川不江流・是川天流須止良尓特", - "painting.minecraft.sunset.title": "sunset_dense", + "painting.minecraft.sunset.title": "須尓世止_代尓世", "painting.minecraft.tides.author": "久利須止川不江流・是川天流須止良尓特", "painting.minecraft.tides.title": "千代須", "painting.minecraft.unpacked.author": "散良保・番江无伊尓具", diff --git a/pack.py b/pack.py index eb8276d..1686f9d 100644 --- a/pack.py +++ b/pack.py @@ -4,9 +4,10 @@ import time import zipfile as zf -from base import P, data, fixed_zh +from base import P, data, fixed_zh, rep_ja_kk from converter import ( save_to_json, + convert, to_bopomofo, to_cyrillic, to_ipa, @@ -26,42 +27,15 @@ def main() -> None: # 生成语言文件 main_start_time = time.time() - save_to_json( - data["en_us"], - {"output_file": "ja_kk", "func": to_katakana}, - ) - save_to_json( - data["en_us"], - {"output_file": "ja_my", "func": to_manyogana}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_py", "func": to_pinyin, "fixed_dict": fixed_zh["zh_py"]}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_ipa", "func": to_ipa}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_bpmf", "func": to_bopomofo}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_wg", "func": to_wadegiles, "fixed_dict": fixed_zh["zh_wg"]}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_gr", "func": to_romatzyh, "fixed_dict": fixed_zh["zh_gr"]}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_cy", "func": to_cyrillic, "fixed_dict": fixed_zh["zh_cy"]}, - ) - save_to_json( - data["zh_cn"], - {"output_file": "zh_xj", "func": to_xiaojing, "fixed_dict": fixed_zh["zh_xj"]}, - ) + save_to_json(convert(data["en_us"], to_katakana, rep=rep_ja_kk), "ja_kk") + save_to_json(convert(data["en_us"], to_manyogana, rep=rep_ja_kk), "ja_my") + save_to_json(convert(data["zh_cn"], to_pinyin, fixed_zh["zh_py"]), "zh_py") + save_to_json(convert(data["zh_cn"], to_ipa), "zh_ipa") + save_to_json(convert(data["zh_cn"], to_bopomofo), "zh_bpmf") + save_to_json(convert(data["zh_cn"], to_wadegiles, fixed_zh["zh_wg"]), "zh_wg") + save_to_json(convert(data["zh_cn"], to_romatzyh, fixed_zh["zh_gr"]), "zh_gr") + save_to_json(convert(data["zh_cn"], to_cyrillic, fixed_zh["zh_cy"]), "zh_cy") + save_to_json(convert(data["zh_cn"], to_xiaojing, fixed_zh["zh_xj"]), "zh_xj") main_elapsed_time = time.time() - main_start_time print(f"\n语言文件生成完毕,共耗时{main_elapsed_time:.2f} s。")