Skip to content

Commit 0848875

Browse files
committed
ok
1 parent 2e2e278 commit 0848875

File tree

6 files changed

+166
-7
lines changed

6 files changed

+166
-7
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
11
*.zip
2+
/out/
3+
/.vscode/
4+
__pycache__/
5+

README.md

+8-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,8 @@
1-
# wechat-emoji-dict
1+
# wechat-emoji-dict
2+
3+
将所有微信表情导出成如下字典文件:
4+
5+
- GBoard(全拼)
6+
- GBoard(小鹤双拼)
7+
8+
![preview](resources/preview.png)

flypy.py

+94
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# copy from https://github.com/bcaso/pinyin_to_double_pinyin/blob/main/dp.py
2+
3+
first = {'ch': 'i',
4+
'sh': 'u',
5+
'zh': 'v'}
6+
7+
second = {
8+
'ua': 'x',
9+
'ei': 'w',
10+
'e': 'e',
11+
'ou': 'z',
12+
'iu': 'q',
13+
've': 't',
14+
'ue': 't',
15+
'u': 'u',
16+
'i': 'i',
17+
'o': 'o',
18+
'uo': 'o',
19+
'ie': 'p',
20+
'a': 'a',
21+
'ong': 's',
22+
'iong': 's',
23+
'ai': 'd',
24+
'ing': 'k',
25+
'uai': 'k',
26+
'ang': 'h',
27+
'uan': 'r',
28+
'an': 'j',
29+
'en': 'f',
30+
'ia': 'x',
31+
'iang': 'l',
32+
'uang': 'l',
33+
'eng': 'g',
34+
'in': 'b',
35+
'ao': 'c',
36+
'v': 'v',
37+
'ui': 'v',
38+
'un': 'y',
39+
'iao': 'n',
40+
'ian': 'm'
41+
}
42+
43+
# 特殊,只有䪨母,且总长不过 3
44+
# 零声母,单双三䪨母
45+
special = {
46+
'a': 'aa',
47+
'ai': 'ai',
48+
'an': 'an',
49+
'ang': 'ah',
50+
'ao': 'ao',
51+
'e': 'ee',
52+
'ei': 'ei',
53+
'en': 'en',
54+
'er': 'er',
55+
'o': 'oo',
56+
'ou': 'ou'
57+
}
58+
59+
60+
def todouble(s: str) -> str:
61+
"""
62+
传入单汉字的全拼编码,反回其小鹤双拼编码
63+
64+
:param s: 全拼编码
65+
:return: 双拼编码
66+
"""
67+
new_s = ''
68+
# 特列情况: 无声母,a, an, ang
69+
if len(s) <= 3 and s[0] in ['a', 'e', 'o']:
70+
if s in special.keys():
71+
return special[s]
72+
else:
73+
print('未知情况1', s)
74+
75+
# 一般: 声母 + 䪨母
76+
77+
# 最长的情况:first+second,例如 chuang = ch + uang
78+
# 2 位声母 + 最多 4 位韵母
79+
if s[:2] in first.keys():
80+
new_s += first[s[:2]]
81+
# 最多 4 位䪨母
82+
if s[2:] in second.keys():
83+
new_s += second[s[2:]]
84+
# 较短的情况:second+second,例如 h uang, x iang
85+
# 1 位声母 + 最多 4 位䪨母
86+
else:
87+
new_s += s[0] # 1 位声母
88+
# 最多 4 位䪨母
89+
if s[1:] in second.keys():
90+
new_s += second[s[1:]]
91+
else:
92+
new_s += s[1:]
93+
94+
return new_s

main.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from pypinyin import pinyin, lazy_pinyin, Style
2+
from zipfile import ZipFile
3+
import flypy
4+
import re
5+
6+
_RE_ENGLISH = re.compile(r'\w+', re.ASCII)
7+
8+
9+
def kanji_to_pinyin(kanji: str, py_type='pinyin') -> str:
10+
if _RE_ENGLISH.fullmatch(kanji):
11+
# 纯英文字母, 转小写就行
12+
return kanji.lower()
13+
py = lazy_pinyin(kanji, style=Style.NORMAL)
14+
if py_type == 'flypy':
15+
py = list(map(lambda s: flypy.todouble(s), py))
16+
return ''.join(py)
17+
18+
19+
def save_to_gboard(list, py_type='pinyin', lang='zh-CN'):
20+
base_path = f'out/wechat-emoji-gboard-{py_type}'
21+
dict_file = base_path+'.txt'
22+
zip_file = base_path + '.zip'
23+
24+
with open(dict_file, 'w', encoding='utf-8') as f:
25+
f.write('# Gboard Dictionary version:1\n')
26+
for item in list:
27+
f.write('\t'.join((item[py_type], item['word'], lang)) + '\n')
28+
# archive
29+
with ZipFile(zip_file, 'w') as f:
30+
f.write(dict_file, 'dictionary.txt')
31+
32+
33+
def main():
34+
list = []
35+
with open('source.txt', 'r', encoding='utf-8') as f:
36+
for line in f:
37+
segments = line.rstrip('\n').split('\t')
38+
word = segments[0]
39+
kanji_list = [word.strip('[]')]
40+
if len(segments) > 1:
41+
kanji_list.extend(segments[1].split(','))
42+
for kanji in kanji_list:
43+
list.append({
44+
'word': word,
45+
'pinyin': kanji_to_pinyin(kanji),
46+
'flypy': kanji_to_pinyin(kanji, py_type='flypy'),
47+
})
48+
save_to_gboard(list)
49+
save_to_gboard(list, py_type='flypy')
50+
51+
52+
if __name__ == '__main__':
53+
main()
54+
print('completed.')

resources/preview.png

194 KB
Loading

source.txt

+6-6
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,10 @@
2323
[傲慢]
2424
[困]
2525
[惊恐]
26-
[憨笑]
26+
[憨笑]
2727
[悠闲]
2828
[咒骂]
29-
[疑问]
29+
[疑问]
3030
[嘘]
3131
[晕]
3232
[衰]
@@ -47,12 +47,12 @@
4747
[笑脸]
4848
[生病]
4949
[脸红]
50-
[破涕为笑]
50+
[破涕为笑] 笑哭
5151
[恐惧]
5252
[失望]
5353
[无语]
5454
[嘿哈]
55-
[捂脸]
55+
[捂脸] 笑,无语
5656
[奸笑]
5757
[机智]
5858
[皱眉]
@@ -63,8 +63,8 @@
6363
[天啊]
6464
[Emm]
6565
[社会社会]
66-
[旺柴]
67-
[好的]
66+
[旺柴] 狗头
67+
[好的] ok
6868
[打脸]
6969
[哇]
7070
[翻白眼]

0 commit comments

Comments
 (0)