-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcomponents.py
223 lines (185 loc) · 7.88 KB
/
components.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
"""
The basic definition of classes in this pack. A "Sentence" is composed of many "Char"s.
About the syntax in Hackmd, we use the following syntax to mark the furigana:
痛める -> {痛|い<b>た</b>}{め|<i>* *</i>}る (This sentence is composed of three Chars)
We use {|} to represent the use of ruby
This syntax is supported by markdown-it, and can be applyed on hackmd
As for accent, we use the css setting written by @OrangeSagoCream based on @Koios's idea
The syntax sugar is supported like this:
<b>: apply the line
<i>: higher line to match the kanji, type one more character than the marked content to get proper length
italic(**): apply top accent (right border)
To use this css style and syntax, type `{%hackmd @OrangeSagoCream/Accent %}` at the first line of your hackmd.
"""
from .utils import *
from .yahooAPI.furigana import getFurigana
from .suzukiKunAPI.accent import getAccent
CharType = ["kanji", "hira", "kata", "number", "symbol"]
class Char:
"""
The basic component of a sentence. It will contains three infomation:
- word: It can be either kanji, hiragana, or katakana.
- furigana: The reading of the word.
- accent: The accent of the word. It will be an number.
"""
def __init__(self, word: str, furigana : str = "", accent : int = -1):
self.word = word
self.accent = accent # If accent = -1, then this word don't need to draw accent
# Determine the type
if is_kanji(word):
self.type = CharType[0]
elif is_hira(word):
self.type = CharType[1]
elif is_kata(word):
self.type = CharType[2]
elif is_number(word):
self.type = CharType[3]
else:
self.type = CharType[4]
# Determine furigana
if self.type == "hira":
self.furigana = word
elif self.type == "kata":
self.furigana = kata_to_hira(word)
else:
self.furigana = furigana
self.furiganas = get_furiganas(self.furigana)
def print(self):
"""
Print the data of the word.
"""
print("{%s|%s;%d}" % (self.word, ' '.join(self.furiganas), self.accent))
def getHackmdSyntax(self, addAccent: bool = True, addFuriOnHira: bool = False, addFuriOnKata: bool = False, addFuriOnKanji: bool = True) -> str:
"""
Get the syntax of hackmd.
"""
# Whether to add furigana
addFuri = (addFuriOnHira and self.type == "hira") or (addFuriOnKata and self.type == "kata") or (addFuriOnKanji and self.type == "kanji")
if addAccent and self.accent == 0:
# At most the last two characters will be drawn
threshold = max(1, len(self.furiganas)-2)
if addFuri:
upperData = "".join(self.furiganas[:threshold]) + "<b>" + "".join(self.furiganas[threshold:]) + "</b>"
else:
upperData = "<i>" + " " + "</i>"
elif addAccent and self.accent > 0 and self.accent <= len(self.furiganas) - 1:
threshold = self.accent - 1
if addFuri:
upperData = "".join(self.furiganas[:threshold]) + "<b>*" + self.furiganas[threshold] + "*</b>" + "".join(self.furiganas[threshold+1:])
else:
upperData = "<i>*" + " " + "*</i>"
elif addFuri:
upperData = self.furigana
else:
upperData = ""
# Special case for accent on hira or kata
if not addFuri and addAccent and self.accent > 0 and self.accent < len(self.furiganas):
if self.type == "hira":
headWord, tailWord = ''.join(self.furiganas[:self.accent]), ''.join(self.furiganas[self.accent:])
elif self.type == "kata":
headWord, tailWord = hira_to_kata(''.join(self.furiganas[:self.accent])), hira_to_kata(''.join(self.furiganas[self.accent:]))
else:
headWord, tailWord = self.word, ""
# Return the seperated parts
return "{%s|%s}%s" % (headWord, upperData, tailWord)
elif upperData:
return "{%s|%s}" % (self.word, upperData)
else:
return self.word
class Sentence:
"""
The unit that holds Chars.
"""
def __init__(self, sentence: list[Char]):
self.sentence = sentence
def __init__(self, sentence: str, accent: bool = True):
self.sentence = []
# Previous length of sentence
threshold = 0
# The input might be too long, we use "\n" to seperate the sentence
for s in sentence.split("\n"):
s, adjusted = post_adjustment(s)
# Get the furigana
furigana = getFurigana(s)
cnt, replace = 0, False
for t in furigana:
if t[0] == "※":
replace = True
elif replace:
self.sentence.append(Char(adjusted[cnt][0], adjusted[cnt][1], adjusted[cnt][2]))
replace = False
cnt += 1
else:
self.sentence.append(Char(t[0], t[1]))
yahooGeneratedFuri = self.getFurigana(threshold)
# Add accent from suzukikun
if accent:
# Get the accent
temp = getAccent(s)
accentData = []
suzukiGeneratedFuri = ""
for c, _ in temp:
if char_is_hira(c):
accentData.append((c, _))
suzukiGeneratedFuri += c
# Generate accent map
listOfFuri = list(map(lambda c: c.furiganas, self.sentence[threshold:]))
accentMap = genearte_accent_map(yahooGeneratedFuri, suzukiGeneratedFuri, accentData, listOfFuri)
# Fill accent if the orignal accent is -1 (default)
for i in range(threshold, len(self.sentence)):
if self.sentence[i].accent == -1:
self.sentence[i].accent = accentMap[i-threshold]
# Fill the "\n" back
self.sentence.append(Char("\n"))
threshold = len(self.sentence)
def getWord(self, begin: int = 0, end: int = -1) -> str:
"""
Get the word (base Part).
"""
result = ""
for word in self.sentence[begin:end]:
result += word.getWord()
return result
def getFurigana(self, begin: int = 0, end: int = -1) -> str:
"""
Get the furigana.
"""
result = ""
for word in self.sentence[begin:end]:
result += word.furigana
return result
def getSentence(self, begin: int = 0, end: int = -1) -> list[Char]:
"""
Get the sentence.
"""
return self.sentence[begin:end]
def getAccent(self, begin: int = 0, end: int = -1) -> list[int]:
"""
Get the accent.
"""
result = []
for word in self.sentence[begin:end]:
result.append(word.accent)
return result
def getType(self, begin: int = 0, end: int = -1) -> list[str]:
"""
Get the type of the word.
"""
result = []
for word in self.sentence[begin:end]:
result.append(word.type)
return result
def print(self, begin: int = 0, end: int = -1) -> str:
"""
Print the sentence.
"""
for word in self.sentence[begin:end]:
word.print()
def getHackmdSyntax(self, begin: int = 0, end: int = -1, addAccent: bool = True, addFuriOnHira: bool = False, addFuriOnKata: bool = False, addFuriOnKanji: bool = True) -> str:
"""
Get the syntax of hackmd.
"""
result = ""
for word in self.sentence[begin:end]:
result += word.getHackmdSyntax(addAccent, addFuriOnHira, addFuriOnKata, addFuriOnKanji)
return result