-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkanjidic2.py
160 lines (134 loc) · 4.27 KB
/
kanjidic2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# Configuration
projectShort = 'kanjidic2'
projectDesc = 'Kanjidic2 i18n'
projectLangs = ('fr', 'it', 'ru', 'th', 'tr', 'pl')
ownerInfo = 'Alexandre Courbot <gnurou@gmail.com>'
txProject = 'kanjidic2-i18n'
srcFile = 'kanjidic2.xml'
import xmlhandler, xml.sax, efilter
from gettextformat import *
# One entry per RMgroup of a kanjidic2 entry
class Kanjidic2Entry:
def __init__(self, kanji, rmgroup):
self.kanji = kanji
self.rmgroup = rmgroup
# One string per language
self.translations = {}
# Languages that should be outputed as 'fuzzy'
self.fuzzies = []
self.readings = []
self.grade = 0
self.freq = 0
def contextString(self):
return '%s %d' % (self.kanji, self.rmgroup)
def sourceString(self):
ret = self.kanji + '\n'
if len(self.readings) != 0:
ret += ', '.join(self.readings) + '\n'
ret += self.trString('en')
return ret
def trString(self, lang):
if not lang in self.translations: return ''
else: return self.translations[lang]
def asGettext(self, lang):
entry = GetTextEntry()
entry.msgctxt = self.contextString()
entry.msgid = self.sourceString()
entry.lang = lang
if lang in self.fuzzies: entry.fuzzy = True
if lang != 'en':
entry.msgstr = self.trString(lang)
return entry
def toJMF(self, lang):
ret = ""
ts = self.trString(lang).split('\n')
for s in ts:
ret += "%s %s\n" % (self.kanji, s)
return ret
class Kanjidic2Parser(xmlhandler.BasicHandler):
def __init__(self):
xmlhandler.BasicHandler.__init__(self)
self.entries = {}
self.lang = None
self.takeReading = False
self.readings = []
def handle_start_character(self, attrs):
self.currentEntry = None
self.currentEid = None
self.currentRM = 0
self.currentGrade = 0
self.currentFreq = 0
def handle_data_literal(self, data):
self.currentEid = data
def handle_data_grade(self, data):
self.currentGrade = int(data)
def handle_data_freq(self, data):
self.currentFreq = int(data)
def handle_start_reading(self, attrs):
if not 'r_type' in attrs: return
if attrs['r_type'] in ('ja_on', 'ja_kun'):
self.takeReading = True
def handle_data_reading(self, data):
if self.takeReading:
self.readings.append(data)
self.takeReading = False
def handle_start_rmgroup(self, attrs):
self.currentEntry = Kanjidic2Entry(self.currentEid, self.currentRM)
self.currentEntry.grade = self.currentGrade
self.currentEntry.freq = self.currentFreq
def handle_end_rmgroup(self):
self.currentEntry.readings = self.readings
self.readings = []
self.entries[self.currentEntry.contextString()] = self.currentEntry
self.currentRM += 1
self.currentEntry = None
def handle_start_meaning(self, attrs):
if 'm_lang' in attrs: self.lang = attrs['m_lang']
else: self.lang = 'en'
def handle_data_meaning(self, data):
try:
trans = self.currentEntry.translations[self.lang]
trans += "\n" + data
except KeyError:
trans = data
self.currentEntry.translations[self.lang] = trans
self.lang = None
def parseSrcEntries(src):
parser = xml.sax.make_parser()
handler = Kanjidic2Parser()
parser.setContentHandler(handler)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.setFeature(xml.sax.handler.feature_external_pes, False)
parser.parse(src)
return handler.entries
class GradeFilter(efilter.Filter):
def __init__(self, grade):
efilter.Filter.__init__(self, "grade%02d" % (grade,), projectShort, projectDesc, ownerInfo)
self.grade = grade
def isfiltered(self, entry):
return entry.grade > 0 and entry.grade == self.grade
class FreqFilter(efilter.Filter):
def __init__(self, freq):
efilter.Filter.__init__(self, "freq%04d" % (freq,), projectShort, projectDesc, ownerInfo)
self.freq = freq
def isfiltered(self, entry):
return entry.freq > 0 and entry.freq <= self.freq
class AllFilter(efilter.Filter):
def __init__(self):
efilter.Filter.__init__(self, "others", projectShort, projectDesc, ownerInfo)
def isfiltered(self, entry):
return True
def filtersList():
filters = []
filters.append(GradeFilter(1))
filters.append(GradeFilter(2))
filters.append(GradeFilter(3))
filters.append(GradeFilter(4))
filters.append(GradeFilter(5))
filters.append(GradeFilter(6))
filters.append(GradeFilter(8))
filters.append(FreqFilter(3000))
filters.append(GradeFilter(9))
filters.append(GradeFilter(10))
filters.append(AllFilter())
return filters