1
+ from diki_translate import Diki
2
+ from babla_translate import Babla
3
+ from gtts import gTTS
4
+ import genanki
5
+ import pandas as pd
6
+ import eng_to_ipa as p
7
+ import contextlib
8
+ import random
9
+ import datetime
10
+ import argparse
11
+ import itertools
12
+ import os
13
+ import shutil
14
+
15
+
16
+ #adds note to deck
17
+ def noteAdder (note_word , phonetic , phonetic_media , meaning , example ):
18
+ with contextlib .suppress (TypeError ):
19
+ my_note = genanki .Note (
20
+ model = my_model ,
21
+ fields = [note_word , phonetic , "[sound:" + phonetic_media [6 :] + "]" , meaning , example ])
22
+ my_deck .add_note (my_note )
23
+
24
+
25
+ #generates tts from word
26
+ def ttsAdder (word ):
27
+ with contextlib .suppress (AssertionError ):
28
+ tts = gTTS (word , lang = 'en' )
29
+ tts_word = f"media/{ str (word_index )} _" + word [:3 ] + ".mp3"
30
+ tts .save (tts_word )
31
+ my_package .media_files .append (tts_word )
32
+ return tts_word
33
+
34
+
35
+ #generates meaning, example and phonetic of word
36
+ def meaningAdder (word ):
37
+ diki_list = list (itertools .islice (diki .translation (word ), 5 ))
38
+ babla_list = list (itertools .islice (babla .translation (word ), 5 ))
39
+
40
+
41
+ num = [len (diki_list ), len (babla_list )]
42
+ meaning_index = max (num )
43
+ trans_list = []
44
+
45
+ for i in range (meaning_index ):
46
+ with contextlib .suppress (IndexError ):
47
+ trans_list .append (diki_list [i ])
48
+
49
+ with contextlib .suppress (IndexError ):
50
+ trans_list .append (babla_list [i ])
51
+
52
+
53
+ uniqueList = []
54
+ duplicateList = []
55
+
56
+ for i in trans_list :
57
+ if i not in uniqueList :
58
+ uniqueList .append (i )
59
+ elif i not in duplicateList :
60
+ duplicateList .append (i )
61
+
62
+
63
+ if uniqueList :
64
+ meaning_string = ', ' .join (uniqueList [:5 ])
65
+ else :
66
+ meaning_string = " "
67
+
68
+
69
+ dataset .at [word_index ,'meaning' ] = meaning_string
70
+ dataset .at [word_index ,'phonetic' ] = p .convert (word )
71
+
72
+
73
+ example = list (itertools .islice (babla .example (word ), 1 ))
74
+ with contextlib .suppress (IndexError ):
75
+ if example :
76
+ dataset .at [word_index ,'example' ] = example [0 ]
77
+ else :
78
+ example = " "
79
+ dataset .at [word_index ,'example' ] = example
80
+
81
+
82
+ print (dataset .at [word_index ,'phonetic' ])
83
+ print (dataset .at [word_index ,'meaning' ])
84
+ print (dataset .at [word_index ,'example' ])
85
+
86
+
87
+
88
+ #cli and pd init
89
+ parser = argparse .ArgumentParser (description = 'English <-> Polish anki generator' )
90
+ parser .add_argument ('file' )
91
+ args = parser .parse_args ()
92
+ columns = ['word_column' , 'phonetic' , 'meaning' , 'example' ]
93
+ dataset = pd .read_csv (args .file ,header = None , index_col = False ,names = columns ,sep = '\t ' ,dtype = object )
94
+
95
+
96
+ # diki and babla module init
97
+ diki = Diki ("english" )
98
+ babla = Babla ("english" , "polish" )
99
+
100
+
101
+ #genanki card model
102
+ my_model = genanki .Model (
103
+ random .randrange (1 << 30 , 1 << 31 ),
104
+ 'Translation_model' ,
105
+ fields = [
106
+ {'name' : 'word' },
107
+ {'name' : 'phonetic' },
108
+ {'name' : 'phonetic_media' },
109
+ {'name' : 'meaning' },
110
+ {'name' : 'example' },
111
+ ],
112
+ templates = [
113
+ {
114
+ 'name' : 'Card 1' ,
115
+ 'qfmt' : '{{word}} <br> {{phonetic}} \t {{phonetic_media}}' ,
116
+
117
+ 'afmt' : '{{FrontSide}}<hr id="answer">{{meaning}} <br><br> {{example}}' ,
118
+ },
119
+ ],
120
+ css = '.card {\n font-family: arial;\n font-size: 20px;\n text-align: center;\n color: black;\n background-color: white;\n }\n ' ,
121
+ )
122
+
123
+ #creates media folder to store tts and checks if it exist
124
+ try :
125
+ folder_path = 'media'
126
+ os .mkdir (folder_path )
127
+ print ('Media folder created' )
128
+ except Exception :
129
+ print ('Media folder already exists' )
130
+
131
+
132
+ current_day = datetime .datetime .now ()
133
+ title = f'Translation_{ str (current_day .strftime ("%d.%m" ))} '
134
+
135
+
136
+ my_deck = genanki .Deck (random .randrange (1 << 30 , 1 << 31 ),title )
137
+ my_package = genanki .Package (my_deck )
138
+ my_package .media_files = []
139
+ not_translated = []
140
+
141
+
142
+ #main loop
143
+ for word_index in range (len (dataset )):
144
+ current_word = dataset ._get_value (word_index ,'word_column' )
145
+ print (word_index , current_word )
146
+ meaningAdder (current_word )
147
+
148
+ if dataset .at [word_index ,'meaning' ] == " " :
149
+ not_translated .append (current_word )
150
+ dataset = dataset .drop (word_index )
151
+ else :
152
+ noteAdder (dataset .at [word_index ,'word_column' ],dataset .at [word_index ,'phonetic' ], ttsAdder (current_word ),dataset .at [word_index ,'meaning' ], dataset .at [word_index ,'example' ])
153
+
154
+
155
+ #export
156
+ print (dataset )
157
+ dataset .to_csv (title + '.csv' ,index = False )
158
+ my_package .write_to_file (title + '.apkg' )
159
+
160
+
161
+ #removes media folder
162
+ try :
163
+ shutil .rmtree (folder_path )
164
+ print ('Media folder and its content removed' )
165
+ except Exception :
166
+ print ('Media folder not deleted' )
167
+ print (f"Words not translated : { not_translated } " )
0 commit comments