Skip to content

Commit cbba926

Browse files
committed
Initital commit
0 parents  commit cbba926

File tree

3 files changed

+191
-0
lines changed

3 files changed

+191
-0
lines changed

README.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# anki_ENG-PL
2+
### Skrypt CLI pozwalający na automatyczne tworzenie fiszek z tłumaczeniami wyrazów, przykładami zdań i wymową, w formie transkrypcji fonetycznej i TTS.
3+
\
4+
Do tłumaczenia wyrazów jest używany słownik [diki.pl](https://diki.pl) i [Bab.la](https://bab.la).\
5+
Przykłady zdań są wzięte ze słownika [Bab.la](https://bab.la).\
6+
A wymowa zostaje wygenerowana przez amerykański Google TTS.
7+
8+
## Obsługa
9+
```
10+
pip install requirements.txt
11+
```
12+
13+
Przygotuj dokument tekstowy z wyrazami do przetłumaczenia (każdy w kolejnej linii), a następnie uruchom skrypt:
14+
15+
```
16+
python anki.py words.txt
17+
```
18+

anki.py

+167
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
from diki_translate import Diki
2+
from babla_translate import Babla
3+
from gtts import gTTS
4+
import genanki
5+
import pandas as pd
6+
import eng_to_ipa as p
7+
import contextlib
8+
import random
9+
import datetime
10+
import argparse
11+
import itertools
12+
import os
13+
import shutil
14+
15+
16+
#adds note to deck
17+
def noteAdder(note_word, phonetic, phonetic_media, meaning, example):
18+
with contextlib.suppress(TypeError):
19+
my_note = genanki.Note(
20+
model=my_model,
21+
fields=[note_word, phonetic, "[sound:"+ phonetic_media[6:] +"]", meaning, example])
22+
my_deck.add_note(my_note)
23+
24+
25+
#generates tts from word
26+
def ttsAdder(word):
27+
with contextlib.suppress(AssertionError):
28+
tts = gTTS(word, lang='en')
29+
tts_word = f"media/{str(word_index)}_" + word[:3] + ".mp3"
30+
tts.save(tts_word)
31+
my_package.media_files.append(tts_word)
32+
return tts_word
33+
34+
35+
#generates meaning, example and phonetic of word
36+
def meaningAdder(word):
37+
diki_list = list(itertools.islice(diki.translation(word), 5))
38+
babla_list = list(itertools.islice(babla.translation(word), 5))
39+
40+
41+
num = [len(diki_list), len(babla_list)]
42+
meaning_index = max(num)
43+
trans_list = []
44+
45+
for i in range(meaning_index):
46+
with contextlib.suppress(IndexError):
47+
trans_list.append(diki_list[i])
48+
49+
with contextlib.suppress(IndexError):
50+
trans_list.append(babla_list[i])
51+
52+
53+
uniqueList = []
54+
duplicateList = []
55+
56+
for i in trans_list:
57+
if i not in uniqueList:
58+
uniqueList.append(i)
59+
elif i not in duplicateList:
60+
duplicateList.append(i)
61+
62+
63+
if uniqueList:
64+
meaning_string = ', '.join(uniqueList[:5])
65+
else:
66+
meaning_string = " "
67+
68+
69+
dataset.at[word_index,'meaning'] = meaning_string
70+
dataset.at[word_index,'phonetic'] = p.convert(word)
71+
72+
73+
example = list(itertools.islice(babla.example(word), 1))
74+
with contextlib.suppress(IndexError):
75+
if example:
76+
dataset.at[word_index,'example'] = example[0]
77+
else:
78+
example = " "
79+
dataset.at[word_index,'example'] = example
80+
81+
82+
print(dataset.at[word_index,'phonetic'])
83+
print(dataset.at[word_index,'meaning'])
84+
print(dataset.at[word_index,'example'])
85+
86+
87+
88+
#cli and pd init
89+
parser = argparse.ArgumentParser(description='English <-> Polish anki generator')
90+
parser.add_argument('file')
91+
args = parser.parse_args()
92+
columns = ['word_column', 'phonetic', 'meaning', 'example']
93+
dataset=pd.read_csv(args.file,header=None, index_col=False,names=columns,sep='\t',dtype=object)
94+
95+
96+
# diki and babla module init
97+
diki = Diki("english")
98+
babla = Babla("english", "polish")
99+
100+
101+
#genanki card model
102+
my_model = genanki.Model(
103+
random.randrange(1 << 30, 1 << 31),
104+
'Translation_model',
105+
fields=[
106+
{'name': 'word'},
107+
{'name': 'phonetic'},
108+
{'name': 'phonetic_media'},
109+
{'name': 'meaning'},
110+
{'name': 'example'},
111+
],
112+
templates=[
113+
{
114+
'name': 'Card 1',
115+
'qfmt': '{{word}} <br> {{phonetic}} \t {{phonetic_media}}',
116+
117+
'afmt': '{{FrontSide}}<hr id="answer">{{meaning}} <br><br> {{example}}',
118+
},
119+
],
120+
css='.card {\n font-family: arial;\n font-size: 20px;\n text-align: center;\n color: black;\n background-color: white;\n}\n',
121+
)
122+
123+
#creates media folder to store tts and checks if it exist
124+
try:
125+
folder_path = 'media'
126+
os.mkdir(folder_path)
127+
print('Media folder created')
128+
except Exception:
129+
print('Media folder already exists')
130+
131+
132+
current_day = datetime.datetime.now()
133+
title = f'Translation_{str(current_day.strftime("%d.%m"))}'
134+
135+
136+
my_deck = genanki.Deck(random.randrange(1 << 30, 1 << 31),title)
137+
my_package = genanki.Package(my_deck)
138+
my_package.media_files = []
139+
not_translated = []
140+
141+
142+
#main loop
143+
for word_index in range(len(dataset)):
144+
current_word = dataset._get_value(word_index,'word_column')
145+
print(word_index, current_word)
146+
meaningAdder(current_word)
147+
148+
if dataset.at[word_index,'meaning'] == " ":
149+
not_translated.append(current_word)
150+
dataset = dataset.drop(word_index)
151+
else:
152+
noteAdder(dataset.at[word_index,'word_column'],dataset.at[word_index,'phonetic'], ttsAdder(current_word),dataset.at[word_index,'meaning'], dataset.at[word_index,'example'])
153+
154+
155+
#export
156+
print(dataset)
157+
dataset.to_csv(title + '.csv',index=False)
158+
my_package.write_to_file(title + '.apkg')
159+
160+
161+
#removes media folder
162+
try:
163+
shutil.rmtree(folder_path)
164+
print('Media folder and its content removed')
165+
except Exception:
166+
print('Media folder not deleted')
167+
print(f"Words not translated : {not_translated}")

requirements.txt

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
diki_translate
2+
babla_translate
3+
gtts
4+
genanki
5+
pandas
6+
eng_to_ipa

0 commit comments

Comments
 (0)