From 35aef4c13c48bf8a2a65c023199921c4638f05c9 Mon Sep 17 00:00:00 2001 From: mdmujtabaraza <45493966+mdmujtabaraza@users.noreply.github.com> Date: Sat, 2 Apr 2022 15:32:17 +0530 Subject: [PATCH] Fixed generate_cloze. Improved app and many fixes. --- .idea/vcs.xml | 1 + run_cli.py | 9 +- src/__pycache__/app.cpython-38.pyc | Bin 12066 -> 12802 bytes src/app.kv | 2 +- src/app.py | 53 +++- src/dict_scraper/spiders/cambridge.py | 249 +++++++++++------- .../__pycache__/json_to_apkg.cpython-38.pyc | Bin 3771 -> 3783 bytes src/lib/json_to_apkg.py | 14 +- 8 files changed, 212 insertions(+), 116 deletions(-) diff --git a/.idea/vcs.xml b/.idea/vcs.xml index 94a25f7..8d7912f 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -2,5 +2,6 @@ + \ No newline at end of file diff --git a/run_cli.py b/run_cli.py index 0430540..94198c7 100644 --- a/run_cli.py +++ b/run_cli.py @@ -8,7 +8,7 @@ from src.app_cli import run_spider from src.dict_scraper.spiders import cambridge - +from src.lib.json_to_apkg import generate_cloze if __name__ == '__main__': word_url = "https://dictionary.cambridge.org/dictionary/english/sit" @@ -18,13 +18,14 @@ 'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', 'Referer': 'https://www.google.com' } + # phrase = generate_cloze("an eye for an eye") # # response = requests.get(gcurl, headers=headers) # # print(response.content) # CONTAINER['url'] = gcurl - http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0, read=2.0)) - response = http.request('GET', word_url, headers=headers, retries=urllib3.Retry(5, redirect=2)) - print(response.status) + # http = urllib3.PoolManager(timeout=urllib3.Timeout(connect=1.0, read=2.0)) + # response = http.request('GET', word_url, headers=headers, retries=urllib3.Retry(5, redirect=2)) + # print(response.status) # print(response.data) # print(response["headers"]) diff --git a/src/__pycache__/app.cpython-38.pyc b/src/__pycache__/app.cpython-38.pyc index b36fdcdad7d987772ddbd852669ca7828a4f65f7..ce8b09d6e7858645111c33cd1bbb3131962a7f5a 100644 GIT binary patch delta 4843 zcma)AeQX@X72nz2yW9Kp#c^z(6MrPmhtElD=hKPnB$&j;PJ%;iCxjz#Ip3`9v(IF+4tVOdGqGYTR;Ecqcn6h81zZ-cgoj2$d7$5w2IKzH+tj(-AtR#NU=WJ zMw@BN84~NiOQKuo)-w{_s@Kjcu>t6BquZgsUEc`(qtM?$cS3)s=pTcAE8PYCU3xjZ zzgIs_caO>EWpt`ahu3n@m!asf)TTX->&YmIUT`dTn*YG;=d@bMq&tE~ZRP zP0gqPO>7j;{F0};P{Q=_>_d7S^~6C?SR=em zG!haIYXwzF)PL>I1B4$Q!TbRbCKxnJ@`Orx*(8)iqyy3eYx9alEQQJuX-sBirc9Mt zLL?YbVPP_nCfX(nWtMEbZz`iIbwwoVrk+c3UY)5kU88QQS(2rio>7f@sgL^4$uw|I z&AaZEGTVuSKN_^83Mubi;CA1P+j8g0eNsj~E?KToFPJAo>qA43I$ZS6f#uK<+ z+M1go2$G59h~6}u%?_vaNf1b8GbTugxG?ktOOEWD*dv(QqCw$j+q?fr??6XSZ)czF z%CS_&v^|Dy7!X0M9WB8~;q9L4Ld!_1M}}meDxu zHQNpnwwuLGecEJqpbk_N)0l4LvKd270z-E*INV1Ach6fkz90}HaVM7_; zQ}5+1+J|Hd4|xk^OMcVZ(~0pxn%Mg|pYy&*4)MEun@R?7T68PBAHd>|`si+?>|i{e zOeYK@si)KN_?PxzXi=gEsBghz$N(B)JqxqXxBC_1k0DJ$L>Rj^7l%bH%E~)5GD~G zLbzVb$2f#qSR+a_G$Q$x5}0V&F`g^gUH_GQ|8V%;`8vLTJA4h{mX=;1A?ta2O_(CH3^F|eurRBwWCurY zkg@~Gbk@-0G?hqahg{ML#ecW6H{%k@GP=y<0BP+ z9J&`oY}fw7NBTQ2!wqhxOxQ;>I_4RiG)>6aV9wLf7lTo4$xIf)(F+JcgmVaZQwvAJ z9t)EkQGTg%bM^I3vjff|;<;>YGPjTarSilS(%Ok3I6KJHIeTlDV8TD6Z6~yV<+eTx^guseFn&A zK->}Hs$zS1Rdp@N^0w;ciXY%hmlJ2opqXJpZzq4Gy2?rPLa$JH30NoiPpa=PJOv9l zD~(E{WK=FHlt2Lol@CygLoHNCYSCrEJEQHaRCH4nD-FEOOixjp!>XzBgq&CMs-;>4 ziorWAMN~>BChoN4^Mp-X#Pm{kyJY$-0u`6b^cMqW&~nj&XBMu=VrUNPATMC?;Oc{EH_N|W?r*2>VtyJ3#}hoAG85z{m=%X4Ol)Js*o%{R9ZpH zYXzNgz>qw(2JC^0k`)>vk4r;@E~BMqyma|F z@Cnp^R~GqJnB`|BOPiI=3R(vIzKaqKS^kUCv#RKWj%Q`M0!qO0bD|QMxMEfoBXd%* zY7WwNCUiD>-8yN7vlVffRzj%|>AgzoMV)Y!?Bix_gIa`r$GH z$#gx#4#F#2#hW4~4^Oef(7j6P*xk^wL;O#*o1D~r1pBv&=0AvN<_&dC4R~#607P3>0$A#%gh4rFQA!(y=g2q%x3kA3zcO z0AQ5}SbVSu0v7%3c>p_@97$w`^#v`_66fq4;HOvDl1@Iix-$F{@+J@l5q`wqUR_ss z4oPv6u0NKx*BHr8#bHH>Q{XlOQ;kGwpy=|!PhA!k1(=(-S76CmCW`&|88;N@3KJFF z6_ge>Fr;o$yM4qzuK&FgdSZe}{u!n>uo8m8J-~xVQN2wx%eWO zN>Mm6hOHWAHs>6!pMefrVUwAg+V6o6D>OeW^Di1+D#5d0yI5*?#DqKL4gN-SZLKIi zZnAdv8ZcML{6EoF{-IxUix2EOcza`Q!GepW=%Oyj+{7ZI?on}Jz~uq|5f~+LFLnMS zAQA{$4{F%25Ppe(Ca_-sM19Tz-oSxhBm4$InDYu!xJT?&gr6h41>lrEO$%DC_Za&j zFsy;^5+eYFBx~Zx!j}?%r!i8vVdv(7gT0A>s}!PNieI5v1TlJ7L7jN=;D84-kLG8> z=UO~I4rwYLkCNW#ee68yaU*;e;oArxD6t%XU8>?gY+~xD0pHHsaJYlBjfct0{I?rh z$P@Gb+PGJ)7RmB4cp39_cXk};?CXzt`WL>p_whqbRrj=@WGg}!LN|iQig>OW#+tCr v$tO$hSzM2;wU1}%WLn<~At@Qt{FSDW<(gLGD-DOk1*sq%4X+C~`pW+gU~e@b delta 4138 zcma)9eQXrR72nz0`~L9x+XomseERsXF`wbv2Ai)21GWk6C9qoGtnD-B-Sg}o4s@?a zFeOAS2pB3gB~&&FG)PU;1gVvxrfDlhRilckQlzw7^^bliQWU9;+9s;}-zm}MwDq(^TlJ<1 zU#1uOZFB|nSLjQj-v|A6x)S;;MZX{VtLSRzuh#2e{-FLeU2{~vAhRAy`lR&QTbJco za(MNyZ5ON!ZEgH@WqEww-QM{4cq{*-QXgM>&&a1rb9~D^BX#~&@kRHH?D7Ak><9fr z@NI|BItf6f60s#pQc{<6cuq-nBuA!lN;(=~F`#@Ym?zTMlChXWEXO7zGF4I%_0z!1 za>-}=Mid$Z8K3PR3D6J?)5tlQM$aiF#Zg9ruyzRA@C2-ZyF8JSCL_R32IVOja?svb z4_ge1Faq!qxfH4=?YtqhuAGj#;qCi+`a5^`boaVJOSi0?X|UD6<|=5I8)W)eQMYV2 zSY-KpZeSHt#3a>h&Gi>p&ag89%hv3oi^j_U&jZAcfV)&PtWQREnjk&ql93Q3YVBtRnmD#AMX z;mGG?1^;riT(cFI?Er8?oATP|0M)h(^N*v~Nf*BuTUN36?lkKM>M;LxjFNud9~ z1O=$mJllYJ7CuV`2obgoHfp?eChjBAK4j7n`uXKV$J|3meg#2@i-kJGG{ESse{g$Mvzte7^e0$6)@% zWByz9x=sIo6|5dBa-S{vb z8zy9K)%8LcMaA6+yuATtj$5F-14C}CYxnm4-TQhvdv|yDrDfs6nF^ZA+7KQzc3d^I zQQcE_X6sRq^z-`q3*-QQwSErZyY=(t3+sfJPr&^BG8iYLJkqdtgHT0gL&$U-;LfhD z&m0)JPs)vD^QNV%G^gdwp-O(S;T7WWwW-~!#AaUy=}cTJ934^zb9wy%s=8ruvdB`y z-%Q;m`}y;;Y7U^5nczUtwjn&h{^wB_988C1^kNAd{RToD;S53)0Z$GtRp{V}@(*V% zZxjbKeM)Z3(^@To5u`LeLH642Y&iR%eHCO`N*l`b2hE&mJ8xpHs8> zT=wW@{?FOJf^hiuoOSCx?{KSzbVFyFt*e81%^J>X@EZ5vN;4kQf!!W}ZpaA`PL?;W z3GBzQZ}Q>BJzeL3Oov^?K3>pWzg;YVUkk%$Y@WWUFIXS&tUrPaW%y@}^HLX)6xL7s zHQNniowwTOK0#JqzP8sDG+VNAJx%=er6fjf^}r&bp!dIca7)pk-YMaB~uOqAh8L9?o_l zJj|i5c@F){d95w@cHM-fXHA1*)Do2lCfgHAGyK65M-Dd08|SC^!g)>IY$5df(unC0;8EKEUUDv ztyA^xitH+~XAqu2_%8BAhKlffL`u#i3L3Ljb5OMkx}F_o-$pL+cJ6(mZrB<&$5kxy zR5(rx^^@cXRAdnnA@h}Fl7yfr3y?r<2&ftb%CzbK7^xvi{{DhJ<&z+lmUrw(S27Jq zHh_>tz;et>Vq-{QMq-#hyb~2MRfJf4=IsjHL)+~u% zUD!x;EaLd@7Mg7$IQp0<<*p;EP~s?3LR>7=ad+aU{Me$OdnJRIFmOUpGkh37Kk(d5 zU3~a4n1Amv|8?<-$6$6n=1nbY9)tN09`j2ro$JqoR*+7F3;Q5Vc-~cy`p?AwX(QZ_ zX{eBLn0=d9r#EzK$amjG-N3Pjo;aT6+pxW*01+awB)iTprGHD#bGl^T0(4vWh2TB( zd(Y4H4-|8G`WM1KSTasBeBaXA<>Co_4`f^)D;f_}7=Drneg97Q#ichY(C1x0%MA_N zkP>e49m^IpiBjVM#)ij>07^O-6A)NdnyBS zCb+A+v#YzeFB9y$^Yd{nZ)~5nV;wHqiLea;-z5{z4t4bGn{kI}3*dL$s7Z@?eGA@? YW%C!>hbvOaWU?%krM~3CWJ{#(e>> get_valid_filename("john's portrait in 2004.jpg") + 'johns_portrait_in_2004.jpg' + """ + s = str(name).strip().replace(" ", "-") + s = re.sub(r"(?u)[^-\w.]", "", s) + if s in {"", ".", ".."}: + raise SuspiciousOperation("Could not derive file name from '%s'" % name) + return s + + def get_tree(branch, seen, *args, **kwargs): out = [] for d in branch.find_all("div", class_="cid"): @@ -50,73 +70,88 @@ def parse(self): count = 0 sections = self.soup.select(".dsense") - print(len(sections)) + for section in sections: + section_id = get_tree(section, set()) + print(section_id) + idiom_block = self.soup.select(".idiom-block") last_true_section_id = None for section in sections: section_id = get_tree(section, set()) - more_words = {section_id[0]: {}} + more_words = {section_id[0]: {}} # dphrase_block = section.css(".dphrase-block").extract() parts_of_speech = section.select(".dsense_pos") if not parts_of_speech: in_dsense = False print('not in_dsense:', section_id) - word = extract_text(section.select_one(".dphrase-title b")) - guide_word = '' - part_of_speech = self.soup.select_one(f"#{section_id[0]} ~ .dpos-h .dpos") - if not part_of_speech: - # print("pos None") - if last_true_section_id.split('-')[0] == section_id[0].split('-')[0]: - part_of_speech = extract_text(self.soup.select_one(f"#{last_true_section_id} ~ .dsense_h .dsense_pos")) - # print("last") - else: - cid = '-'.join(section_id[0].split('-', 2)[:2]) - part_of_speech = extract_text(self.soup.select_one(f"#{cid} ~ .dpos-h .dpos")) - # print("last not correct") - # combinators = ['', '>', '+', '~'] - # for combinator in combinators: - # part_of_speech = response.css(f"#{cid}{combinator} .dpos-h .dpos").css("::text").extract_first() - # print(f"#{cid}{combinator} .dpos-h .dpos") - # if part_of_speech is not None: - # print("correct") - # break - # slice_number = 0 - # while bool(re.findall('[0-9]+', section_id[0].rsplit('-', slice_number)[0])) and part_of_speech is None: - # combinators = ['', '>', '+', '~'] - # for combinator in combinators: - # part_of_speech = response.css(f"#{section_id[0].rsplit('-', slice_number)[0]}{combinator} " - # f".dpos-h .dpos").css("::text").extract_first() - # # print(f"#{section_id[0][:slice_number]}{combinator} .dpos-h .dpos") - # if part_of_speech is not None: - # # print("correct") - # break - # if slice_number is None: - # slice_number = 0 - # slice_number += 1 - if not word: - word = extract_text(self.soup.select_one(".hw.dhw")) - domain = extract_text(section.select(".ddomain"), join_char='/') - word_meaning = extract_text(section.select(".ddef_d")) - dlu = extract_text(section.select(".dlu"), join_char='/') - cl = extract_text(section.select(".cl"), join_char=' ') - if domain: - word += f" ({domain})" - if dlu: - word += f" ({dlu})" - if cl: - word += f" ({cl})" - elif dlu: - word = f"{dlu}" - if cl: - word += f" ({cl})" - elif cl: - word = f"{cl}" - else: - word += f" ({word_meaning.split(':')[0]})" + if idiom_block: + # cid = '-'.join(section_ids[0].split('-', 2)[:2]) + # word = extract_text(self.soup.select(f"#{cid} ~ .idiom-block b")) + word = extract_text(self.soup.select_one(f".idiom-block b")) + guide_word = '(' + extract_text(section.select(f".dsense_b .ddef_d .query"), join_char=' ') + ')' + part_of_speech = 'idiom' + else: + word = extract_text(section.select_one(".dphrase-title b")) + guide_word = '' + part_of_speech = self.soup.select_one(f"#{section_id[0]} ~ .dpos-h .dpos") + # print("before not pos") + if not part_of_speech: + # print("pos None") + if last_true_section_id is not None: + if last_true_section_id.split('-')[0] == section_id[0].split('-')[0]: + part_of_speech = extract_text(self.soup.select_one(f"#{last_true_section_id} ~ .dsense_h .dsense_pos")) + # print("last") + else: + cid = '-'.join(section_id[0].split('-', 2)[:2]) + part_of_speech = extract_text(self.soup.select_one(f"#{cid} ~ .dpos-h .dpos")) + # print("last not correct") + else: + cid = '-'.join(section_id[0].split('-', 2)[:2]) + part_of_speech = extract_text(self.soup.select_one(f"#{cid} ~ .dpos-h .dpos")) + # print("last not correct") + # combinators = ['', '>', '+', '~'] + # for combinator in combinators: + # part_of_speech = response.css(f"#{cid}{combinator} .dpos-h .dpos").css("::text").extract_first() + # print(f"#{cid}{combinator} .dpos-h .dpos") + # if part_of_speech is not None: + # print("correct") + # break + # slice_number = 0 + # while bool(re.findall('[0-9]+', section_id[0].rsplit('-', slice_number)[0])) and part_of_speech is None: + # combinators = ['', '>', '+', '~'] + # for combinator in combinators: + # part_of_speech = response.css(f"#{section_id[0].rsplit('-', slice_number)[0]}{combinator} " + # f".dpos-h .dpos").css("::text").extract_first() + # # print(f"#{section_id[0][:slice_number]}{combinator} .dpos-h .dpos") + # if part_of_speech is not None: + # # print("correct") + # break + # if slice_number is None: + # slice_number = 0 + # slice_number += 1 + if not word: + word = extract_text(self.soup.select_one(".hw.dhw")) + domain = extract_text(section.select(".ddomain"), join_char='/') + word_meaning = extract_text(section.select(".ddef_d")) + dlu = extract_text(section.select(".dlu"), join_char='/') + cl = extract_text(section.select(".cl"), join_char=' ') + if domain: + word += f" ({domain})" + if dlu: + word += f" ({dlu})" + if cl: + word += f" ({cl})" + elif dlu: + word = f"{dlu}" + if cl: + word += f" ({cl})" + elif cl: + word = f"{cl}" + else: + word += f" ({word_meaning.split(':')[0]})" else: in_dsense = True print('in_dsense:', section_id) - last_true_section_id = section_id[0] # if len(section_id) > 1: @@ -131,38 +166,45 @@ def parse(self): # ignore this word # else meaning found then: # keep this word - - extracted_meanings = extract_text(section.select(".dsense_b > .ddef_block .ddef_d")) - meanings_list = extracted_meanings.split(':')[:-1] - - if len(section_id) <= 1: - if len(meanings_list) > 1: - for i in range(len(meanings_list)): - more_words[section_id[0]][i + 1] = meanings_list[i] + if idiom_block: + # print("IDIOM") + # cid = '-'.join(section_ids[0].split('-', 2)[:2]) + # word = extract_text(self.soup.select(f"#{cid} ~ .idiom-block b")) + word = extract_text(self.soup.select_one(f".idiom-block b")) + guide_word = '' + part_of_speech = 'idiom' else: - if meanings_list: - for i in range(len(meanings_list)): - more_words[section_id[0]][i + 1] = meanings_list[i] - for bid in section_id[1:]: - blue_block_title = extract_text(section.select(f"#{bid} ~ .dphrase_h b")) - if not blue_block_title: - blue_block_meaning = extract_text(section.select(f"#{bid} ~ .dphrase_b .ddef_d"))[:-1] - more_words[section_id[0]][bid] = blue_block_meaning - else: - more_words[section_id[0]][bid] = blue_block_title - # if word has multiple meanings: - # create another instances of those meanings - word = extract_text(section.select_one(".dsense_hw")) - guide_word = '(' + extract_text(section.select_one(".dsense_gw span")) + ')' - # b = section.css("b").css("::text").extract() - # if b: - # if guide_word: - # guide_word += f" ({' '.join(b)})" - # else: - # guide_word = f" ({' '.join(b)})" - part_of_speech = extract_text(section.select_one(".dsense_pos")) - # definitions = section.css(".ddef_d").css("::text").extract() - # sentences = section.css(".deg").css("::text").extract() + extracted_meanings = extract_text(section.select(".dsense_b > .ddef_block .ddef_d")) + meanings_list = extracted_meanings.split(':')[:-1] + + if len(section_id) <= 1: + if len(meanings_list) > 1: + for i in range(len(meanings_list)): + more_words[section_id[0]][i + 1] = meanings_list[i] + else: + if meanings_list: + for i in range(len(meanings_list)): + more_words[section_id[0]][i + 1] = meanings_list[i] + for bid in section_id[1:]: + blue_block_title = extract_text(section.select(f"#{bid} ~ .dphrase_h b")) + if not blue_block_title: + blue_block_meaning = extract_text(section.select(f"#{bid} ~ .dphrase_b .ddef_d"))[:-1] + more_words[section_id[0]][bid] = blue_block_meaning + else: + more_words[section_id[0]][bid] = blue_block_title + # if word has multiple meanings: + # create another instances of those meanings + word = extract_text(section.select_one(".dsense_hw")) + guide_word = '(' + extract_text(section.select_one(".dsense_gw span")) + ')' + # b = section.css("b").css("::text").extract() + # if b: + # if guide_word: + # guide_word += f" ({' '.join(b)})" + # else: + # guide_word = f" ({' '.join(b)})" + part_of_speech = extract_text(section.select_one(".dsense_pos")) + # definitions = section.css(".ddef_d").css("::text").extract() + # sentences = section.css(".deg").css("::text").extract() if word: word = re.sub("\s\s+", " ", word) if guide_word: @@ -172,6 +214,7 @@ def parse(self): count += 1 # print(count) + print(meanings) return meanings @@ -237,6 +280,7 @@ def parse(self): # = #cid~ .dsense_b .ddef_d (cbed-1-1, ..., cbed-1-8, ) word = extract_text(self.soup.select_one(f".hw.dhw")) + if in_dsense is True: # word = response.css(f"#{cid}~ .dsense_h .dsense_hw").css("::text").extract_first() if type(meaning) is tuple: @@ -253,16 +297,23 @@ def parse(self): meaning_text = extract_text(self.soup.select(f"#{cid} ~ .dsense_b .ddef_d")) sentences = self.soup.select(f"#{cid} ~ .dsense_b .dexamp") else: # in_dsense is False: - if len(section_ids) > 1: - cid = section_ids[1] - meaning_text = extract_text(self.soup.select(f"#{cid} ~ .dphrase_b .ddef_d")) - sentences = self.soup.select(f"#{cid} ~ .dphrase_b .dexamp") + if part_of_speech == 'idiom': + word = extract_text(self.soup.select_one(f".idiom-block b")) + # cid = '-'.join(section_ids[0].split('-', 2)[:2]) + # word = extract_text(self.soup.select(f"#{cid} ~ .idiom-block b")) + meaning_text = extract_text(self.soup.select(f"#{section_ids[0]} ~ .dsense_b .ddef_d")) + sentences = self.soup.select(f"#{section_ids[0]} ~ .dsense_b .dexamp") else: - cid = section_ids[0] - meaning_text = extract_text(self.soup.select(f"#{cid} ~ .dsense_b .ddef_d")) - sentences = self.soup.select(f"#{cid} ~ .dsense_b .dexamp") + if len(section_ids) > 1: + cid = section_ids[1] + meaning_text = extract_text(self.soup.select(f"#{cid} ~ .dphrase_b .ddef_d")) + sentences = self.soup.select(f"#{cid} ~ .dphrase_b .dexamp") + else: + cid = section_ids[0] + meaning_text = extract_text(self.soup.select(f"#{cid} ~ .dsense_b .ddef_d")) + sentences = self.soup.select(f"#{cid} ~ .dsense_b .dexamp") # print("MeaningText:", meaning_text) - print("Sentences:", type(sentences), len(sentences), type(sentences[0])) + # print("Sentences:", type(sentences), len(sentences), type(sentences[0])) if tld == "co.uk": accent_tld = "uk" @@ -314,8 +365,8 @@ def parse(self): # us_pronunciation = response.css(".us #ampaudio2 source::attr(src)").extract_first() # amp-audio def download_audio() -> str: - filename = word + '_' + accent_tld + '.mp3' - + filename = get_valid_filename(word + '_' + accent_tld + '.mp3') + # print(filename) tts = gTTS(word, lang='en', tld=tld) if not os.path.exists('media'): os.makedirs('media') @@ -358,14 +409,16 @@ def download_audio() -> str: 'part_of_speech': part_of_speech, 'meaning': meaning_text.split(':')[0], 'sentences': ''.join(sentences_list), - 'phonemic_script': '/' + phonemic_script + '/', + 'phonemic_script': '' if not phonemic_script else '/' + phonemic_script + '/', 'pronunciation_word': download_audio(), - 'synonyms': f"Synonyms" + 'synonyms': f"Synonyms" } # dictionary_item['sentences'] = ''.join(sentences).split('.')[:2] # ''.join(sentences) # dictionary_item['sentences'] = re.findall('.*?[.!?]', ''.join(sentences))[:2] # dictionary_item['us_phonemic_script'] = '/' + us_phonemic_script + '/' # dictionary_item['us_pronunciation'] = download_audio('us', us_pronunciation) jta = JsonToApkg(dictionary_item) + # print(dictionary_item) jta.generate_apkg() + # print("Generated.") return dictionary_item diff --git a/src/lib/__pycache__/json_to_apkg.cpython-38.pyc b/src/lib/__pycache__/json_to_apkg.cpython-38.pyc index d87b2fe1fd004578da39d2a9117d2d18343038ec..977263be3dc565339408e4af10063848a3129d75 100644 GIT binary patch delta 148 zcmdljdt8<;l$V!_0SFAGJ(6asPUM@yC^d0$7^A?%Uz<2;n2Wtqm}{9fuVKt$WU~S( zW1cL-Y{bgN#KOcj*`IkOW60*8%o8}d!+}!82@nNSc=dIaco>l(6C=-e7Csh6CPt=z eY^)HPg@ucSi&2V&g=z8!-ZaLb$>Ds-j0ON}1RWFr delta 135 zcmX>uyIYnol$V!_0SFfScS|~=Jdtk-qtL{~VT>FTe{I^lg)xhf%?u>VI9Y|+h>>S< zH1kSEzs;;H6F9j8fx^WJAW5dl3wZT)6?qtuArm9dcNSh2MkYq4e{8G}nuVE*nTt`1 Tg@tkQAKo-ZpUKI5$&6Y6Sppkv diff --git a/src/lib/json_to_apkg.py b/src/lib/json_to_apkg.py index 2039687..52aed3c 100644 --- a/src/lib/json_to_apkg.py +++ b/src/lib/json_to_apkg.py @@ -32,6 +32,8 @@ def generate_cloze(phrase: str): + # print("Starting generate_cloze..") + # print(phrase) n = len(phrase) - phrase.count(' ') if (n % 2) == 0: u_count = int(n/2) @@ -68,8 +70,12 @@ def generate_cloze(phrase: str): cloze_list[temp_index] = cloze_text phrase_list.remove(temp_word) # print(phrase_list) + if not phrase_list: + # print("Avoiding error") + u_count = 0 # print(cloze_list) # print("end of loop:", temp_len, u_count) + # print("No Problem in generate_cloze") return ' '.join(cloze_list) @@ -80,6 +86,7 @@ def __init__(self, j_dict): def generate_apkg(self): # create/initialize model + # print('before my_model') my_model = genanki.Model( 1646879431108, # todo: change id and also create new customized structure name='English Vocab', @@ -112,6 +119,7 @@ def generate_apkg(self): # just do these steps # automatic fill # todo: cloze, picture, synonyms, arrange in order, if sound not there then? + # print('before list_of_fields') list_of_fields = [ self.j_dict.get("word", ""), self.j_dict.get("part_of_speech", ""), @@ -125,23 +133,27 @@ def generate_apkg(self): ] # list_of_fields = [x for x in self.j_dict.values()] + # print('Before my_note') my_note = genanki.Note( model=my_model, fields=list_of_fields ) - + # print('Before my_deck') my_deck = genanki.Deck( 1646145285163, # todo: change id and name "English Vocabulary (British Accent)") + # print('before adding a note to deck') my_deck.add_note(my_note) # add media + # print('before my_package') my_package = genanki.Package(my_deck) my_package.media_files = ['media/' + self.j_dict["pronunciation_word"][7:-1:]] # generate apkg # my_package.write_to_file('output-' + self.j_dict["word"] + '.apkg') # apkg_filename = 'output-' + dt.now().strftime("%Y%m%d%H%M%S") + '.apkg' apkg_filename = 'output' + '.apkg' + # print('before writing') my_package.write_to_file(apkg_filename) return apkg_filename