From 26a0935e7eb7b4b94df52856b8c0da3447e05cd5 Mon Sep 17 00:00:00 2001 From: mdmujtabaraza <45493966+mdmujtabaraza@users.noreply.github.com> Date: Tue, 29 Mar 2022 20:49:29 +0530 Subject: [PATCH] confirm_generation function finalized and MeaningsSpider return values changed --- src/__pycache__/app.cpython-38.pyc | Bin 11840 -> 12048 bytes src/app.kv | 24 ++++++------- src/app.py | 32 +++++++++++------- .../__pycache__/cambridge.cpython-38.pyc | Bin 5675 -> 5727 bytes src/dict_scraper/spiders/cambridge.py | 28 ++++++++------- 5 files changed, 48 insertions(+), 36 deletions(-) diff --git a/src/__pycache__/app.cpython-38.pyc b/src/__pycache__/app.cpython-38.pyc index 7a37a56f1837bd3657966397d58819f81799cf71..6b97c9f4f6bb8343ed0d3927c83461262f0637da 100644 GIT binary patch delta 2962 zcmaJ@YiwM_6`r}Ty}RDstnKw<{oL4z?M(@kM2hM}i5=TONWml+s;cHRGr= zgXWX~Try9V=?bC5ql_lVv)C&M!qfzrQ`cxMK|B>lL8f`yva&*k$jPstQ0DYG!z0V8 zt9eF6!_z%uOc_(2A!C&3%y=ru0*lHDrR3~>M(!LTDRY>^fyl=QzM9Q3Ka?+W+nOnI z=44fnuK(0at#vx}lodt%v#Kqm`t-a{XMK7>XyGJ1BX)*|%et>S(v`D)obw zm7U5sj%D$04U(&D-Kj2YTHufx_G(zP|ubsf53%@5cbJ^_aV9Ej6aEoYVJ$i?o@vhKd(7@ z+tGiBn%VN~viho0*z|2s#O@I1=Bp zgQJ{sx6;j2y7QH87P|l+7q7+-bjN$fa2c~M;q^;hL~x550*HXNElNneo~xB^wI_KQsNF86u(b=(DfLIeSL7M z=-8{u_?Z0tyN^Ev;#JMp-JAV#H66Z$E668V!;3ofZF z=nmb_Z#Q|{(^Y1^`7ztrK68uME*s9l<8~kY{>>MOqak5(#X=j8;0Mo zv{6ZBr(vQW$lIAht}yP0*9gnC&%5bpwsLZ7blP=`1^z?aN#=-3p!Og?0 z>0xoXBNYkL7&T=KX*4cwcJ$MyM7Xo9EK?ySL5`V^W4f!HM~1shEH9z#6$Fe>jse4e zieMwi%_U@!?MD~`z^vyq0YF9W3mq|4_!}VuFdS+%H8W;vpeG)Pw21dQ&-KV6{02_$ zMaUz_1?AHH>jOU%*&R1&Nk%S){IA4)JEu1BzY%}l*|&+mE_QScZQ^f;C%f+L#iR)D zLApmBEuQDn?De?GhU357*f-&$#Bnp+y>_X4`0h7R?Y39PE4+hd$A<}5*ZCWw+(e66^2S#ebcVY^5%m?f_xCBA7{`An{0S*a|44z;n!@E-uAx81MgRZ+ delta 2767 zcmaJ@Yiu0V6~1?7cJ|@jwPTyu8^`t%JGR&4fyhI+5XW&G5D|4~4T3C5M(e#}XYJh` zb7x#TVbcu_p-GCt>QQ;ra%87b+EkS)WDpR96e1o{`-dO=2!q6*NKvDxf>74@8% zI5raTtmd0@&OP^a&bi;+=|h(f#Gi=8A`G8_hTg%SJ^V)eN+fjSpPVt-&){mLP}Z<5 zjBh2r_Fe>sonUi(o^e*kZv4m{)Tob6WaO~z1`B4Um@5un zzU9Ql;eFWRRdAF0eVvkSYZ1uy5JFV(7__liQ z9a}$#Ow9vL&(Iuq8sJN~RMWe%%QaY2yX(OfNBPU*wr)@fr_{8b&E)f@bOSlZEZFd? z+WT6I)WYSM|FQj#uFS=-%m6cs=CUK)B(bz|0;Su}*S#)LId{5?E+*Co9~ zeg$~0vnF7n_;+Yso#VbPQU4cQSZ(rWAzIfJ7)AXX*jqQv_rn);YZ1SxYuCLZI6(vG zA$0hbL3jOtE>M({@M`@AuEB@(A0?kfLpu0mu833HM?;fhcq6SQ6_SU^^`~$#(Zx@~ zwZsM|~7*{DR;Gf(im!3Oadb%HGO5u&fNm&Pw4F>bYr27-Wb)&tG-3bR zb^Kl^t?ei;op{D5UqO8F>@%Bk<1oQX{1%ZYLPa9crj%m}+5@N92$BnGDoTpnPB^C$ zJvo}tMG#3SlwdPVgi(rkQpIdQM3J79VMWBIv{TGbdcAG|>gF+1ygwb>Z54-dvapPd zkc)6Nw7eQxS`^_Fn`0$pz=a6oLKQ{hAr})bk;j6|yWmhAT`wvgL%J9Td zW!+9WMU5{Zq;x;TUUZkjz7*$MTt)I zjMwux59>&03jXThi>CHn6y(K+WZEHdi?=(6m6Q|6*Ax?ZKU(E6gp}%KJFN~nMR~|8 z_E*uo3kI9F$BA+y_Ft`Z~gQ6HAv&CtWg&JN+^FJx=gFNj(WOcW>#SL*mA5 z6B8ja@`&M>la9yWN3|qADaAiVv+(b`do~_H=X8WpZ{&(&hCEjQC- zbn4}C$gb;49UzfnDeDOK5!55Nid{&D?TMU|J#08uDh!{lTVF9l%~TljH3+mMo4;EL zZrDC-O&B;+W)IavKRnWs3Wu-^qFloam0NoFb8xw(DeY01MdJ`-Nunu#QC?AchabrH z3IWxZq~yzaf((H-u?GgJJp_XYBXECfb08Md@KQX`3LW14c#c}T2yz5o53g^}V#9Bt zZvCHmp2`gFZP)l)@Za_MW%}E2ylv+){T;a4*0)T57q+!O)J4pMMC2tw9keE;7hF0O z%hG#$VmD+J4J0e+`~lu<@89|x^1I{aXb%@CsKg8*9t}vcz`Obmusf3F2T^hGFO#DP z{G&95C5GHYx+>H~U1%jXq8R}Z@V>ghn*{Zs&HnZ%>XM&>jj0`Y35})_kq>B@#9IueQ&a8d$zcz{QjISK zAt+5R27iLS&inKW#aj0DpdA<30aV_;I*rQ}bPOWcRAy+Ykj;o(E&o T`B=27%*w30YE4y5WcB|6Db{1_ diff --git a/src/app.kv b/src/app.kv index 8a74fcb..f5b6bf0 100644 --- a/src/app.kv +++ b/src/app.kv @@ -27,7 +27,7 @@ MDToolbar: title: 'Vocab to Anki' anchor_title: 'center' - right_action_items: [["refresh", lambda x: app.get_running_app().restart()]] + # right_action_items: [["refresh", lambda x: app.get_running_app().restart()]] elevation: 10 FloatLayout: # MDFlatButton: @@ -38,7 +38,7 @@ id: dict_dropdown text: "Browse" font_style: 'Button' - pos_hint: {"center_x": 0.5, 'center_y': 0.9} + pos_hint: {"center_x": 0.5, 'center_y': 0.85} on_release: root.open_dropdown(dict_dropdown=True) MDTextField: id: word_input @@ -48,26 +48,26 @@ helper_text_mode: "on_focus" # icon_right: "clipboard-file" # icon_right_color: app.theme_cls.primary_color - pos_hint: {'center_x': 0.4, 'center_y': 0.75} + pos_hint: {'center_x': 0.4, 'center_y': 0.7} size_hint_x: None width: 240 MDIconButton: icon: "clipboard-file" - pos_hint: {"center_x": .9, "center_y": .75} + pos_hint: {"center_x": .9, "center_y": .7} on_release: word_input.text = Clipboard.paste() - MDLabel: - text: "Select Pronunciation Accent" - theme_text_color: 'Primary' - font_style: 'Body1' - halign: 'center' - pos_hint: {'center_x': 0.5, 'center_y': 0.63} + # MDLabel: + # text: "Select Pronunciation Accent" + # theme_text_color: 'Primary' + # font_style: 'Body1' + # halign: 'center' + # pos_hint: {'center_x': 0.5, 'center_y': 0.58} GridLayout: cols: 2 row_force_default: True row_default_height: 40 col_force_default: True col_default_width: 180 - pos_hint: {'center_x': 0.26, 'center_y': 0.5} + pos_hint: {'center_x': 0.26, 'center_y': 0.45} size_hint: (None, None) MDLabel: text: "English (United Kingdom)" @@ -81,7 +81,7 @@ MDRectangleFlatButton: text: 'Generate Anki Flashcard' font_style: 'Button' - pos_hint: {'center_x': 0.5, 'center_y': 0.35} + pos_hint: {'center_x': 0.5, 'center_y': 0.2} on_release: root.show_data() diff --git a/src/app.py b/src/app.py index 227a67b..408d44f 100644 --- a/src/app.py +++ b/src/app.py @@ -86,7 +86,7 @@ def run_spider(spider, *args): # ----------------------------------- KIVY ------------------------------------- -Window.size = (500, 500) +Window.size = (500, 400) Builder.load_file("src/app.kv") sm = ScreenManager() @@ -95,11 +95,13 @@ class MeaningsPanelContent(MDBoxLayout): def __init__(self, *args, **kwargs): super().__init__() menu_instance = args[0] - for meaning in args[1]['more_words']: + section_id = args[1]['cid'] + more_words = args[1]['more_words'][section_id[0]] + for key, value in more_words.items(): # root.ids.meanings_screen.ids.meanings_panel self.add_widget(OneLineListItem( - text=meaning, - on_release=lambda x, y=args[1]: menu_instance.confirm_generation(y) + text=value, + on_release=lambda x, y=section_id[0], z=(key, value): menu_instance.confirm_generation(y, z) )) @@ -305,22 +307,25 @@ def checkbox_click(self, instance, value, tld): if value is True: self.tld = tld - def confirm_generation(self, meaning): - confirm_button = MDFlatButton(text="Confirm", on_release=lambda x, y=meaning: self.generate_flashcard(x, y)) + def confirm_generation(self, section_id, meaning): + meaning_text = meaning[1] if type(meaning) is tuple else meaning + confirm_button = MDFlatButton( + text="Confirm", on_release=lambda x, y=section_id, z=meaning: self.generate_flashcard(x, y, z) + ) close_button = MDFlatButton(text="Close", on_release=self.close_dialog) if self.dialog: self.dialog.dismiss() self.dialog = MDDialog( title="Confirm generation", - text=f"Do you want to generate Anki flashcard for \"{meaning['word']} {meaning['gw']}\"?", + text=f"Do you want to generate Anki flashcard for \"{meaning_text}\"?", size_hint=(0.7, 1), buttons=[close_button, confirm_button] ) self.dialog.open() - def generate_flashcard(self, btn, meaning): + def generate_flashcard(self, btn, section_id, meaning): # run_spider(CambridgeSpider, gcurl, self.tld, self.timestamp) - print(meaning) # {'cid': 'cald4-1-1', 'word': 'run', 'gw': '(GO QUICKLY)', 'pos': 'verb'} + print(section_id, meaning) # {'cid': 'cald4-1-1', 'word': 'run', 'gw': '(GO QUICKLY)', 'pos': 'verb'} def show_data(self): # word_url = self.word_url.text @@ -378,15 +383,17 @@ def show_data(self): # print(CONTAINER['meanings']) meanings_screen = self.manager.get_screen("meanings_screen") for meaning in CONTAINER['meanings']: + section_id = meaning['cid'] word = meaning['word'] guide_word = meaning['gw'] part_of_speech = meaning['pos'] - if not meaning['more_words']: + meaning_text = word + " " + guide_word + if not meaning['more_words'][section_id[0]]: meanings_screen.ids.meanings_container.add_widget( TwoLineListItem( - text=f"{word} {guide_word}", + text=meaning_text, secondary_text=f"{part_of_speech}", - on_release=lambda x, y=meaning: self.confirm_generation(y) + on_release=lambda x, y=section_id[0], z=meaning_text: self.confirm_generation(y, z) ) ) else: @@ -447,6 +454,7 @@ def on_start(self): class MyApp(MDApp): def build(self): + self.title = 'Vocab to Anki' sm.add_widget(MenuScreen(name='menu_screen')) sm.add_widget(MeaningsScreen(name='meanings_screen')) self.theme_cls.primary_palette = "Blue" diff --git a/src/dict_scraper/spiders/__pycache__/cambridge.cpython-38.pyc b/src/dict_scraper/spiders/__pycache__/cambridge.cpython-38.pyc index 168830da4e8899dea9d44edba9d5ca48f1f2db29..a0302b7650b26011e7802790a2c6d9167a260841 100644 GIT binary patch delta 1276 zcma)6&ubG=5Pm;4+hnu5Nj95oQq!3JQfiAQwIaoX6^eS&qBjezQ%YM{+fBU4+vZSu zU)hVOl|=NSC-Ej8Joqmt5)^NO2;xbPb@pvZnxf#s4sX7h?|U=v&F+h-FH`1q!_Wvm z_lIYf^4DIPU-ir&x)F8@-})IPZfEx*^g*K z3t0UEB>Z%E)fj5(kn#;dk*md=hXm5Qkq(6H%swsi*UfdgIrrYmtV~~Nx@<1n#iHi3 z63g{rIf+?)n1xuALcR~oi!Aop5jxmVz?xASiz@E&Y2xd`!q}T`4CM~Q@&WVC%b0^a zRPDuFKqm7vsVG^fCG(sAAe@rnDs-iM-L1*o6pBZ^C7={vLvNdax?Axf`zn-Y86hsm4KNW9= zt0Gh!GD8Bk`NVxK3aIew|2R!a`hY?eAW!Cl{xRYn7?2?zTdIxvuU!#FV|Q3`l{ z?NDCza2^g>++JCf50vZ75H~AIh&1JQf*l|f@J-g>a1_shQ6dko);+vvj&&H8=m?_d zJ2)2(!LKJObTr2BTZzUsg{Sthk!oo$29D6%9PiuM$A5Nlqs=cPoww2=DKJ5pA}vxd z{zz?8EI@$@4xO@D6I?JjHbVqi(_2QgE|V21bJz_+36Oa*aXkEX9%C= z&W46HocOkfPxv`E8J@JBVsO!2S@terMu*q$EOpeyjg4g&JPQ%;3Yv+*@V7P3yoGQs zzYxC9kMa&(4WH*H=e(gPN1AH18bPy%Z~ZYE!d0eFC8be?YSg4UH>oPuHrg81SdEr= jl~$(1S^GrJrX^-#T9#IsinbDN*agL6Hh*=`en9^KtWreG delta 1216 zcmZWoOKTHR6uytibTY3zn@4Th^r>mPQ`CiSiW@ByM06vyv^}9AOJ&!xzSKhC<4;{xO`1D?{ZWr!< zbkFQW5w{ZC!ME{~hum4HoCITHtV4l!X@_-5V5psTzKe@^n>E%U7MS6NQ;Jy-5A3kz zOt&220I3;m>d!Vg*hiESp?6~7bScmx5r(wGeU14)1jTbqp&t`)PvvP*`>Z zODb%91WPF_7hx?)PKyNAey{x=S% zT9&dwPGmZUz^3B#O|{0RQ_^Pp2kM}isNJ&Ld8L~ll!Nn+WD1yf;XH5dub@y|6r>Pd zty!#y;nkFR&qZ+spU1)FNnk5%aexI5c9GQLZSKEnnjH%+!-TN990#&# zpF7@04jUb|;8>9%EMW^rU`=r4M3<=u4N7<`wEeQ+f~mt=@CQ*t$nrjKvq1P$Rj)2J3VsYXrea+m6Al|@@d#6X*)CYz#@)VLn* kXP4$R>e3{-C;A$f!T4qDkIrr8(kv}dmzmn9&)GNhFGw^v+yDRo diff --git a/src/dict_scraper/spiders/cambridge.py b/src/dict_scraper/spiders/cambridge.py index e0c84fa..65130ab 100644 --- a/src/dict_scraper/spiders/cambridge.py +++ b/src/dict_scraper/spiders/cambridge.py @@ -271,12 +271,13 @@ def parse(self, response): last_true_section_id = None for section in sections: section_id = section.css(".cid::attr(id)").extract() + more_words = {section_id[0]: {}} # dphrase_block = section.css(".dphrase-block").extract() parts_of_speech = section.css(".dsense_pos").extract() if not parts_of_speech: in_dsense = False - # print('not in_dsense:', section_id) + print('not in_dsense:', section_id) word = section.css(".dphrase-title b").css("::text").extract_first() guide_word = '' part_of_speech = response.css(f"#{section_id[0]}~ .dpos-h .dpos").css("::text").extract_first() @@ -388,8 +389,17 @@ def parse(self, response): # ignore this word # else meaning found then: # keep this word - more_words = [] - if len(section_id) > 1: + extracted_meanings = section.css(".dsense_b > .ddef_block .ddef_d").css("::text").extract() + meanings_list = ''.join(extracted_meanings).split(':')[:-1] + + if len(section_id) <= 1: + if len(meanings_list) > 1: + for i in range(len(meanings_list)): + more_words[section_id[0]][i + 1] = meanings_list[i] + else: + if meanings_list: + for i in range(len(meanings_list)): + more_words[section_id[0]][i + 1] = meanings_list[i] for bid in section_id[1:]: blue_block_title = ''.join( section.css(f"#{bid}~ .dphrase_h b").css("::text").extract() @@ -398,18 +408,12 @@ def parse(self, response): blue_block_meaning = ''.join( section.css(f"#{bid}~ .dphrase_b .ddef_d").css("::text").extract() )[:-1] - more_words.append(blue_block_meaning) + more_words[section_id[0]][bid] = blue_block_meaning else: - more_words.append(blue_block_title) - extracted_meanings = section.css(".dsense_b > .ddef_block .ddef_d").css("::text").extract() - meanings_list = ''.join(extracted_meanings).split(':')[:-1] - if len(meanings_list) > 1: - # print(len(extracted_meanings), meanings_list, len(meanings_list)) - more_words.extend(meanings_list) - + more_words[section_id[0]][bid] = blue_block_title # if word has multiple meanings: # create another instances of those meanings - # print('in_dsense:', section_id) + print('in_dsense:', section_id) word = section.css(".dsense_hw").css("::text").extract_first() guide_word = '(' + section.css(".dsense_gw span::text").extract_first() + ')' # b = section.css("b").css("::text").extract()