From f81f7d8af822056627e008dfb2a8aa2280189f4d Mon Sep 17 00:00:00 2001 From: ExamDay Date: Mon, 7 Dec 2020 23:20:34 -0600 Subject: [PATCH 1/5] Gave BaseSession the ability to set custom headers --- .requests_html.py.swp | Bin 0 -> 90112 bytes requests_html.py | 311 ++++++++++++++++++++++++++++-------------- 2 files changed, 211 insertions(+), 100 deletions(-) create mode 100644 .requests_html.py.swp diff --git a/.requests_html.py.swp b/.requests_html.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..93079a3e0736c14c811a2b1125a1b6f66ab31366 GIT binary patch literal 90112 zcmeI537lkAeeX+&Vib%E8ub~y)I_^`?5P>x6U1hEWEch*U|^UT1YR?*y1M)JbkWsS zO;z>uv?GdpG#W9OMBEZ3Mx){qm&7emQ87WD3a)t?cST9`X++Hv2)U@Svc<4V>T#&HN~T&2S4b%I~G>|t3Cz#6zEf+Pk}xK`V{C> zpihB51^N{DwWL6M{;0y!NclZ|>NorQJv#13_~+yOeM`std;90Z{e6GO`y>7Hl)qor z@jiU_JKp{49q;e&pTqYzb-cfie-8IEeFj24e$T((Q+ zynmp7zs}#^-tqoH-u*;>|4PUE`+E0be}9B8kPzR4z59B9Kds~a{k;2He_!Z$f0TDW z&fovK<9(oe$-lp$Ujo;Ie*iBBF9QqUT(A|Cz!BhcF#fLuZLkvzfX9OSfp5VKUI>Q3AUF#AKN#6> zf^UEu!42SJ;8kD}JOMl!{2ur@O!K$F7r}Mlz2J4=<>2{X8teue!T$h3!Vm^3ho2GgFxg4 z@HTK6m;gtBAHqog8~6ga4!jOD!Q;TA!F|AYVaC4*o(z`2EI0!k1+Is0{vdb*sDNGI zaBy#M3q1M%2Co9I1b+<1KnWZL9t`dUeh1{Pi0h>JW@A2Xwo$#%qpnt5n=Y9~Ml>2p zF{5D4{3#ZTJG^&%wcBwsnr<}R2d2S+2lG4?Pe;wlq6IRj@P;j*k!WJ69k+&}b(^BI z8})d^Jf9SNmRy)Cq@)zrCmU0-1?hfQ>wAZiD3yj>bgg!?GcrqOXGG*JYR2t_W<84A z%{VT#8+=OGKBihc7uVavkwuU5=)gj~s+0^2mE>`L&?Q3Q#I=^sn4Wa`kd3ZMUZ>*j zoDe~?*=V*#2b%Fb+BF1Uun_oDoz4`DBb;6ZitG2_crC7wg}FvEjyBG;+w&vq*Do$E zmZqxv;%XiBo2B+lqkdee9=F%K{IyD=$ENY}P^q9GbP?ovE_bASz8Sa3dZk@$)T739 z)Sii>(|2z_E1GQ7+q@{~3l(=ldn0)HK*?Q{c z^Um5`-g@?yb57ZI_Gv>RSG85Gx7wBZWIU)(h9hfhqItg0aPftP(=$$TSXrpG%Nb9d zPWj}wW~=jMqMo6x5aRCULP!1MR+vT$FJ=W3^L!0kR z%1Bf`#U*&FIwN`mpQ+Htm92d^vJWc}d~x2+voal#MXQ!;KSt&&&B~nfxSQsM-)Oqo zn2Q!?s*^Kmi)%Eid#m+IJDwucRy<{W@A!CSqSdG^wBvHETAyu=kGopq169$CxCWaI zvJp+tJ}qb>rzH1A3-dJa!N$BAdZp&O10PB^3C_2qB?Z=Qs1HyfM(su}0iKK_k#ZL3 zzlx-k9Bm-dW~H?{X_;8R)tBH7QXMykjRdDJ%vI{GQem=IX|*DCJnroXgI(k=tA%rM zt5w+>Q(XR+drita%iq5r8u5W{uk7E+z)+1_gIo7c#zrJwy6~HoDzx0WP;Xb~;;rhy z2a7v{nxoYPsX12ZNTVIr0Y}G`j(^o|fePi_q>D#9666I8HKWn#RAXQCg>t6Z znIGR1;>~6k1vR$teRoT>PtJq5oA)w&1%F|<3eUa?XOe7TlX;g){JD2VH;P+HFYXzR zlAne01v@HjT#19%2G|W-ulVlcyVQ4r8UXaMKs91;c<)mkV#D;^!;aVy8WjLx; zXXEHJs&Qi|dXj4z>yOz`I;M0im3+F=tcEX&X=1EG6g4AiF(r!kSLO)6)M)NqpLz7qlXTCnqdKluop58JDspvh9$jY}Ool$*eppSeyEgb{E7l zA7z`IrQD_rKJzw7F8Bw%=m?}_wJgN%tCRCHU73x`Xnlw!z(>TbQrU)BpTFG<4UL7ib?`2{?k_^X zyJwnlWoid;ZQUPFF0>m>_XRTvRYWW4;1@T0wt2=vJ^g)gqgpTc$SbX-`ed~c9vM># z;{PAZ`1Nxw)WZKaxas;^c>T-3+2925cyK&80=x&l{*S?BU>0lxLtqfx3}63U@E!0$ z@OJRG;7>sdjDd5(N#KcK6L>hd9^U`$;Emu7;HBXCU@zDKjsw2~ZiDat8u%)B7kD8! zA8Y{;kZj4E(%fQpXC15vL3w{hge<5crCl~^Q;5zvG z7lX^eGbcg#nmYivFl1K0+SCwQV}8H`>sh z$uI<3WZ=zA&TYmbk#~e80uYevjU1s|7^Rdb;4(7eE(k)HQUA=SMhh9;l~zM@v1 zY-q~T_M)$le#Tj~-J$e*IP;lGtHKQ3*ZgAeTO@vku1h(ho>U@FhiH+hfg|`-WMQi6 zOjnz&cD_{|ysrq+6)YnE#~Mg@1G$q1(=&&@r|@Y@^AH~7F9SErYTkh@S;H5%NgYsTv9M~%Ye5KUQb zg+Nf<-TP4gDTT!fRX&+BmTDEoubIK*8wz$6h2Z*#)fa^)iKqvcfC#1)m&%%MmzhG^ z<6!vV3n@vEG7;XT+{tJZIJUsgBp>)3bbXWW7Pib(>c~ifFB5s!P0D4Xu9HivcVO_T zSVP+x65cWXy7ZT4n5X-`CN-kg|i5O>?ZyxwzBd*F@VH#327-JP`;o(6T@y3p6vJhJhw&+pdje`lBpG zM_{j8k_e0y*RiK}U5i3zi!VZoB)^Wh`H?eu87L7=RVUl&H`#10^QB7AeUR&JT>rp$ zWl>b-8uh)Tufa^E$#jLD)lW0?DMaRg9Q`w#Xe)OHC^n=oq3mQ#g>+7l{bDdMt;vTAn z`2Q!u`#u=nSo}X7hF!PA=U)w;4$cR|;4tt-c>JruGr&2Z2z~(n{$cP2@Dk7j7lPBk zF<=9D82Bjs`$xcA!FI3({66?OJo`=H+u)PnL*Vt`1>h1O{{D%e2!2XBKL_3oUI?B8 zP6bP>+TaYZ4IBkN1#kaJ@Om%-hQLk8Y`zUX4sHgjdFj04;4}1uv*>g4zcg-MWwtCw zpY)9K6|QY}4O_3^?n>4^6W%J>do2$gDt{k{VE5%G6J0VTH~Qoz6gJOTRyvSOH~I{} zQ|mZ*cM@2vLbi}Pxhs24I-X9E#=#i=9!&S$!tJNyvhu>zAx-vg1uO4ozjqBU-&qBg zVX%8L_;-xIPm9mb`VJy7Ne9i<$xZyEFUzj}5}EaEbIblVx-ZYJCSG&?5?9aecFIM} zv!7^|{L7R%U_S0-XyUvj64eZwDLEW-Dca3~AB&Wgxmarf+)x{ArBcNVBZ{Kg6oO7I z>$O-5^|M+pp*4mk>v*UBAYOo#`=FHQzMup`-mZim< zpHSM6M%0XXH3Y-tKQ8C5(OxXd7*LVm{pol^iC?evo*OlcsoYJ!>E zDPlK9WE6E!Vl*i|OAL-2w)CNFQYW>vT$jMxbf0`}KqE&X>GBw3HRmS$cJwp(B4{8T3y2D*?ZDjMw4GF zf8gmg>hCLi&a{Q}=gObBHaF^SD|_`NWpCE5NA2%HJemg0sB5|_ zd(E|sLYDs7-)_{jri=*Kwc_mi@Z0Cx(+=L4q@+sE7;bBjO&lUfF zl4Ji1-+|Y^3A_?4f&T^efQ{fDpa8xM?|%(=GmuPRFW3wo0d9fczX7}sTn4n}KL~yX zzkdUGA9xvfI%ojx1GoU31ilBK|DWLVU;)em@&Atn4*=5rzdw-f|CL}n5dVJ)_yPR< zhruhrrQj)`4t9dagNK11l2_^SzYRPcv_KPF03-)6T6jA2Z+!~%DbS}tp8~&l3TTL{ z*dJ54ko3k$A$qxKywfs=-s$oaT^k1VoKA`-^=MazTjuf3YCv?-Xo|I$Q8e~CKBPQMc0rO)B$-9rwVaM?~teHCj*>E1m{Kv=}vIz6qW)O|6l za4gOmzdsyJEJy(jEqIpcV3K1LSWQEnT`Ho}__!3%Z2MtUnVO2HlI1K{PFL9s{WWb( z!l;CEb@E}?>Dd^X@V|vXop4X|A%a7vxF)!3cXFAY2k8(>O|*TJqGjg`^69Hc;GPjd z){$ejgD8?M2HNQ1x$$)9BGR4`%XAb};I2do+a!<$Iq9;G>HeXY_Gtxy7h9Lya3Mcy zqRnXk&$0Jb`m{^L)@Cn>#F5gybrMYl_TnWww+W}_B z3-#s>ev_YjNd9`)>XY4e&uO%{ zr)+F3rC-W(>~>bRP|%kUAEn~_`w(E_&#TY*=d@V_k5tS`eQ9uP5>@6!ss5`Sdf#l4 zsnyDD_K+nn)l?_tMK#s!8I+<*2u*i+@d0GQYSB*T+2fe=8d6DpA4~P96i9euU-3FT zv!7^wDX)x|rX&-3J~SoCk(88M-!*@UW`E2+8qGXSwmJe2`UpquGuyY55tw&PgTo9s z4J#yASxfR9yy2K`qb6;Zr0_CNqBM63DSb*aNC{?kd8a;@USd}vEt@sb7T04&XjReo zpc1J!x|X{%Ie&0{Yiq5Q5mj?O=Bmwkl}=J=LfyU=j(p6RZqV+M}NI8DhqQe7EqqmUf8 zN6Q?dQlH&3KAwbUuh+Cte%p3Rr!!jt=i)8e=G^&)=Ol!!ZhK>tOG%~%d-ac`p7_1l zVKY0FV#rBp?!6m5N22p*;PkY2q-lC}wzy(;GcI00FEb&$AKbe_N$bUkHXwK%Jnd-D z6kyUO39r%)b;$yUUD#KJQHZSXNV4qHXuA%3r4cpOYP-7&)H3Dx_(nIi*+i9+R;V1L z6RGERCFqw%Tt1di6-IIt_S<1HReXhN?f4V++^|n5G#MDYlw35A(_p5wxhY$W68DSR z=sh|`-Nv-hK%AYnn;PS3+@EzoVJ)$^7DYg88f2+orp=~%v)$GhPruxXt7}Nf`lp|D zWGYX(T

BwRbsF+dM~Q9g*#6Rmo|Jr8U7!f4j;K0g45>imi(_>hC6R^gx9~IJ?#S z8DOlESf+_88f3w*wqTm5=1tZdh%gV~$F)~=0iCHg63Ds>GHUgD`DVua?QUHEk z^GTK3!g33nx$x87%Pv$!g%UG+zqE^XbikrUaR4pcj5Qp^l;Z*zt1`hdG^j?WL)2YZ zyT(P4#JrA?XNu0EOw;UPET^E9s&>g^TCq+imwuF6T}jPfl6LS4(q-1nyZFt~j(mzH zEs%sNcwCVtu0kbE%O@>660fG=jr*iH>S#=L7KdioY7RG|{q}xi_xf&v6_hPe%FUo$ zUG@PkOCv{yFu|d^^3wuRU=;d}AYOuZ%9*PQpCsES6?1SjQku0V>p`tmrS<8g;H}GE zRklM&ddP&;OI=Lc-jYtNIn9U6bY3~o)s@BGA3D$OuWy6M^kie|6tgt7jFN!`7Fu%Y zP>oV4x?D4q!y2)sHLSL zlY&n6>Fk#cWj9eIZ9&1LJ%eJht7qXA{LAh_Ohf3^w`4_*Zj1`zMkcjhIV(5XYs^Z> z;(A#TZz48Ep1Qcd-K-e#%ou|vS~mVMW3e=LRbMw0Z1uCc&IV-r|KaC%0`dQ_rglmH z|1;oe{@uCUZ-vjl5!?X&3cLyogCE1&zX!YPL&2j7Em{|RmcH-I;TXM$&di@>qq7$BbiGvKx0Z@?HB0k^`-{|vkb$Znr>{3}2< z`)-H7*S`MS!1uwm{_cxPH`Pv+d?jZCDjjB=NuJKaPe&48=QSU)uoT`J?9?tqDG5}bnc(ID&cCMpJPK%YtnD*1A5^x=d1 zjMd*KCf_-h_GvoN%5HSl7i-|Jm`T!H7A76@ArTnOhN)hoiJ2B{AXihV$BU6ma!1Ns z@M8n+<$!I?^CmT1_aY-6WfG2^Kg)7K@#>;G(Z-_A0Mc%un~o>yBlaih+)T~jU5LY4 z`mk+D*0Mu{9SKcID9axCw8BOn}$OiUYf=RR9*5tpv5&Q?g=`RuH<+?WQ7H ziS`=HV}3*3%Z<qF*t2ua`l$1p!GY9f)5ySZG%&}Q z*=~#s3}s|7A%VJE5}3hL&?Q*}XpIfXXXQKWmnL}NZ61oV)J^?4lJb5w~8@mhZOrr zbz_5>ehju_3?|V2{d_=!{!l8~%8l7nW9;vR51cr}S6v^7`V<7}nOYls=LT_a%1jkL zFsVIe(u3ZD2Q33FWUQfz#`?xpe`{0)*1*QlF%BX6j9nWr**|Vj+2> zQa$OBwv0PX4+2<=li3k7&GxccW-`UnQFo5Z-8*6t#mROnJ0?wn)uT5EKEj4@ z|D5)xp|<%K3FGUzO3<+3oHSWQS{ATX;f6fGxVha9;$U|fRb5}54L{&~#% z`BY|Q{kf*()fRlBv;6cgSF%k8)>{U!=VZq44l37n)(pLDxs}q!>;>3%4>d_l4#~2u zWRyXhLque{!E|R~Vx6NT^v7Ew{EY_V(%uq+*Y)cw>rc`UH9pR+07*t=^d^n#mnylo zhSNj-eK*OL9ldv1?S@`m1JDWs%ppjZ>ye8ml*>v2=A^1A3zK{F0 z;N9R#a6UK^$mYN7|Ggi)3CQmM9uRE(9m(%6BLnz%@LI43Tm%Ndf5Z1n=l^ry!yw2A zUdM0Q0{9bfD)<3%fOi4O2(AXQ@3#ZU#{WaWQQ+^937i8qfCmEE@xLB?99#{g=YMZ- zFYpt1|IdSGfyaZ#f%}1ffzSUKcp5kltOMVG*Z(^B7`PUE6ucQ|pa0XrF!&aH{u{wn z;5FbY@b_N^Zv@W+7l8AD*83j@w6|Y;|7*b_m;;Xlhk;MS>)#080d4@gRJAlWDx-=r z-}JVsP0Yh^nQ1h%&1p=$v&zy6)_sF|Nydt+1*ag*yC&w0ZW40lUv#M_+M1OVbIZov zY}QXnqIS9cdMxwPTjd;*0MlVs@sgFC=ySBwA^A?e;WkG>UUN#5`ceJKJNsrgiXhpR z4`8)O6Du@}&B{Dn$>0EMzF58AxZZ99L%~}bjI)}fY&iZQV;d_;R!;b?kcQHcN=cRz zG}Ud{y>Cfi^Z=R3lD;A0WmefG-M|hsIzHY^zexgZo!zVym$G%G)2?}k#^*q$sKirt zmZynC-0C3Y4-1N+J|S+&Sv~LG%Gdct*!9QoZj~s%u$Ymw8)d>=$`AaVHAG9jGLfDL zP8lV=9|lavP54hm(K~Ml-AJ$-R!wpy6W82P8|@)`UePt0YD_M;GbUtQaml9H;@>Fd ztQxD?Rx_qa3K(g9^BkYG<+Abm={u@oa%DV>$(saT?FA-T#%dwNF)@L?DYY;%i99FY zY88@3EByJ%KWBy5S1D5=KVlPtr?CN>2CI4@Y|09vK><^giLXFl`0qn>8 zC3drN(Jw!k3?u{?@QKk&a%O%q!c9H<4m!!J+%gXqRJq)$HQKFm85(;&JT$*MKNQ?` zop{9M)8q4fXU@4uq(5rs8kR%Hztz>JgY^AmEA8`nfOPy_Zo$Q5gA!=0YO$qzp+ymx zJYRy#IgUY@mO)~PMAAgKIK~F#YV(MJ^}fHzzwsjtZ->T4)*Z8FD0*DPLsF>}`&?t6 zjP@;Q!Npr4)+PmQEt%HBzLLQJ(zXwiFOyu7T>I^MF`k&vt+-&J{4two#eAk!B#cE^ z9Qhoo0_gG`C<-gcKnmvM|1hdJ*ZlD4+#tVJSe$TPmZF)6NgMF8b+^nIMnGxaGGA-gH_db zebHUxl@Dsfp)M^J-7256`kAYRA&UoDT1)~+Gk@oo7RCR+48Hz7K>WXFG41(s+*)8Q z_y&Bv_0P!Pm>)-!sAK;52X~_$++KOe+zsDJR3}a zv%s0)IB+vO|98Qs!27}bz$?M?z{y}Okj}tYkpTp|e^>B(8pPmA?EA~E|8d~w@c*}g4}v#+S{tudbLVA1#fLhySTb_Ekgx2#a*?_@SUG>x@EdEI3Z@2J0T|4 z`a9iVc1MW<(Upm*JR2`fG%C$0EME=|>>%<}FoDydwx@Y^vJY09-?pZbm`bwQ>W@k~Fqs+Rf{2eT}$v}`vMxgefQ%uh1 z+Xcw6J!yBKc#?2z+FOuQ_R565x6y74P7h$2v@#;aQ4@j;AFxqGT=Jz1Anq8j*64u& zztL^LEDsI(r)Vul^=#NMbZI_my^EwXYP#w>8UcBSHE=;HbMxN4^GYA74HZYohaULq&|JWUyPtVoVpu545ZmH`rh6jNSW^!&-7K zCNKAM5n2`S#=Q_AE~)`%^f_{hevI(Z=`ic;^{_DP@Bpm{ih;jaYd}6ZB|GG)S$F1t zjV$indboP|;V?Gs9nY8qvy9s;fH|5Zjn1P@1#GQ>D&!X)Tnh<8CMTy#+aerTVxWs- zAfT2vy(J!0^~kBjvphVKoc3dfph3E24TKFx$xcxxyiy=$WhrNjOyU`pr7k0EMj{*5 z12RsoXf^Xe8>pOl;d7mF$%d@Z)5)i^AFoDS(2H=Ww z-7}v~(mPgV6TCGBpPN_8D#%~<5(~n~JUUV{Wp9%ZQpF>E)2+Cb4ap}w{ni9HK^v@) z^%{OdBlz~c@qmq9A=&v9cNA0jG~Dk%tol?js{}0OvQwZagfO#rn}Rv1u}0W@8XY$s zm19Uo@HYR4Qh#M_lv3Cf-BL^Gb6 zVt$aX6#9-xdq!n)hK_h_$`Z(!EBDR`N)n_F#IVsMs0C&>nT@I#{UT)A`qZDKtt?ZA zBtR&HyjnM;3dsDyHhpSHK}LXBetg_Ep=sBrDTw6vecFfc>M1Gg6Sq$^8YFhDIwHJw zCD=+d^=uK%ZzuBxL=eO!2_kj$-zeXq<3fW}eyvp|3AM8ZV5XRh5uZj!ArmuK(+wZ0-Xi;Kp;JU>%d=tKL;0rlfi$%|9=(87QhtP3C;zN2NAd# z-v7Jc6W}UP1tVY(+(tTY1z!bP`+pNq+O_^KxxhIB-{I}A0tpuu4g$BTknIN8I7NkjVqdPec}7qzZyf zm;ThFVP~56sGi#n;ASwA5H|LQv+|Y)49$0D{42V+J`*v9YjshCDRg z5w>#ekq^(zyObxLdJslgw4w}~pv0`PzL4yF^3eqgQ`&9@BN{e`o3}aE44O-6T?n*u z9N{+hAMe!(;#E5Y(KeMMB|vYVT-qA5uYby})fZoPwED)eufE!L2cOgvHn<-&4y%=l z!N?||Q&)hFYu!ylNh@?F5(&tW|s$<>U*j{T`F%w_6kupj0Q)0qk;#^L8>SS*uN zpH6Q?Oxa@0-f4WkTWU!7;7mkx2{blVq}h~%nR1&1_zGB15E`*_;)f(}=-V*)If0JH zQw?<;9GnayBWu7-X1+5*&%ha{!U_2aGKoYZ#&M=1+KQISJH-)%Vzx*I*2!HVyLbD8EZ;;V@mG3jN*U-%uCKJkRYq@@uWi- ztv91K)yuE+ld`8pm$q2e&!}WAnB>zB1v5rtG%XGJ+-W0Rlo(&IW6qMdTf7yN9=)z* zs!Oz{4a6=vNf;qE)w>STweqx6W|u7~FEUGlRR&xp{hM$$RXIe$l+?@*`l>-(5_BzP z%0}gai<&97j4U(N8BjE~be60vn2Y-(el~Fro%~(7;!xZ{r>7N%j3IYLO(jW3kdWV?G<~kI}}2A;hxEht#9N!^e(v0Pmmw2J^}gML2^Hp z$;I1Wm-x+Em&i0UKVak}{;BqGW*)8m5b2BM2lLdA?-@H+B$ArtYFg?)Gu22P)2T(q zEt#4QoBhAn!}HeQg~k7`^^B-&|9=X+8(amh1eXEX|9>=i6!-x={x`v^!85@k7z0Ov zZ@}kY0TzJv08D`=fsG&n_X0P=>)#081zrGV!FF&okez@V;rCw({t}!5#P?qgwC4X6 z(yKH-0vru)ArD^!9|a!)*TBO+6FdX#0AZj1f5OL0um8>9YVZQE6Pyb+fTO_eTDJ#B zfgi%(e*s(z-Ui+ZUJ6bHvgP*$`2DNFOTbgXrQj*xJa9O8FMPk`0Dlk4;8<{9p!52^ z3%&?G0A36J4fMQ1=SKG;Hpoqe0nIF(1v6r^gzAR}AG#Gu~X6=(7crQ!os z)ZfhWcYJ3o-fVToYV)*qtYI*k>*NUjSQ!&^xua+#kdZYsS4b+Kzzc>Wa5D4PtQq03 zN}U|Ulk$QYBgTH>-wKBTbrhTmFSoQW2jAs{v8W(7T^*B9&kSeT5Ip(8(K;EnA_We^ za~2(JOgfX+X`@Rpna)=|%#0o-3Pd%8v({#Y*>WMM%IE8FwlwTGvrtK?pp!Y?fR zb_yw68!hC@!sJzla=Kag+KUVC0ukk1AEGm>-xqpH@2OLY*cW=9(dpSEYsFHt z3nynx8r&f=<2?iV zRrvd_fG-1`1E6yNUjbwb;E~|7@c6PDa2ePS#QVqKp5VXW?LQ4<1K<^47HkJUfv1;U zz>k0r1M&TT24n-^q2RvYzu@z=FW_1bgDLPN@JHZw_}PxFi{7Kfv4om;Mu4sOP6|G|Oow-+B$$k` z7s0@Z6PD!!|H1x>jK!p9^I2q;PG|T$Wj^%AGa$|mUmN4>+Bam059Tq<_&K|}>w8Cw z-pWjoeX1R1L%SDNeaM#RB(E!46)$?-o6MTM2?8ZJG(r}XVqHP_uixEUmHh!OQ<#vI zB^;*(c^vyF+Us5ZTBeGI-T91uAVGTbgV3ah_5R-aKuT7+=u$qJt;#E|yePqUlH@DY z28F(IK^Pqi@~7(;O5Kyy2v$K`r}k#ta%Iudn<4Pd%HoRjb#2GYG3EAVICmM{M1aua zjmnWX)=aF0#e>1HQ7}`)p;b8zBl$&zN^MOvw{HtZR{ps7LR;+_CppX+y~0zcQ$Dy> z#(D;xr4yF-;I_T+H}CB2QSgcXWv@GWh=@yxwoP^3-yzrz30+TFrQ@?}MopX*5{pqq zS}uE&C9W4zJGOUT#7f$o*$lH3z#${i9W|KDW>|qKN!2-}xe0x{6}06GWoJ2 zxF>iYx&bc)(ixZoHE4|)Ty0Kq2U>HK~aco29X_;>UM-UTiPF9H>C z7SQ1fIve0R@UKAU0zMa5*|=iq|4k^MO0A2{P`wZJDy5ZQ{(L*ko@*#)U8@bk`F7s@ z??d@^P54IlkQ~}#M#nV!hV{?+rL^DHNp$^#3G2*6Q#I54n5;!=cVMI&nN#iH{=tO( zg9+XKBkj>Kt4Pd6rO45>%_6Nt{eubF;mGWwa#tKoXcNxzsdgk_=mryGZ#lCKh49)R zG`Y+YV-G9S$(PwO>~Xwd;%{xbjKcH?>+y|dNQzEMO#Io@*q$_twH>bM*plUR#bVg; ze^PbnpKDleZyHw(3U(@uJy`Cb#^2I&4OzRR&X`@YW=8Z8apjb;5^mkt*cFYE266{h zAz1TVWnuKsHC(oy_Rlpmg$3g!lioWOBqb6L6N#9wPR2RyH&w%Rbp+vgOlybVSgWLv3Xrx=^SU~)$C0d4%%`u`OC{{!GNJNf^x{{KGs z{x^W9f+;Wwo&dfI&;O6$AHcJK_5iej^#2b7*TeU}7+ely6Y&4R-@gMq8$1ha0uKid z13!Sre+PIOxDv=7;PGHB2z&m&4PXCkAb$VR;J@MFzYlaKp!WG|?f->9>;6q}I{3fv z@;VPt`vSz{{~4GBvhVj`@MZY=8^H}gX9E5;sDXz9oe6jt_yj!tHQ@Q+dEfvL-~WeT z4R`>!3BLZ@;07Sx|CwMKJRDpPUoU?D3eW@>f(yVVFbW51vXxgjz-xJrs|;n8-U`8AxUZBr=SP3T6o zFVNENck5Am8C}I(q&#S6w>Hkm)=l>Cy}{zen6~uWw%K)9^atq`Gn-wP3#;oSzv7zW zawqu}Is*yMN>I<2%=QL*zR(xQgw(ja4W^=;ig*4fzhb4BA)%Ub|0q9JC{}lvUH>RQ z309oZO^unF}qx?d*pnH%HuSN>|i#f{AP3o6FkdHn~oowC= zLpYLerKA3G9i3-d`^Wl)%Af^44A{g>nvibSSmq0FIJ~Gm-}}6g{;_`8tMT(UvpaQg zx$kfFv3^cG>rdSa^~=Pb;?Gmb=v{HbpVeQ@FWfnTsYJk+SO4tRgD(3WdWWwv4MR#v z#-QN;W!hBy|5wkO%AoCj#jL919|FB=|XU0G$UQJAm&5e*>-n&jOMYl);7I0`OyG z0zntxJbn*@2Z9HHA0P{OIXDAs11Ew<0-X^knLq=`R^X99x&YU}`(Ftz0fS%w90BeJ z?h9mF;G^Ip!07)g%G&?JlBG(jj)CKTl3!BC3r3u!)`-eg#&QpuSF7~7N^d5Olx5EJ z%*174SjM?W5)bJV#z29C<-0Z=5^zUzw$gVBdxfG%-zLdSoRS}GSnDo2y}irTy$T8y zUhTKy485Xhc}ZHqmgKC2Nkl-}+do6kc2#nMpc!TRwK+qt7!?n4hF;Om&`S>ElkINV zhv=W7hslgz@iX+;w`oT8GD-zScR*pzW&V=fqn3Je4q7YDIfNCrtRo|6PPI7uE~`bH zbw|dXee^R$@6|JG zY+l+IwzQaPcP_J5ktC;kk5CYr)hqkry_Jc04&B5{eKa7I=C2A!MQ-T|2G2h{yGcr0pNCc`L}=;Vq}M}eQg z&wm?y3rIiU?VtvBgH7Om;Jfhl-vRFjmxC7p=>bfFCxOR;$AGWF%fAo27hDCN0``Ew z^WO}A|7q}S&;s`bzYD(4+WzOkRp2#1y8dI}Xz)X1GLp-@6X+~~D?tpVKm=|H1$9T5 zbJ|pLc|}FWr;HucZtX?+zuWEh_D>2FnHgD9o4y%Yhu-OOip%)|yBqBGW~xK~q`-nv zzQ>;ol@c4?{gVO%p>n!&2x$5z1@=!0balhAM2WPue^Ov2o!W^){gVO%Cv9~koco}o z(WyEqf52ebKPfPDw>CrTpA=ZDv@(ZMB*fu|I7xSspm;R&qTL~kPLr;GQlKLhO{ynl zq-QvgQ4y4H5j63fqiO%7KwFWz`%Vg!NYS6~qmuB8YMsL0gBdE&`hO9AST^*;|F84> z@{kMP*g1-WFFb8&m68JVUfKP(=f){{i0ImN& z415PUz)j#^z+Zt^fsH^m0IvoUpaRYVyTL}FGXQ=DZUCBZU<2?r^6?))XBsYn zNud4zXMlTxdw`qZ?LQ4}1n&TU4K4(m!Q;SB;qyNSJ`3IeE(K2kZ-v+20k(h#g9m|6 zz~}!Rcs;lrRDsS2dJWcD4(Z8cvd`=nLeY2LGgRs``|+Yq*+PdbP^r)C)85-0tk-$!Pq5&eJ0wVphJkVk z8|;;i!nt`FJ*3p&TV-m>4kq`0y*_=`YbIpH;8$Z18_qZCtvEfzK13hhBqGI?S+4L| z_wn}39HjvcgO)DN$akOF4>Z%gn{K}ZWjk^Z zyDr>*@;PTYRdo5v^kcqqP8ZF3ggTVb1J+CN>*(gCo1A)rmRQR@)ee{MYw7fQBqFKq z7KuMQq}y_!7fTUF_ys>~Gt08A#%^vc)hDGqvdgLe3=YX0M$RFLDIG}F)H?aN(N2fd zI*GiCARzB3ZJ|;9$vb=Hp3(VB2F>^c+ZIXG{3WHuW@Vl=?ZF}&OXg~uHmoh1-V#dM}R zC6m%gjW;?KEFb)#m^#12-eLNYuSp{j21y(Bv0|pSNa?e)N|`E;YGxWj-mJ~Ed*Rwf zC?rdb>@SM{|0F!`=i!UR|DWjjf9d~U2c8T5CwMf_nSbAar@tD!3A`N4g2#i0fm`9} ze+Iq`J`P?5wD*5MSOhWH2uk4h!9&5%;rDL?K^7nzf$s-z0vCZNfHj~9bUwg`kO6!U zydJy+TmjlZ_5vOb?g4H_9w1$TzXX2)BqJCF(h-o|fCmEE2>cOwwtV|M_iud)^eND% zK%WAA3Up8)4Bq}8a!{znmS6(=m@^n}-uf=Ay-HFCBL@ig0 zbY*AgzMJV4bqgv_H-)coa96S;vbS#zJJ{d$Dm5|`KOAKW#~m)4aTrP^f-Z(-k`t=b zuY~+6Cu~;AoI<&hUI+=z(mZjL4_bl|)f_!z)SIXZ%F7;TElD6NAG6>&IMXNKtF#ip z#r_s5t9S!N3GqZzT^g_MR%MmtB39%S1^Zq0#=S(BWB$WT`ACa9HuOr0YDF(qRB{~UbqR`_A@ z|Lgt4=QHs7{{UVB_Jec4vEaerXYl%82VVnMfxiT&ffK>u;5+d3Uji=#PY36M2;2_; z{(r!`!2z%vtOwc$a3i<@yb;I-;2hWm9uJNNx062c^*4g|gZF_q0`d6I01a>I8I3L~>R-y{luEGUQ}Mcq zT4Qo{WF&G&i>2?I3w0e`U5d8uuV@!uI;?M-j>gAbxZ~qmOl!@@lhx^JJjG_gHhcBh zNf__vXkRw&$x_vPv$3x_6;DNHV4rfAOKGK@iOEbHu*Q)Z10xDEbTyvMWh?jK!dW?A z*$>j+cbFt;5EyzsNc^kvYn~1k(j6!v7x|qEcvb1|U5D<#r>pv8Ri1hzX|>;F{rTT+ z7kMgDkaKaWfo0Wt#=?NP!W(ru?W@!lx=y#&Ts7lyHePDM!HuQOL81BC`0T!dp0d~M zK76y(m34C(7ij~{zBB&^h-D<&cWLe``RZodi!uSab-W$%auJ?Ee&2qqP1(A z@g)l!v_7?VZSPoBgAeY%``htxSBNb>Zp}0nYE!mV|z?JcB0u>Y{gB4RbG=&XQ6H}&q;V8Y^(FW%3~2#0TNKT-SE}hYr%QR zOX?%r9+X7iOXM^;h;`mxHH(13)$ajsW)qAAzTTF3>rDPX>Pgw03_l@E+tAe*q4FCj;5=dlb<5 zez%gJuK>%}Z}7CYpS3;(`V{C>pihC{01AXL+mGnBteQFdT$3QF6uN~%8AFwBpqq3g zR)g+}FjCISlHg}mun_)NAw-gX1KoY==i21gQ&ri#kwi>9ex6K!5-!(X?nla*R+iP+jKtXXhq zy!suY1Qa`=RHVbTkw8tvrXt&lr%GvYr<29p4B3j=PuD~KeLhOTnS#%!W3m}n5NB23 za5ev|H5&7#6YW1pBMozWWqQjD$9`8A<|qvkztmW0Ml%fpqk);Yx_73{0W+b&X!AkVBr)slO&qq;w??WB4(@?88l&`dna^FM9jNqWZ{)u9E&TZqgGuyMLl$=$MTVZTi)Py))#&cS&+pKGg3` z!=wK_5dRNbZr8Q&{jUZ~paRChR`65!{x^dO5CPfuw{QJ-{ac>`eG2p`(5FD30(}bf zDbS}tp8|af^eND%z+FXwAe!*9im^80B`qu?qL3hy132qu;eAiSF3aBga8wSGJ4xb7 zh{MWKSF0hIbQa?WZA%IkvBd8WwkxgKR^}LFdnSaj;47}T7MgK+dZEn?EtFD&hD|3Ww0aWdJ2Nr?(JE4+U)#widZhqY8#6bY!%7DP_fke4*3qTeGaH zBiBPdp(CD6P+>GML79kBLKI2@<-JyIdF~j-IiU@q?jQ=K0cR5sDBTO~Y8~YYG1rR~ zBoJ`eM3gG)$y7oVS0-mL`X5_h5>VsCD 5 except AssertionError: - raise RuntimeError('Requests-HTML requires Python 3.6+!') + raise RuntimeError("Requests-HTML requires Python 3.6+!") class MaxRetries(Exception): - def __init__(self, message): self.message = message @@ -77,7 +76,9 @@ class BaseParser: """ - def __init__(self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None: + def __init__( + self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL + ) -> None: self.element = element self.url = url self.skip_anchors = True @@ -95,7 +96,7 @@ def raw_html(self) -> _RawHTML: if self._html: return self._html else: - return etree.tostring(self.element, encoding='unicode').strip().encode(self.encoding) + return etree.tostring(self.element, encoding="unicode").strip().encode(self.encoding) @property def html(self) -> _BaseHTML: @@ -103,9 +104,9 @@ def html(self) -> _BaseHTML: (`learn more `_). """ if self._html: - return self.raw_html.decode(self.encoding, errors='replace') + return self.raw_html.decode(self.encoding, errors="replace") else: - return etree.tostring(self.element, encoding='unicode').strip() + return etree.tostring(self.element, encoding="unicode").strip() @html.setter def html(self, html: str) -> None: @@ -129,11 +130,10 @@ def encoding(self) -> _Encoding: self._encoding = html_to_unicode(self.default_encoding, self._html)[0] # Fall back to requests' detected encoding if decode fails. try: - self.raw_html.decode(self.encoding, errors='replace') + self.raw_html.decode(self.encoding, errors="replace") except UnicodeDecodeError: self._encoding = self.default_encoding - return self._encoding if self._encoding else self.default_encoding @encoding.setter @@ -158,7 +158,7 @@ def lxml(self) -> HtmlElement: """ if self._lxml is None: try: - self._lxml = soup_parse(self.html, features='html.parser') + self._lxml = soup_parse(self.html, features="html.parser") except ValueError: self._lxml = lxml.html.fromstring(self.raw_html) @@ -178,7 +178,15 @@ def full_text(self) -> _Text: """ return self.lxml.text_content() - def find(self, selector: str = "*", *, containing: _Containing = None, clean: bool = False, first: bool = False, _encoding: str = None) -> _Find: + def find( + self, + selector: str = "*", + *, + containing: _Containing = None, + clean: bool = False, + first: bool = False, + _encoding: str = None, + ) -> _Find: """Given a CSS Selector, returns a list of :class:`Element ` objects or a single one. @@ -234,7 +242,9 @@ def find(self, selector: str = "*", *, containing: _Containing = None, clean: bo return _get_first_or_list(elements, first) - def xpath(self, selector: str, *, clean: bool = False, first: bool = False, _encoding: str = None) -> _XPath: + def xpath( + self, selector: str, *, clean: bool = False, first: bool = False, _encoding: str = None + ) -> _XPath: """Given an XPath selector, returns a list of :class:`Element ` objects or a single one. @@ -257,7 +267,8 @@ def xpath(self, selector: str, *, clean: bool = False, first: bool = False, _enc elements = [ Element(element=selection, url=self.url, default_encoding=_encoding or self.encoding) - if not isinstance(selection, etree._ElementUnicodeResult) else str(selection) + if not isinstance(selection, etree._ElementUnicodeResult) + else str(selection) for selection in selected ] @@ -293,11 +304,15 @@ def links(self) -> _Links: """All found links on page, in as–is form.""" def gen(): - for link in self.find('a'): + for link in self.find("a"): try: - href = link.attrs['href'].strip() - if href and not (href.startswith('#') and self.skip_anchors) and not href.startswith(('javascript:', 'mailto:')): + href = link.attrs["href"].strip() + if ( + href + and not (href.startswith("#") and self.skip_anchors) + and not href.startswith(("javascript:", "mailto:")) + ): yield href except KeyError: pass @@ -311,12 +326,12 @@ def _make_absolute(self, link): parsed = urlparse(link)._asdict() # If link is relative, then join it with base_url. - if not parsed['netloc']: + if not parsed["netloc"]: return urljoin(self.base_url, link) # Link is absolute; if it lacks a scheme, add one from base_url. - if not parsed['scheme']: - parsed['scheme'] = urlparse(self.base_url).scheme + if not parsed["scheme"]: + parsed["scheme"] = urlparse(self.base_url).scheme # Reconstruct the URL to incorporate the new scheme. parsed = (v for v in parsed.values()) @@ -325,7 +340,6 @@ def _make_absolute(self, link): # Link is absolute and complete with scheme; nothing to be done here. return link - @property def absolute_links(self) -> _Links: """All found links on page, in absolute form @@ -344,9 +358,9 @@ def base_url(self) -> _URL: (`learn more `_).""" # Support for tag. - base = self.find('base', first=True) + base = self.find("base", first=True) if base: - result = base.attrs.get('href', '').strip() + result = base.attrs.get("href", "").strip() if result: return result @@ -354,7 +368,7 @@ def base_url(self) -> _URL: parsed = urlparse(self.url)._asdict() # Remove any part of the path after the last '/' - parsed['path'] = '/'.join(parsed['path'].split('/')[:-1]) + '/' + parsed["path"] = "/".join(parsed["path"].split("/")[:-1]) + "/" # Reconstruct the url with the modified path parsed = (v for v in parsed.values()) @@ -372,8 +386,16 @@ class Element(BaseParser): """ __slots__ = [ - 'element', 'url', 'skip_anchors', 'default_encoding', '_encoding', - '_html', '_lxml', '_pq', '_attrs', 'session' + "element", + "url", + "skip_anchors", + "default_encoding", + "_encoding", + "_html", + "_lxml", + "_pq", + "_attrs", + "session", ] def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = None) -> None: @@ -384,8 +406,8 @@ def __init__(self, *, element, url: _URL, default_encoding: _DefaultEncoding = N self._attrs = None def __repr__(self) -> str: - attrs = ['{}={}'.format(attr, repr(self.attrs[attr])) for attr in self.attrs] - return "".format(repr(self.element.tag), ' '.join(attrs)) + attrs = ["{}={}".format(attr, repr(self.attrs[attr])) for attr in self.attrs] + return "".format(repr(self.element.tag), " ".join(attrs)) @property def attrs(self) -> _Attrs: @@ -396,7 +418,7 @@ def attrs(self) -> _Attrs: self._attrs = {k: v for k, v in self.element.items()} # Split class and rel up, as there are usually many of them: - for attr in ['class', 'rel']: + for attr in ["class", "rel"]: if attr in self._attrs: self._attrs[attr] = tuple(self._attrs[attr].split()) @@ -411,7 +433,15 @@ class HTML(BaseParser): :param default_encoding: Which encoding to default to. """ - def __init__(self, *, session: Union['HTMLSession', 'AsyncHTMLSession'] = None, url: str = DEFAULT_URL, html: _HTML, default_encoding: str = DEFAULT_ENCODING, async_: bool = False) -> None: + def __init__( + self, + *, + session: Union["HTMLSession", "AsyncHTMLSession"] = None, + url: str = DEFAULT_URL, + html: _HTML, + default_encoding: str = DEFAULT_ENCODING, + async_: bool = False, + ) -> None: # Convert incoming unicode HTML into bytes. if isinstance(html, str): @@ -419,10 +449,10 @@ def __init__(self, *, session: Union['HTMLSession', 'AsyncHTMLSession'] = None, pq = PyQuery(html) super(HTML, self).__init__( - element=pq('html') or pq.wrapAll('')('html'), + element=pq("html") or pq.wrapAll("")("html"), html=html, url=url, - default_encoding=default_encoding + default_encoding=default_encoding, ) self.session = session or async_ and AsyncHTMLSession() or HTMLSession() self.page = None @@ -439,25 +469,25 @@ def next(self, fetch: bool = False, next_symbol: _NextSymbol = DEFAULT_NEXT_SYMB """ def get_next(): - candidates = self.find('a', containing=next_symbol) + candidates = self.find("a", containing=next_symbol) for candidate in candidates: - if candidate.attrs.get('href'): + if candidate.attrs.get("href"): # Support 'next' rel (e.g. reddit). - if 'next' in candidate.attrs.get('rel', []): - return candidate.attrs['href'] + if "next" in candidate.attrs.get("rel", []): + return candidate.attrs["href"] # Support 'next' in classnames. - for _class in candidate.attrs.get('class', []): - if 'next' in _class: - return candidate.attrs['href'] + for _class in candidate.attrs.get("class", []): + if "next" in _class: + return candidate.attrs["href"] - if 'page' in candidate.attrs['href']: - return candidate.attrs['href'] + if "page" in candidate.attrs["href"]: + return candidate.attrs["href"] try: # Resort to the last candidate. - return candidates[-1].attrs['href'] + return candidates[-1].attrs["href"] except IndexError: return None @@ -500,7 +530,20 @@ async def __anext__(self): def add_next_symbol(self, next_symbol): self.next_symbol.append(next_symbol) - async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep: int, wait: float, reload, content: Optional[str], timeout: Union[float, int], keep_page: bool, cookies: list = [{}]): + async def _async_render( + self, + *, + url: str, + script: str = None, + scrolldown, + sleep: int, + wait: float, + reload, + content: Optional[str], + timeout: Union[float, int], + keep_page: bool, + cookies: list = [{}], + ): """ Handle page creation and js rendering. Internal use for render/arender methods. """ try: page = await self.browser.newPage() @@ -515,9 +558,11 @@ async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep # Load the given page (GET request, obviously.) if reload: - await page.goto(url, options={'timeout': int(timeout * 1000)}) + await page.goto(url, options={"timeout": int(timeout * 1000)}) else: - await page.goto(f'data:text/html,{self.html}', options={'timeout': int(timeout * 1000)}) + await page.goto( + f"data:text/html,{self.html}", options={"timeout": int(timeout * 1000)} + ) result = None if script: @@ -525,13 +570,13 @@ async def _async_render(self, *, url: str, script: str = None, scrolldown, sleep if scrolldown: for _ in range(scrolldown): - await page._keyboard.down('PageDown') + await page._keyboard.down("PageDown") await asyncio.sleep(sleep) else: await asyncio.sleep(sleep) if scrolldown: - await page._keyboard.up('PageDown') + await page._keyboard.up("PageDown") # Return the content of the page, JavaScript evaluated. content = await page.content() @@ -563,25 +608,28 @@ def _convert_cookiejar_to_render(self, session_cookiejar): # | * ``secure`` (bool) # | * ``sameSite`` (str): ``'Strict'`` or ``'Lax'`` cookie_render = {} + def __convert(cookiejar, key): try: - v = eval ("cookiejar."+key) - if not v: kv = '' - else: kv = {key: v} + v = eval("cookiejar." + key) + if not v: + kv = "" + else: + kv = {key: v} except: - kv = '' + kv = "" return kv keys = [ - 'name', - 'value', - 'url', - 'domain', - 'path', - 'sameSite', - 'expires', - 'httpOnly', - 'secure', + "name", + "value", + "url", + "domain", + "path", + "sameSite", + "expires", + "httpOnly", + "secure", ] for key in keys: cookie_render.update(__convert(session_cookiejar, key)) @@ -598,7 +646,19 @@ def _convert_cookiesjar_to_render(self): cookies_render.append(self._convert_cookiejar_to_render(cookie)) return cookies_render - def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False, cookies: list = [{}], send_cookies_session: bool = False): + def render( + self, + retries: int = 8, + script: str = None, + wait: float = 0.2, + scrolldown=False, + sleep: int = 0, + reload: bool = True, + timeout: Union[float, int] = 8.0, + keep_page: bool = False, + cookies: list = [{}], + send_cookies_session: bool = False, + ): """Reloads the response in Chromium, and replaces HTML content with an updated version, with JavaScript executed. @@ -654,13 +714,26 @@ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scroll reload = False if send_cookies_session: - cookies = self._convert_cookiesjar_to_render() + cookies = self._convert_cookiesjar_to_render() for i in range(retries): if not content: try: - content, result, page = self.session.loop.run_until_complete(self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page, cookies=cookies)) + content, result, page = self.session.loop.run_until_complete( + self._async_render( + url=self.url, + script=script, + sleep=sleep, + wait=wait, + content=self.html, + reload=reload, + scrolldown=scrolldown, + timeout=timeout, + keep_page=keep_page, + cookies=cookies, + ) + ) except TypeError: pass else: @@ -669,12 +742,26 @@ def render(self, retries: int = 8, script: str = None, wait: float = 0.2, scroll if not content: raise MaxRetries("Unable to render the page. Try increasing timeout") - html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING) + html = HTML( + url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING + ) self.__dict__.update(html.__dict__) self.page = page return result - async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, scrolldown=False, sleep: int = 0, reload: bool = True, timeout: Union[float, int] = 8.0, keep_page: bool = False, cookies: list = [{}], send_cookies_session: bool = False): + async def arender( + self, + retries: int = 8, + script: str = None, + wait: float = 0.2, + scrolldown=False, + sleep: int = 0, + reload: bool = True, + timeout: Union[float, int] = 8.0, + keep_page: bool = False, + cookies: list = [{}], + send_cookies_session: bool = False, + ): """ Async version of render. Takes same parameters. """ self.browser = await self.session.browser @@ -685,13 +772,24 @@ async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, reload = False if send_cookies_session: - cookies = self._convert_cookiesjar_to_render() + cookies = self._convert_cookiesjar_to_render() for _ in range(retries): if not content: try: - content, result, page = await self._async_render(url=self.url, script=script, sleep=sleep, wait=wait, content=self.html, reload=reload, scrolldown=scrolldown, timeout=timeout, keep_page=keep_page, cookies=cookies) + content, result, page = await self._async_render( + url=self.url, + script=script, + sleep=sleep, + wait=wait, + content=self.html, + reload=reload, + scrolldown=scrolldown, + timeout=timeout, + keep_page=keep_page, + cookies=cookies, + ) except TypeError: pass else: @@ -700,7 +798,9 @@ async def arender(self, retries: int = 8, script: str = None, wait: float = 0.2, if not content: raise MaxRetries("Unable to render the page. Try increasing timeout") - html = HTML(url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING) + html = HTML( + url=self.url, html=content.encode(DEFAULT_ENCODING), default_encoding=DEFAULT_ENCODING + ) self.__dict__.update(html.__dict__) self.page = page return result @@ -711,7 +811,7 @@ class HTMLResponse(requests.Response): Effectively the same, but with an intelligent ``.html`` property added. """ - def __init__(self, session: Union['HTMLSession', 'AsyncHTMLSession']) -> None: + def __init__(self, session: Union["HTMLSession", "AsyncHTMLSession"]) -> None: super(HTMLResponse, self).__init__() self._html = None # type: HTML self.session = session @@ -719,12 +819,17 @@ def __init__(self, session: Union['HTMLSession', 'AsyncHTMLSession']) -> None: @property def html(self) -> HTML: if not self._html: - self._html = HTML(session=self.session, url=self.url, html=self.content, default_encoding=self.encoding) + self._html = HTML( + session=self.session, + url=self.url, + html=self.content, + default_encoding=self.encoding, + ) return self._html @classmethod - def _from_response(cls, response, session: Union['HTMLSession', 'AsyncHTMLSession']): + def _from_response(cls, response, session: Union["HTMLSession", "AsyncHTMLSession"]): html_r = cls(session=session) html_r.__dict__.update(response.__dict__) return html_r @@ -752,24 +857,30 @@ def _get_first_or_list(l, first=False): class BaseSession(requests.Session): - """ A consumable session, for cookie persistence and connection pooling, + """A consumable session, for cookie persistence and connection pooling, amongst other things. """ - def __init__(self, mock_browser : bool = True, verify : bool = True, - browser_args : list = ['--no-sandbox']): + def __init__( + self, + mock_browser: bool = True, + headers: dict, + verify: bool = True, + browser_args: list = ["--no-sandbox"], + ): super().__init__() # Mock a web browser's user agent. - if mock_browser: - self.headers['User-Agent'] = user_agent() + if mock_browser and not headers: + self.headers["User-Agent"] = user_agent() + if headers: + self.headers = headers - self.hooks['response'].append(self.response_hook) + self.hooks["response"].append(self.response_hook) self.verify = verify self.__browser_args = browser_args - def response_hook(self, response, **kwargs) -> HTMLResponse: """ Change response enconding and replace it by a HTMLResponse. """ if not response.encoding: @@ -779,13 +890,14 @@ def response_hook(self, response, **kwargs) -> HTMLResponse: @property async def browser(self): if not hasattr(self, "_browser"): - self._browser = await pyppeteer.launch(ignoreHTTPSErrors=not(self.verify), headless=True, args=self.__browser_args) + self._browser = await pyppeteer.launch( + ignoreHTTPSErrors=not (self.verify), headless=True, args=self.__browser_args + ) return self._browser class HTMLSession(BaseSession): - def __init__(self, **kwargs): super(HTMLSession, self).__init__(**kwargs) @@ -794,7 +906,9 @@ def browser(self): if not hasattr(self, "_browser"): self.loop = asyncio.get_event_loop() if self.loop.is_running(): - raise RuntimeError("Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead.") + raise RuntimeError( + "Cannot use HTMLSession within an existing event loop. Use AsyncHTMLSession instead." + ) self._browser = self.loop.run_until_complete(super().browser) return self._browser @@ -808,14 +922,13 @@ def close(self): class AsyncHTMLSession(BaseSession): """ An async consumable session. """ - def __init__(self, loop=None, workers=None, - mock_browser: bool = True, *args, **kwargs): - """ Set or create an event loop and a thread pool. + def __init__(self, loop=None, workers=None, mock_browser: bool = True, *args, **kwargs): + """Set or create an event loop and a thread pool. - :param loop: Asyncio loop to use. - :param workers: Amount of threads to use for executing async calls. - If not pass it will default to the number of processors on the - machine, multiplied by 5. """ + :param loop: Asyncio loop to use. + :param workers: Amount of threads to use for executing async calls. + If not pass it will default to the number of processors on the + machine, multiplied by 5.""" super().__init__(*args, **kwargs) self.loop = loop or asyncio.get_event_loop() @@ -833,11 +946,9 @@ async def close(self): super().close() def run(self, *coros): - """ Pass in all the coroutines you want to run, it will wrap each one - in a task, run it and wait for the result. Return a list with all - results, this is returned in the same order coros are passed in. """ - tasks = [ - asyncio.ensure_future(coro()) for coro in coros - ] + """Pass in all the coroutines you want to run, it will wrap each one + in a task, run it and wait for the result. Return a list with all + results, this is returned in the same order coros are passed in.""" + tasks = [asyncio.ensure_future(coro()) for coro in coros] done, _ = self.loop.run_until_complete(asyncio.wait(tasks)) return [t.result() for t in done] From 51bd98dc3bafe4f09b120c01778fc566a7997848 Mon Sep 17 00:00:00 2001 From: ExamDay Date: Sat, 13 Mar 2021 18:37:26 -0600 Subject: [PATCH 2/5] Gave BaseSession capacity for custom headers and port numbers. (this functionality allows for scraping through a proxy) --- .requests_html.py.swp | Bin 90112 -> 0 bytes requests_html.py | 336 +++++++++++++----------------------------- 2 files changed, 103 insertions(+), 233 deletions(-) delete mode 100644 .requests_html.py.swp diff --git a/.requests_html.py.swp b/.requests_html.py.swp deleted file mode 100644 index 93079a3e0736c14c811a2b1125a1b6f66ab31366..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 90112 zcmeI537lkAeeX+&Vib%E8ub~y)I_^`?5P>x6U1hEWEch*U|^UT1YR?*y1M)JbkWsS zO;z>uv?GdpG#W9OMBEZ3Mx){qm&7emQ87WD3a)t?cST9`X++Hv2)U@Svc<4V>T#&HN~T&2S4b%I~G>|t3Cz#6zEf+Pk}xK`V{C> zpihB51^N{DwWL6M{;0y!NclZ|>NorQJv#13_~+yOeM`std;90Z{e6GO`y>7Hl)qor z@jiU_JKp{49q;e&pTqYzb-cfie-8IEeFj24e$T((Q+ zynmp7zs}#^-tqoH-u*;>|4PUE`+E0be}9B8kPzR4z59B9Kds~a{k;2He_!Z$f0TDW z&fovK<9(oe$-lp$Ujo;Ie*iBBF9QqUT(A|Cz!BhcF#fLuZLkvzfX9OSfp5VKUI>Q3AUF#AKN#6> zf^UEu!42SJ;8kD}JOMl!{2ur@O!K$F7r}Mlz2J4=<>2{X8teue!T$h3!Vm^3ho2GgFxg4 z@HTK6m;gtBAHqog8~6ga4!jOD!Q;TA!F|AYVaC4*o(z`2EI0!k1+Is0{vdb*sDNGI zaBy#M3q1M%2Co9I1b+<1KnWZL9t`dUeh1{Pi0h>JW@A2Xwo$#%qpnt5n=Y9~Ml>2p zF{5D4{3#ZTJG^&%wcBwsnr<}R2d2S+2lG4?Pe;wlq6IRj@P;j*k!WJ69k+&}b(^BI z8})d^Jf9SNmRy)Cq@)zrCmU0-1?hfQ>wAZiD3yj>bgg!?GcrqOXGG*JYR2t_W<84A z%{VT#8+=OGKBihc7uVavkwuU5=)gj~s+0^2mE>`L&?Q3Q#I=^sn4Wa`kd3ZMUZ>*j zoDe~?*=V*#2b%Fb+BF1Uun_oDoz4`DBb;6ZitG2_crC7wg}FvEjyBG;+w&vq*Do$E zmZqxv;%XiBo2B+lqkdee9=F%K{IyD=$ENY}P^q9GbP?ovE_bASz8Sa3dZk@$)T739 z)Sii>(|2z_E1GQ7+q@{~3l(=ldn0)HK*?Q{c z^Um5`-g@?yb57ZI_Gv>RSG85Gx7wBZWIU)(h9hfhqItg0aPftP(=$$TSXrpG%Nb9d zPWj}wW~=jMqMo6x5aRCULP!1MR+vT$FJ=W3^L!0kR z%1Bf`#U*&FIwN`mpQ+Htm92d^vJWc}d~x2+voal#MXQ!;KSt&&&B~nfxSQsM-)Oqo zn2Q!?s*^Kmi)%Eid#m+IJDwucRy<{W@A!CSqSdG^wBvHETAyu=kGopq169$CxCWaI zvJp+tJ}qb>rzH1A3-dJa!N$BAdZp&O10PB^3C_2qB?Z=Qs1HyfM(su}0iKK_k#ZL3 zzlx-k9Bm-dW~H?{X_;8R)tBH7QXMykjRdDJ%vI{GQem=IX|*DCJnroXgI(k=tA%rM zt5w+>Q(XR+drita%iq5r8u5W{uk7E+z)+1_gIo7c#zrJwy6~HoDzx0WP;Xb~;;rhy z2a7v{nxoYPsX12ZNTVIr0Y}G`j(^o|fePi_q>D#9666I8HKWn#RAXQCg>t6Z znIGR1;>~6k1vR$teRoT>PtJq5oA)w&1%F|<3eUa?XOe7TlX;g){JD2VH;P+HFYXzR zlAne01v@HjT#19%2G|W-ulVlcyVQ4r8UXaMKs91;c<)mkV#D;^!;aVy8WjLx; zXXEHJs&Qi|dXj4z>yOz`I;M0im3+F=tcEX&X=1EG6g4AiF(r!kSLO)6)M)NqpLz7qlXTCnqdKluop58JDspvh9$jY}Ool$*eppSeyEgb{E7l zA7z`IrQD_rKJzw7F8Bw%=m?}_wJgN%tCRCHU73x`Xnlw!z(>TbQrU)BpTFG<4UL7ib?`2{?k_^X zyJwnlWoid;ZQUPFF0>m>_XRTvRYWW4;1@T0wt2=vJ^g)gqgpTc$SbX-`ed~c9vM># z;{PAZ`1Nxw)WZKaxas;^c>T-3+2925cyK&80=x&l{*S?BU>0lxLtqfx3}63U@E!0$ z@OJRG;7>sdjDd5(N#KcK6L>hd9^U`$;Emu7;HBXCU@zDKjsw2~ZiDat8u%)B7kD8! zA8Y{;kZj4E(%fQpXC15vL3w{hge<5crCl~^Q;5zvG z7lX^eGbcg#nmYivFl1K0+SCwQV}8H`>sh z$uI<3WZ=zA&TYmbk#~e80uYevjU1s|7^Rdb;4(7eE(k)HQUA=SMhh9;l~zM@v1 zY-q~T_M)$le#Tj~-J$e*IP;lGtHKQ3*ZgAeTO@vku1h(ho>U@FhiH+hfg|`-WMQi6 zOjnz&cD_{|ysrq+6)YnE#~Mg@1G$q1(=&&@r|@Y@^AH~7F9SErYTkh@S;H5%NgYsTv9M~%Ye5KUQb zg+Nf<-TP4gDTT!fRX&+BmTDEoubIK*8wz$6h2Z*#)fa^)iKqvcfC#1)m&%%MmzhG^ z<6!vV3n@vEG7;XT+{tJZIJUsgBp>)3bbXWW7Pib(>c~ifFB5s!P0D4Xu9HivcVO_T zSVP+x65cWXy7ZT4n5X-`CN-kg|i5O>?ZyxwzBd*F@VH#327-JP`;o(6T@y3p6vJhJhw&+pdje`lBpG zM_{j8k_e0y*RiK}U5i3zi!VZoB)^Wh`H?eu87L7=RVUl&H`#10^QB7AeUR&JT>rp$ zWl>b-8uh)Tufa^E$#jLD)lW0?DMaRg9Q`w#Xe)OHC^n=oq3mQ#g>+7l{bDdMt;vTAn z`2Q!u`#u=nSo}X7hF!PA=U)w;4$cR|;4tt-c>JruGr&2Z2z~(n{$cP2@Dk7j7lPBk zF<=9D82Bjs`$xcA!FI3({66?OJo`=H+u)PnL*Vt`1>h1O{{D%e2!2XBKL_3oUI?B8 zP6bP>+TaYZ4IBkN1#kaJ@Om%-hQLk8Y`zUX4sHgjdFj04;4}1uv*>g4zcg-MWwtCw zpY)9K6|QY}4O_3^?n>4^6W%J>do2$gDt{k{VE5%G6J0VTH~Qoz6gJOTRyvSOH~I{} zQ|mZ*cM@2vLbi}Pxhs24I-X9E#=#i=9!&S$!tJNyvhu>zAx-vg1uO4ozjqBU-&qBg zVX%8L_;-xIPm9mb`VJy7Ne9i<$xZyEFUzj}5}EaEbIblVx-ZYJCSG&?5?9aecFIM} zv!7^|{L7R%U_S0-XyUvj64eZwDLEW-Dca3~AB&Wgxmarf+)x{ArBcNVBZ{Kg6oO7I z>$O-5^|M+pp*4mk>v*UBAYOo#`=FHQzMup`-mZim< zpHSM6M%0XXH3Y-tKQ8C5(OxXd7*LVm{pol^iC?evo*OlcsoYJ!>E zDPlK9WE6E!Vl*i|OAL-2w)CNFQYW>vT$jMxbf0`}KqE&X>GBw3HRmS$cJwp(B4{8T3y2D*?ZDjMw4GF zf8gmg>hCLi&a{Q}=gObBHaF^SD|_`NWpCE5NA2%HJemg0sB5|_ zd(E|sLYDs7-)_{jri=*Kwc_mi@Z0Cx(+=L4q@+sE7;bBjO&lUfF zl4Ji1-+|Y^3A_?4f&T^efQ{fDpa8xM?|%(=GmuPRFW3wo0d9fczX7}sTn4n}KL~yX zzkdUGA9xvfI%ojx1GoU31ilBK|DWLVU;)em@&Atn4*=5rzdw-f|CL}n5dVJ)_yPR< zhruhrrQj)`4t9dagNK11l2_^SzYRPcv_KPF03-)6T6jA2Z+!~%DbS}tp8~&l3TTL{ z*dJ54ko3k$A$qxKywfs=-s$oaT^k1VoKA`-^=MazTjuf3YCv?-Xo|I$Q8e~CKBPQMc0rO)B$-9rwVaM?~teHCj*>E1m{Kv=}vIz6qW)O|6l za4gOmzdsyJEJy(jEqIpcV3K1LSWQEnT`Ho}__!3%Z2MtUnVO2HlI1K{PFL9s{WWb( z!l;CEb@E}?>Dd^X@V|vXop4X|A%a7vxF)!3cXFAY2k8(>O|*TJqGjg`^69Hc;GPjd z){$ejgD8?M2HNQ1x$$)9BGR4`%XAb};I2do+a!<$Iq9;G>HeXY_Gtxy7h9Lya3Mcy zqRnXk&$0Jb`m{^L)@Cn>#F5gybrMYl_TnWww+W}_B z3-#s>ev_YjNd9`)>XY4e&uO%{ zr)+F3rC-W(>~>bRP|%kUAEn~_`w(E_&#TY*=d@V_k5tS`eQ9uP5>@6!ss5`Sdf#l4 zsnyDD_K+nn)l?_tMK#s!8I+<*2u*i+@d0GQYSB*T+2fe=8d6DpA4~P96i9euU-3FT zv!7^wDX)x|rX&-3J~SoCk(88M-!*@UW`E2+8qGXSwmJe2`UpquGuyY55tw&PgTo9s z4J#yASxfR9yy2K`qb6;Zr0_CNqBM63DSb*aNC{?kd8a;@USd}vEt@sb7T04&XjReo zpc1J!x|X{%Ie&0{Yiq5Q5mj?O=Bmwkl}=J=LfyU=j(p6RZqV+M}NI8DhqQe7EqqmUf8 zN6Q?dQlH&3KAwbUuh+Cte%p3Rr!!jt=i)8e=G^&)=Ol!!ZhK>tOG%~%d-ac`p7_1l zVKY0FV#rBp?!6m5N22p*;PkY2q-lC}wzy(;GcI00FEb&$AKbe_N$bUkHXwK%Jnd-D z6kyUO39r%)b;$yUUD#KJQHZSXNV4qHXuA%3r4cpOYP-7&)H3Dx_(nIi*+i9+R;V1L z6RGERCFqw%Tt1di6-IIt_S<1HReXhN?f4V++^|n5G#MDYlw35A(_p5wxhY$W68DSR z=sh|`-Nv-hK%AYnn;PS3+@EzoVJ)$^7DYg88f2+orp=~%v)$GhPruxXt7}Nf`lp|D zWGYX(T

BwRbsF+dM~Q9g*#6Rmo|Jr8U7!f4j;K0g45>imi(_>hC6R^gx9~IJ?#S z8DOlESf+_88f3w*wqTm5=1tZdh%gV~$F)~=0iCHg63Ds>GHUgD`DVua?QUHEk z^GTK3!g33nx$x87%Pv$!g%UG+zqE^XbikrUaR4pcj5Qp^l;Z*zt1`hdG^j?WL)2YZ zyT(P4#JrA?XNu0EOw;UPET^E9s&>g^TCq+imwuF6T}jPfl6LS4(q-1nyZFt~j(mzH zEs%sNcwCVtu0kbE%O@>660fG=jr*iH>S#=L7KdioY7RG|{q}xi_xf&v6_hPe%FUo$ zUG@PkOCv{yFu|d^^3wuRU=;d}AYOuZ%9*PQpCsES6?1SjQku0V>p`tmrS<8g;H}GE zRklM&ddP&;OI=Lc-jYtNIn9U6bY3~o)s@BGA3D$OuWy6M^kie|6tgt7jFN!`7Fu%Y zP>oV4x?D4q!y2)sHLSL zlY&n6>Fk#cWj9eIZ9&1LJ%eJht7qXA{LAh_Ohf3^w`4_*Zj1`zMkcjhIV(5XYs^Z> z;(A#TZz48Ep1Qcd-K-e#%ou|vS~mVMW3e=LRbMw0Z1uCc&IV-r|KaC%0`dQ_rglmH z|1;oe{@uCUZ-vjl5!?X&3cLyogCE1&zX!YPL&2j7Em{|RmcH-I;TXM$&di@>qq7$BbiGvKx0Z@?HB0k^`-{|vkb$Znr>{3}2< z`)-H7*S`MS!1uwm{_cxPH`Pv+d?jZCDjjB=NuJKaPe&48=QSU)uoT`J?9?tqDG5}bnc(ID&cCMpJPK%YtnD*1A5^x=d1 zjMd*KCf_-h_GvoN%5HSl7i-|Jm`T!H7A76@ArTnOhN)hoiJ2B{AXihV$BU6ma!1Ns z@M8n+<$!I?^CmT1_aY-6WfG2^Kg)7K@#>;G(Z-_A0Mc%un~o>yBlaih+)T~jU5LY4 z`mk+D*0Mu{9SKcID9axCw8BOn}$OiUYf=RR9*5tpv5&Q?g=`RuH<+?WQ7H ziS`=HV}3*3%Z<qF*t2ua`l$1p!GY9f)5ySZG%&}Q z*=~#s3}s|7A%VJE5}3hL&?Q*}XpIfXXXQKWmnL}NZ61oV)J^?4lJb5w~8@mhZOrr zbz_5>ehju_3?|V2{d_=!{!l8~%8l7nW9;vR51cr}S6v^7`V<7}nOYls=LT_a%1jkL zFsVIe(u3ZD2Q33FWUQfz#`?xpe`{0)*1*QlF%BX6j9nWr**|Vj+2> zQa$OBwv0PX4+2<=li3k7&GxccW-`UnQFo5Z-8*6t#mROnJ0?wn)uT5EKEj4@ z|D5)xp|<%K3FGUzO3<+3oHSWQS{ATX;f6fGxVha9;$U|fRb5}54L{&~#% z`BY|Q{kf*()fRlBv;6cgSF%k8)>{U!=VZq44l37n)(pLDxs}q!>;>3%4>d_l4#~2u zWRyXhLque{!E|R~Vx6NT^v7Ew{EY_V(%uq+*Y)cw>rc`UH9pR+07*t=^d^n#mnylo zhSNj-eK*OL9ldv1?S@`m1JDWs%ppjZ>ye8ml*>v2=A^1A3zK{F0 z;N9R#a6UK^$mYN7|Ggi)3CQmM9uRE(9m(%6BLnz%@LI43Tm%Ndf5Z1n=l^ry!yw2A zUdM0Q0{9bfD)<3%fOi4O2(AXQ@3#ZU#{WaWQQ+^937i8qfCmEE@xLB?99#{g=YMZ- zFYpt1|IdSGfyaZ#f%}1ffzSUKcp5kltOMVG*Z(^B7`PUE6ucQ|pa0XrF!&aH{u{wn z;5FbY@b_N^Zv@W+7l8AD*83j@w6|Y;|7*b_m;;Xlhk;MS>)#080d4@gRJAlWDx-=r z-}JVsP0Yh^nQ1h%&1p=$v&zy6)_sF|Nydt+1*ag*yC&w0ZW40lUv#M_+M1OVbIZov zY}QXnqIS9cdMxwPTjd;*0MlVs@sgFC=ySBwA^A?e;WkG>UUN#5`ceJKJNsrgiXhpR z4`8)O6Du@}&B{Dn$>0EMzF58AxZZ99L%~}bjI)}fY&iZQV;d_;R!;b?kcQHcN=cRz zG}Ud{y>Cfi^Z=R3lD;A0WmefG-M|hsIzHY^zexgZo!zVym$G%G)2?}k#^*q$sKirt zmZynC-0C3Y4-1N+J|S+&Sv~LG%Gdct*!9QoZj~s%u$Ymw8)d>=$`AaVHAG9jGLfDL zP8lV=9|lavP54hm(K~Ml-AJ$-R!wpy6W82P8|@)`UePt0YD_M;GbUtQaml9H;@>Fd ztQxD?Rx_qa3K(g9^BkYG<+Abm={u@oa%DV>$(saT?FA-T#%dwNF)@L?DYY;%i99FY zY88@3EByJ%KWBy5S1D5=KVlPtr?CN>2CI4@Y|09vK><^giLXFl`0qn>8 zC3drN(Jw!k3?u{?@QKk&a%O%q!c9H<4m!!J+%gXqRJq)$HQKFm85(;&JT$*MKNQ?` zop{9M)8q4fXU@4uq(5rs8kR%Hztz>JgY^AmEA8`nfOPy_Zo$Q5gA!=0YO$qzp+ymx zJYRy#IgUY@mO)~PMAAgKIK~F#YV(MJ^}fHzzwsjtZ->T4)*Z8FD0*DPLsF>}`&?t6 zjP@;Q!Npr4)+PmQEt%HBzLLQJ(zXwiFOyu7T>I^MF`k&vt+-&J{4two#eAk!B#cE^ z9Qhoo0_gG`C<-gcKnmvM|1hdJ*ZlD4+#tVJSe$TPmZF)6NgMF8b+^nIMnGxaGGA-gH_db zebHUxl@Dsfp)M^J-7256`kAYRA&UoDT1)~+Gk@oo7RCR+48Hz7K>WXFG41(s+*)8Q z_y&Bv_0P!Pm>)-!sAK;52X~_$++KOe+zsDJR3}a zv%s0)IB+vO|98Qs!27}bz$?M?z{y}Okj}tYkpTp|e^>B(8pPmA?EA~E|8d~w@c*}g4}v#+S{tudbLVA1#fLhySTb_Ekgx2#a*?_@SUG>x@EdEI3Z@2J0T|4 z`a9iVc1MW<(Upm*JR2`fG%C$0EME=|>>%<}FoDydwx@Y^vJY09-?pZbm`bwQ>W@k~Fqs+Rf{2eT}$v}`vMxgefQ%uh1 z+Xcw6J!yBKc#?2z+FOuQ_R565x6y74P7h$2v@#;aQ4@j;AFxqGT=Jz1Anq8j*64u& zztL^LEDsI(r)Vul^=#NMbZI_my^EwXYP#w>8UcBSHE=;HbMxN4^GYA74HZYohaULq&|JWUyPtVoVpu545ZmH`rh6jNSW^!&-7K zCNKAM5n2`S#=Q_AE~)`%^f_{hevI(Z=`ic;^{_DP@Bpm{ih;jaYd}6ZB|GG)S$F1t zjV$indboP|;V?Gs9nY8qvy9s;fH|5Zjn1P@1#GQ>D&!X)Tnh<8CMTy#+aerTVxWs- zAfT2vy(J!0^~kBjvphVKoc3dfph3E24TKFx$xcxxyiy=$WhrNjOyU`pr7k0EMj{*5 z12RsoXf^Xe8>pOl;d7mF$%d@Z)5)i^AFoDS(2H=Ww z-7}v~(mPgV6TCGBpPN_8D#%~<5(~n~JUUV{Wp9%ZQpF>E)2+Cb4ap}w{ni9HK^v@) z^%{OdBlz~c@qmq9A=&v9cNA0jG~Dk%tol?js{}0OvQwZagfO#rn}Rv1u}0W@8XY$s zm19Uo@HYR4Qh#M_lv3Cf-BL^Gb6 zVt$aX6#9-xdq!n)hK_h_$`Z(!EBDR`N)n_F#IVsMs0C&>nT@I#{UT)A`qZDKtt?ZA zBtR&HyjnM;3dsDyHhpSHK}LXBetg_Ep=sBrDTw6vecFfc>M1Gg6Sq$^8YFhDIwHJw zCD=+d^=uK%ZzuBxL=eO!2_kj$-zeXq<3fW}eyvp|3AM8ZV5XRh5uZj!ArmuK(+wZ0-Xi;Kp;JU>%d=tKL;0rlfi$%|9=(87QhtP3C;zN2NAd# z-v7Jc6W}UP1tVY(+(tTY1z!bP`+pNq+O_^KxxhIB-{I}A0tpuu4g$BTknIN8I7NkjVqdPec}7qzZyf zm;ThFVP~56sGi#n;ASwA5H|LQv+|Y)49$0D{42V+J`*v9YjshCDRg z5w>#ekq^(zyObxLdJslgw4w}~pv0`PzL4yF^3eqgQ`&9@BN{e`o3}aE44O-6T?n*u z9N{+hAMe!(;#E5Y(KeMMB|vYVT-qA5uYby})fZoPwED)eufE!L2cOgvHn<-&4y%=l z!N?||Q&)hFYu!ylNh@?F5(&tW|s$<>U*j{T`F%w_6kupj0Q)0qk;#^L8>SS*uN zpH6Q?Oxa@0-f4WkTWU!7;7mkx2{blVq}h~%nR1&1_zGB15E`*_;)f(}=-V*)If0JH zQw?<;9GnayBWu7-X1+5*&%ha{!U_2aGKoYZ#&M=1+KQISJH-)%Vzx*I*2!HVyLbD8EZ;;V@mG3jN*U-%uCKJkRYq@@uWi- ztv91K)yuE+ld`8pm$q2e&!}WAnB>zB1v5rtG%XGJ+-W0Rlo(&IW6qMdTf7yN9=)z* zs!Oz{4a6=vNf;qE)w>STweqx6W|u7~FEUGlRR&xp{hM$$RXIe$l+?@*`l>-(5_BzP z%0}gai<&97j4U(N8BjE~be60vn2Y-(el~Fro%~(7;!xZ{r>7N%j3IYLO(jW3kdWV?G<~kI}}2A;hxEht#9N!^e(v0Pmmw2J^}gML2^Hp z$;I1Wm-x+Em&i0UKVak}{;BqGW*)8m5b2BM2lLdA?-@H+B$ArtYFg?)Gu22P)2T(q zEt#4QoBhAn!}HeQg~k7`^^B-&|9=X+8(amh1eXEX|9>=i6!-x={x`v^!85@k7z0Ov zZ@}kY0TzJv08D`=fsG&n_X0P=>)#081zrGV!FF&okez@V;rCw({t}!5#P?qgwC4X6 z(yKH-0vru)ArD^!9|a!)*TBO+6FdX#0AZj1f5OL0um8>9YVZQE6Pyb+fTO_eTDJ#B zfgi%(e*s(z-Ui+ZUJ6bHvgP*$`2DNFOTbgXrQj*xJa9O8FMPk`0Dlk4;8<{9p!52^ z3%&?G0A36J4fMQ1=SKG;Hpoqe0nIF(1v6r^gzAR}AG#Gu~X6=(7crQ!os z)ZfhWcYJ3o-fVToYV)*qtYI*k>*NUjSQ!&^xua+#kdZYsS4b+Kzzc>Wa5D4PtQq03 zN}U|Ulk$QYBgTH>-wKBTbrhTmFSoQW2jAs{v8W(7T^*B9&kSeT5Ip(8(K;EnA_We^ za~2(JOgfX+X`@Rpna)=|%#0o-3Pd%8v({#Y*>WMM%IE8FwlwTGvrtK?pp!Y?fR zb_yw68!hC@!sJzla=Kag+KUVC0ukk1AEGm>-xqpH@2OLY*cW=9(dpSEYsFHt z3nynx8r&f=<2?iV zRrvd_fG-1`1E6yNUjbwb;E~|7@c6PDa2ePS#QVqKp5VXW?LQ4<1K<^47HkJUfv1;U zz>k0r1M&TT24n-^q2RvYzu@z=FW_1bgDLPN@JHZw_}PxFi{7Kfv4om;Mu4sOP6|G|Oow-+B$$k` z7s0@Z6PD!!|H1x>jK!p9^I2q;PG|T$Wj^%AGa$|mUmN4>+Bam059Tq<_&K|}>w8Cw z-pWjoeX1R1L%SDNeaM#RB(E!46)$?-o6MTM2?8ZJG(r}XVqHP_uixEUmHh!OQ<#vI zB^;*(c^vyF+Us5ZTBeGI-T91uAVGTbgV3ah_5R-aKuT7+=u$qJt;#E|yePqUlH@DY z28F(IK^Pqi@~7(;O5Kyy2v$K`r}k#ta%Iudn<4Pd%HoRjb#2GYG3EAVICmM{M1aua zjmnWX)=aF0#e>1HQ7}`)p;b8zBl$&zN^MOvw{HtZR{ps7LR;+_CppX+y~0zcQ$Dy> z#(D;xr4yF-;I_T+H}CB2QSgcXWv@GWh=@yxwoP^3-yzrz30+TFrQ@?}MopX*5{pqq zS}uE&C9W4zJGOUT#7f$o*$lH3z#${i9W|KDW>|qKN!2-}xe0x{6}06GWoJ2 zxF>iYx&bc)(ixZoHE4|)Ty0Kq2U>HK~aco29X_;>UM-UTiPF9H>C z7SQ1fIve0R@UKAU0zMa5*|=iq|4k^MO0A2{P`wZJDy5ZQ{(L*ko@*#)U8@bk`F7s@ z??d@^P54IlkQ~}#M#nV!hV{?+rL^DHNp$^#3G2*6Q#I54n5;!=cVMI&nN#iH{=tO( zg9+XKBkj>Kt4Pd6rO45>%_6Nt{eubF;mGWwa#tKoXcNxzsdgk_=mryGZ#lCKh49)R zG`Y+YV-G9S$(PwO>~Xwd;%{xbjKcH?>+y|dNQzEMO#Io@*q$_twH>bM*plUR#bVg; ze^PbnpKDleZyHw(3U(@uJy`Cb#^2I&4OzRR&X`@YW=8Z8apjb;5^mkt*cFYE266{h zAz1TVWnuKsHC(oy_Rlpmg$3g!lioWOBqb6L6N#9wPR2RyH&w%Rbp+vgOlybVSgWLv3Xrx=^SU~)$C0d4%%`u`OC{{!GNJNf^x{{KGs z{x^W9f+;Wwo&dfI&;O6$AHcJK_5iej^#2b7*TeU}7+ely6Y&4R-@gMq8$1ha0uKid z13!Sre+PIOxDv=7;PGHB2z&m&4PXCkAb$VR;J@MFzYlaKp!WG|?f->9>;6q}I{3fv z@;VPt`vSz{{~4GBvhVj`@MZY=8^H}gX9E5;sDXz9oe6jt_yj!tHQ@Q+dEfvL-~WeT z4R`>!3BLZ@;07Sx|CwMKJRDpPUoU?D3eW@>f(yVVFbW51vXxgjz-xJrs|;n8-U`8AxUZBr=SP3T6o zFVNENck5Am8C}I(q&#S6w>Hkm)=l>Cy}{zen6~uWw%K)9^atq`Gn-wP3#;oSzv7zW zawqu}Is*yMN>I<2%=QL*zR(xQgw(ja4W^=;ig*4fzhb4BA)%Ub|0q9JC{}lvUH>RQ z309oZO^unF}qx?d*pnH%HuSN>|i#f{AP3o6FkdHn~oowC= zLpYLerKA3G9i3-d`^Wl)%Af^44A{g>nvibSSmq0FIJ~Gm-}}6g{;_`8tMT(UvpaQg zx$kfFv3^cG>rdSa^~=Pb;?Gmb=v{HbpVeQ@FWfnTsYJk+SO4tRgD(3WdWWwv4MR#v z#-QN;W!hBy|5wkO%AoCj#jL919|FB=|XU0G$UQJAm&5e*>-n&jOMYl);7I0`OyG z0zntxJbn*@2Z9HHA0P{OIXDAs11Ew<0-X^knLq=`R^X99x&YU}`(Ftz0fS%w90BeJ z?h9mF;G^Ip!07)g%G&?JlBG(jj)CKTl3!BC3r3u!)`-eg#&QpuSF7~7N^d5Olx5EJ z%*174SjM?W5)bJV#z29C<-0Z=5^zUzw$gVBdxfG%-zLdSoRS}GSnDo2y}irTy$T8y zUhTKy485Xhc}ZHqmgKC2Nkl-}+do6kc2#nMpc!TRwK+qt7!?n4hF;Om&`S>ElkINV zhv=W7hslgz@iX+;w`oT8GD-zScR*pzW&V=fqn3Je4q7YDIfNCrtRo|6PPI7uE~`bH zbw|dXee^R$@6|JG zY+l+IwzQaPcP_J5ktC;kk5CYr)hqkry_Jc04&B5{eKa7I=C2A!MQ-T|2G2h{yGcr0pNCc`L}=;Vq}M}eQg z&wm?y3rIiU?VtvBgH7Om;Jfhl-vRFjmxC7p=>bfFCxOR;$AGWF%fAo27hDCN0``Ew z^WO}A|7q}S&;s`bzYD(4+WzOkRp2#1y8dI}Xz)X1GLp-@6X+~~D?tpVKm=|H1$9T5 zbJ|pLc|}FWr;HucZtX?+zuWEh_D>2FnHgD9o4y%Yhu-OOip%)|yBqBGW~xK~q`-nv zzQ>;ol@c4?{gVO%p>n!&2x$5z1@=!0balhAM2WPue^Ov2o!W^){gVO%Cv9~koco}o z(WyEqf52ebKPfPDw>CrTpA=ZDv@(ZMB*fu|I7xSspm;R&qTL~kPLr;GQlKLhO{ynl zq-QvgQ4y4H5j63fqiO%7KwFWz`%Vg!NYS6~qmuB8YMsL0gBdE&`hO9AST^*;|F84> z@{kMP*g1-WFFb8&m68JVUfKP(=f){{i0ImN& z415PUz)j#^z+Zt^fsH^m0IvoUpaRYVyTL}FGXQ=DZUCBZU<2?r^6?))XBsYn zNud4zXMlTxdw`qZ?LQ4}1n&TU4K4(m!Q;SB;qyNSJ`3IeE(K2kZ-v+20k(h#g9m|6 zz~}!Rcs;lrRDsS2dJWcD4(Z8cvd`=nLeY2LGgRs``|+Yq*+PdbP^r)C)85-0tk-$!Pq5&eJ0wVphJkVk z8|;;i!nt`FJ*3p&TV-m>4kq`0y*_=`YbIpH;8$Z18_qZCtvEfzK13hhBqGI?S+4L| z_wn}39HjvcgO)DN$akOF4>Z%gn{K}ZWjk^Z zyDr>*@;PTYRdo5v^kcqqP8ZF3ggTVb1J+CN>*(gCo1A)rmRQR@)ee{MYw7fQBqFKq z7KuMQq}y_!7fTUF_ys>~Gt08A#%^vc)hDGqvdgLe3=YX0M$RFLDIG}F)H?aN(N2fd zI*GiCARzB3ZJ|;9$vb=Hp3(VB2F>^c+ZIXG{3WHuW@Vl=?ZF}&OXg~uHmoh1-V#dM}R zC6m%gjW;?KEFb)#m^#12-eLNYuSp{j21y(Bv0|pSNa?e)N|`E;YGxWj-mJ~Ed*Rwf zC?rdb>@SM{|0F!`=i!UR|DWjjf9d~U2c8T5CwMf_nSbAar@tD!3A`N4g2#i0fm`9} ze+Iq`J`P?5wD*5MSOhWH2uk4h!9&5%;rDL?K^7nzf$s-z0vCZNfHj~9bUwg`kO6!U zydJy+TmjlZ_5vOb?g4H_9w1$TzXX2)BqJCF(h-o|fCmEE2>cOwwtV|M_iud)^eND% zK%WAA3Up8)4Bq}8a!{znmS6(=m@^n}-uf=Ay-HFCBL@ig0 zbY*AgzMJV4bqgv_H-)coa96S;vbS#zJJ{d$Dm5|`KOAKW#~m)4aTrP^f-Z(-k`t=b zuY~+6Cu~;AoI<&hUI+=z(mZjL4_bl|)f_!z)SIXZ%F7;TElD6NAG6>&IMXNKtF#ip z#r_s5t9S!N3GqZzT^g_MR%MmtB39%S1^Zq0#=S(BWB$WT`ACa9HuOr0YDF(qRB{~UbqR`_A@ z|Lgt4=QHs7{{UVB_Jec4vEaerXYl%82VVnMfxiT&ffK>u;5+d3Uji=#PY36M2;2_; z{(r!`!2z%vtOwc$a3i<@yb;I-;2hWm9uJNNx062c^*4g|gZF_q0`d6I01a>I8I3L~>R-y{luEGUQ}Mcq zT4Qo{WF&G&i>2?I3w0e`U5d8uuV@!uI;?M-j>gAbxZ~qmOl!@@lhx^JJjG_gHhcBh zNf__vXkRw&$x_vPv$3x_6;DNHV4rfAOKGK@iOEbHu*Q)Z10xDEbTyvMWh?jK!dW?A z*$>j+cbFt;5EyzsNc^kvYn~1k(j6!v7x|qEcvb1|U5D<#r>pv8Ri1hzX|>;F{rTT+ z7kMgDkaKaWfo0Wt#=?NP!W(ru?W@!lx=y#&Ts7lyHePDM!HuQOL81BC`0T!dp0d~M zK76y(m34C(7ij~{zBB&^h-D<&cWLe``RZodi!uSab-W$%auJ?Ee&2qqP1(A z@g)l!v_7?VZSPoBgAeY%``htxSBNb>Zp}0nYE!mV|z?JcB0u>Y{gB4RbG=&XQ6H}&q;V8Y^(FW%3~2#0TNKT-SE}hYr%QR zOX?%r9+X7iOXM^;h;`mxHH(13)$ajsW)qAAzTTF3>rDPX>Pgw03_l@E+tAe*q4FCj;5=dlb<5 zez%gJuK>%}Z}7CYpS3;(`V{C>pihC{01AXL+mGnBteQFdT$3QF6uN~%8AFwBpqq3g zR)g+}FjCISlHg}mun_)NAw-gX1KoY==i21gQ&ri#kwi>9ex6K!5-!(X?nla*R+iP+jKtXXhq zy!suY1Qa`=RHVbTkw8tvrXt&lr%GvYr<29p4B3j=PuD~KeLhOTnS#%!W3m}n5NB23 za5ev|H5&7#6YW1pBMozWWqQjD$9`8A<|qvkztmW0Ml%fpqk);Yx_73{0W+b&X!AkVBr)slO&qq;w??WB4(@?88l&`dna^FM9jNqWZ{)u9E&TZqgGuyMLl$=$MTVZTi)Py))#&cS&+pKGg3` z!=wK_5dRNbZr8Q&{jUZ~paRChR`65!{x^dO5CPfuw{QJ-{ac>`eG2p`(5FD30(}bf zDbS}tp8|af^eND%z+FXwAe!*9im^80B`qu?qL3hy132qu;eAiSF3aBga8wSGJ4xb7 zh{MWKSF0hIbQa?WZA%IkvBd8WwkxgKR^}LFdnSaj;47}T7MgK+dZEn?EtFD&hD|3Ww0aWdJ2Nr?(JE4+U)#widZhqY8#6bY!%7DP_fke4*3qTeGaH zBiBPdp(CD6P+>GML79kBLKI2@<-JyIdF~j-IiU@q?jQ=K0cR5sDBTO~Y8~YYG1rR~ zBoJ`eM3gG)$y7oVS0-mL`X5_h5>VsCD 5 except AssertionError: - raise RuntimeError("Requests-HTML requires Python 3.6+!") + raise RuntimeError('Requests-HTML requires Python 3.6+!') class MaxRetries(Exception): + def __init__(self, message): self.message = message class BaseParser: """A basic HTML/Element Parser, for Humans. - :param element: The element from which to base the parsing upon. :param default_encoding: Which encoding to default to. :param html: HTML from which to base the parsing upon (optional). :param url: The URL from which the HTML originated, used for ``absolute_links``. - """ - def __init__( - self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL - ) -> None: + def __init__(self, *, element, default_encoding: _DefaultEncoding = None, html: _HTML = None, url: _URL) -> None: self.element = element self.url = url self.skip_anchors = True @@ -96,7 +93,7 @@ def raw_html(self) -> _RawHTML: if self._html: return self._html else: - return etree.tostring(self.element, encoding="unicode").strip().encode(self.encoding) + return etree.tostring(self.element, encoding='unicode').strip().encode(self.encoding) @property def html(self) -> _BaseHTML: @@ -104,9 +101,9 @@ def html(self) -> _BaseHTML: (`learn more `_). """ if self._html: - return self.raw_html.decode(self.encoding, errors="replace") + return self.raw_html.decode(self.encoding, errors='replace') else: - return etree.tostring(self.element, encoding="unicode").strip() + return etree.tostring(self.element, encoding='unicode').strip() @html.setter def html(self, html: str) -> None: @@ -130,10 +127,11 @@ def encoding(self) -> _Encoding: self._encoding = html_to_unicode(self.default_encoding, self._html)[0] # Fall back to requests' detected encoding if decode fails. try: - self.raw_html.decode(self.encoding, errors="replace") + self.raw_html.decode(self.encoding, errors='replace') except UnicodeDecodeError: self._encoding = self.default_encoding + return self._encoding if self._encoding else self.default_encoding @encoding.setter @@ -158,7 +156,7 @@ def lxml(self) -> HtmlElement: """ if self._lxml is None: try: - self._lxml = soup_parse(self.html, features="html.parser") + self._lxml = soup_parse(self.html, features='html.parser') except ValueError: self._lxml = lxml.html.fromstring(self.raw_html) @@ -178,35 +176,22 @@ def full_text(self) -> _Text: """ return self.lxml.text_content() - def find( - self, - selector: str = "*", - *, - containing: _Containing = None, - clean: bool = False, - first: bool = False, - _encoding: str = None, - ) -> _Find: + def find(self, selector: str = "*", *, containing: _Containing = None, clean: bool = False, first: bool = False, _encoding: str = None) -> _Find: """Given a CSS Selector, returns a list of :class:`Element ` objects or a single one. - :param selector: CSS Selector to use. :param clean: Whether or not to sanitize the found HTML of ``