From d233bb1071af26419d1ac7c24f6cc9b075442af8 Mon Sep 17 00:00:00 2001 From: akaushika <31716836+akaushika@users.noreply.github.com> Date: Thu, 12 Oct 2017 02:43:34 -0400 Subject: [PATCH] Add files via upload --- MP3 Writeup.docx | Bin 0 -> 14730 bytes WordFinder.py | 137 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 MP3 Writeup.docx create mode 100644 WordFinder.py diff --git a/MP3 Writeup.docx b/MP3 Writeup.docx new file mode 100644 index 0000000000000000000000000000000000000000..6aa9d3ac0c3d707fb523164686fb1bc49a0cfc45 GIT binary patch literal 14730 zcmeIZ1w$O!@;*Gc2X}XO0t5)|?(Qyw6FfKscXxMp2<{q!ySqcspnrDn-MhQF`@Ub` zcY9{Kr~90zx@NlSoH`})Qefa{07w8d001Bcu*BG^nSuZS1rPuLDgYW(OUTy7$;8G< zPub1R#8HRA)fz~Y3l2(^4FG-X|G(}3@IO!=KWf>_gd}no_~Jj=xFFd|RnRw_8)brP z{s0onCM$d+nAi5wvCYX`K*r2AJU}$D;xPfu>_54b3WPy!a3Md&{Fvw)r)$AMyEwl? z?`@1tu^G>_CNRwI17vG!$ChRXVIpnljU88ll}pb{_eWR12Sm~&M5&T-4PYY*dWB(M zJnEkNN}Z!L#}_U6BS10AhXhJ29;bS7kRij@p0rq$*GWfP5@mS~-0 z+yXQs8NnsKqek^}8Au^>0Jim)MLexErI?uyk-S6|3HMPocOH4F`)qbT$=8Q!^Axc+ zi&p~ev1r9$i=JPNzwo_D*6S-6K>ly)Nf3k6bpF;N`=)(xZ|bS%U;=bxWcY3WKehZn ztc!p7>E*HAZ)yq8fA0V6KiRIZ)Qy!V%V_j{8EX*=T1!G2b$P*Z{^glxVF6U<&_HZ- zYC7(_yJNcOr;T`>3!Fq1gwS@Fxu3edT90k6fP}!d0%q4ayDj*%y<5}g5mE{25&tj^ z^w0?$$e8=!=fqNIZJUZ52(_QA{{~a zt>3V{U`_dIl33}Zqk7hAb^4T#;+k0^EU9nkaDYsVeX@R=$$p!{{!kc@w|VTX_y6s!JZ}8AaXjEWu)}}BgPz-32qwmn+E-ia z4#e2wS?Is|s}P?5pY1Nw6Kr;Fsr;I3WC9&PG|Xvi^{NEPBbqYR%e*0Jp2bkGtaw( zGEs(!h^f55t3`8f9_%Z<1{cXMKF^s_+(`!CYn2FX2I;RHeEa(@?DAm`^Iod=m|Ixm z@+Q~C2A9K2)5IJb8k5H|s*L;cvLa{>tI{4|?&nxiOQ>bE{dVsy1_v_U;n>P)V#ulH zevgeKlYE@6xM*7Bpm&=r^>@K@(-DL;6EZlO%}~`FL5LF>G=6Cj8Y`>K)0&Cvtzr$v zl%TZ=YC`T*F;Sry=9{o*=?D}J?zil>7G7AXi_4E4sN{IbEj!dX+c~g-zqMs@#kpQp z$7`SyHPEQgF;bZ5kR7dRvpRauarJ41;q)dHb8{BowP9|mY2)HkPaC~4f?DOw@a`BF zV-Y!--xw5>Q1w$?~?Pr={m)lNFTAm{Pq zW1GbR=U6Z%j?~xPfiL&m)4a=sMJacssJ&?#$kW+!EDvyIM%k-=Eb=e32$(Ff?2{~g zSNc1L8yO`|NZtp5uG=&g7PXvB265VfX#s7hge`F!8;jjPyRUAG3BOK0=h^Ih7_Mbj zmF$Qp86ILEj~nGP_ngVpuWvoQp6KPA-69*Yczlwd2wJbZf@jtYT{rgUccTxXa4w=4 zU%ME}+wR?GWOR1h*&B1&j)4`>eH2T2A{a~DLeTLrCS%>X{wkaoWsv}K9re}`CMk@YEcK}2J;ocq}c+O`||XD z>q5V6pozBAs2a*WjTPRV{vFHyBZPa@#qCGU)31VI(xTLUR&7%x7%ZVuli&y+M~S$0 zJZAffPn91Rk)})S7?5h_HKjAYz9ZoxuAoaIY~XJQqDB?Yz(9mJoubq=uaa}UmQ&l? znh(k~UO;7JlQfXGyi6<5#-L;MdeSi+{dlQr#j5-S;Ohmil!2LAs(wu}T2DNSE zob;t~SV|#V^P4#G$TmSkH4?2nsG$OL$jaBy^V%xt*7Zaj3Zx7@*d(|p10( zTJ}peNhQ6qI?$;K{43;=dXyWkVov-{xUe4*9bXtAzJ35_$GMqvlfK zN6Sm_Ta=Ir5LH#blQmNTHV@vr`5fb^8z;;tv^7sdjt8Ns1%q6*eoK2Tlq7bO?Lc{V+Lq9*mm?mYHsLKi}|44=LTSHayTLzsnrl@=UIp3L=Y)P)goP_rw?i485==K55;^$MDJkg<# zaHw^@8ib88q<+utkdM4(qEs{$R;0`Bs#Q9aZM4}>e7H=)84(fLW{B^Wj$QZ$-Y!Gy zpG{WfOXCO%O>{(kr9Vt}j{zMP#m&E)%FHwshSMD(GDBcfA2cq0vC`<>kI5f(UW<2U z&C46(HRlI)_037TW?%u)59pX^3XA$zWFDz;+3#ZZeVliT~|-H#%l z`iQ7uiy)6t>;vBb_i^O`hQg!=a78t0MO?%{UKeayjeyVjTBr)*1*6)U?gp`?qc{;L zw+{jn(A8sYIyg zBx~RpUI3;06k7oSobmd#$D*ooLCf4sv34@QWx_Cg4LN|hyk6x)aZ*j?Jf)D=6#5sU zQKBjNw!~43Wn3Kqs6wnl-DvFY`M^G*Kw7)n0(bA^wEu8j@3RW0q{LH^(M)6#eiZZ3 zD`uW^>7ldOD%W^L!DH4?*C~bsgJ{-M*CRp#SW7Z)d$xG5xKw0?w&)r?>jSiTlmgbv z&;HNhwvJF{DN4u!ES?5co4gj0j$q~ku9s^vhp6h7Itkf zVwINI&3;fvbRtaF`zc>6G5vhih_Q2T{pOXqP*Ne4X01DYN$lh2*_LMP7N%;_esm~z z$RW4WrmB?m9+lQJuwDleUSuiKzH2110YaiYP~q6SEVep>83^ub(-kWX|3sJd8>4YB zEvDZ!`?PIEQ_LkKC6KOBfKV$s3Bv*;uFzD}Rc+wbH?p}zH8?E@4HB-(6JVE&b#YQI z4+OOcF1&4TtW$^Yhk$gEs1+Ms0{5*OOCVgK(7GSEB|vLmd5lkA7cX|2SKN^SK9zFr zCAy3}u4voVx5Kg07TfO{D3?9>jPq%T6~-WUu@5seujeavM$kamRLPLH!-{>A7%$h= zf`RZWH@ELZm^&6cwU?Mg=WlC{c04QkaMl(*X|Dqa&vy|_PqQ;WPPKuyt<|m(ZAaC3 z%Hf<~>HXYVtG#vNE%lRuSyaO}u8E#^Zo}DnreR$g2-h$JjL-EW?75OCPhSS!N!@&n zDPM;*7CxN$u*I^Ksd0{gx_oBpfF9bN3IIErL2(3pw7k;v(r8!)|AGs3T5FFcb7={t z!J8E8$duMzKuRwQ75B`}$Z&Btd1;V#H&-_wE0j^j?3FBjU$=d;J=1?sdgEAfwFU%9 zF2YM!wBXt6x?bB3=Vc`>nQ|8QI^@!7sH(LM*iu`Qw~^I&ObCHhB;eUyRB*9*Vf}pf zQSo98?p3DH$XH5QPH(p#NdTIhmVSn=t;qXZ~#|aH1g-hRcD~f%!lf z)WS`$%=)6fg|JV5gmWm^?Nd)2l0c8mKdUH=?RK735M)qEvqBSc-aFjd&s z+xBgP0&A^se>%`)<7%r3;<~C+@25$X>Os_GL8%2ZAfnT0z}2B{!-(J{`~o^lV9Eh5 zYAPs0DO;r|S~_eeb>$p>k%kP&2#^-6#LmNmN0hievOvnC(~h@mOK%_r2+jO4wlc>w zr+PA`DgkQ49Lm+ z$`f-QO`O)sefH-9Z-_2WR1R~5RR6uHEKnaNG}iH!_!`}+>0=K#AWS?aLdRo;S6<v|j^LKVN;2-Hdg&ZBZ8cJZ8%NBM~G;YVJCOVh;d?^dI${FUz!))0V zCF4Zall#l{;m^1%Oe5Q+Re*+eqsL>fn)k=om4~aO^L5_GrEoKor?jjN*Q@38*PTqO z`VP0tz1^_+b+4a;Q4{cXS$+!GUg3&EV|Ybv zQ360_vN*xGMKMIWeas=2p&u@D6fyi1#VMDl8NYUhMfaR!(iO1(Y8bU9Sb3m=Z$S zt!U7^bfSR==rx4Z6_{gEB2Q?gRm>VKOtqKxAKAAM5m~PX3R^H`2>0^ms<7$u3yuRR z>9`;#d^tE9B9p zOD+cUZ9wtNbhvlI_+N2NQWOUseaJsZf7-p0L;f<;LxpbDo(`E_(L8%Z|6vd@UVHH8 z1xZoIh(ypfGyKoQL=~0mUD@#VB=Aw#6qqS72`9^li|{oF5_#P+3NS3V47v<(YT3`$9jF~?>a#>4o%oaD+0$tp$))=-mJhR;X3Kr(L0+?HKH zOi`f*-`YB{o?*k(=Li*s)nmUx`HYWtsHIl|WOlSO4%*yXhvj%J))iK!5pv{RSA z1?yx%9+#;SAoaH^&a&&uMNbOV{CM1ISuB?i-ckiI~D9Q4yJ~OTMi}+lo+>}azyS3*(!}gwkL!B)4Qbv+3zPR598UhgtuT- ztJu7a6{rx-&!QXbP^Xf07~tqlhvE;XvIV34z4PnB;@gpXZ(JAJBr-;cQI7(R$F@bVa$XRG10U+{HLq z%oLG9^hw`~VesBKkHvJ@s%T49Mi-!)%n%I5k)?=yjL;UXzz~r`cGR@8FgDT=Cr%5R z%YTJIet7TpHJVnVDINNlHS)cIyg!<$SON_2>9rQ58IJ7n+}dzLb&NytF!owBfR0=s(oJ9$Dm!Ihh(IU=!S zINX4D>g)v~i2xv)!PuSjI5g;zP|a+x|B`?Ip#D^L;v%vN0qX<`&0&^(9hLE2@rsy`jLab1frkrU#qH5ZDei05ZdsSo6=Xc_dbTK)Au{ zu-Knz>a3;g%2p+|;E>i&wZ`=p!9n0?^^G7u<>?V$($|p#44ht#*Dg!KH;B6^=y)Y8 z{HhDI#Blb_Ej^i7%>xgm1rN!eed>>VWsLRn^tEoI$sE zO0|w&d)Lo>dDUDkAwZNo|>pDB;Xa`Muitd=Sq`D~{>Z?T=i!cn9C8a#= z#bEpGJ{#UdPBjtSdScVi=;$K~vNuBp5lv`DfVt;YjvDE7ffC9hQ302L>ar6ybL;Bj zOrZLG&qelQlqL6JO~@bx{vMzFDLd@ivthN2-)WyM4sg0V5kiZ7;fMYx)H29*NMgi+u)@#R=@LLEsA2DBj%T4dt+~ho{ zJ5i_A?;fm9@fDx9j+yq$Gg`uO_6op+Rh zackDIHMGv{*32_So9vQAYYamz>DZV>=XLrHct7g;!uXF)LtgwJuHkRq#E+b^s#xXWo(NYnBNrXlX0R1W3Ybd=&9W19 zKq^h?dqjvbN!n(ChWZV|Z8^dO!@Q& zWYy2mT2ADD`!tL@Nob7O8+Lpwpd_!~(qR!xYr#o76*0p208m*eS!{YJ-w)&4QM(@8 zf;SR=l-d_HHCYh0KeulS@ngJhDOm>Br7<@Ogmm-WVcEhkc*k|TN9W}Y=2Sr5p!{MdPG2cBirQSb2y%gR3Z#QrN0F)B|fcVGL zb98b8n*2Vdw`FKKF1I2Nu5(YHcTFliZVdf+Ka7{f8i~r0Nf9Zx!#05gqfsm&xJ%yn zcnIf{Eo=!B3TN|dd|b=tbAL|Q_k-l+cF++8s?#+QE=5<`)b<@Eh@231$n~b-kL{O- zjZ?BAxI|I9MC#7uwhPZ6gXiAQqqcJAS6k$JZrK2f7hV`fin3hw&Met0yMENf&KRrA zQp6+{DfJv(Q^e^2lc?PXqWIxdbVFi8g*NG^cZd@d7;y+WL%WIrPkHd80Y!l!S*ARy z!YO;SHQ%h3rjeH7#&SPV2xabD=EXH{kxS>}j-7z}pdg#FB#mxGEMYB^`YUeM30>)- z*n$*sb|C}Ock#Y(HUwL|#9r)6L6 zNl@ZSY?xoG9xgD$7%9ZWZn)K2p(^eS6M1LOFRwQS+ZN*nwBY*NldDhky-L7}oO??< zBgS>kJ6I5^G^nJxff{xR#dUQ;2oZs|{ga1ZANubUqnK#nEO429j?N*uAxJBKsF_@~ zu_QdiGjBtS^{J9|7EVMjbuAxAu2K}S4Q0Y^Mpen-5J$UV*Jp}y)m(EL8n5eWITCsEgNMaDfu#Y43BgxZW>$fM|cr&ku;F2xDwpIUK#_urNtD{ zm@ZSauU8FKf+A2xu`x1;Ds(pnmy^%QVPE1)AiC~XBR-nqtki^mGzPZAUMn2h03Fk6PCcV1rR?Nthbup>2VTT*m+1itX+j6moEmiPRcYXjv)7FXL=z8h=PZXsrRq_AE=P)07QOv0Wa$#9yu*ShjRgB{|I$LAhp0+$0@m zd-`cI&Z?65F_J=OlrvF|CzteFn6zrkT>Vo_jB)fXANeu*XkbZ#X7J|Mhlc?7i-Za> z+wVGK!jgH*Mg9RHoY`6eo*l0bS%Iy_`bS8&(Wlx|#&mTi*wYC5 z2xPe3IO6O9EDUjMMJtBAq$~Qpd26~o@ytAaLJmQ9QPRT3hFDk$D3GG${=Nzp`uh2x z;hlm-Ah_NsvS92lni;U7_2A}R#XrKG?&O>ji$L;5Si%1M)NMzB=rPQaB->z4^DFLZ zgGn^yNx)^6n+()qlI#>E^D+5><)fc6#7TX2^p`FRYlew_!1^TJyijSUO!B$|S6gX; zVbTd58C=Vm7<;g=Y<%*CV(-Mx)>x;ysppns;r*W3%`=^jfu0-LNd6gBTgujnt84Y}f$GKZ z%f_i>F1i~R6H237BcH=pNgI(&#P9&?4ty*N&!y0lt6s6hkIoc5Zh9|3OCGTw8Fwa= z6wP&u9l}e(&IKE8dKeAmDB3|eyjp-+>^*FkC#I|BZDs}sB_?MFCCJB-hy&IImlvsn zkKCqHY^TL<|5yA>U~E!qol%DQlu4*aW+4* z$cv7}R^)eInr$O(zmH zELG$NzheoBIHCrX?o+@gOv>;d9`i!rB`!YbIM#R0;v+carbP{z7k-vf)GMR4h(` z9VuQDv=#W4^H2o;%FYV(b#WrjWag}*|Aq#=K!xSF{j8!A8;3!$yG@Tg_9}RMrYyX! zItOe;HSQZrm6}JfOY6lppsJSI4Lk8TYVihn<4yK)NRJ!NG7~>Z266Zj^cJ{0mC13hN=H({QV{?e`|cZ$=^9}R+gGB zngdH_Tjp7p{@miK;rNWZ5`k-)d&D7rD#3r%pJslQO@?3YfwCZ`xY#7~zHl!8T-k7iSl&JDbmb(qtFuX#NzU#V3fTaWdvlqzvA%D z(Ij`8%W_@a>PO{gjlNl>))_tyi+q6GEsTnaT%MU~&dk0VPh@B9(Xzwoi{)67$j#x& z_Z9NatDm_K-5h3~rlydT`b%fhH&eXSW6W!_a$UHb?QBS#e*(TMm7gj5}Z45}a>uUlL{F{}@~LXU~m>fd%v9 zi7wv|tVF%;NPl!0Voj8ktrHaVqhT^n)Ya>k7AgOJQ3?mK1k04kDuK$JkR8!L)3Z>G zurmW58C}^-<0M-lb%R4Y(&&WXDm;X?uTNc+xTSeewiX;I`OUpx>98ExE{ND4FaJF! zyr8Ec{lSX%%6H|L=$7gPzc6#PlS6;xkLrmg@{zbRH-uXg{i9lrDtbfis_^nSju;vS ztl~zVusX)h>wI0nuPc-crZ~m)PVN({!!sU*+;zxWOLjUHnGYDHwcl4bdx;#epnB`& z=h{t<3F$bw@&*`IwkyFez8pOjJLazy7%(2a#sncu7G2seSbKDR3?3%`UXEKU%d$` z5efkC_WoA}lg%eIRaK1$OfSqR&u_uvAol!1Na)QJd^Kt&nukLx1D-XoXn`Y^oMb3_ z?|}2_+QyyI7sDPrfsBqPw~a7&x!XU8xPR=Y1`I+67wy_;RUdW0sS1w~i9K)##GWb& zkC!yBVJa%n4^gKR4~c7#zcP+CEqSC&psJbcY7uA?<+8Egz4EGW5wsF-?BbzT6UTmP zGckq|3K}jl{(;lCCMzR8*4=^>N(`pcAdRIcm%sy3DI7EgO>PP{WooPTiDSj1KE8OIwY%ftBb>_Mxb~nlWRINSYeV#*hFlT zrm_fnN8PC-DVjF&7^2ZS7^*IuR3iuT)#yPIi^P-r;%mMeu?;&9Jd-Qfo9%{WxIlu^ zZm=w8M4vQR@xzRqrllZe+x+gS#Ap%h7@+!9OZQCwvo^*3t65F(B&Xi}Fdj0g*EIA|2MB)N@tZ#-U5#z%i4NU4fwBeb+-~v0D zy=Yxh*hISa;V;_)>FS7O#l(8{5r^BNhJ@SYGn#2sy<|5X$H(?uBZC&(ISrqX%%9XN zF$mM&E7(!5)O_PCqv|gl_3oRL5Qt~(a)MT^n z&<`osp%$w-At_&ID1xUc3mv~u>@g`bU)8NW4z>uPI-->(_txKmI{b!(qTOr$*Vptn zGF0d7KIxCIVJRnVIGQOWcUD+b(epGAep3@akI~~6_56?yMR<0m7Z0Df!ewxcBtjn!TA7jpm+@|LV*gNv5TY{MUAAD- zGk9(#=W%in(WOkl&}kENp=y=ChIk~DgfJk-2wQEYLHgT!a8x67`iBB z-FhTeVv(A6FatWu%L`W8(y`D3e7KGB+L&J6<}Ki2c{&shP4-$k+l?&FnOeQgj`DS} z30b*MZIO)&n+HB1%Y|!88pg%dqbi=-vXphW=G0 z?1|sVI0Ym?jL}_<*h;;}^xuD17PueO91p*Cmu;};@%tV-LfnIoh0G9kvdQ4-Vuu*G zE10No?0Tp`PnM7_Prl=xrv&mN&o)EO|o(I``Y8^F&|C=#O z##wN0bsf&ncP zF%tK*K5HSADCMGhOB9+g%J1yhQR6+ZHlrR&^N!%9o+HY4nOE7lcUB~dN!q?k1Rn55 zvlLv4mEbply|3ljn3dAAY+v^*UZ?jihhx3YKZ~=qvwXS&Ul&eA$J8nYroYY#B4!Tm z6bC||eSQEVux*n6XrH(Kk6y;T$WXhrHydK(%}0#%R~y2>&hCFE#M@u??=4-a+h&;w zz8!Ia537|!(vNHbRHQ%J@W4O|tj~HPRhqKLSR#5MdvQLh7FTZ3IaMx7?s^+lB4l~~ zn<{1Suxpb+-Eh3hM!jt~H*mq|`R5*384Arj)iFMB0UPCO!ufiL&NT=LjySqRbG?CB zU-cf(A*mCM%)!E>#IaFPbdT#shjl~{caZq? zP*ja#=n0~hIq8gM@ft{5O3-uWfipTkXAchIY-`*P=+!j%FIYsrzrW4a%k8eqM=_!5 zz!H16CyW@}Ch{qm1}{$A)Zd8gR#Rg^5z4p>2-@w9x<#Ozc_y0zya+f(HNUQr+@l0^79;uRqO(6_;E&@!l%UB={hh$y%VPcj{&viIQ?$Pn$@~iZd%eJ4ft_!OL;ru( z1HV%GHD&xyT99vv z!&iU8x$*xE|0S69D}`S}D1TD;N$>{){|Tu43jX`*>Q5*DfJ+Mi{MTCRSNN~??wL81zgfS(;(s*;{=^?M{Tu%`)8JS9-v{|MmiT LlTb(Y-_HIYziqbv literal 0 HcmV?d00001 diff --git a/WordFinder.py b/WordFinder.py new file mode 100644 index 0000000..63fa81c --- /dev/null +++ b/WordFinder.py @@ -0,0 +1,137 @@ +#TextMining Project: KeyWord Search +#This script will search through a given paper or papers for the occurences of several keywords +#it will then tell the user the most relevant paper based on the maximum number of occurences of a keyword or keywords +#Claire Kincaid +#April 18, 2016 (revised for MiniProject 5) +import doctest +import string +import random +import math +from random import randint +import requests + + + +Knight_full_text = requests.get('http://www.gutenberg.org/files/55708/55708-8.txt').text +America_full_text = requests.get('http://www.gutenberg.org/files/55713/55713-0.txt').text +Wolf_full_text = requests.get('http://www.gutenberg.org/files/55720/55720-8.txt').text +#first remove all punctuation from the texts +s1 = Knight_full_text # do you just put the whole text here? +#out1 = s1.translate(string.punctuation) +s2 = America_full_text # Sample string +#out2 = s2.translate(string.punctuation) +s3 = Wolf_full_text +exclude = set(string.punctuation) +s1 = ''.join(ch for ch in s1 if ch not in exclude) +s2 = ''.join(ch for ch in s2 if ch not in exclude) +s3 = ''.join(ch for ch in s3 if ch not in exclude) + +#make all letters lowercase +Knight_text = str.lower(s1) +America_text = str.lower(s2) +Wolf_text = str.lower(s3) +whole_text = Knight_text + America_text + Wolf_text +#print(whole_text) + +word_list = whole_text.split(' ') + +#make an index of all words in Herland & Crusoe +new_dict = {} +""" The following code creates a dictionary (new_dict) that contains all of the +words in both texts as keys and then the word that +follows the key word stored in a dictionary. If the word already exists in the +dictionary the code simply adds the following word in the list to the dictionary. + """ +for index,word in enumerate(word_list[:-1]): + if word not in new_dict: + new_dict[word] = [word_list[index + 1]] + else: + new_dict[word].append(word_list[index + 1]) +#print(new_dict) + +def quote(data,length_quote): + """ This function generated a random sentence/quote from the dictionary + created above. this code randomly chooses an index and then finds the key + with that index in the dictionary and then randomly chooses a value of that + key. That value is then added to a string and then becomes the next key. + This process is repeted until the desired length of quote is reached. + """ + new_string = '' + num_words = 0 + x = random.choice(list(data.keys())) + while num_words < length_quote: + if num_words > 0: + new_string += ' ' + #print(new_string) + next_word = random.choice(data[x]) + new_string = new_string + next_word + x = next_word + num_words = num_words + 1 + new_string += '."' + #print(new_string) + return new_string + + + +Knight_America_Wolf = quote(new_dict, 10000) +print(Knight_America_Wolf) + + + +def make_data(data): + """Takes a string, removes all punctuation, makes all letters lowercase and puts words of string into a list + >>> make_data("I'm hilarious") + ['im', 'hilarious'] + """ + listdata = data.split(data) + return listdata + +def word_count(data): + """Takes a string, uses make_data to turn it into an analyzable list + creates a dictionary that counts all words within that list + >>> word_count("I'm hilarious") + {'im': 1, 'hilarious': 1} + """ + words = dict() + for word in make_data(data): + words[word] = words.get(word, 0) + 1 + return words + +def word_find(data, keyword): + """ Takes a string, uses word_count to create dict counting all words in string + returns frequency of word specified as a keyword + >>> word_find("I'm hilarious", "hilarious") + 1 + """ + hist = word_count(data) + return hist.get(keyword, 0) + +def multi_keywords_find(data, keywords): + """ Takes a string data and a list of keywords and returns dict w/ word count of those words + >>> multi_keywords_find("I'm hilarious", ['im', 'hilarious']) + {'im': 1, 'hilarious': 1} + """ + all_keywords = dict() + for i in keywords: + all_keywords[i] = (word_find(data, i)) + return all_keywords + +def multi_paper_word_find(data, keyword): + """takes string keyword, uses word_find to find the occurences of keyword in three datasets in a list + returns dictionary of papers in order of highest occurences of word to lowest""" + data_keyword = dict() + for i in data: + data_keyword[i] = word_find(i, keyword) + return data_keyword + +def relevance(data, keyword): + data_keyword = multi_paper_word_find(data, keyword) + most_relevant = max(data_keyword.get(data, 0)) + return most_relevant + + +keywords = ['kingdom', 'wild', 'cold', 'learning', 'war', 'violence', 'journey', 'love','hero','freedom','morning','people','interested','the'] + + +print (multi_keywords_find(Knight_America_Wolf, keywords)) +print (relevance(Knight_America_Wolf, keywords[1]))