From 118a096542fff97392ed076370521f030eb20c82 Mon Sep 17 00:00:00 2001 From: Linnas Date: Sat, 24 Jul 2021 13:30:43 +0800 Subject: [PATCH] code simplification --- .../database_helpers.cpython-37.pyc | Bin 3210 -> 3181 bytes .../__pycache__/free_energy.cpython-37.pyc | Bin 13717 -> 13698 bytes .../__pycache__/pipeline.cpython-37.pyc | Bin 12989 -> 12616 bytes .../analysis/__pycache__/views.cpython-37.pyc | Bin 3348 -> 3363 bytes rnai/analysis/database_helpers.py | 1 - rnai/analysis/free_energy.py | 1 - rnai/analysis/pipeline.py | 133 ++++++++---------- rnai/analysis/views.py | 1 + src/components/aligntable.vue | 25 +--- 9 files changed, 63 insertions(+), 98 deletions(-) diff --git a/rnai/analysis/__pycache__/database_helpers.cpython-37.pyc b/rnai/analysis/__pycache__/database_helpers.cpython-37.pyc index 712e6e87f9395ac52ff269ce4855b713cd62aa2f..e24768677d26bf14d7d2c4ec080919dde19e711e 100644 GIT binary patch delta 374 zcmXAky-Gtd7>4sDC+GD1*=kE$|Bx<lyVIEFHW=*ZBfF zhdw~qxgh!y`4NJy-GXMUIsE4T-v2TGH(!N^dB5rV38YWC`F&J;@&j`BtSgQp5Lf{q zj2wk6A3PMX*QA{E2q4Rj;3jF6$oJR_W_OzQ2wgPs`lfoXsAr49C zP>b4va8Edt1c*9C{oaXvfVdx3Qal}HXiUL0-j7n;;WN>!LYi5H@_O{&3FIsJSe#=S zz43%}&uE`b$bj`P_{2Hw@$L!TirkfCmA@aSnT06>tU?AOWl~eu)hM)@9XQ zftu{N$554TZW&tRPj}xK_2gE%W)fnT*Sw=MSY6&HYp^HZlck-uc2<41rY}YeN(N>l hwqdYqu&FU2d^qS`u_e>U8Z-=A25njKwyhwZ`voW|T(U-ik*-5KrP!L==3xDbhI%-}k-md&|Bz&)t1Ddo!C&Gdd6N z{k~f~%zmt_z0eu+m|`OsGQ|fA9t5CR2tgubLrxV=6|O+8dt5OAqr{MT9PR4z<6TdS za&(i&<$gk5Q<_M)np8qlTAsREY-q$9lLtccl1du^>6~w%XM1+{1~QM70``|u#|WrNvc&9ay`7bbxD zT3=$|6nIkp!ne*PZhQ0i-6=s0)2(_d7Z_?Oq)l1uEE9`18?LCFSaNk=mr|{$W!g)m zlw;*!ELzV1%<RaK&-NRAhj=xGhn<261Z9P;ZZ#)65$WMk=RWL= IZ(QDle;&BE5C8xG delta 710 zcmYk4!D|yi6vk(EH=9k;ZIa!zX_^GmM1m&Uq_$BHibB=mA5cgY3ZcxQvQpaV9C~<* z&{S_i?C4F(#-j&OBSpOl#ak~PJxD>1#XrE4Z#S`Y54+!d-}^mw-psE1(lr-NGfwdN z+&Sn}-AkUsU*KJipcR7_N$d{JmX80{(^5t>Y0 zsyyBlk$}TD*0^d<;Y*+~#nt#8(?H8$_blXY1qU3_#vCp)<^`}jC zt|DcOs2fO)G+&XaT0VcFO~S5P*p-J}L(RX*rCK|0uwn8uBm*6c!Dex$%Tj$p9@n1I zPjb1wN+;!q`u$|gj|bz$F)2NjughuYZY;{ikyH6cx$abA21|vj6I_+$p_48q0tX6y zBB;aB6+@jKdh)lks*Z_>yfb@?ewTZ*S7KRNQUSt3$>g_54O1-RcB#MxAy;AC?d;>|#!r*lQVZCsuyi!_XZ3FDB26r}M& z6rM%B3Zz*P_oMUHezUGxzAYQgoDEQg3Z7sUd`^wbkH{u}xFH6ob4S!T j2`;8?Pte5+^ZAJ0x}1d&bUg_Lei_ zVn;J9sgqDhowPl`0~ET8NFyYuii|)4@qmO-1%0X%35|s43o1dt6Me}``TyCFqy>o` z?RWn3pZ`B+&VSwgwf)XaVt+ht2=LkM-9LY0=G8=R>t&cxfs(3Vbw^^6a4uS6I7*fr zj*6v(qiU()7_lNSidC(qTYAl~41)SnHCl^Vv5=Ol@mj)4gtSsk)=bL`X%*;{l_J7n zf$ob??W#aEJG~@X8A?tI1^phr;qihPD5vd9M@|G93}1B1Ri{7#31}E16OH-0YqMAH z?jSGo1X-3raSgviM#%5@yJTI<(>-BY^$fs>rvPf63aEQgK*NgxM!h&-%u4{qy(D15 zGXaxc3efb@fGIBnn5G0ULq)(Wl>mFF449+1bDrYPy`DxRc>$(tB>^8xY$wliJ8`HLh5aVr>!jhb zuFwXfRm2VeUvY(s;>lE^^0Ihq##a|CSEY&@@yHVD zYrO(h^TLvNKsdM0k9cZZrxDliA|aRL!4-J{&+yb%Tla_;fd|y`!Zoo^Xh%B_1umUB zBY+3>t_R&%J5CK6r7_q{s3evMg>b4QX#&Dcn$tVP*F3FlcEAd76~%*#+#2>(c!5+$ z@O3xsW@wU{R|QXB5|@atpBZ|zxzNsfQh0Nul<@s@{$BFQ*kzGzfCP|h4Qg{@ZsJ?a z=g1p;+1xtb!=}J^fl|6a%Zz1!X2%guAXlBGjiy~^yO4u^XD0urnQH}xGe6U0jgsv+ zfifA&gi(W$6v$sLmfb*Sc9B-gb=zSlkt@}T%|M;61M6bJU>lI9IrgP_yI!){v$*+l z0G2dMsb!Sw&3SjaS#&Qj3zwb-2qF)TJizuMZ|os#LTV4dOYn8p00?4CjS*8c$sp+` zV`3J<)gYP}BN`bH4WRkoQ=3}92g(1AnOi@Gh*2u4F9Sv}ZZLlN;E=W(=wPu4A+2VmRvrBG+ zEwWi$kSC8!9%OSU{R9Hqshja@k!m1NfItP8K%@lGQlnOzua`?j*A8TKMj%%j<$562 zn^vUC|C|{jRsKa7e(e$%}-u>o9eu;=Sb@vFGeNczDEt;^x}T1z7?)70axx7Aju%N zebZH3)iak!JJJa`kWdCZv1<}YXdsaSZ^Wnw`3(GP;w+}8?i!V7hqPlIG-$j}@S$qA z6HwWqes`p^5kKQ+TYit9^YebM-{rH@ z1Tr2}Zm-A7fmXlQ3k@L&4P_Oy5FMIEAC14>xy~@(n)_f~4D83^0G8sES-r@xrrwJH9GW_QpKQ4I5qt_gd~beT$_yo-WY(Hh zyM|S>l>eZGSsA3<3pT4YrtR6;Qn^%ywL&J4%5^vjmb>+SCsOATwj;;@L7Lew*vzpX z+mzX`V5vpNU2NKH0i~4LYNO}|s@tfR9hafcgKVi-EzMVpdtDn(4G%3XV&@Vp@$f{T zmYr#6MQk@P3WH%WVB1j9C|1jJ^=Vpki|h>lu(uG!lxEivzQzlE2hOsupj@IlUvGwD z(+f45u@(wkL3j~i3E?WjHH5DNSSe~(-Qx7IgNLT=x;!v zxqpCvwm+t#XoB#Q{h#LlTD40J$V5O6^Bn`9+YpFj<51j+F8nyD7YM7ux!vl9h3&qv z2Mvpk-IwFH2lnqg2Hb+&wHnF>+5=t3Qkgx?pyXn0{LfBdb^JUqvMa-@buk(?`A@-| zseJ3uQMGFcTChVVTAE9%&F46SMC0bPH6AKA%b-7P=96ngPv zrJg6&4CKb4qBy;vAw*1RL>!b1ae(yl{cHX~Zu9ZAJ<;%_2Uq*m$N)LLQd_&8OkkyA zZy=y@LH*>rJ&K$#kUn;BH@N7^A4hf*!g+RdguJk_b2KWMk0LLOw-%o&tR+`g779O? z$PK<}<1qOdpWOHv^%Nd_j4yB8M}ErR-T2%5H;@q;Ep)2VeVyz4&e&$<7OeedIp;5!A)L;Q`?+gP#hV9Za+LPL9 zEv=o?6fLi<)lxu5wQ%zBzNwxySdOd(L<5UoQQ`j_;4fOah;O{rTUgE-hb<=X)36jT25A#2P)RMd6(_ zSW~eSI?}0@8u)0I9y~)<2%bvAXojtD;G;IoX2gnI^N(6luxpK2Gj7ELoepxsN?fxi zt>lA*hq!TW7vY9I)>o_)r^iSsd=+Q6P@=we%w9SCFbJUo?bViR3lJ61UH<59`l_6u z7o&XJIbx;xb{IA-zfY&>Z{!s^)8o`5W!(z{hCCC{@HD`%rvsW^1Tf-70i#|FFy_Sp z<6Z(V;Uxi+-WcGRmjXGI&RAG}D<-KW=i|HSNx2+9)Qf`4 z0l1wBSiVaJbV$0)WdlfbT^&*ix;o{Qvy1cz)>XQyt9dF{`t0SUuJ(-O>Rfe09_>SR z>qZ_bE=T(8AUV0Ot9#lY%yrlF^gv5*;b0^5g zR>5?lo)SzZHi+{uH+clYl6s;~In+@-$zz`CF|HNJ>F|(tLtbbwHiVSG$dor)2CJtB znUT+sUZNrc3t_wjLwLN;2Ag2rhMRSBJi(Lah-dU!pLWAu_!v12ndOFjKXHpDFjexC z#Ju8DA?K2h(V@JMyrY*F4}(Eq@rW+~_*(TzUK1jPiWvmlb8$BUR*0`XEo#tFy90T( z)3Js4BFd(-y3`Tvs_i(ww%G32tx+*yf%)pwm7448!fv*|Z2M}b;y&r?_A@o-kn8wi zVOMyg*0LS(2s$gxN~dIsohTcQy|QYzsvT~PX1W_mpY#iiE`EH>=(dN@lslp07TNLnC8K+<^# zbkQvAHI9OvSGaLe?P{J@hx+i?AThjFX86v-gFV+3gJe(^q&^0Pp>T7^y1JXLXP`zS zJbICJLvDJo$pfrsIrLjSBHv0q4!y$CrBp81Td@R-8TItT^4@grc3C}+^fCO6NsW$a7c4b_by9fXIN z5VJtKz|MmRaC+bgDTFxCp)ug@v7z!rsGII)x|_P$Zmyf}7QCF7&692sd}0rIF=)?m z=zRkYfEZAF%FB40yev2sy*$u<0wzBWM?@G7g#wShp6O0_I&A6$md#4^1o3p>xtNS+ z|FJCsi4jO(mgdw%t0F!R^3@Q)q2mi5)19S^oXO3|Q@OwNkaztt_eonc+hu!sxmv5% zfFVl>U%{=bfO*ttX_W3kI0azo6~|qJ?p%WxUt4apE6{*$yHRu8fZKh&=9J-J@H5p) zqq^FtxHg6lqF2|hYiB{d!OLO~L@P}Oy(#X*$Y!n8S#`_Ua^g|R3Z)44m^hE{6}h`` zQ1%L0{S3aIm6r>7dO`l6kY(bq{9R$nJPP_B!|gQWe+q?Js2!>^xD*3=&|oUPb}=K8 z1WStDOq1aW@G_Y^S}e+PG0x<`ui`L7gE@VHW}o(t`wOkh#PII^;?Lx;T*30?84V zqC8HZiy1VN)@QIa`a%`TH2C1*sla^{M4B% zUo0Lm#cMeB>j3h@;zKtH36dWIzJ}nf5*7G`qtF4Tz6!6Ca&dfrYyqU}vyyL)PbLD_ z!0QOu#@By69%hk%Pj12>n*8JB5&d}-`*Jv)jz^b;4JFvJ#p$)rY>`K&4$!~L*QUNi z-;3GfUyvHyFa=N%#KAWH1e+tFmdo+|@N0>rz z0ZQRf;trzXGQt}GR>ZMej$N*nA(i5Jxw555gGtFZw-obtVcLq|MnXwtnoTJto1l51 zx!-R2+rIB$j5h&F`i+rCYeV~4gy19!AV>=89$)`>>kfMJ^%w$4^#28tA8wtXjdgqb z9!g(%W+eC`pVYoXB&V)jeCrYaf`9lNE?qE8eBPJ+88rZ zM#R`%KgIn>wcTjgRkzk|IYAd& zAs8_S|3!p~qrQPoMx|A0tT{DD1Qm-N>MPEg322M$XCp>{62rIE7hMfyyU4aasxrfQ(^gW8`AwVdMak9Gkm1#Tgl6H!tEc RV`R+Ve3RRPk(r-I2ml2R7pMRL delta 104 zcmZ21HARZoiII4$`3KsuS(7`YgE7&#cZHqYb~XJm}oyphX{kuh)c3vLTWW x['gc_content']*100 > self.min_gc_range, json_lst)) + json_lst = list(filter(lambda x: self.max_gc_range > x['gc_content'] > self.min_gc_range, json_lst)) json_lst = list(filter(lambda x: self.gc_contiguous(x['sirna_sequence']), json_lst)) json.dump(json_lst, fp, indent=4) os.close(fd) @@ -134,43 +135,38 @@ def process_data(self, target): def run_bowtie(self, sequence): """Run BOWTIE alignment.""" - temp_bowtie_file = tempfile.mkstemp() os.chdir(self.bowtie_location) - # -a report all alignments per read; # -n max mismatches in seed # -y try hard to find valid alignments, at the expense of speed # -x index name # -f query input files are (multi-)FASTA .fa/.mfa - fd, path = tempfile.mkstemp(suffix=".fasta") - with open(path, 'w') as f: + fd, out_path = tempfile.mkstemp() + fdd, input_path = tempfile.mkstemp(suffix=".fasta") + + with os.fdopen(fdd, mode="r+") as fp: for i in range(0, len(sequence)-self.sirna_size+1): - f.write('> sirna'+str(i+1)+'\n') - f.write(sequence[i:i+self.sirna_size].upper() + '\n') - os.close(fd) + fp.write('> sirna'+str(i+1)+'\n') + fp.write(sequence[i:i+self.sirna_size].upper() + '\n') process = subprocess.Popen(["bowtie", "-a", "-v", str(self.mismatches), "-y", self.bowtie_db, "-f", - path, temp_bowtie_file[1]]) + input_path, out_path]) process.wait() - os.remove(path) - - if os.path.exists(temp_bowtie_file[1]): - bowtie_data = open(temp_bowtie_file[1], 'r').readlines() - bowtie_data_l = list(map(lambda x: x.strip().split('\t'), bowtie_data)) - + with os.fdopen(fd) as fp: + bowtie_data = fp.readlines() + bowtie_data = list(map(lambda x: x.strip().split('\t'), bowtie_data)) - return bowtie_data_l - else: - return [] + os.unlink(out_path) + os.unlink(input_path) + return bowtie_data def run_rnaplfold(self, query_name, sequence): os.chdir(self.rnaplfold_location) with tempfile.TemporaryDirectory() as fp: - print(fp) prc_stdout = subprocess.PIPE prc = subprocess.Popen(['RNAplfold', '-W', '%d'%self.winsize,'-L', '%d'% self.span, '-u', '%d'%self.sirna_size, '-T', '%.2f'%self.temperature], stdin=subprocess.PIPE, stdout=prc_stdout, cwd=fp) prc.stdin.write(sequence.encode()) @@ -184,13 +180,13 @@ def run_rnaplfold(self, query_name, sequence): return lunp_data - def data_to_json(self, query_name, input_data, no_target, lunp_data, main_targets): + def data_to_json(self, query_name, align_data, no_target, lunp_data, main_targets): """Extracts the data from bowtie results and put everything into json format. Efficiency is calculated for each siRNA. If no target is found, for design mode the siRNA fasta file is used instead of Bowtie data.""" json_lst = [] - for entity in input_data: + for entity in align_data: if not no_target: sirna_name = entity[0] strand = entity[1] @@ -200,13 +196,11 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target sirna_sequence = entity[4] missmatches = entity[7] if self.mismatches else 0 - if self.mode == 0: - if hit_name == main_targets: - off_target = False - else: - off_target = True + if hit_name == main_targets: + off_target = False else: - off_target = None + off_target = True + else: # We use the siRNA fasta file to get the efficiency information for each siRNA sirna_name = entity[0] @@ -232,11 +226,13 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target # We must ignore the first two siRNAs because we can not calculate free energy if query_position == 1 or query_position == 2: sirna_sequence_n2 = None + sirna_complement = sirna_sequence else: sirna_sequence_n2 = self.sirna_l[query_position-3].strip() + sirna_complement = str(Seq(sirna_sequence_n2).reverse_complement().strip())[::-1].upper() + lunp_data_xmer = lunp_data[query_position-1, :].astype(np.float).tolist()[self.accessibility_window] - SNP_exist = self.is_snp(main_targets, query_position-1) is_efficient,\ strand_selection,\ @@ -248,13 +244,14 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target self.calculate_efficiency(sirna_sequence, sirna_sequence_n2, lunp_data_xmer) delta_MEF_enegery = anti_sense5_MFE_enegery - sense5_MFE_enegery - gc_content = self.calculate_gc_content(sirna_sequence) - + gc_percentage = SeqUtils.GC(sirna_sequence) + SNP_exist = self.is_snp(main_targets, query_position-1) json_dict = { "query_name": query_name, "sirna_name":sirna_name, "sirna_position": query_position, "sirna_sequence": sirna_sequence, + "sirna_complement": sirna_complement, "is_efficient": is_efficient, "SNP_exist": SNP_exist, "strand_selection": strand_selection, @@ -266,22 +263,16 @@ def data_to_json(self, query_name, input_data, no_target, lunp_data, main_target "accessibility_value": lunp_data_xmer, "is_off_target": off_target, "hit_name": hit_name, - "gc_content":gc_content, + "gc_content":gc_percentage, "reference_strand_pos":reference_strand_pos, "strand": strand, "mismatches": missmatches, "thermo_effcicient": thermo_effcicient } - json_lst.append(json_dict) - + if(is_efficient): + json_lst.append(json_dict) return json_lst - - def calculate_gc_content(self, sequence): - seq_letter_list = [i for i in sequence] - gc_content = (Counter(seq_letter_list)['C'] + Counter(seq_letter_list)['G']) / len(seq_letter_list) - return round(gc_content, 2) - def gc_contiguous(self, sequence): patterns = ['C'*self.contiguous_num, 'G'*self.contiguous_num] re_res = [re.findall(pattern, sequence) for pattern in patterns] @@ -329,7 +320,6 @@ def free_energy_dangling_ends(self, sirna_sequence, sirna_sequence_n2): # Anitsense5_MFE for sifi siRNA not zhangbing siRNA antisense_five_seq = Seq(sirna_sequence_n2).reverse_complement().strip()[self.sirna_start_position:self.end_nucleotides] antisense_c_seq = sirna_sequence[self.sirna_size-5:self.sirna_size-1] - print(antisense_five_seq) anti_sense5_MFE_enegery = free_energy.calculate_free_energy(antisense_five_seq, check=True, strict=True, c_seq=antisense_c_seq[::-1], shift=1) @@ -466,11 +456,6 @@ def is_snp(self, hit_name, start_position): pass return 'Yes' if bool(snp_sum) else 'No' - - - - - def calculate_efficiency(self, sirna_sequence, sirna_sequence_n2, lunp_data_xmer): """""" is_efficient = None diff --git a/rnai/analysis/views.py b/rnai/analysis/views.py index a6bb5c3..3a3e53f 100644 --- a/rnai/analysis/views.py +++ b/rnai/analysis/views.py @@ -84,6 +84,7 @@ def process_data(request): target = order['target'] table_data = sifi.process_data(target) response['table_data'] = table_data + print(table_data) # response['json_lst'] = json_lst # response['eff_sirna_plot'] = eff_sirna_plot # response['main_histo'] = main_histo diff --git a/src/components/aligntable.vue b/src/components/aligntable.vue index 537bb6b..6dd5547 100644 --- a/src/components/aligntable.vue +++ b/src/components/aligntable.vue @@ -147,45 +147,26 @@ export default { this.targets = [...new Set(hit_targets[2])].sort(collator.compare); var targets_counts = _.countBy(hit_targets[2]) - var snp_counts = _.countBy(hit_targets[hit_targets.length - 1]) var trace1 = { values: Object.values(targets_counts), labels: Object.keys(targets_counts), type:'pie', name:'hit Target', - domain:{row:0}, hole:.4, textinfo: "label+percent", textposition: "outside", automargin: true, - textinfo: "label+percent", - textposition: "outside", - hoverinfo: 'label+percent', - }; - var trace2 = { - values: Object.values(snp_counts), - labels: Object.keys(snp_counts), - type:'pie', - name:'SNP', - domain:{row:1}, - hole:.4, - textinfo: "label+percent", - textposition: "inside", - automargin: true, - // textinfo: "label+percent", - // textposition: "outside", hoverinfo: 'label+percent', }; var layout = { height:550, width:300, - ygap:0.2, margin: {"t": 0, "b": 0, "l": 0, "r": 0}, - showlegend:false, - grid:{rows:2, columns:1} + showlegend:false + }; - Plotly.newPlot('pieDiv', [trace1, trace2], layout) + Plotly.newPlot('pieDiv', [trace1], layout) // Plotly.newPlot('pieDiv2', [trace2], layout) var lunp_data = this.$store.state.lunaData;