From 07de2378f420544666abe92b262ac859e571aaa6 Mon Sep 17 00:00:00 2001 From: Gabriele Girelli Date: Fri, 27 Oct 2017 10:56:06 +0200 Subject: [PATCH] Implemented secondary structure formamide correction and melting curves production. --- CHANGELOG.md | 9 ++ README.md | 24 ++-- lib/__pycache__/meltlib.cpython-36.pyc | Bin 0 -> 11955 bytes lib/{oligomeltlib.py => meltlib.py} | 114 ++++++++++++----- oligomelt.py => melt_duplex.py | 21 +--- melt_second.py | 163 +++++++++++++++++++++++++ 6 files changed, 274 insertions(+), 57 deletions(-) create mode 100644 lib/__pycache__/meltlib.cpython-36.pyc rename lib/{oligomeltlib.py => meltlib.py} (85%) rename oligomelt.py => melt_duplex.py (93%) create mode 100755 melt_second.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fedaaf..d71198c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [unreleased] +## [1.4.4] +### Changed +- Renamed `oligomeltlib.py` to `meltlib.py`. +- Renamed `oligomelt.py` to `melt_duplex.py`. + +### Added +- `melt_second.py` to perform formamide correction and melting curve calculation for secondary structures predicted with OligoArrayAux. + ## [1.4.3] ### Added - Melting curve plotting script (from fish-conditions repo). @@ -50,6 +58,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [1.0.0] * [unreleased] https://github.com/ggirelli/oligo-melting +* [1.4.4] https://github.com/ggirelli/oligo-melting/releases/tag/v1.4.4 * [1.4.3] https://github.com/ggirelli/oligo-melting/releases/tag/v1.4.3 * [1.4.2] https://github.com/ggirelli/oligo-melting/releases/tag/v1.4.2 * [1.4.0] https://github.com/ggirelli/oligo-melting diff --git a/README.md b/README.md index 8012906..cc0b977 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,9 @@ oligo-melting === -The `oligomelt.py` script, implemented in Python, allows to calculate the melting temperature of a nucleic acid duplex, provided the sequence of one of the two strands. +## Duplexes + +The `melt_duplex.py` script, implemented in Python, allows to calculate the melting temperature of a nucleic acid duplex, provided the sequence of one of the two strands. The hybridization delta free energy calculation is based on the N-N thermodynamic values in literature and is available for DNA:DNA[^3], RNA:RNA[^1] and DNA:RNA[^2] duplexes. The melting temperature calculation is based on Santalucia, 1998[^4]. Sodium and cagnesium concentration correction is based on the work of Owczarzy et al[^5][^6]. Formamide correction can be performed based on two different published models[^7][^8]. @@ -13,15 +15,13 @@ The hybridization delta free energy calculation is based on the N-N thermodynami * Use `-C` for the temperature in **degree Celsius** instead of Kelvin. * Use `--out-curve` to specify a file where to save estimated single-sequence **melting curves** with temperature range and step around the melting temperature as defined with `--t-curve`. -### Help page - ``` -usage: oligomelt.py [-h] [-t {DNA:DNA,RNA:RNA,RNA:DNA,DNA:RNA}] - [-o oligo_conc] [-n na_conc] [-m mg_conc] - [-f fa_conc] [--fa-mode fa_mode] [--fa-mvalue m] - [--t-curve range step] [--out-curve outname] - [-C] [-F] [-v] - seq +usage: melt_duplex.py [-h] [-t {DNA:DNA,RNA:RNA,RNA:DNA,DNA:RNA}] + [-o oligo_conc] [-n na_conc] [-m mg_conc] + [-f fa_conc] [--fa-mode fa_mode] [--fa-mvalue m] + [--t-curve range step] [--out-curve outname] + [-C] [-F] [-v] + seq Calculate melting temeprature of a DNA duplex at provided [oligo], [Na+], [Mg2+]. Either provide an oligo sequence or a file with one oligo @@ -74,9 +74,13 @@ optional arguments: -v, --verbose Verbose output. ``` +### Secondary structure + +The `melt_second.py` script, implemented in Python, allows to correct the melting temperature of a nucleic acid secondary structure, previously calculated with OligoArrayAux, and to produce the corresponding melting curves. + ### Library -The `oligomeltlib.py` function library (available in `/lib/`) contains all the functions necessary to calculate and/or correct a duplex melting temperature based on a number of parameters. +The `meltlib.py` function library (available in `/lib/`) contains all the functions necessary to calculate and/or correct a duplex melting temperature based on a number of parameters. ### Additional scripts diff --git a/lib/__pycache__/meltlib.cpython-36.pyc b/lib/__pycache__/meltlib.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b50c62fbc1106945d819115d3321c8347a5ff8d GIT binary patch literal 11955 zcmbta31D1Dd49)UTCEPtvJTr()`wzkVp~3PI<{Ae91D`zAwHU-BvsZstJO-YUHR>k z?U=XVqJ$(RG)^fY2>}X}($GQymm{2ni4%{l1y^b|uBK zdOUl+H*e;fnScKOpL4ajt}goFpZ)B$C-!LCNo~fn6!9K}(lK4rgeLU7HlT?=9MHub z;%@O4ai4gbcu>4kyhprWJR&|MJ}N#g9u=PvpB0}MUldP>uZSnbO0h~j1-#(l_5nkD zLp&|MC7ux{#k6=<{6zd*yeM80FCW*1i4aDpMQ9MKfQw|=T@N=8FmDM$JHmMgD-b#n z5(w)MHX>Yta0S9PgkFTJ5nhe33*iQYy|T7dVmImw;QK~|n-B&O(g<0EQG_Bw3E?op z>k!_6Fo|#{!kZE9MR+U1+YsK4@BqSttQ+r#@XkNnHV_nJqHw!55E4Z(j(1ob6c*kQ zQ4%)ZHDW>>!aFJsiz9f~ird6dykp{Z!oj;vl*Q}ut`~0*$M9|t`d+Q;9WcUaT4#6m zfkM;d)R$jQ=6+~uT6Rc2OMAS?rk?DRU*+>$(u=h8WLZwrx_h#l~SqhE8-k0ktFB>fb0M>Tpf=|xuf9C()!^_RXY!Woo( zxBA6@UmxBhx@;K6!unafvk#S7Elq)0Awh ze8Dd7$v$_(r#kL<7Fn|RDY(?ZZEWPX{@{DmuQ}8+t?K!SZ`8(~Y1R6fWTH*kG7h1v zRfJB0RGifI3VupUuVhS`TUvEBKcyL}_LU}RV@4~X1t3T2K51a3|7FafebWYj550#* z>O&}f9pLCA+N4(29bM?7hP6}}!ps_FqtJnr08-Y9pa{v7n2p72QrER1ZLj7SBGRsn z7?YYTZ{oXV#<$_9bnQso(ROIJw3P!+z%fR{)(~om`n4P!x>0j9QHy^KcbM&3IhfVT zA(RPvWkMsSgStmhdQxxKSnl9MH)x zlyR$gC?_)Lnd~pxEDYsyX}c6Z49U^od?sxpJCz?FP94bD>vy|anli^vie>VK9a2!^)-5`5E?uJt1<6O3axq`?X zaRVd8T)_=aAkj*M2aH0>)$IWzbpRSq9k4c_Ay>Ct!kvwHK{bW&=U3b(Q94y6WR*11Bub*0iSsBsY4DKWwUUgD&$)l*2T2eCBg8$MhS zDpSE5@d06izAz9agg6GTFr8qtcDYv8L_ns!-U-7MOb)tojR-oC(U5hkqdPU^wQ9C* z8yK_!5n6_U*^G&zRa=JAoKWU+X691`iLmOj8y!odcd3c&@NJduyP?BYE<0?yk1gGEFv>w;mO~;?4nv9hsAdTmfU1f(5hv&vP+h1^v$YZUh+`^cUZ!$kfNH7; zE!LVf=Ae88L89dk7!UoCysEL~Mrk>d^BLTrpgZiip(Y z{YpP@&{|PLojQS54LYr*gF0|+C#>T9F2|tn8ZR&E)QTweiS#CMI zV`J2`nla7LTIMA5i!>c+T{gxD#{<(iWyqP*P}2rnm=#MOf{r)?SX{=e-zQKVbA7Yk z8`rsFfgN}iJHUAn)4LqdF~hnEUuilbAFhYYm!b}XjqOT3MB}`<8sbK!D^b7z+%4qP zP1cm7NUasdGED5{hU5B?4Nilf(yEQ>$93xf=UCM6HI!~}YPbZH8!_9~VP-vM>m!EK zSgYBl(?AEDM3+M4VDYKAJE@MNI87?LhNi=>NHPax4X zRY~li6OhBflL)!ZXKVHUE0a?{5`t7sLk2f_kT!-P)^PA*Y!K_!%5W7F@6BmikJXDW zi*~Sf5I9m2VLAEA!Qq;jBjeUiX3>JyYY3N-)GSAfbsZC`-oYio+C|t+kk0aI#;zgA z#Ux}~sX{j62JMoa8Mm%Sh8qwmJ7rzTs3Ed$NDUKL9|dc$IAPCqxR~c~=?UvlW`ORt z31zi5&VeyKX2$dujE@+;I*c~6(X3~x5jC34sGow}Ma;AWN~Pt}jtI|xl@W9ZAKXR5 z+yS9W^M{YziW$%QrqfyD7)4>V+bBomY#D)z8BSQa7-mxgTYwt5bVuQOVT?GVT%5Jn0gY=V)v^EP)^?IR5mR1?+B5J}mhhcRh}m^E9ymbAbv%>M(zAc2at zZY87$2MB3GWw)u;=a{$ck!t+_)yk2|X(*;QL9NYDSyU-5gY+~P6+T>^sYDZ@^a+&1 z;u5sAJGDFYAt*qYw?`aP9WctFyR^G-1QCIDEY*(x6~qoH)UFNbEgJF=gFwAHe8jKBi8Z1XUiLw z@`h*SHdW;Ha;x&9rb{baN<5nZiNP42DYN|c+osf&7DfGea2?Tbmp&Q->wYfETxxmL z6OC%SBi%1jXqlPEJ)=sO%Xnmni|=CJ@y;Y)aggDWxx~%3JjgVcb=4n-wuH^pfhM8L zmfxR%Z~f+MSUBPZI%~7nxJTygj3?(IH}_R6;iB8%-vYH(KdFcQD>23Oa^Z@%Ztm+ zXh1G(HCpT%rv;0A)1+B$#bO>9<$%=eC{hD5wXG0vTFY%t9n#-q z*E)@25$5%FJ0@$wvCXTkiM6AR2B#i1*O!+b+376Boy_)|jq{x#!RQxX_c{n)8@6h zylU0NGPR$4#Tevvp~O2kIh8cZTQd!JT3BbjX~Y-^;tr>Gzub?^oK0N)fEsvm<*Ljg zaX6$ui=6N84d$6aj$XNMI1{g4?s(T$yxy@>xk4#kC>GYuy4gw(r!4H-aq+a?t?^II z0#_pWeClw{)%PDwu*%}NjSX;oogMEi;iBr`1a59h@l<@r?%wLKzIC(-@Ay!5{f42=&Kc#&z;lzlkGMrLt{95%dL;v#-KmQtJ`L!aX1@Kc|Nj4}`IG@?PXDxjQ(yACdQuDtET$>)Hp9D8C54V3N7rZ%bD_d=gZ;bCk+xy1i+t9c8wec?b7MW~c!Y?7Mg5>oZhvJ=` zU5C~lN-S7JdIHN_3SU@&?fJ!mY^jCaMLV-4PEVTY5k=fq{BxNQf{I& z5Y3ea(d7fE)QyPZpmbh0Dqn+iMz>bQ)e*2xec*sMjw?rt7khJ#j03oM*=YS9GhPee z=f!Sd2qj$8#znI{*&JZDoY}njjO&71J7XOPa7yFxNo$XoI2wAFW%YW97b)2K1k4UI z9&8EWm>7%&qk6p&2!?R^jF=1|eW`(qHKc`bA>C{k=NU-%OY;gls^eFC7;E5-_Z*A% z-|GKxwRCD){aUtZ%KJ6)xyKWxbqA={n+SITJjy)WyGXm6@JHmTU;J98rtyKNdse+~ zMv8S0;Vpn^^|M@j%KLTuQa|!j%J9de-Ag!5E&`rTzq%{^$ZyEK4_Vfq5Z($nH}_9T zdmG{Y1-Wl0?E%7rh+Cc zOrhRCBkete_b$kNA8CJ1ct5#H-qpUCTYQ~qtoJWS`vBoD7vw%d+6M`LMJ@s!*iAJ6 zJ*X5}?_VQseTeX3fJa&D$xuSyl+|k~!{3nh5yD5S+%AuDVw=wmllw8!P7wZfLGH&% z`vl>W<_ z>bT&2sEUUkPV^SN?V`~zwKNccRts$Sn} zrz%`I;=e%J7YSb?7Xc6Im4(_Xo+G(WAj|qP;VS^IUT~E#m3pPTUnT8H!q>=^J-Kd! zC+0o(nj0-og_9_C3PMDtCo117=C2;`t@H-$%sy0pS$D(-20ya>cE_48J0m<*XkP zPLqp(XN}*bt9m)=Pxn0K9tV@$BD{_B+DM zYYET;*PvQ@p>HBDagQ1mQw*Rj;81zCOIWuccRgv{gbm~> zdHvBfgnGTdy<)u^N!vu&ydd{t(k>xfO0JR)`c;CXZ{A;UvEIu_yPR;vg4`{nT}jwV zu2L)Ht=LWVK&}PbNZU?GF39a6t(R~WxvDzsKve`gQ17qFI4ky%wu5jrxym0_`8K|M zYTEnrGIDp4)=%J1kzV(G{hq=)=l$Us_3&!at|45zAon`bb`f@ytMm|Jix)t}*7eL; zgV-x_UgPJ0ilc~TbrBMP|3N~d`t@fgixde3!YCn6*hc6f+(5XIFhID8AP5;kj&M6+ z-E7#1PdBm^f5kD8&ur)NQ{o;a5)MQS{eSl_GH(g_12%rdHe`rI{qOnTY&lXhfBx(o bvI97wMb curve_range / 2: + sys.exit("!!!ERROR! Curve step must be smaller than curve range.") +curve_range -= curve_range % curve_step + +# Remove output curve file if it exists +if do_curve and os.path.isfile(curve_outpath): + os.remove(curve_outpath) + +# FUNCTIONS ==================================================================== + +# RUN ========================================================================== + +# Build argument dictionary +data = { + 'fa_conc' : fa_conc, + 'celsius' : celsius, + 'do_curve' : do_curve, + 'curve_step' : curve_step, + 'curve_range' : curve_range, + 'curve_outpath' : curve_outpath +} + +# CALCULATE -------------------------------------------------------------------- + +if not os.path.exists(fin_path): + sys.exit("!!!ERROR! File not found: %s" % (fin_path,)) +else: + # Read file + first_line = True + with open(fin_path) as fin: + for row in fin: + + # Parse first line ------------------------------------------------- + if first_line: + print(row.strip()) + if not "Tm" in row.strip().split("\t"): + sys.exit("!!!ERROR! Missing 'Tm' column.") + else: + Tm_col = row.strip().split("\t").index("Tm") + dG_col = row.strip().split("\t").index("dG") + dH_col = row.strip().split("\t").index("dH") + dS_col = row.strip().split("\t").index("dS") + first_line = False + continue + + # Parse other lines ------------------------------------------------ + row = row.strip().split("\t") + oldTm = float(row[Tm_col]) + dG = float(row[dG_col]) + dH = float(row[dH_col]) + dS = float(row[dS_col]) / 1000 + + # Correct Tm per formamide + row[Tm_col] = ssMelt_fa_adj(oldTm + 273.15, fa_conc) + if celsius: + row[Tm_col] = row[Tm_col] - 273.15 + row[Tm_col] = "%.1f" % row[Tm_col] + + # Produce melting curves ------------------------------------------- + + if do_curve: + fout = open(curve_outpath, 'a+') + tab = ssMelt_curve(dH, dS, oldTm + 273.15, + fa_conc, curve_range, curve_step) + for (t, k) in tab: + if celsius: + fout.write("%s\t%f\t%f\n" % (row[0], t - 273.15, k)) + else: + fout.write("%s\t%f\t%f\n" % (row[0], t, k)) + fout.close() + + # Output ----------------------------------------------------------- + print("\t".join(row)) + +# END ========================================================================== + +################################################################################