From c9eee4aa3db2e557192bfe5179e621aaf1f2a7ec Mon Sep 17 00:00:00 2001 From: Agrima Bhatt Date: Tue, 9 Dec 2025 14:46:50 +0100 Subject: [PATCH 1/2] changed minor codes --- .DS_Store | Bin 0 -> 10244 bytes misc/.DS_Store | Bin 0 -> 6148 bytes models/.DS_Store | Bin 0 -> 6148 bytes models/bert_nc_finetune.py | 7 +++---- resources/.DS_Store | Bin 0 -> 6148 bytes .../{bert_nc_C2_final.h5 => bert_nc_C2.h5} | Bin utils/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 165 bytes utils/__pycache__/tax_entry.cpython-39.pyc | Bin 0 -> 5786 bytes 8 files changed, 3 insertions(+), 4 deletions(-) create mode 100644 .DS_Store create mode 100644 misc/.DS_Store create mode 100644 models/.DS_Store create mode 100644 resources/.DS_Store rename resources/{bert_nc_C2_final.h5 => bert_nc_C2.h5} (100%) create mode 100644 utils/__pycache__/__init__.cpython-39.pyc create mode 100644 utils/__pycache__/tax_entry.cpython-39.pyc diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..ba18c593b4f0d89d4aa68d04da8d8d1e283e9621 GIT binary patch literal 10244 zcmeHM%}*0S6n|4eS`k@<`hhXo*o%oo`HI09W33P+#y}`RP_%BhL)oy*G`m|W5|WufGS)>EP$JRWpc~x;7K_bc(xpE~s#URQZ z^|rQ4YhG1cxdYM32hl4NouLr9I?iuPb0BHOO>Yn|2(%&~!lhT{>>jX&{C#`Y=WgET zd9)B#8EhASNJNYG#R&v(RP0k|6V>FNsUV);?>OGGYcqPNjtPo1ytQv?X z;)$GfYG-F+Y-+?FpBmd8v3IVFkB-=r6PI^)6Y*r~($$6BhQG=9Gr2P$`XK{->U^(% zj?&(~J$Fm*hkUB-=c{Gac9b~Md$jLZzcp}taA0U4nM|HIdFu4=nc_vd-&MIL7>t0Ce%8_~^Kw#ISAEyQ@PB!XC)b;NQ##H)FO*pRv3sOqJZ_%Neq&Z1_ zph{e1oY6!oJ+Dp^_`&+4T#BOwZV7kMiiFrTv`>V*Aayy-a4v9DxOx!JwVSu${pwF~=F2 zd}89mu0D(sa;*?;%P7g!8dr&lfc}d|hlF1nEKCq~ZV2pm!{*rNVF^ZWn*m6hoY0tSJOM1XWnXQxGZuT~Lh-la&% zY;2FQ2@-Zy#ia{&!bIq0IUY|$IbN1`P&X>Z1fiVPys9{AkpBCh0j2Z3%P-m>9?2q?&E^o`f23hy^x`>-f!U<~JOhwxae(%L}okPZ6tP%{SIL z8Pwb*i;K)+Th0kb%o(mjYjJS~oB?OR8E^)E!T`@~$z;#aduPBIa0b2@(EbqFgjL4g zFl`;|MjXmN7D}k=nk7U+&VV!UUl|DF`T4x&*X6VI+jr`$jck`} r62_PN9l~KGWLe5B6~_F`b8iS;+-?_3k-Y!THQXw literal 0 HcmV?d00001 diff --git a/models/.DS_Store b/models/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..aa473b3f287c5ca8ad21be7ac9e2140c83204b9d GIT binary patch literal 6148 zcmeHKOHRWu5Pc4n8mN%EWa$%B;s#Mv7OXiyp)E*}LQ;ih!7gXu033n~!W-L)+}3O> zgl4Mo^JgZnevV=rfGPX!Eiea=(*?6j773Ga@e|8J&y;A~Gb-${;%|Z7NwjwSMFnK- zW^TF&l{l0ED>$+VtLSB!OzW8>=8ZgBg&*-?~vf8QW&nS96Mz8Qfo`$)5^bS+` zUq@_LQGSC>iqm`?<;0vj{!?M>@Z{JNoSD4O@eHpc&YrmyYP_YGv+gJ}$17TF$ylf2 zA+w9sVN*>NPz6+hGb+G6TP&Y>)KV2t1yq4i0r@^yx?mA-@aR4rEab=};}bzK*5$A$ zXag1j2ag<~8J|k@sU}=8j8A934RJ-l!J|)yam;*9U^d}~VjMgBtqX@KJZh;5r~-!y zT=?c(_W!%z@BfEM+EN8nfq$idnJgccC8rej)|1J}UTf)(bTNq=JdP=>xUHD6vK8OZ aow46ag;)d}JhF$T2LUI87OKEm75D*$*mBeW literal 0 HcmV?d00001 diff --git a/models/bert_nc_finetune.py b/models/bert_nc_finetune.py index f786811..eb8a1f0 100644 --- a/models/bert_nc_finetune.py +++ b/models/bert_nc_finetune.py @@ -16,7 +16,6 @@ from typing import List, Optional from logging import warning import pickle -from tensorflow.keras.mixed_precision import experimental as mixed_precision from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping from os.path import splitext import pandas as pd @@ -49,7 +48,7 @@ def load_fragments(fragments_dir, shuffle_=True, balance=True, nr_seqs=None): nr_seqs = nr_seqs_max x = [] y = np.array([]) - y_species = np.array([], dtype=np.int) + y_species = np.array([], dtype=int) for index, fragments_i in enumerate(fragments): class_, class_fragments = fragments_i @@ -98,7 +97,7 @@ def __post_init__(self): def __len__(self): return np.ceil(len(self.x) - / float(self.batch_size)).astype(np.int) + / float(self.batch_size)).astype(int) def __getitem__(self, idx): batch_fragments = self.x[idx * self.batch_size: @@ -179,7 +178,7 @@ def get_class_vectors_multi_tax(self, taxid): def __len__(self): return np.ceil(len(self.x) - / float(self.batch_size)).astype(np.int) + / float(self.batch_size)).astype(int) def __getitem__(self, idx): batch_fragments = self.x[idx * self.batch_size: diff --git a/resources/.DS_Store b/resources/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..932d8a6ce32d10d764d913c7b6ace01c31ce0960 GIT binary patch literal 6148 zcmeHKyH3ME5S)V)k!Vs-P~I0rqM)}dr=aEo0_H7QL_*NJ%V)#v1B7Les3_2^v^#g} zox6AnZx%plUvEx<6@Vq(5PJ)Av*+eBJIG8S(s{-f_qfFqeLJiEK4IKr^kh8aWy0^c zyTc<-yTX7q9&mO(?5_J^*OOyExTUY(+_;iS0VyB_q<|EV0>4qfdM|Bx6sRZ#q<|Fo zRKUIum2TK1j)DH^VDJ%uxM13iYo8^E)e^)eaSUXJR)R_lYRM6!1fBVsbxqpiAO-%F0%g%%bS+;gd+Y4w wtk+ihCH=!#>*WmIiizHexv^G!@};iYn$MfWG0^GEJDsRM0;Y>h3jBov?>mqlVgLXD literal 0 HcmV?d00001 diff --git a/resources/bert_nc_C2_final.h5 b/resources/bert_nc_C2.h5 similarity index 100% rename from resources/bert_nc_C2_final.h5 rename to resources/bert_nc_C2.h5 diff --git a/utils/__pycache__/__init__.cpython-39.pyc b/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e86a20e9106e0d5116af402efa2310a31b9a98fc GIT binary patch literal 165 zcmYe~<>g`k0@3egnIQTxh(HF6K#l_t7qb9~6oz01O-8?!3`HPe1o6vDKeRZts8~NS zy(lv`F)1Ulq(t8(wK%&Zzd%1IwIIJZvm`UWC^NNKKPk1SB(WmCq$n{nFEcM)zqBMX hr&vEe9>^<+kJl@xyv1RYo1apelWGUD<1-L5005@ZDf$2a literal 0 HcmV?d00001 diff --git a/utils/__pycache__/tax_entry.cpython-39.pyc b/utils/__pycache__/tax_entry.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6636f083d05c75c5ca7f5b9d0a0a8adb40bb94d GIT binary patch literal 5786 zcmb_g&5s;M6|d^<>FN2{+1d3bvJnXrgit1gUB?NAn8jF$NpKRaP#iEB99q3oJ-fX- z(>+_&V|#~gE^CNEiU^9t1%YtJcA%J4k%@5|9Y))@OIHRhiZ8rM+NZ$ShTyw5uHw)%Wvbu3=+?2c{voQ@MZ zdnI9qu5dn~-WB1B@_pVZ3r|$;vyS&R6ID^W&qVFW>QvCGizT#{(5j-<5X)#Sqg4y* zqPcI$Ux(!*cDvMB>NGmbon~i6@SCi?`Uv}BTWzjuBJ2gDek!7Fs@-TOPGktS*A3Ee zCy|GrU)>nQ=|(S5>e9wgCVOEwRU5s5FT+6i-9GqHFT&_3iT!Re7zQ#@NxYE;_mVgn z9CkUaG+!E-LG+=^Ikqc|v~l%@QL=kDZPPOa45bB;?H6ZVO9#DDka z+{?V{?4t*HC952}nRUWVt(y%ubEqnxxkJqS^b3bVr&(Cn_2nEzFSN<4s+O%%~{-hg(-cJ zar65SbZ-AOv;SefoGoMjo&T}_y4k;t{WtTKV#U0McN3CnX7+J4t7Xjy=jAMOsFl{T zl_Oqo4*s*Sqt#c~r(DJJdlE~cai3+SWNsCNKRa5^zV;s{Q1H~AO@{?6GeRqBw`u+X!W+0^V~{V1~bq2XV!splRad(jf4DwB? zG(p-+(!vX}2~+JP!!XuXqO>y%(p~NBB~h$h3Asf>T~fn-lxjPQK>{JPI~u}%Ls_9e zC`D;FQ0+DCkxk>Iva{4p`o0MJ(ExpD%H-~>scUogjmFFbb#rDwg;DByVG@(psi6>3 z!mbRrQ#EGj_o9BNoqiOD+J%*kLZw|&P9!=WjquZZsph^mR_mvS!w@sc#EO}f$)+&8 zT5O37U#~sBBR`7o=;qw|ia3u+T34nzFGECX6NI@AcdVj#+~aE~Rs87P;OEWzl6A^m z1(*1*S*v`N%Wq(Qm#qHuAuFNwn<(lE2&|rf$j;af&z;N`eBYLrGb^(%z+aj=r#ePB z$EX=tZe*~bw!QU;yo|1^(J++zux*hH+LpXPOg9+9G(@|ktw_k{!IEDA=~TNTX;3N> z+MD3T;p1YclNd+|WK^$Unj-ULIGJnCTj!FvU1H8Zbh2wG>gymO>u|w3mZ%6z*zo2y zIVgB@$9Q+vDG3j!sOr)O!M*EmeE#CbZZZfrc9PdGZ%6lLPFIBC&=2o~Gv7+~fBKLq zldg5F{{Bq`j zdIGn(Z6Q{W&*5L&74tF*Eu`53SH3`mN{)v;|lP6 zSZ9>_K3Skz1z`~Vd6e02+%R%mKONeg7YhNUi$WSveGu3gf57&rSVwkleaw0gkg)DU zB*qomhGWlh9K&(uI3>d=&2ik+CR>FlA;P0lUe3s2x?2nINE;)FJV%>(7G#HDzmd~s zu&oR1_XhHf5FqasMm)3%SX`kxm#^Vf$cO+Dc-@awf!8;vhj4NHVj&I{2C}>B_hd2v zhRi|tbjlamn^E9pTJzgPj6X6M{T(#lAr|fBi>zR*>8usr{eP@rO!cf4=n55n#xZw% z1|7Nv0pGY@c+IwFbfQZs@eGm}u+Yyu0ldaBtt=p~VKwJGNHO=qB%YXe6UuaTYG$2c z?Lz8C{>B;_izl_I(| zT;K~pnYhmv+|q;wF0X;K?Ly8Dxk5<}qd5K4@Zfvr44y{)2!ke&VZjr*yr2z8WBlxsGN37h za<9^At_JcLuj}io?++3&>W99s>%M<53i?Hl{1$EF8WEDz+=iyRG6$gDDgq(Ts@A-U z;dRkmcnBM=p(sj{rY?hkiNRsmkd`A~M^~5r==7mFO*D!kb1p#NfIEC7;M+3bPN6&? z-at9uf}p>Pv63)VevimiBHssTy9LCY)jxMebK6M*w^5YIgQuWuZ0z)kOo4d|N4QQa z{eZ|@M2rNDv}VNoF&*`1D2k5f@#5&HE<83`*HBD+^0>@)kT3p)1Ki2k@|m-p@+qa} z9amH#+nTP!A|mk~!mnT_d@61kArk5>>Db}7V;N(An zkKMEUvJrb}OHz-hEUZcHQGB}Y#%HG6BJ*r@yFD2I%+}u6h)H?jQ9!fNG8YZgx%q? zNnd(QoI+8j2R@%9`@n@g>llJR!Q}vWQ?!kD>SRt@IuOB3aH8@oM~n_)U=UOAALkwfd)i41nRA$jk(QKc#>85M<)J z3!!$1YnGZ_0h4W-J8~Dj6Obx$OXI1hxV%1HeXCu5sxKjDBuPm!m@Z^io1Kp0B7tJo zL6+(^OI}$Lmz@3pUq^f&aRBM5y(-s;kMBVJKJXSFIhFb9ASpc5-rc}_)Ka Date: Mon, 12 Jan 2026 11:33:08 +0100 Subject: [PATCH 2/2] ete3 offline --- preprocessing/filter_fragments.py | 3 ++- utils/tax_entry.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/preprocessing/filter_fragments.py b/preprocessing/filter_fragments.py index 472ddc9..8c00f53 100644 --- a/preprocessing/filter_fragments.py +++ b/preprocessing/filter_fragments.py @@ -2,6 +2,7 @@ import argparse from ete3 import NCBITaxa +offline_db = "./taxa.sqlite" if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -29,7 +30,7 @@ 'input either has to be a json and a txt-list or a fasta') print(('json + txt' if out_mode == 'json' else 'fasta') + ' has been provided as input, output will be of the same format') - ncbi = NCBITaxa() + ncbi = NCBITaxa(dbfile=offline_db) species_filter = [] # if (len(args.taxid) == 1 and str(args.taxid[0]).endswith('%')): # perc = int(str(args.taxid[0])[:str(args.taxid[0]).index('%')]) diff --git a/utils/tax_entry.py b/utils/tax_entry.py index 032f7d0..3ca6b01 100644 --- a/utils/tax_entry.py +++ b/utils/tax_entry.py @@ -98,9 +98,9 @@ def __init__(self, tax_path, scientific_names_path, common_names_path, phylo_nam class TaxidLineage: - def __init__(self): + def __init__(self, dbfile="./taxa.sqlite"): from ete3 import NCBITaxa - self.ncbi = NCBITaxa() + self.ncbi = NCBITaxa(dbfile=dbfile) self.cache = {} self.ncbi.db = sqlite3.connect(self.ncbi.dbfile, check_same_thread=False)