diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..ba18c59 Binary files /dev/null and b/.DS_Store differ diff --git a/misc/.DS_Store b/misc/.DS_Store new file mode 100644 index 0000000..871ddcc Binary files /dev/null and b/misc/.DS_Store differ diff --git a/models/.DS_Store b/models/.DS_Store new file mode 100644 index 0000000..aa473b3 Binary files /dev/null and b/models/.DS_Store differ diff --git a/models/bert_nc_finetune.py b/models/bert_nc_finetune.py index f786811..eb8a1f0 100644 --- a/models/bert_nc_finetune.py +++ b/models/bert_nc_finetune.py @@ -16,7 +16,6 @@ from typing import List, Optional from logging import warning import pickle -from tensorflow.keras.mixed_precision import experimental as mixed_precision from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping from os.path import splitext import pandas as pd @@ -49,7 +48,7 @@ def load_fragments(fragments_dir, shuffle_=True, balance=True, nr_seqs=None): nr_seqs = nr_seqs_max x = [] y = np.array([]) - y_species = np.array([], dtype=np.int) + y_species = np.array([], dtype=int) for index, fragments_i in enumerate(fragments): class_, class_fragments = fragments_i @@ -98,7 +97,7 @@ def __post_init__(self): def __len__(self): return np.ceil(len(self.x) - / float(self.batch_size)).astype(np.int) + / float(self.batch_size)).astype(int) def __getitem__(self, idx): batch_fragments = self.x[idx * self.batch_size: @@ -179,7 +178,7 @@ def get_class_vectors_multi_tax(self, taxid): def __len__(self): return np.ceil(len(self.x) - / float(self.batch_size)).astype(np.int) + / float(self.batch_size)).astype(int) def __getitem__(self, idx): batch_fragments = self.x[idx * self.batch_size: diff --git a/preprocessing/filter_fragments.py b/preprocessing/filter_fragments.py index 472ddc9..8c00f53 100644 --- a/preprocessing/filter_fragments.py +++ b/preprocessing/filter_fragments.py @@ -2,6 +2,7 @@ import argparse from ete3 import NCBITaxa +offline_db = "./taxa.sqlite" if __name__ == '__main__': parser = argparse.ArgumentParser() @@ -29,7 +30,7 @@ 'input either has to be a json and a txt-list or a fasta') print(('json + txt' if out_mode == 'json' else 'fasta') + ' has been provided as input, output will be of the same format') - ncbi = NCBITaxa() + ncbi = NCBITaxa(dbfile=offline_db) species_filter = [] # if (len(args.taxid) == 1 and str(args.taxid[0]).endswith('%')): # perc = int(str(args.taxid[0])[:str(args.taxid[0]).index('%')]) diff --git a/resources/.DS_Store b/resources/.DS_Store new file mode 100644 index 0000000..932d8a6 Binary files /dev/null and b/resources/.DS_Store differ diff --git a/resources/bert_nc_C2_final.h5 b/resources/bert_nc_C2.h5 similarity index 100% rename from resources/bert_nc_C2_final.h5 rename to resources/bert_nc_C2.h5 diff --git a/utils/__pycache__/__init__.cpython-39.pyc b/utils/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..e86a20e Binary files /dev/null and b/utils/__pycache__/__init__.cpython-39.pyc differ diff --git a/utils/__pycache__/tax_entry.cpython-39.pyc b/utils/__pycache__/tax_entry.cpython-39.pyc new file mode 100644 index 0000000..c6636f0 Binary files /dev/null and b/utils/__pycache__/tax_entry.cpython-39.pyc differ diff --git a/utils/tax_entry.py b/utils/tax_entry.py index 032f7d0..3ca6b01 100644 --- a/utils/tax_entry.py +++ b/utils/tax_entry.py @@ -98,9 +98,9 @@ def __init__(self, tax_path, scientific_names_path, common_names_path, phylo_nam class TaxidLineage: - def __init__(self): + def __init__(self, dbfile="./taxa.sqlite"): from ete3 import NCBITaxa - self.ncbi = NCBITaxa() + self.ncbi = NCBITaxa(dbfile=dbfile) self.cache = {} self.ncbi.db = sqlite3.connect(self.ncbi.dbfile, check_same_thread=False)