Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
Binary file added misc/.DS_Store
Binary file not shown.
Binary file added models/.DS_Store
Binary file not shown.
7 changes: 3 additions & 4 deletions models/bert_nc_finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from typing import List, Optional
from logging import warning
import pickle
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from os.path import splitext
import pandas as pd
Expand Down Expand Up @@ -49,7 +48,7 @@ def load_fragments(fragments_dir, shuffle_=True, balance=True, nr_seqs=None):
nr_seqs = nr_seqs_max
x = []
y = np.array([])
y_species = np.array([], dtype=np.int)
y_species = np.array([], dtype=int)

for index, fragments_i in enumerate(fragments):
class_, class_fragments = fragments_i
Expand Down Expand Up @@ -98,7 +97,7 @@ def __post_init__(self):

def __len__(self):
return np.ceil(len(self.x)
/ float(self.batch_size)).astype(np.int)
/ float(self.batch_size)).astype(int)

def __getitem__(self, idx):
batch_fragments = self.x[idx * self.batch_size:
Expand Down Expand Up @@ -179,7 +178,7 @@ def get_class_vectors_multi_tax(self, taxid):

def __len__(self):
return np.ceil(len(self.x)
/ float(self.batch_size)).astype(np.int)
/ float(self.batch_size)).astype(int)

def __getitem__(self, idx):
batch_fragments = self.x[idx * self.batch_size:
Expand Down
3 changes: 2 additions & 1 deletion preprocessing/filter_fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import argparse
from ete3 import NCBITaxa

offline_db = "./taxa.sqlite"

if __name__ == '__main__':
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -29,7 +30,7 @@
'input either has to be a json and a txt-list or a fasta')
print(('json + txt' if out_mode == 'json' else 'fasta')
+ ' has been provided as input, output will be of the same format')
ncbi = NCBITaxa()
ncbi = NCBITaxa(dbfile=offline_db)
species_filter = []
# if (len(args.taxid) == 1 and str(args.taxid[0]).endswith('%')):
# perc = int(str(args.taxid[0])[:str(args.taxid[0]).index('%')])
Expand Down
Binary file added resources/.DS_Store
Binary file not shown.
File renamed without changes.
Binary file added utils/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file added utils/__pycache__/tax_entry.cpython-39.pyc
Binary file not shown.
4 changes: 2 additions & 2 deletions utils/tax_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,9 @@ def __init__(self, tax_path, scientific_names_path, common_names_path, phylo_nam


class TaxidLineage:
def __init__(self):
def __init__(self, dbfile="./taxa.sqlite"):
from ete3 import NCBITaxa
self.ncbi = NCBITaxa()
self.ncbi = NCBITaxa(dbfile=dbfile)
self.cache = {}
self.ncbi.db = sqlite3.connect(self.ncbi.dbfile, check_same_thread=False)

Expand Down