diff --git a/src/data/build_data.py b/src/data/build_data.py index 0eea6da..022d8ed 100644 --- a/src/data/build_data.py +++ b/src/data/build_data.py @@ -53,7 +53,7 @@ def build_load_save_data(file, hostfile, prefix, dataset, host, kmers_list=None, nb_features_keep = nb_features_keep) save_Xy_data(data, data_file) - # Assign kmers_list to variable ater extracting database data + # Assign kmers_list to variable after extracting database data if kmers_list is None: kmers_list = data['kmers'] diff --git a/src/models/classification.py b/src/models/classification.py index ec7593e..5cf1bd1 100644 --- a/src/models/classification.py +++ b/src/models/classification.py @@ -137,12 +137,11 @@ def _train_model(self, taxa): self._binary_training(taxa) else: self._multiclass_training(taxa) - if isinstance(self.models[taxa], KerasTFModel): - for file in glob(os.path.join(self._outdirs['data_dir'], '*sim*')): - if os.path.isdir(file): - rmtree(file) - else: - os.remove(file) + for file in glob(os.path.join(self._outdirs['data_dir'], '*sim*')): + if os.path.isdir(file): + rmtree(file) + else: + os.remove(file) def _binary_training(self, taxa): print('_binary_training') @@ -516,7 +515,6 @@ def _sim_4_cv(self, df, kmers_ds, name): cv_sim = readsSimulation(kmers_ds['fasta'], cls, sim_cls_dct['id'], 'miseq', sim_outdir, name) sim_data = cv_sim.simulation(self._k, self._database_data['kmers']) sim_ids = sim_data['ids'] - sim_ids = sim_data['ids'] sim_cls = pd.DataFrame({'sim_id':sim_ids}, dtype = object) sim_cls['id'] = sim_cls['sim_id'].str.replace('_[0-9]+_[0-9]+_[0-9]+', '', regex=True) sim_cls = sim_cls.set_index('id').join(cls.set_index('id'))