diff --git a/CHANGELOG.md b/CHANGELOG.md index 16ce3e1..76fddad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +# [2.1.4] - 2023-05-08 + +### Changed +- slight refractoring + +# [2.1.3] - 2023-05-08 + +### Changed +- slight refractoring + # [2.1.2] - 2023-05-08 ### Changed diff --git a/deeplc/deeplc.py b/deeplc/deeplc.py index 2e420c1..c5cfc4a 100644 --- a/deeplc/deeplc.py +++ b/deeplc/deeplc.py @@ -125,27 +125,6 @@ def warn(*args, **kwargs): logger = logging.getLogger(__name__) - -def read_library(use_library): - global LIBRARY - - if not use_library: - logger.warning("Trying to read library, but no library file was provided.") - return - try: - library_file = open(use_library) - except IOError: - logger.warning("Could not find existing library file: %s", use_library) - return - - for line in library_file: - split_line = line.strip().split(",") - try: - LIBRARY[split_line[0]] = float(split_line[1]) - except: - logger.warning("Could not use this library entry due to an error: %s", line) - - def split_list(a, n): k, m = divmod(len(a), n) return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n)) @@ -505,49 +484,6 @@ def calibration_core(self,uncal_preds,cal_dict,cal_min,cal_max): cal_preds.append(slope * (uncal_pred) + intercept) return np.array(cal_preds) - - """ - def write_to_library(self): - # TODO repair function - try: - lib_file = open(self.use_library,"a") - except: - logger.debug("Could not append to the library file") - return - if type(m_name) == str: - for up, mn, sd in zip(uncal_preds, [m_name]*len(uncal_preds), seq_df["idents"]): - lib_file.write("%s,%s\n" % (sd+"|"+m_name,str(up))) - lib_file.close() - else: - for up, mn, sd in zip(uncal_preds, m_name, seq_df["idents"]): - lib_file.write("%s,%s\n" % (sd+"|"+m_name,str(up))) - lib_file.close() - if self.reload_library: read_library(self.use_library) - - - def _check_presence_library(self, - psm_list, - m_name - ): - psm_list_lib = [] - psm_list_lib_idx = [] - - psm_list_nonlib = [] - psm_list_nonlib_idx = [] - - for idx,psm in enumnerate(psm_list): - k = psm.peptidoform.proforma+"|"+m_name - if k in LIBRARY.keys(): - psm_list_lib.append(psm) - psm_list_lib_idx.append(idx) - else: - psm_list_nonlib.append(psm) - psm_list_nonlib_idx.append(idx) - - proforma_library = list(set(proforma_library)) - return psm_list_lib, psm_list_lib_idx, psm_list_nonlib, psm_list_nonlib_idx - """ - def make_preds_core_library(self, psm_list=[], calibrate=True, @@ -1018,27 +954,6 @@ def calibrate_preds(self, tf.config.threading.set_inter_op_parallelism_threads(1) - #if len(location_peprec_retraining) == 0: - # t_dir = TemporaryDirectory().name - # os.mkdir(t_dir) - #else: - # t_dir = location_peprec_retraining - # try: - # os.mkdir(t_dir) - # except: - # pass - - # For training new models we need to use a file, so write the train df to a file - - #df_train_file = os.path.join(t_dir,"train.csv") - #seq_df.to_csv(df_train_file,index=False) - - #peprec_name = os.path.join(t_dir,"train.peprec") - #write_file(psm_list,peprec_name,filetype="peprec") - - #peprec_name_csv = os.path.join(t_dir,"train.csv") - #pd.read_csv(peprec_name,sep=" ").rename({"observed_retention_time":"tr","peptide":"seq"},axis=1).to_csv(peprec_name_csv,sep=",") - if len(location_retraining_models) > 0: t_dir_models = TemporaryDirectory().name os.mkdir(t_dir_models) diff --git a/deeplc/feat_extractor.py b/deeplc/feat_extractor.py index d3797ad..40d0ee9 100644 --- a/deeplc/feat_extractor.py +++ b/deeplc/feat_extractor.py @@ -598,13 +598,6 @@ def rolling_sum(a, n=2): #ret_list_all = pd.DataFrame.from_dict(ret_list_all).T #ret_list_hc = pd.DataFrame.from_dict(ret_list_hc).T - logger.debug( - "Dicts to DF: %s seconds" % - (time.time() - t1)) - - logger.debug( - "To df: %s seconds" % - (time.time() - t1)) return ret_list diff --git a/setup.py b/setup.py index 368cc19..713871a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='deeplc', - version='2.1.3', + version='2.1.4', license='apache-2.0', description='DeepLC: Retention time prediction for (modified) peptides using Deep Learning.', long_description=LONG_DESCRIPTION, diff --git a/tests/test_deeplc.py b/tests/test_deeplc.py index b6d2d7f..e88d9b9 100644 --- a/tests/test_deeplc.py +++ b/tests/test_deeplc.py @@ -34,7 +34,6 @@ def test_cli_full(): preds_df = pd.read_csv(file_path_out) train_df = pd.read_csv(file_path_pred) model_r2 = r2_score(train_df['tr'], preds_df['predicted retention time']) - logging.info(f"{len(train_df.index)}{len(preds_df.index)}") logging.info("DeepLC R2 score on %s: %f", file_path_pred, model_r2) assert model_r2 > 0.90, f"DeepLC R2 score on {file_path_pred} below 0.9 \ (was {model_r2})"