Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
RobbinBouwmeester committed Feb 16, 2024
2 parents 7516b3a + dbfba70 commit 763e62b
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 102 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
pip install --editable .
pip install --editable .[test]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand Down
2 changes: 0 additions & 2 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
include deeplc/expasy/*
include deeplc/mod_to_smiles/*
include deeplc/mods/*
include deeplc/package_data/**/*
include deeplc/unimod/*
Expand Down
11 changes: 4 additions & 7 deletions deeplc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,14 @@
import warnings

import pandas as pd
from matplotlib import pyplot as plt

from deeplc import __version__, DeepLC, FeatExtractor
from deeplc._argument_parser import parse_arguments
from deeplc._exceptions import DeepLCError

from psm_utils.io.peptide_record import peprec_to_proforma
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io import read_file
from psm_utils.io import write_file

from deeplc import __version__, DeepLC, FeatExtractor
from deeplc._argument_parser import parse_arguments
from deeplc._exceptions import DeepLCError

logger = logging.getLogger(__name__)

Expand Down
35 changes: 14 additions & 21 deletions deeplc/deeplc.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,18 @@

LIBRARY = {}

import os
import sys
import copy
import gc
import logging
import math
import multiprocessing
import multiprocessing.dummy
import pickle
import random
import sys
import warnings
from configparser import ConfigParser
from tempfile import TemporaryDirectory
from copy import deepcopy
import random
import math
from collections import ChainMap
from itertools import chain
from tempfile import TemporaryDirectory

# If CLI/GUI/frozen: disable Tensorflow info and warnings before importing
IS_CLI_GUI = os.path.basename(sys.argv[0]) in ["deeplc", "deeplc-gui"]
Expand All @@ -65,29 +61,25 @@
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.python.eager import context
from tensorflow.keras.models import load_model
import h5py

from deeplc._exceptions import CalibrationError, DeepLCError
from deeplc.trainl3 import train_en

from deeplcretrainer import deeplcretrainer
from psm_utils.io import read_file
from psm_utils.io.peptide_record import peprec_to_proforma
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io import read_file
from psm_utils.io import write_file
from tensorflow.keras.models import load_model
from tensorflow.python.eager import context

from deeplcretrainer import deeplcretrainer
from deeplc._exceptions import CalibrationError
from deeplc.trainl3 import train_en

# "Custom" activation function
lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.1, max_value=20.0)


try:
from tensorflow.compat.v1.keras.backend import set_session
from tensorflow.compat.v1.keras.backend import set_session # noqa: F401
except ImportError:
from tensorflow.keras.backend import set_session
from tensorflow.keras.backend import set_session # noqa: F401
try:
from tensorflow.compat.v1.keras.backend import clear_session
except ImportError:
Expand All @@ -112,9 +104,10 @@
# session = tf.compat.v1.Session(config=config)

# Feature extraction
from deeplc.feat_extractor import FeatExtractor
from pygam import LinearGAM, s

from deeplc.feat_extractor import FeatExtractor


def warn(*args, **kwargs):
pass
Expand Down
14 changes: 0 additions & 14 deletions deeplc/feat_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,21 @@
__email__ = ["Robbin.Bouwmeester@ugent.be", "Ralf.Gabriels@ugent.be"]

# Native imports
from operator import index
import os
import math
import time
from configparser import ConfigParser
import ast
from re import sub
import logging
from copy import deepcopy

# Numpy
import numpy as np

# Pandas
import pandas as pd

from psm_utils.io.peptide_record import peprec_to_proforma
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from pyteomics import mass

from functools import lru_cache

from psm_utils.io.peptide_record import peprec_to_proforma
from psm_utils.psm import PSM
from psm_utils.psm_list import PSMList
from psm_utils.io import read_file
from psm_utils.io import write_file

logger = logging.getLogger(__name__)


Expand Down
95 changes: 53 additions & 42 deletions deeplc/trainl3.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,26 @@
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
This code is used to train retention time predictors and store
predictions from a CV procedure for further analysis.
This project was made possible by MASSTRPLAN. MASSTRPLAN received funding
from the Marie Sklodowska-Curie EU Framework for Research and Innovation
Horizon 2020, under Grant Agreement No. 675132.
"""

from sklearn.model_selection import RandomizedSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import KFold
from sklearn.base import clone
from sklearn.model_selection import GridSearchCV
from scipy.stats import randint
from scipy.stats import uniform
from numpy import arange
from scipy.stats import pearsonr
try:
from sklearn.base import clone
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import GridSearchCV
except ImportError:
_has_sklearn = False
else:
_has_sklearn = True

from operator import itemgetter
from numpy import median
from collections import Counter

def train_en(X,y,n_jobs=16,cv=None):

def train_en(X, y, n_jobs=16, cv=None):
"""
Function that trains Layer 3 of CALLC (elastic net)
Parameters
----------
X : pd.DataFrame
Expand All @@ -51,7 +42,7 @@ def train_en(X,y,n_jobs=16,cv=None):
number of jobs to spawn
cv : sklearn.model_selection.KFold
cv object
Returns
-------
sklearn.linear_model.ElasticNet
Expand All @@ -61,35 +52,55 @@ def train_en(X,y,n_jobs=16,cv=None):
list
list with features used to train Layer 3
"""
preds = []
if not _has_sklearn:
raise ImportError(
"This function requires the optional dependency `scikit-learn`. Run `pip install "
"scikit-learn` and try again."
)

model = ElasticNet()
crossv_mod = clone(model)
ret_mod = clone(model)

set_reg = [0.01,1.0,10.0,100.0,1000.0,10000.0,10000.0,100000.0,1000000.0,1000000000,1000000]
set_reg.extend([x/2 for x in set_reg])
set_reg.extend([x/3 for x in set_reg])

set_reg = [
0.01,
1.0,
10.0,
100.0,
1000.0,
10000.0,
10000.0,
100000.0,
1000000.0,
1000000000,
1000000,
]
set_reg.extend([x / 2 for x in set_reg])
set_reg.extend([x / 3 for x in set_reg])

params = {
'alpha': set_reg,
'l1_ratio' : [0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0],
'copy_X':[True],
'normalize' : [False],
'positive' : [True],
'fit_intercept' : [True,False]
"alpha": set_reg,
"l1_ratio": [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
"copy_X": [True],
"normalize": [False],
"positive": [True],
"fit_intercept": [True, False],
}

grid = GridSearchCV(model, params,cv=cv,scoring='neg_mean_absolute_error',verbose=0,n_jobs=n_jobs,refit=True)
grid.fit(X,y)

cv_pred = cv
crossv_mod.set_params(**grid.best_params_)
preds = cross_val_predict(crossv_mod, X=X, y=y, cv=cv_pred, n_jobs=n_jobs, verbose=0)
grid = GridSearchCV(
model,
params,
cv=cv,
scoring="neg_mean_absolute_error",
verbose=0,
n_jobs=n_jobs,
refit=True,
)
grid.fit(X, y)

crossv_mod.set_params(**grid.best_params_)

ret_mod.set_params(**grid.best_params_)
ret_mod.fit(X,y)
ret_mod.fit(X, y)

coef_indexes = [i for i,coef in enumerate(ret_mod.coef_) if coef > 0.0]

return ret_mod
return ret_mod
65 changes: 65 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
[project]
name = "deeplc"
version = "2.2.27"
description = "DeepLC: Retention time prediction for (modified) peptides using Deep Learning."
readme = "README.md"
license = { file = "LICENSE" }
authors = [
{ name = "Robbin Bouwmeester", email = "robbin.bouwmeester@ugent.be" },
{ name = "Niels Hulstaert" },
{ name = "Arthur Declercq" },
{ name = "Ralf Gabriels" },
{ name = "Lennart Martens" },
{ name = "Sven Degroeve" },
]
classifiers = [
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Development Status :: 5 - Production/Stable",
]
requires-python = ">=3.7"
keywords = [
"DeepLC",
"Proteomics",
"deep learning",
"peptides",
"retention time",
"prediction",
]

dependencies = [
"tensorflow>=2.2,<2.13.0",
"numpy>=1.17,<2",
"pandas>=0.25,<2",
"h5py>=2.10.0,<4",
"pygam>=0.8.0,<1",
"deeplcretrainer>=0.1,<1",
"psm_utils>=0.2.3,<1",
"hdf5plugin>=4.1.1",
]

[project.optional-dependencies]
test = ["pytest", "matplotlib>=3,<4"]
gui = ["gooey>=1.0"]
plot = ["plotly>=5"]
deepcallc = ["scikit-learn<2,>=0.24.0"]

[project.scripts]
deeplc = "deeplc.__main__:main"
deeplc-gui = "deeplc.gui:start_gui"

[project.urls]
GitHub = "https://github.com/compomics/deeplc"
PyPi = "https://pypi.org/project/deeplc/"
CompOmics = "https://www.compomics.com"

[build-system]
requires = ["setuptools"]
build-backend = "setuptools.build_meta"

[tool.setuptools]
packages = ["deeplc"]
include-package-data = true
3 changes: 0 additions & 3 deletions setup.cfg

This file was deleted.

Loading

0 comments on commit 763e62b

Please sign in to comment.