Skip to content

Commit

Permalink
Merge pull request #431 from MannLabs/add-two-step-classifier
Browse files Browse the repository at this point in the history
Add two step classifier
  • Loading branch information
anna-charlotte authored Jan 24, 2025
2 parents a80b6c8 + 3d602af commit 781e34e
Show file tree
Hide file tree
Showing 7 changed files with 693 additions and 27 deletions.
1 change: 1 addition & 0 deletions alphadia/constants/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ fdr:
keep_decoys: false
channel_wise_fdr: false
inference_strategy: "heuristic"
enable_two_step_classifier: false

search_output:
peptide_level_lfq: false
Expand Down
Empty file.
128 changes: 128 additions & 0 deletions alphadia/fdrx/models/logistic_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import logging

import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

from alphadia.fdrexperimental import Classifier

logger = logging.getLogger()


class LogisticRegressionClassifier(Classifier):
def __init__(self) -> None:
"""Binary classifier using a logistic regression model."""
self.scaler = StandardScaler()
self.model = LogisticRegression()
self._fitted = False

@property
def fitted(self) -> bool:
return self._fitted

def fit(self, x: np.ndarray, y: np.ndarray) -> None:
"""Fit the classifier to the data.
Parameters
----------
x : np.array, dtype=float
Training data of shape (n_samples, n_features).
y : np.array, dtype=int
Target values of shape (n_samples,) or (n_samples, n_classes).
"""
x_scaled = self.scaler.fit_transform(x)
self.model.fit(x_scaled, y)
self._fitted = True

def predict(self, x: np.ndarray) -> np.ndarray:
"""Predict the class of the data.
Parameters
----------
x : np.array, dtype=float
Data of shape (n_samples, n_features).
Returns
-------
y : np.array, dtype=float
Predicted class probabilities of shape (n_samples, n_classes).
"""
x_scaled = self.scaler.transform(x)
return self.model.predict(x_scaled)

def predict_proba(self, x: np.ndarray) -> np.ndarray:
"""Predict the class probabilities of the data.
Parameters
----------
x : np.array, dtype=float
Data of shape (n_samples, n_features).
Returns
-------
y : np.array, dtype=float
Predicted class probabilities of shape (n_samples, n_classes).
"""
x_scaled = self.scaler.transform(x)
return self.model.predict_proba(x_scaled)

def to_state_dict(self) -> dict:
"""Return the state of the classifier as a dictionary.
Returns
-------
dict : dict
Dictionary containing the state of the classifier.
"""
state_dict = {"_fitted": self._fitted}

if self._fitted:
state_dict.update(
{
"scaler_mean": self.scaler.mean_,
"scaler_var": self.scaler.var_,
"scaler_scale": self.scaler.scale_,
"scaler_n_samples_seen": self.scaler.n_samples_seen_,
"model_coef": self.model.coef_,
"model_intercept": self.model.intercept_,
"model_classes": self.model.classes_,
"is_fitted": self._fitted,
}
)

return state_dict

def from_state_dict(self, state_dict: dict) -> None:
"""Load the state of the classifier from a dictionary.
Parameters
----------
dict : dict
Dictionary containing the state of the classifier.
"""
self._fitted = state_dict["_fitted"]

if self._fitted:
self.scaler = StandardScaler()
self.scaler.mean_ = np.array(state_dict["scaler_mean"])
self.scaler.var_ = np.array(state_dict["scaler_var"])
self.scaler.scale_ = np.array(state_dict["scaler_scale"])
self.scaler.n_samples_seen_ = np.array(state_dict["scaler_n_samples_seen"])

self.model = LogisticRegression()
self.model.coef_ = np.array(state_dict["model_coef"])
self.model.intercept_ = np.array(state_dict["model_intercept"])
self.model.classes_ = np.array(state_dict["model_classes"])
Loading

0 comments on commit 781e34e

Please sign in to comment.