diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml
index d9843bc06..39efdaff8 100644
--- a/alphadia/constants/default.yaml
+++ b/alphadia/constants/default.yaml
@@ -233,6 +233,7 @@ fdr:
   keep_decoys: false
   channel_wise_fdr: false
   inference_strategy: "heuristic"
+  enable_two_step_classifier: false
 
 search_output:
   peptide_level_lfq: false
diff --git a/alphadia/fdrx/models/__init__.py b/alphadia/fdrx/models/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/alphadia/fdrx/models/logistic_regression.py b/alphadia/fdrx/models/logistic_regression.py
new file mode 100644
index 000000000..4d43f6879
--- /dev/null
+++ b/alphadia/fdrx/models/logistic_regression.py
@@ -0,0 +1,128 @@
+import logging
+
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+
+from alphadia.fdrexperimental import Classifier
+
+logger = logging.getLogger()
+
+
+class LogisticRegressionClassifier(Classifier):
+    def __init__(self) -> None:
+        """Binary classifier using a logistic regression model."""
+        self.scaler = StandardScaler()
+        self.model = LogisticRegression()
+        self._fitted = False
+
+    @property
+    def fitted(self) -> bool:
+        return self._fitted
+
+    def fit(self, x: np.ndarray, y: np.ndarray) -> None:
+        """Fit the classifier to the data.
+
+        Parameters
+        ----------
+
+        x : np.array, dtype=float
+            Training data of shape (n_samples, n_features).
+
+        y : np.array, dtype=int
+            Target values of shape (n_samples,) or (n_samples, n_classes).
+
+        """
+        x_scaled = self.scaler.fit_transform(x)
+        self.model.fit(x_scaled, y)
+        self._fitted = True
+
+    def predict(self, x: np.ndarray) -> np.ndarray:
+        """Predict the class of the data.
+
+        Parameters
+        ----------
+
+        x : np.array, dtype=float
+            Data of shape (n_samples, n_features).
+
+        Returns
+        -------
+
+        y : np.array, dtype=float
+            Predicted class probabilities of shape (n_samples, n_classes).
+
+        """
+        x_scaled = self.scaler.transform(x)
+        return self.model.predict(x_scaled)
+
+    def predict_proba(self, x: np.ndarray) -> np.ndarray:
+        """Predict the class probabilities of the data.
+
+        Parameters
+        ----------
+
+        x : np.array, dtype=float
+            Data of shape (n_samples, n_features).
+
+        Returns
+        -------
+
+        y : np.array, dtype=float
+            Predicted class probabilities of shape (n_samples, n_classes).
+
+        """
+        x_scaled = self.scaler.transform(x)
+        return self.model.predict_proba(x_scaled)
+
+    def to_state_dict(self) -> dict:
+        """Return the state of the classifier as a dictionary.
+
+        Returns
+        -------
+
+        dict : dict
+            Dictionary containing the state of the classifier.
+
+        """
+        state_dict = {"_fitted": self._fitted}
+
+        if self._fitted:
+            state_dict.update(
+                {
+                    "scaler_mean": self.scaler.mean_,
+                    "scaler_var": self.scaler.var_,
+                    "scaler_scale": self.scaler.scale_,
+                    "scaler_n_samples_seen": self.scaler.n_samples_seen_,
+                    "model_coef": self.model.coef_,
+                    "model_intercept": self.model.intercept_,
+                    "model_classes": self.model.classes_,
+                    "is_fitted": self._fitted,
+                }
+            )
+
+        return state_dict
+
+    def from_state_dict(self, state_dict: dict) -> None:
+        """Load the state of the classifier from a dictionary.
+
+        Parameters
+        ----------
+
+        dict : dict
+            Dictionary containing the state of the classifier.
+
+        """
+        self._fitted = state_dict["_fitted"]
+
+        if self._fitted:
+            self.scaler = StandardScaler()
+            self.scaler.mean_ = np.array(state_dict["scaler_mean"])
+            self.scaler.var_ = np.array(state_dict["scaler_var"])
+            self.scaler.scale_ = np.array(state_dict["scaler_scale"])
+            self.scaler.n_samples_seen_ = np.array(state_dict["scaler_n_samples_seen"])
+
+            self.model = LogisticRegression()
+            self.model.coef_ = np.array(state_dict["model_coef"])
+            self.model.intercept_ = np.array(state_dict["model_intercept"])
+            self.model.classes_ = np.array(state_dict["model_classes"])
diff --git a/alphadia/fdrx/models/two_step_classifier.py b/alphadia/fdrx/models/two_step_classifier.py
new file mode 100644
index 000000000..c33468d96
--- /dev/null
+++ b/alphadia/fdrx/models/two_step_classifier.py
@@ -0,0 +1,353 @@
+import logging
+
+import numpy as np
+import pandas as pd
+
+from alphadia.fdr import get_q_values, keep_best
+from alphadia.fdrexperimental import Classifier
+
+logger = logging.getLogger()
+
+
+class TwoStepClassifier:
+    def __init__(
+        self,
+        first_classifier: Classifier,
+        second_classifier: Classifier,
+        first_fdr_cutoff: float = 0.6,
+        second_fdr_cutoff: float = 0.01,
+        min_precursors_for_update: int = 5000,
+        train_on_top_n: int = 1,
+    ):
+        """
+        A two-step classifier, designed to refine classification results by applying a stricter second-stage classification after an initial filtering stage.
+
+        Parameters
+        ----------
+        first_classifier : Classifier
+            The first classifier used to initially filter the data.
+        second_classifier : Classifier
+            The second classifier used to further refine or confirm the classification based on the output from the first classifier.
+        first_fdr_cutoff : float, default=0.6
+            The fdr threshold for the first classifier, determining how selective the first classification step is.
+        second_fdr_cutoff : float, default=0.01
+            The fdr threshold for the second classifier, typically set stricter to ensure high confidence in the final classification results.
+        min_precursors_for_update : int, default=5000
+            The minimum number of precursors required to update the first classifier.
+
+        """
+        self.first_classifier = first_classifier
+        self.second_classifier = second_classifier
+        self.first_fdr_cutoff = first_fdr_cutoff
+        self.second_fdr_cutoff = second_fdr_cutoff
+
+        self._min_precursors_for_update = min_precursors_for_update
+        self._train_on_top_n = train_on_top_n
+
+    def fit_predict(
+        self,
+        df: pd.DataFrame,
+        x_cols: list[str],
+        y_col: str = "decoy",
+        group_columns: list[str] | None = None,
+        max_iterations: int = 5,
+    ) -> pd.DataFrame:
+        """
+        Train the two-step classifier and predict precursors using an iterative approach:
+        1. First iteration: Train neural network on top-n candidates.
+        2. Subsequent iterations: Use linear classifier to filter data, then refine with neural network.
+        3. Update linear classifier if enough high-confidence predictions are found, else break.
+
+        Parameters
+        ----------
+        df : pd.DataFrame
+            Input DataFrame containing features and target variable
+        x_cols : list[str]
+            Feature column names
+        y_col : str, optional
+            Target variable column name, defaults to 'decoy'
+        group_columns : list[str] | None, optional
+            Columns to group by for FDR calculations
+        max_iterations : int
+            Maximum number of refinement iterations
+
+        Returns
+        -------
+        pd.DataFrame
+            DataFrame containing predictions and q-values
+
+        """
+        df = self._preprocess_data(df, x_cols)
+        best_result = None
+        best_precursor_count = -1
+
+        for i in range(max_iterations):
+            if self.first_classifier.fitted and i > 0:
+                df_train, df_predict = self._apply_filtering_with_first_classifier(
+                    df, x_cols, group_columns
+                )
+                self.second_classifier.epochs = 50
+            else:
+                df_train = df[df["rank"] < self._train_on_top_n]
+                df_predict = df
+                self.second_classifier.epochs = 10
+
+            predictions = self._train_and_apply_second_classifier(
+                df_train, df_predict, x_cols, y_col, group_columns
+            )
+
+            # Filter results and check for improvement
+            df_filtered = filter_by_qval(predictions, self.second_fdr_cutoff)
+            current_target_count = len(df_filtered[df_filtered["decoy"] == 0])
+
+            if current_target_count < best_precursor_count:
+                logger.info(
+                    f"Stopping training after iteration {i}, "
+                    f"due to decreased target count ({current_target_count} < {best_precursor_count})"
+                )
+                return best_result
+
+            best_precursor_count = current_target_count
+            best_result = predictions
+
+            # Update first classifier if enough confident predictions
+            if current_target_count > self._min_precursors_for_update:
+                self._update_first_classifier(
+                    df_filtered, df, x_cols, y_col, group_columns
+                )
+            else:
+                logger.info(
+                    f"Stopping fitting after {i+1} / {max_iterations} iterations due to insufficient detected precursors to update the first classifier."
+                )
+                break
+        else:
+            logger.info(
+                f"Stopping fitting after reaching the maximum number of iterations: {max_iterations} / {max_iterations}."
+            )
+
+        return best_result
+
+    def _preprocess_data(self, df: pd.DataFrame, x_cols: list[str]) -> pd.DataFrame:
+        """
+        Prepare data by removing NaN values and applying absolute transformations.
+        """
+        df.dropna(subset=x_cols, inplace=True)
+        return apply_absolute_transformations(df)
+
+    def _apply_filtering_with_first_classifier(
+        self, df: pd.DataFrame, x_cols: list[str], group_columns: list[str]
+    ) -> tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Apply first classifier to filter data for the training of the second classifier.
+        """
+        df["proba"] = self.first_classifier.predict_proba(df[x_cols].to_numpy())[:, 1]
+
+        filtered_df = compute_and_filter_q_values(
+            df, self.first_fdr_cutoff, group_columns, remove_decoys=False
+        )
+
+        return filtered_df, filtered_df
+
+    def _train_and_apply_second_classifier(
+        self,
+        train_df: pd.DataFrame,
+        predict_df: pd.DataFrame,
+        x_cols: list[str],
+        y_col: str,
+        group_columns: list[str],
+    ) -> pd.DataFrame:
+        """
+        Train second_classifier and apply it to get predictions.
+        """
+        self.second_classifier.fit(
+            train_df[x_cols].to_numpy().astype(np.float32),
+            train_df[y_col].to_numpy().astype(np.float32),
+        )
+
+        x = predict_df[x_cols].to_numpy().astype(np.float32)
+        predict_df["proba"] = self.second_classifier.predict_proba(x)[:, 1]
+
+        return compute_q_values(predict_df, group_columns)
+
+    def _update_first_classifier(
+        self,
+        subset_df: pd.DataFrame,
+        full_df: pd.DataFrame,
+        x_cols: list[str],
+        y_col: str,
+        group_columns: list[str],
+    ) -> None:
+        """
+        Update first classifier by finding and using target/decoy pairs. First extracts the corresponding
+        target/decoy partners from the full dataset for each entry in the subset, then uses these
+        pairs to retrain the classifier.
+        """
+        df = get_target_decoy_partners(subset_df, full_df)
+
+        x = df[x_cols].to_numpy()
+        y = df[y_col].to_numpy()
+
+        previous_n_precursors = -1
+
+        if self.first_classifier.fitted:
+            df["proba"] = self.first_classifier.predict_proba(x)[:, 1]
+            df_targets = compute_and_filter_q_values(
+                df, self.first_fdr_cutoff, group_columns
+            )
+            previous_n_precursors = len(df_targets)
+            previous_state_dict = self.first_classifier.to_state_dict()
+
+        self.first_classifier.fit(x, y)
+
+        df["proba"] = self.first_classifier.predict_proba(x)[:, 1]
+        df_targets = compute_and_filter_q_values(
+            df, self.first_fdr_cutoff, group_columns
+        )
+        current_n_precursors = len(df_targets)
+
+        if previous_n_precursors > current_n_precursors:
+            logger.info(
+                f"Reverted the first classifier back to the previous version "
+                f"(prev: {previous_n_precursors}, curr: {current_n_precursors})"
+            )
+            self.first_classifier.from_state_dict(previous_state_dict)
+        else:
+            logger.info("Fitted the second classifier")
+
+    @property
+    def fitted(self) -> bool:
+        """Return whether both classifiers have been fitted."""
+        return self.second_classifier.fitted
+
+    def to_state_dict(self) -> dict:
+        """Save classifier state.
+
+        Returns
+        -------
+        dict
+            State dictionary containing both classifiers
+        """
+        return {
+            "first_classifier": self.first_classifier.to_state_dict(),
+            "second_classifier": self.second_classifier.to_state_dict(),
+            "first_fdr_cutoff": self.first_fdr_cutoff,
+            "second_fdr_cutoff": self.second_fdr_cutoff,
+            "train_on_top_n": self._train_on_top_n,
+        }
+
+    def from_state_dict(self, state_dict: dict) -> None:
+        """Load classifier state.
+
+        Parameters
+        ----------
+        state_dict : dict
+            State dictionary containing both classifiers
+        """
+        self.first_classifier.from_state_dict(state_dict["first_classifier"])
+        self.second_classifier.from_state_dict(state_dict["second_classifier"])
+        self.first_fdr_cutoff = state_dict["first_fdr_cutoff"]
+        self.second_fdr_cutoff = state_dict["second_fdr_cutoff"]
+        self._train_on_top_n = state_dict["train_on_top_n"]
+
+
+def compute_q_values(
+    df: pd.DataFrame, group_columns: list[str] | None = None
+) -> pd.DataFrame:
+    """
+    Compute q-values for each entry after keeping only best entries per group.
+    """
+    df.sort_values("proba", ascending=True, inplace=True)
+    df = keep_best(df, group_columns=group_columns)
+    return get_q_values(df, "proba", "decoy")
+
+
+def filter_by_qval(df: pd.DataFrame, fdr_cutoff: float) -> pd.DataFrame:
+    """
+    Filter dataframe by q-value threshold. If no entries pass the threshold,
+    return the single target entry with lowest q-value.
+    """
+    df_filtered = df[df["qval"] < fdr_cutoff]
+
+    if len(df_filtered) == 0:
+        df_targets = df[df["decoy"] == 0]
+        df_filtered = df_targets.loc[[df_targets["qval"].idxmin()]]
+
+    return df_filtered
+
+
+def compute_and_filter_q_values(
+    df: pd.DataFrame,
+    fdr: float,
+    group_columns: list[str] | None = None,
+    remove_decoys: bool = True,
+) -> pd.DataFrame:
+    """
+    Returns entries in the DataFrame based on the FDR threshold and optionally removes decoy entries.
+    If no entries are found below the FDR threshold after filtering, returns the single best entry based on the q-value.
+    """
+    df = compute_q_values(df, group_columns)
+    if remove_decoys:
+        df = df[df["decoy"] == 0]
+    return filter_by_qval(df, fdr)
+
+
+def get_target_decoy_partners(
+    reference_df: pd.DataFrame, full_df: pd.DataFrame, group_by: list[str] | None = None
+) -> pd.DataFrame:
+    """
+    Identifies and returns the corresponding target and decoy partner rows in full_df given the subset reference_df.
+    This function is typically used to find target-decoy partners based on certain criteria like rank and elution group index.
+
+    Parameters
+    ----------
+    reference_df : pd.DataFrame
+        A subset DataFrame that contains reference values for matching.
+    full_df : pd.DataFrame
+        The main DataFrame from which rows will be matched against reference_df.
+    group_by : list[str] | None, optional
+        The columns to group by when performing the match. Defaults to ['rank', 'elution_group_idx'] if None is provided.
+
+    Returns
+    -------
+    pd.DataFrame
+        A DataFrame containing rows from full_df that match the grouping criteria.
+
+    """
+    if group_by is None:
+        group_by = ["rank", "elution_group_idx"]
+    valid_tuples = reference_df[group_by]
+    matching_rows = full_df.merge(valid_tuples, on=group_by, how="inner")
+
+    return matching_rows
+
+
+def apply_absolute_transformations(
+    df: pd.DataFrame, columns: list[str] | None = None
+) -> pd.DataFrame:
+    """
+    Applies absolute value transformations to predefined columns in a DataFrame inplace.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        The input DataFrame containing the data to be transformed.
+    columns : list of str, optional
+        List of column names to transform. Defaults to ['delta_rt', 'top_3_ms2_mass_error', 'mean_ms2_mass_error'].
+
+    Returns
+    -------
+    pd.DataFrame
+        The transformed DataFrame.
+    """
+    if columns is None:
+        columns = ["delta_rt", "top_3_ms2_mass_error", "mean_ms2_mass_error"]
+
+    for col in columns:
+        if col in df.columns:
+            df[col] = np.abs(df[col])
+        else:
+            logger.warning(
+                f"column '{col}' is not present in df, therefore abs() was not applied."
+            )
+
+    return df
diff --git a/alphadia/workflow/manager.py b/alphadia/workflow/manager.py
index 84c036782..4766d4749 100644
--- a/alphadia/workflow/manager.py
+++ b/alphadia/workflow/manager.py
@@ -19,6 +19,7 @@
 import alphadia
 from alphadia import fdr
 from alphadia.calibration.property import Calibration, calibration_model_provider
+from alphadia.fdrx.models.two_step_classifier import TwoStepClassifier
 from alphadia.workflow import reporting
 from alphadia.workflow.config import Config
 
@@ -570,6 +571,34 @@ def fit_predict(self, update_dict):
         return self.predict()
 
 
+def get_group_columns(competetive: bool, group_channels: bool) -> list[str]:
+    """
+    Determine the group columns based on competitiveness and channel grouping.
+
+    competitive : bool
+        If True, group candidates eluting at the same time by grouping them under the same 'elution_group_idx'.
+    group_channels : bool
+        If True and 'competitive' is also True, further groups candidates by 'channel'.
+
+    Returns
+    -------
+    list
+        A list of column names to be used for grouping in the analysis. If competitive, this could be either
+        ['elution_group_idx', 'channel'] or ['elution_group_idx'] depending on the `group_channels` flag.
+        If not competitive, the list will always be ['precursor_idx'].
+
+    """
+    if competetive:
+        group_columns = (
+            ["elution_group_idx", "channel"]
+            if group_channels
+            else ["elution_group_idx"]
+        )
+    else:
+        group_columns = ["precursor_idx"]
+    return group_columns
+
+
 class FDRManager(BaseManager):
     def __init__(
         self,
@@ -597,6 +626,8 @@ def __init__(
             self.feature_columns = feature_columns
             self.classifier_store = defaultdict(list)
             self.classifier_base = classifier_base
+            self.is_two_step_classifier = isinstance(classifier_base, TwoStepClassifier)
+
         self._current_version = -1
         self.load_classifier_store()
 
@@ -665,17 +696,27 @@ def fit_predict(
 
         classifier = self.get_classifier(available_columns, version)
         if decoy_strategy == "precursor":
-            psm_df = fdr.perform_fdr(
-                classifier,
-                available_columns,
-                features_df[features_df["decoy"] == 0].copy(),
-                features_df[features_df["decoy"] == 1].copy(),
-                competetive=competetive,
-                group_channels=True,
-                df_fragments=df_fragments,
-                dia_cycle=dia_cycle,
-                figure_path=self.figure_path,
-            )
+            if not self.is_two_step_classifier:
+                psm_df = fdr.perform_fdr(
+                    classifier,
+                    available_columns,
+                    features_df[features_df["decoy"] == 0].copy(),
+                    features_df[features_df["decoy"] == 1].copy(),
+                    competetive=competetive,
+                    group_channels=True,
+                    df_fragments=df_fragments,
+                    dia_cycle=dia_cycle,
+                    figure_path=self.figure_path,
+                )
+            else:
+                group_columns = get_group_columns(competetive, group_channels=True)
+
+                psm_df = classifier.fit_predict(
+                    features_df,
+                    available_columns + ["score"],
+                    group_columns=group_columns,
+                )
+
         elif decoy_strategy == "precursor_channel_wise":
             channels = features_df["channel"].unique()
             psm_df_list = []
@@ -770,14 +811,17 @@ def load_classifier_store(self, path: None | str = None):
 
         logger.info(f"Loading classifier store from {path}")
 
-        for file in os.listdir(path):
-            if file.endswith(".pth"):
-                classifier_hash = file.split(".")[0]
-
-                if classifier_hash not in self.classifier_store:
-                    classifier = deepcopy(self.classifier_base)
-                    classifier.from_state_dict(torch.load(os.path.join(path, file)))
-                    self.classifier_store[classifier_hash].append(classifier)
+        if (
+            not self.is_two_step_classifier
+        ):  # TODO add pretrained model for TwoStepClassifier
+            for file in os.listdir(path):
+                if file.endswith(".pth"):
+                    classifier_hash = file.split(".")[0]
+
+                    if classifier_hash not in self.classifier_store:
+                        classifier = deepcopy(self.classifier_base)
+                        classifier.from_state_dict(torch.load(os.path.join(path, file)))
+                        self.classifier_store[classifier_hash].append(classifier)
 
     def get_classifier(self, available_columns: list, version: int = -1):
         """Gets the classifier for a given set of feature columns and version. If the classifier is not found in the store, gets the base classifier instead.
diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py
index 67c26644e..151763f11 100644
--- a/alphadia/workflow/peptidecentric.py
+++ b/alphadia/workflow/peptidecentric.py
@@ -15,6 +15,8 @@
 
 # alphadia imports
 from alphadia import fragcomp, plexscoring, utils
+from alphadia.fdrx.models.logistic_regression import LogisticRegressionClassifier
+from alphadia.fdrx.models.two_step_classifier import TwoStepClassifier
 from alphadia.peakgroup import search
 from alphadia.workflow import base, manager, optimization
 from alphadia.workflow.config import Config
@@ -94,13 +96,42 @@
     "mean_overlapping_mass_error",
 ]
 
-classifier_base = fdrx.BinaryClassifierLegacyNewBatching(
-    test_size=0.001,
-    batch_size=5000,
-    learning_rate=0.001,
-    epochs=10,
-    experimental_hyperparameter_tuning=True,
-)
+
+def get_classifier_base(
+    enable_two_step_classifier: bool = False, fdr_cutoff: float = 0.01
+):
+    """Creates and returns a classifier base instance.
+
+    Parameters
+    ----------
+    enable_two_step_classifier : bool, optional
+        If True, uses logistic regression + neural network.
+        If False (default), uses only neural network.
+    fdr_cutoff : float, optional
+        The FDR cutoff threshold used by the second classifier when two-step
+        classification is enabled. Default is 0.01.
+
+    Returns
+    -------
+    BinaryClassifierLegacyNewBatching | TwoStepClassifier
+        Neural network or two-step classifier based on enable_two_step_classifier.
+    """
+    nn_classifier = fdrx.BinaryClassifierLegacyNewBatching(
+        test_size=0.001,
+        batch_size=5000,
+        learning_rate=0.001,
+        epochs=10,
+        experimental_hyperparameter_tuning=True,
+    )
+
+    if enable_two_step_classifier:
+        return TwoStepClassifier(
+            first_classifier=LogisticRegressionClassifier(),
+            second_classifier=nn_classifier,
+            second_fdr_cutoff=fdr_cutoff,
+        )
+    else:
+        return nn_classifier
 
 
 class PeptideCentricWorkflow(base.WorkflowBase):
@@ -137,7 +168,10 @@ def load(
     def init_fdr_manager(self):
         self.fdr_manager = manager.FDRManager(
             feature_columns=feature_columns,
-            classifier_base=classifier_base,
+            classifier_base=get_classifier_base(
+                self.config["fdr"]["enable_two_step_classifier"],
+                self.config["fdr"]["fdr"],
+            ),
         )
 
     def init_spectral_library(self):
diff --git a/tests/unit_tests/test_fdrx_models.py b/tests/unit_tests/test_fdrx_models.py
new file mode 100644
index 000000000..d917f927f
--- /dev/null
+++ b/tests/unit_tests/test_fdrx_models.py
@@ -0,0 +1,106 @@
+from collections import Counter
+
+import pandas as pd
+import pytest
+
+from alphadia.fdrx.models.two_step_classifier import (
+    apply_absolute_transformations,
+    compute_and_filter_q_values,
+    get_target_decoy_partners,
+)
+
+
+def test_apply_absolute_transformations():
+    data = {
+        "delta_rt": [-1, -2, 3],
+        "top_3_ms2_mass_error": [-1, -2, -3],
+        "mean_ms2_mass_error": [1, -2, 3],
+        "extra_column": [-1, -2, -3],
+    }
+    df = pd.DataFrame(data)
+
+    transformed_df = apply_absolute_transformations(df)
+
+    assert (transformed_df["delta_rt"] >= 0).all(), "delta_rt contains negative values"
+    assert (
+        transformed_df["top_3_ms2_mass_error"] >= 0
+    ).all(), "top_3_ms2_mass_error contains negative values"
+    assert (
+        transformed_df["mean_ms2_mass_error"] >= 0
+    ).all(), "mean_ms2_mass_error contains negative values"
+
+    assert (
+        transformed_df["extra_column"] == df["extra_column"]
+    ).all(), "extra_column should not be transformed"
+
+
+@pytest.fixture
+def setup_data():
+    reference_df = pd.DataFrame(
+        {"decoy": [0, 1], "rank": [1, 0], "elution_group_idx": [100, 101]}
+    )
+
+    full_df = pd.DataFrame(
+        {
+            "decoy": [0, 0, 1, 1, 0],
+            "rank": [1, 0, 2, 1, 2],
+            "elution_group_idx": [100, 101, 102, 100, 102],
+            "intensity": [200, 150, 120, 130, 95],
+            "peptide": ["pepA", "pepB", "pepC", "pepD", "pepE"],
+        }
+    )
+
+    return reference_df, full_df
+
+
+def test_get_target_decoy_partners_correct_extraction(setup_data):
+    reference_df, full_df = setup_data
+    group_columns = ["elution_group_idx", "rank"]
+    result_df = get_target_decoy_partners(reference_df, full_df, group_by=group_columns)
+
+    assert (
+        len(result_df) == 3
+    )  # should match rows with ("rank", "elution_group_idx")=(1,100) and (2,101)
+    assert all(col in result_df.columns for col in full_df.columns)
+
+    assert Counter(result_df["decoy"]) == Counter([0, 0, 1])
+    assert Counter(result_df["peptide"]) == Counter(["pepA", "pepB", "pepD"])
+
+
+def test_handling_nonexistent_partners_in_get_target_decoy_partners(setup_data):
+    reference_df, full_df = setup_data
+
+    reference_df.loc[1] = [0, 3, 104]
+    result_df = get_target_decoy_partners(reference_df, full_df)
+
+    assert len(result_df) == 2
+    assert result_df[
+        (result_df["rank"] == 3) & (result_df["elution_group_idx"] == 104)
+    ].empty
+
+
+@pytest.mark.parametrize(
+    ["fdr", "remove_decoys", "expected_length", "expected_decoy_count"],
+    [
+        (0.5, True, 3, 0),
+        (0.01, True, 1, 0),
+        (0.5, False, 4, 1),
+        (0.01, False, 1, 0),
+    ],
+)
+def test_compute_and_filter_q_values(
+    fdr, remove_decoys, expected_length, expected_decoy_count
+):
+    df = pd.DataFrame(
+        {
+            "proba": [0.1, 0.3, 0.8, 0.9, 0.9, 0.2, 0.4, 0.5],
+            "decoy": [0, 1, 0, 1, 0, 1, 0, 1],
+            "group": ["A", "A", "B", "B", "C", "C", "D", "D"],
+        }
+    )
+    result = compute_and_filter_q_values(
+        df, fdr=fdr, group_columns=["group"], remove_decoys=remove_decoys
+    )
+    print(result)
+    assert len(result) == expected_length
+    assert len(result[result["decoy"] == 1]) == expected_decoy_count