diff --git a/alphadia/constants/default.yaml b/alphadia/constants/default.yaml index d9843bc06..39efdaff8 100644 --- a/alphadia/constants/default.yaml +++ b/alphadia/constants/default.yaml @@ -233,6 +233,7 @@ fdr: keep_decoys: false channel_wise_fdr: false inference_strategy: "heuristic" + enable_two_step_classifier: false search_output: peptide_level_lfq: false diff --git a/alphadia/fdrx/models/__init__.py b/alphadia/fdrx/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/alphadia/fdrx/models/logistic_regression.py b/alphadia/fdrx/models/logistic_regression.py new file mode 100644 index 000000000..4d43f6879 --- /dev/null +++ b/alphadia/fdrx/models/logistic_regression.py @@ -0,0 +1,128 @@ +import logging + +import numpy as np +from sklearn.linear_model import LogisticRegression +from sklearn.preprocessing import StandardScaler + +from alphadia.fdrexperimental import Classifier + +logger = logging.getLogger() + + +class LogisticRegressionClassifier(Classifier): + def __init__(self) -> None: + """Binary classifier using a logistic regression model.""" + self.scaler = StandardScaler() + self.model = LogisticRegression() + self._fitted = False + + @property + def fitted(self) -> bool: + return self._fitted + + def fit(self, x: np.ndarray, y: np.ndarray) -> None: + """Fit the classifier to the data. + + Parameters + ---------- + + x : np.array, dtype=float + Training data of shape (n_samples, n_features). + + y : np.array, dtype=int + Target values of shape (n_samples,) or (n_samples, n_classes). + + """ + x_scaled = self.scaler.fit_transform(x) + self.model.fit(x_scaled, y) + self._fitted = True + + def predict(self, x: np.ndarray) -> np.ndarray: + """Predict the class of the data. + + Parameters + ---------- + + x : np.array, dtype=float + Data of shape (n_samples, n_features). + + Returns + ------- + + y : np.array, dtype=float + Predicted class probabilities of shape (n_samples, n_classes). + + """ + x_scaled = self.scaler.transform(x) + return self.model.predict(x_scaled) + + def predict_proba(self, x: np.ndarray) -> np.ndarray: + """Predict the class probabilities of the data. + + Parameters + ---------- + + x : np.array, dtype=float + Data of shape (n_samples, n_features). + + Returns + ------- + + y : np.array, dtype=float + Predicted class probabilities of shape (n_samples, n_classes). + + """ + x_scaled = self.scaler.transform(x) + return self.model.predict_proba(x_scaled) + + def to_state_dict(self) -> dict: + """Return the state of the classifier as a dictionary. + + Returns + ------- + + dict : dict + Dictionary containing the state of the classifier. + + """ + state_dict = {"_fitted": self._fitted} + + if self._fitted: + state_dict.update( + { + "scaler_mean": self.scaler.mean_, + "scaler_var": self.scaler.var_, + "scaler_scale": self.scaler.scale_, + "scaler_n_samples_seen": self.scaler.n_samples_seen_, + "model_coef": self.model.coef_, + "model_intercept": self.model.intercept_, + "model_classes": self.model.classes_, + "is_fitted": self._fitted, + } + ) + + return state_dict + + def from_state_dict(self, state_dict: dict) -> None: + """Load the state of the classifier from a dictionary. + + Parameters + ---------- + + dict : dict + Dictionary containing the state of the classifier. + + """ + self._fitted = state_dict["_fitted"] + + if self._fitted: + self.scaler = StandardScaler() + self.scaler.mean_ = np.array(state_dict["scaler_mean"]) + self.scaler.var_ = np.array(state_dict["scaler_var"]) + self.scaler.scale_ = np.array(state_dict["scaler_scale"]) + self.scaler.n_samples_seen_ = np.array(state_dict["scaler_n_samples_seen"]) + + self.model = LogisticRegression() + self.model.coef_ = np.array(state_dict["model_coef"]) + self.model.intercept_ = np.array(state_dict["model_intercept"]) + self.model.classes_ = np.array(state_dict["model_classes"]) diff --git a/alphadia/fdrx/models/two_step_classifier.py b/alphadia/fdrx/models/two_step_classifier.py new file mode 100644 index 000000000..c33468d96 --- /dev/null +++ b/alphadia/fdrx/models/two_step_classifier.py @@ -0,0 +1,353 @@ +import logging + +import numpy as np +import pandas as pd + +from alphadia.fdr import get_q_values, keep_best +from alphadia.fdrexperimental import Classifier + +logger = logging.getLogger() + + +class TwoStepClassifier: + def __init__( + self, + first_classifier: Classifier, + second_classifier: Classifier, + first_fdr_cutoff: float = 0.6, + second_fdr_cutoff: float = 0.01, + min_precursors_for_update: int = 5000, + train_on_top_n: int = 1, + ): + """ + A two-step classifier, designed to refine classification results by applying a stricter second-stage classification after an initial filtering stage. + + Parameters + ---------- + first_classifier : Classifier + The first classifier used to initially filter the data. + second_classifier : Classifier + The second classifier used to further refine or confirm the classification based on the output from the first classifier. + first_fdr_cutoff : float, default=0.6 + The fdr threshold for the first classifier, determining how selective the first classification step is. + second_fdr_cutoff : float, default=0.01 + The fdr threshold for the second classifier, typically set stricter to ensure high confidence in the final classification results. + min_precursors_for_update : int, default=5000 + The minimum number of precursors required to update the first classifier. + + """ + self.first_classifier = first_classifier + self.second_classifier = second_classifier + self.first_fdr_cutoff = first_fdr_cutoff + self.second_fdr_cutoff = second_fdr_cutoff + + self._min_precursors_for_update = min_precursors_for_update + self._train_on_top_n = train_on_top_n + + def fit_predict( + self, + df: pd.DataFrame, + x_cols: list[str], + y_col: str = "decoy", + group_columns: list[str] | None = None, + max_iterations: int = 5, + ) -> pd.DataFrame: + """ + Train the two-step classifier and predict precursors using an iterative approach: + 1. First iteration: Train neural network on top-n candidates. + 2. Subsequent iterations: Use linear classifier to filter data, then refine with neural network. + 3. Update linear classifier if enough high-confidence predictions are found, else break. + + Parameters + ---------- + df : pd.DataFrame + Input DataFrame containing features and target variable + x_cols : list[str] + Feature column names + y_col : str, optional + Target variable column name, defaults to 'decoy' + group_columns : list[str] | None, optional + Columns to group by for FDR calculations + max_iterations : int + Maximum number of refinement iterations + + Returns + ------- + pd.DataFrame + DataFrame containing predictions and q-values + + """ + df = self._preprocess_data(df, x_cols) + best_result = None + best_precursor_count = -1 + + for i in range(max_iterations): + if self.first_classifier.fitted and i > 0: + df_train, df_predict = self._apply_filtering_with_first_classifier( + df, x_cols, group_columns + ) + self.second_classifier.epochs = 50 + else: + df_train = df[df["rank"] < self._train_on_top_n] + df_predict = df + self.second_classifier.epochs = 10 + + predictions = self._train_and_apply_second_classifier( + df_train, df_predict, x_cols, y_col, group_columns + ) + + # Filter results and check for improvement + df_filtered = filter_by_qval(predictions, self.second_fdr_cutoff) + current_target_count = len(df_filtered[df_filtered["decoy"] == 0]) + + if current_target_count < best_precursor_count: + logger.info( + f"Stopping training after iteration {i}, " + f"due to decreased target count ({current_target_count} < {best_precursor_count})" + ) + return best_result + + best_precursor_count = current_target_count + best_result = predictions + + # Update first classifier if enough confident predictions + if current_target_count > self._min_precursors_for_update: + self._update_first_classifier( + df_filtered, df, x_cols, y_col, group_columns + ) + else: + logger.info( + f"Stopping fitting after {i+1} / {max_iterations} iterations due to insufficient detected precursors to update the first classifier." + ) + break + else: + logger.info( + f"Stopping fitting after reaching the maximum number of iterations: {max_iterations} / {max_iterations}." + ) + + return best_result + + def _preprocess_data(self, df: pd.DataFrame, x_cols: list[str]) -> pd.DataFrame: + """ + Prepare data by removing NaN values and applying absolute transformations. + """ + df.dropna(subset=x_cols, inplace=True) + return apply_absolute_transformations(df) + + def _apply_filtering_with_first_classifier( + self, df: pd.DataFrame, x_cols: list[str], group_columns: list[str] + ) -> tuple[pd.DataFrame, pd.DataFrame]: + """ + Apply first classifier to filter data for the training of the second classifier. + """ + df["proba"] = self.first_classifier.predict_proba(df[x_cols].to_numpy())[:, 1] + + filtered_df = compute_and_filter_q_values( + df, self.first_fdr_cutoff, group_columns, remove_decoys=False + ) + + return filtered_df, filtered_df + + def _train_and_apply_second_classifier( + self, + train_df: pd.DataFrame, + predict_df: pd.DataFrame, + x_cols: list[str], + y_col: str, + group_columns: list[str], + ) -> pd.DataFrame: + """ + Train second_classifier and apply it to get predictions. + """ + self.second_classifier.fit( + train_df[x_cols].to_numpy().astype(np.float32), + train_df[y_col].to_numpy().astype(np.float32), + ) + + x = predict_df[x_cols].to_numpy().astype(np.float32) + predict_df["proba"] = self.second_classifier.predict_proba(x)[:, 1] + + return compute_q_values(predict_df, group_columns) + + def _update_first_classifier( + self, + subset_df: pd.DataFrame, + full_df: pd.DataFrame, + x_cols: list[str], + y_col: str, + group_columns: list[str], + ) -> None: + """ + Update first classifier by finding and using target/decoy pairs. First extracts the corresponding + target/decoy partners from the full dataset for each entry in the subset, then uses these + pairs to retrain the classifier. + """ + df = get_target_decoy_partners(subset_df, full_df) + + x = df[x_cols].to_numpy() + y = df[y_col].to_numpy() + + previous_n_precursors = -1 + + if self.first_classifier.fitted: + df["proba"] = self.first_classifier.predict_proba(x)[:, 1] + df_targets = compute_and_filter_q_values( + df, self.first_fdr_cutoff, group_columns + ) + previous_n_precursors = len(df_targets) + previous_state_dict = self.first_classifier.to_state_dict() + + self.first_classifier.fit(x, y) + + df["proba"] = self.first_classifier.predict_proba(x)[:, 1] + df_targets = compute_and_filter_q_values( + df, self.first_fdr_cutoff, group_columns + ) + current_n_precursors = len(df_targets) + + if previous_n_precursors > current_n_precursors: + logger.info( + f"Reverted the first classifier back to the previous version " + f"(prev: {previous_n_precursors}, curr: {current_n_precursors})" + ) + self.first_classifier.from_state_dict(previous_state_dict) + else: + logger.info("Fitted the second classifier") + + @property + def fitted(self) -> bool: + """Return whether both classifiers have been fitted.""" + return self.second_classifier.fitted + + def to_state_dict(self) -> dict: + """Save classifier state. + + Returns + ------- + dict + State dictionary containing both classifiers + """ + return { + "first_classifier": self.first_classifier.to_state_dict(), + "second_classifier": self.second_classifier.to_state_dict(), + "first_fdr_cutoff": self.first_fdr_cutoff, + "second_fdr_cutoff": self.second_fdr_cutoff, + "train_on_top_n": self._train_on_top_n, + } + + def from_state_dict(self, state_dict: dict) -> None: + """Load classifier state. + + Parameters + ---------- + state_dict : dict + State dictionary containing both classifiers + """ + self.first_classifier.from_state_dict(state_dict["first_classifier"]) + self.second_classifier.from_state_dict(state_dict["second_classifier"]) + self.first_fdr_cutoff = state_dict["first_fdr_cutoff"] + self.second_fdr_cutoff = state_dict["second_fdr_cutoff"] + self._train_on_top_n = state_dict["train_on_top_n"] + + +def compute_q_values( + df: pd.DataFrame, group_columns: list[str] | None = None +) -> pd.DataFrame: + """ + Compute q-values for each entry after keeping only best entries per group. + """ + df.sort_values("proba", ascending=True, inplace=True) + df = keep_best(df, group_columns=group_columns) + return get_q_values(df, "proba", "decoy") + + +def filter_by_qval(df: pd.DataFrame, fdr_cutoff: float) -> pd.DataFrame: + """ + Filter dataframe by q-value threshold. If no entries pass the threshold, + return the single target entry with lowest q-value. + """ + df_filtered = df[df["qval"] < fdr_cutoff] + + if len(df_filtered) == 0: + df_targets = df[df["decoy"] == 0] + df_filtered = df_targets.loc[[df_targets["qval"].idxmin()]] + + return df_filtered + + +def compute_and_filter_q_values( + df: pd.DataFrame, + fdr: float, + group_columns: list[str] | None = None, + remove_decoys: bool = True, +) -> pd.DataFrame: + """ + Returns entries in the DataFrame based on the FDR threshold and optionally removes decoy entries. + If no entries are found below the FDR threshold after filtering, returns the single best entry based on the q-value. + """ + df = compute_q_values(df, group_columns) + if remove_decoys: + df = df[df["decoy"] == 0] + return filter_by_qval(df, fdr) + + +def get_target_decoy_partners( + reference_df: pd.DataFrame, full_df: pd.DataFrame, group_by: list[str] | None = None +) -> pd.DataFrame: + """ + Identifies and returns the corresponding target and decoy partner rows in full_df given the subset reference_df. + This function is typically used to find target-decoy partners based on certain criteria like rank and elution group index. + + Parameters + ---------- + reference_df : pd.DataFrame + A subset DataFrame that contains reference values for matching. + full_df : pd.DataFrame + The main DataFrame from which rows will be matched against reference_df. + group_by : list[str] | None, optional + The columns to group by when performing the match. Defaults to ['rank', 'elution_group_idx'] if None is provided. + + Returns + ------- + pd.DataFrame + A DataFrame containing rows from full_df that match the grouping criteria. + + """ + if group_by is None: + group_by = ["rank", "elution_group_idx"] + valid_tuples = reference_df[group_by] + matching_rows = full_df.merge(valid_tuples, on=group_by, how="inner") + + return matching_rows + + +def apply_absolute_transformations( + df: pd.DataFrame, columns: list[str] | None = None +) -> pd.DataFrame: + """ + Applies absolute value transformations to predefined columns in a DataFrame inplace. + + Parameters + ---------- + df : pd.DataFrame + The input DataFrame containing the data to be transformed. + columns : list of str, optional + List of column names to transform. Defaults to ['delta_rt', 'top_3_ms2_mass_error', 'mean_ms2_mass_error']. + + Returns + ------- + pd.DataFrame + The transformed DataFrame. + """ + if columns is None: + columns = ["delta_rt", "top_3_ms2_mass_error", "mean_ms2_mass_error"] + + for col in columns: + if col in df.columns: + df[col] = np.abs(df[col]) + else: + logger.warning( + f"column '{col}' is not present in df, therefore abs() was not applied." + ) + + return df diff --git a/alphadia/workflow/manager.py b/alphadia/workflow/manager.py index 84c036782..4766d4749 100644 --- a/alphadia/workflow/manager.py +++ b/alphadia/workflow/manager.py @@ -19,6 +19,7 @@ import alphadia from alphadia import fdr from alphadia.calibration.property import Calibration, calibration_model_provider +from alphadia.fdrx.models.two_step_classifier import TwoStepClassifier from alphadia.workflow import reporting from alphadia.workflow.config import Config @@ -570,6 +571,34 @@ def fit_predict(self, update_dict): return self.predict() +def get_group_columns(competetive: bool, group_channels: bool) -> list[str]: + """ + Determine the group columns based on competitiveness and channel grouping. + + competitive : bool + If True, group candidates eluting at the same time by grouping them under the same 'elution_group_idx'. + group_channels : bool + If True and 'competitive' is also True, further groups candidates by 'channel'. + + Returns + ------- + list + A list of column names to be used for grouping in the analysis. If competitive, this could be either + ['elution_group_idx', 'channel'] or ['elution_group_idx'] depending on the `group_channels` flag. + If not competitive, the list will always be ['precursor_idx']. + + """ + if competetive: + group_columns = ( + ["elution_group_idx", "channel"] + if group_channels + else ["elution_group_idx"] + ) + else: + group_columns = ["precursor_idx"] + return group_columns + + class FDRManager(BaseManager): def __init__( self, @@ -597,6 +626,8 @@ def __init__( self.feature_columns = feature_columns self.classifier_store = defaultdict(list) self.classifier_base = classifier_base + self.is_two_step_classifier = isinstance(classifier_base, TwoStepClassifier) + self._current_version = -1 self.load_classifier_store() @@ -665,17 +696,27 @@ def fit_predict( classifier = self.get_classifier(available_columns, version) if decoy_strategy == "precursor": - psm_df = fdr.perform_fdr( - classifier, - available_columns, - features_df[features_df["decoy"] == 0].copy(), - features_df[features_df["decoy"] == 1].copy(), - competetive=competetive, - group_channels=True, - df_fragments=df_fragments, - dia_cycle=dia_cycle, - figure_path=self.figure_path, - ) + if not self.is_two_step_classifier: + psm_df = fdr.perform_fdr( + classifier, + available_columns, + features_df[features_df["decoy"] == 0].copy(), + features_df[features_df["decoy"] == 1].copy(), + competetive=competetive, + group_channels=True, + df_fragments=df_fragments, + dia_cycle=dia_cycle, + figure_path=self.figure_path, + ) + else: + group_columns = get_group_columns(competetive, group_channels=True) + + psm_df = classifier.fit_predict( + features_df, + available_columns + ["score"], + group_columns=group_columns, + ) + elif decoy_strategy == "precursor_channel_wise": channels = features_df["channel"].unique() psm_df_list = [] @@ -770,14 +811,17 @@ def load_classifier_store(self, path: None | str = None): logger.info(f"Loading classifier store from {path}") - for file in os.listdir(path): - if file.endswith(".pth"): - classifier_hash = file.split(".")[0] - - if classifier_hash not in self.classifier_store: - classifier = deepcopy(self.classifier_base) - classifier.from_state_dict(torch.load(os.path.join(path, file))) - self.classifier_store[classifier_hash].append(classifier) + if ( + not self.is_two_step_classifier + ): # TODO add pretrained model for TwoStepClassifier + for file in os.listdir(path): + if file.endswith(".pth"): + classifier_hash = file.split(".")[0] + + if classifier_hash not in self.classifier_store: + classifier = deepcopy(self.classifier_base) + classifier.from_state_dict(torch.load(os.path.join(path, file))) + self.classifier_store[classifier_hash].append(classifier) def get_classifier(self, available_columns: list, version: int = -1): """Gets the classifier for a given set of feature columns and version. If the classifier is not found in the store, gets the base classifier instead. diff --git a/alphadia/workflow/peptidecentric.py b/alphadia/workflow/peptidecentric.py index 67c26644e..151763f11 100644 --- a/alphadia/workflow/peptidecentric.py +++ b/alphadia/workflow/peptidecentric.py @@ -15,6 +15,8 @@ # alphadia imports from alphadia import fragcomp, plexscoring, utils +from alphadia.fdrx.models.logistic_regression import LogisticRegressionClassifier +from alphadia.fdrx.models.two_step_classifier import TwoStepClassifier from alphadia.peakgroup import search from alphadia.workflow import base, manager, optimization from alphadia.workflow.config import Config @@ -94,13 +96,42 @@ "mean_overlapping_mass_error", ] -classifier_base = fdrx.BinaryClassifierLegacyNewBatching( - test_size=0.001, - batch_size=5000, - learning_rate=0.001, - epochs=10, - experimental_hyperparameter_tuning=True, -) + +def get_classifier_base( + enable_two_step_classifier: bool = False, fdr_cutoff: float = 0.01 +): + """Creates and returns a classifier base instance. + + Parameters + ---------- + enable_two_step_classifier : bool, optional + If True, uses logistic regression + neural network. + If False (default), uses only neural network. + fdr_cutoff : float, optional + The FDR cutoff threshold used by the second classifier when two-step + classification is enabled. Default is 0.01. + + Returns + ------- + BinaryClassifierLegacyNewBatching | TwoStepClassifier + Neural network or two-step classifier based on enable_two_step_classifier. + """ + nn_classifier = fdrx.BinaryClassifierLegacyNewBatching( + test_size=0.001, + batch_size=5000, + learning_rate=0.001, + epochs=10, + experimental_hyperparameter_tuning=True, + ) + + if enable_two_step_classifier: + return TwoStepClassifier( + first_classifier=LogisticRegressionClassifier(), + second_classifier=nn_classifier, + second_fdr_cutoff=fdr_cutoff, + ) + else: + return nn_classifier class PeptideCentricWorkflow(base.WorkflowBase): @@ -137,7 +168,10 @@ def load( def init_fdr_manager(self): self.fdr_manager = manager.FDRManager( feature_columns=feature_columns, - classifier_base=classifier_base, + classifier_base=get_classifier_base( + self.config["fdr"]["enable_two_step_classifier"], + self.config["fdr"]["fdr"], + ), ) def init_spectral_library(self): diff --git a/tests/unit_tests/test_fdrx_models.py b/tests/unit_tests/test_fdrx_models.py new file mode 100644 index 000000000..d917f927f --- /dev/null +++ b/tests/unit_tests/test_fdrx_models.py @@ -0,0 +1,106 @@ +from collections import Counter + +import pandas as pd +import pytest + +from alphadia.fdrx.models.two_step_classifier import ( + apply_absolute_transformations, + compute_and_filter_q_values, + get_target_decoy_partners, +) + + +def test_apply_absolute_transformations(): + data = { + "delta_rt": [-1, -2, 3], + "top_3_ms2_mass_error": [-1, -2, -3], + "mean_ms2_mass_error": [1, -2, 3], + "extra_column": [-1, -2, -3], + } + df = pd.DataFrame(data) + + transformed_df = apply_absolute_transformations(df) + + assert (transformed_df["delta_rt"] >= 0).all(), "delta_rt contains negative values" + assert ( + transformed_df["top_3_ms2_mass_error"] >= 0 + ).all(), "top_3_ms2_mass_error contains negative values" + assert ( + transformed_df["mean_ms2_mass_error"] >= 0 + ).all(), "mean_ms2_mass_error contains negative values" + + assert ( + transformed_df["extra_column"] == df["extra_column"] + ).all(), "extra_column should not be transformed" + + +@pytest.fixture +def setup_data(): + reference_df = pd.DataFrame( + {"decoy": [0, 1], "rank": [1, 0], "elution_group_idx": [100, 101]} + ) + + full_df = pd.DataFrame( + { + "decoy": [0, 0, 1, 1, 0], + "rank": [1, 0, 2, 1, 2], + "elution_group_idx": [100, 101, 102, 100, 102], + "intensity": [200, 150, 120, 130, 95], + "peptide": ["pepA", "pepB", "pepC", "pepD", "pepE"], + } + ) + + return reference_df, full_df + + +def test_get_target_decoy_partners_correct_extraction(setup_data): + reference_df, full_df = setup_data + group_columns = ["elution_group_idx", "rank"] + result_df = get_target_decoy_partners(reference_df, full_df, group_by=group_columns) + + assert ( + len(result_df) == 3 + ) # should match rows with ("rank", "elution_group_idx")=(1,100) and (2,101) + assert all(col in result_df.columns for col in full_df.columns) + + assert Counter(result_df["decoy"]) == Counter([0, 0, 1]) + assert Counter(result_df["peptide"]) == Counter(["pepA", "pepB", "pepD"]) + + +def test_handling_nonexistent_partners_in_get_target_decoy_partners(setup_data): + reference_df, full_df = setup_data + + reference_df.loc[1] = [0, 3, 104] + result_df = get_target_decoy_partners(reference_df, full_df) + + assert len(result_df) == 2 + assert result_df[ + (result_df["rank"] == 3) & (result_df["elution_group_idx"] == 104) + ].empty + + +@pytest.mark.parametrize( + ["fdr", "remove_decoys", "expected_length", "expected_decoy_count"], + [ + (0.5, True, 3, 0), + (0.01, True, 1, 0), + (0.5, False, 4, 1), + (0.01, False, 1, 0), + ], +) +def test_compute_and_filter_q_values( + fdr, remove_decoys, expected_length, expected_decoy_count +): + df = pd.DataFrame( + { + "proba": [0.1, 0.3, 0.8, 0.9, 0.9, 0.2, 0.4, 0.5], + "decoy": [0, 1, 0, 1, 0, 1, 0, 1], + "group": ["A", "A", "B", "B", "C", "C", "D", "D"], + } + ) + result = compute_and_filter_q_values( + df, fdr=fdr, group_columns=["group"], remove_decoys=remove_decoys + ) + print(result) + assert len(result) == expected_length + assert len(result[result["decoy"] == 1]) == expected_decoy_count