From 9e2f7f1dc24468c4a45bffbcdccabef1394f7bd5 Mon Sep 17 00:00:00 2001 From: ammarcsj <70114795+ammarcsj@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:22:51 +0200 Subject: [PATCH] add dostring and type hints --- directlfq/utils.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/directlfq/utils.py b/directlfq/utils.py index 4327d02..ba5734f 100644 --- a/directlfq/utils.py +++ b/directlfq/utils.py @@ -310,13 +310,26 @@ def index_and_log_transform_input_df(data_df): def remove_allnan_rows_input_df(data_df): return data_df.dropna(axis = 0, how = 'all') -def remove_potential_quant_id_duplicates(data_df): +def remove_potential_quant_id_duplicates(data_df : pd.DataFrame): + """ + Remove duplicate entries from a DataFrame based on the QUANT_ID column. + + This function removes duplicate rows from the input DataFrame, keeping only the first + occurrence of each unique QUANT_ID. It also logs a warning message if any duplicates + are found and removed. + + Args: + data_df (pd.DataFrame): dataframe in directLFQ format + + Returns: + pd.DataFrame: dataframe in directLFQ format w duplicate QUANT_ID entries removed. + """ before_drop = len(data_df) data_df = data_df.drop_duplicates(subset=config.QUANT_ID, keep='first') after_drop = len(data_df) if before_drop != after_drop: entries_removed = before_drop - after_drop - LOGGER.info(f"Duplicate quant_ids detected. {entries_removed} rows removed from input df.") + LOGGER.warning(f"Duplicate quant_ids detected. {entries_removed} rows removed from input df.") return data_df