Skip to content

Commit

Permalink
add dostring and type hints
Browse files Browse the repository at this point in the history
  • Loading branch information
ammarcsj committed Aug 13, 2024
1 parent 68a16d8 commit 9e2f7f1
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions directlfq/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,13 +310,26 @@ def index_and_log_transform_input_df(data_df):
def remove_allnan_rows_input_df(data_df):
return data_df.dropna(axis = 0, how = 'all')

def remove_potential_quant_id_duplicates(data_df):
def remove_potential_quant_id_duplicates(data_df : pd.DataFrame):
"""
Remove duplicate entries from a DataFrame based on the QUANT_ID column.
This function removes duplicate rows from the input DataFrame, keeping only the first
occurrence of each unique QUANT_ID. It also logs a warning message if any duplicates
are found and removed.
Args:
data_df (pd.DataFrame): dataframe in directLFQ format
Returns:
pd.DataFrame: dataframe in directLFQ format w duplicate QUANT_ID entries removed.
"""
before_drop = len(data_df)
data_df = data_df.drop_duplicates(subset=config.QUANT_ID, keep='first')
after_drop = len(data_df)
if before_drop != after_drop:
entries_removed = before_drop - after_drop
LOGGER.info(f"Duplicate quant_ids detected. {entries_removed} rows removed from input df.")
LOGGER.warning(f"Duplicate quant_ids detected. {entries_removed} rows removed from input df.")

return data_df

Expand Down

0 comments on commit 9e2f7f1

Please sign in to comment.