diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py index 54e0449..1e16ff7 100644 --- a/google_sheets/data_processing/processing.py +++ b/google_sheets/data_processing/processing.py @@ -285,10 +285,13 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901 description_columns = [col for col in df.columns if "Description" in col] for index, row in df.iterrows(): - # Check for duplicate headlines and descriptions - if len(set(row[headline_columns])) != len(row[headline_columns]): + headlines = [headline for headline in row[headline_columns] if headline] + descriptions = [ + description for description in row[description_columns] if description + ] + if len(set(headlines)) != len(headlines): df.loc[index, "Issues"] += "Duplicate headlines found.\n" - if len(set(row[description_columns])) != len(row[description_columns]): + if len(set(descriptions)) != len(descriptions): df.loc[index, "Issues"] += "Duplicate descriptions found.\n" # Check for the number of headlines and descriptions diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py index e5481e2..32a56cd 100644 --- a/tests/data_processing/test_processing.py +++ b/tests/data_processing/test_processing.py @@ -515,6 +515,8 @@ def test_process_campaign_data_f( "Headline 1": ["H1", "H1", "H1", "H1"], "Headline 2": ["H2", "H2", "H2", "H2"], "Headline 3": ["H3", "H3", "H3", "H3"], + "Headline 4": ["", "", "", ""], + "Headline 5": ["", "", "", ""], "Description 1": ["D1", "D1", "D1", "D1"], "Description 2": ["D2", "D2", "D2", "D2"], "Path 1": ["P1", "P1", "P1", "P1"],