diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py index e878fc5..2fc9903 100644 --- a/google_sheets/data_processing/processing.py +++ b/google_sheets/data_processing/processing.py @@ -33,6 +33,7 @@ def validate_input_data( INSERT_CRITERION_TYPE = "{INSERT_CRITERION_TYPE}" INSERT_LANGUAGE_CODE = "{INSERT_LANGUAGE_CODE}" INSERT_CATEGORY = "{INSERT_CATEGORY}" +INSERT_TICKET_PRICE = "{INSERT_TICKET_PRICE}" def _update_campaign_name( @@ -174,6 +175,13 @@ def _process_row( if not _use_template_row(new_campaign_row["Category"], template_row): return final_df + # Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name) + if target_resource == "keyword" and ( + template_row["Negative"].lower() == "false" + and template_row["Keyword Match Type"] != template_row["Match Type"] + ): + return final_df + stations = [ { "Station From": new_campaign_row["Station From"], @@ -203,19 +211,28 @@ def _process_row( new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"]) new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"]) new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"]) - new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"]) + new_row = new_row.str.replace( + INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"] + ) if target_resource == "ad": new_row["Final URL"] = station["Final Url"] - elif ( - target_resource == "keyword" - and new_row["Negative"] - and new_row["Negative"].lower() == "true" - ): - new_row["Match Type"] = new_row["Keyword Match Type"] - if "Campaign" in new_row["Level"]: - new_row["Ad Group Name"] = None + elif target_resource == "keyword": + if new_row["Negative"] and new_row["Negative"].lower() == "true": + new_row["Match Type"] = new_row["Keyword Match Type"] + + if "Campaign" in new_row["Level"]: + new_row["Ad Group Name"] = None + elif ( + new_row["Target Category"].lower() == "false" + and new_row["Match Type"] == "Exact" + ): + new_row["Keyword"] = ( + new_row["Keyword"].replace(INSERT_CATEGORY, "").strip() + ) + + new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"]) final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True) @@ -256,7 +273,7 @@ def process_data_f( new_campaign_row, template_row, final_df, target_resource ) - final_df = final_df.drop(columns=["Language Code", "Category"]) + final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"]) if target_resource == "keyword": final_df = final_df.drop(columns=["Keyword Match Type"]) final_df = final_df.drop_duplicates(ignore_index=True) @@ -350,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901 return df +MAX_SITELINK_TEXT_LENGTH = 25 +MAX_SITELINK_DESCRIPTION_LENGTH = 35 + + +def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame: + df.insert(0, "Issues", "") + + sitelink_text_columns = [ + col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text") + ] + + for index, row in df.iterrows(): + for site_text_column in sitelink_text_columns: + site_text = row[site_text_column] + if not site_text: + continue + error_msg = "" + + final_url_column = site_text_column.replace("Text", "Final URL") + if not row.get(final_url_column, None): + error_msg += f"{final_url_column} is missing.\n" + if len(site_text) > MAX_SITELINK_TEXT_LENGTH: + error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n" + site_description_column = site_text_column.replace("Text", "Description") + for i in [1, 2]: + site_description = row.get(site_description_column + f" {i}", None) + if ( + site_description + and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH + ): + error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n" + + if error_msg: + df.loc[index, "Issues"] += error_msg + + if not df["Issues"].any(): + df = df.drop(columns=["Issues"]) + + return df + + def validate_output_data( - df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"] + df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"] ) -> pd.DataFrame: if target_resource == "ad": return _validate_output_data_ad(df) - # No validation required for campaign and keyword data currently + elif target_resource == "campaign": + return _validate_output_data_campaign(df) + # No validation required for keyword data currently return df diff --git a/pyproject.toml b/pyproject.toml index fe2ac47..99787e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "pydantic>=2.3,<3", "fastapi>=0.110.2", "prisma==0.13.1", - "google-api-python-client==2.143.0", + "google-api-python-client==2.144.0", "asyncify==0.10.0", "pandas==2.2.2" ] @@ -67,10 +67,10 @@ lint = [ "types-Pygments", "types-docutils", "mypy==1.11.2", - "ruff==0.6.3", + "ruff==0.6.4", "pyupgrade-directories==0.3.0", "bandit==1.7.9", - "semgrep==1.85.0", + "semgrep==1.86.0", "pytest-mypy-plugins==3.1.2", ] diff --git a/tests/app/test_app.py b/tests/app/test_app.py index f7e717f..8dd520a 100644 --- a/tests/app/test_app.py +++ b/tests/app/test_app.py @@ -409,6 +409,7 @@ class TestProcessData: "Final Url To", "Language Code", "Category", + "Ticket Price", ], [ "India", @@ -418,6 +419,7 @@ class TestProcessData: "https://www.example.com/to", "EN", "Bus", + "10.5", ], ] ), @@ -475,6 +477,7 @@ async def test_process_data_keywords( "Language Code": ["EN"], "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"], "Match Type": ["Exact"], + "Target Category": ["True"], } ) if isinstance(detail, GoogleSheetValues): @@ -517,7 +520,7 @@ async def test_process_data_ads(self) -> None: "Bus", "https://www.example.com/from", "H" * 31, - "Headline 2", + "Headline 2 {INSERT_TICKET_PRICE}", "Headline 3", "Description Line 1", "Description Line 2", @@ -536,6 +539,7 @@ async def test_process_data_ads(self) -> None: "Final Url To", "Language Code", "Category", + "Ticket Price", ], [ "India", @@ -545,6 +549,7 @@ async def test_process_data_ads(self) -> None: "https://www.example.com/to", "EN", "Bus", + "10.5", ], ] ) @@ -556,6 +561,7 @@ async def test_process_data_ads(self) -> None: "Language Code": ["EN"], "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"], "Match Type": ["Exact"], + "Target Category": ["False"], } ) result = await process_data( @@ -588,7 +594,7 @@ async def test_process_data_ads(self) -> None: "Exact", "https://www.example.com/from", "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH", - "Headline 2", + "Headline 2 10.5", "Headline 3", "Description Line 1", "Description Line 2", @@ -602,7 +608,7 @@ async def test_process_data_ads(self) -> None: "Exact", "https://www.example.com/to", "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH", - "Headline 2", + "Headline 2 10.5", "Headline 3", "Description Line 1", "Description Line 2", diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py index 65f3796..f4c4320 100644 --- a/tests/data_processing/test_processing.py +++ b/tests/data_processing/test_processing.py @@ -10,6 +10,7 @@ _update_campaign_name, _use_template_row, _validate_language_codes, + _validate_output_data_campaign, process_campaign_data_f, process_data_f, validate_input_data, @@ -119,7 +120,7 @@ def test_process_row( { "Campaign Name": "USA - A - B - EN", "Ad Group Name": "A - B", - "Keyword": "k1", + "Keyword": "k1 {INSERT_CATEGORY}", "Max CPC": "", "Language Code": "EN", "Negative": "FALSE", @@ -127,6 +128,7 @@ def test_process_row( "Keyword Match Type": "Exact", "Match Type": "Exact", "Category": "Bus", + "Target Category": "False", } ) new_campaign_row = pd.Series( @@ -137,12 +139,15 @@ def test_process_row( "Station To": "B", "Language Code": "EN", "Category": category, + "Ticket Price": "100", } ) final_df = pd.DataFrame(columns=template_row.index) final_df = _process_row(new_campaign_row, template_row, final_df, "keyword") assert len(final_df) == expected_length + if expected_length == 1: + assert final_df["Keyword"].values[0] == "k1" @pytest.mark.parametrize( @@ -157,6 +162,7 @@ def test_process_row( "Language Code": ["EN"], "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"], "Match Type": ["Exact"], + "Target Category": ["False"], } ), pd.DataFrame( @@ -177,6 +183,7 @@ def test_process_row( "Station To": ["C", "D"], "Language Code": ["EN", "EN"], "Category": ["Bus", "Bus"], + "Ticket Price": ["100", "200"], } ), pd.DataFrame( @@ -236,6 +243,7 @@ def test_process_row( "Language Code": ["EN"], "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"], "Match Type": ["Exact"], + "Target Category": ["False"], } ), pd.DataFrame( @@ -256,6 +264,7 @@ def test_process_row( "Station To": ["C", "D"], "Language Code": ["EN", "EN"], "Category": ["Bus", "Bus"], + "Ticket Price": ["100", "200"], } ), pd.DataFrame( @@ -319,6 +328,7 @@ def test_process_row( "{INSERT_STATION_FROM} - {INSERT_STATION_TO}", ], "Match Type": ["Exact", "Exact"], + "Target Category": ["False", "False"], } ), pd.DataFrame( @@ -339,6 +349,7 @@ def test_process_row( "Station To": ["C", "D"], "Language Code": ["EN", "DE"], "Category": ["Bus", "Bus"], + "Ticket Price": ["100", "200"], } ), pd.DataFrame( @@ -376,6 +387,7 @@ def test_process_row( "{INSERT_STATION_FROM} - {INSERT_STATION_TO}", ], "Match Type": ["Exact", "Exact"], + "Target Category": ["False", "False"], } ), pd.DataFrame( @@ -396,6 +408,7 @@ def test_process_row( "Station To": ["C", "D"], "Language Code": ["EN", "DE"], "Category": ["Bus", "Bus"], + "Ticket Price": ["100", "200"], } ), pd.DataFrame( @@ -697,3 +710,59 @@ def test_validate_language_codes( _validate_language_codes(new_campaign_df, valid_language_codes, "table") else: _validate_language_codes(new_campaign_df, valid_language_codes, "table") + + +@pytest.mark.parametrize( + ("df", "expected_issues"), + [ + ( + pd.DataFrame( + { + "Sitelink 1 Text": ["S1"], + "Sitelink 1 Final URL": ["URL"], + }, + ), + None, + ), + ( + pd.DataFrame( + { + "Sitelink 1 Text": ["S1"], + "Sitelink 1 Final URL": ["URL"], + "Sitelink 1 Description 1": ["D1"], + "Sitelink 1 Description 2": ["D2"], + }, + ), + None, + ), + ( + pd.DataFrame( + { + "Sitelink 1 Text": ["S1"], + } + ), + "Sitelink 1 Final URL is missing.\n", + ), + ( + pd.DataFrame( + { + "Sitelink 1 Text": ["S" * 26], + "Sitelink 1 Final URL": ["URL"], + "Sitelink 1 Description 1": ["D" * 36], + "Sitelink 1 Description 2": ["D2"], + }, + ), + """Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text. +Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""", + ), + ], +) +def test_validate_output_data_campaign( + df: pd.DataFrame, expected_issues: Optional[str] +) -> None: + expected = df.copy() + result = _validate_output_data_campaign(df) + if expected_issues: + assert result["Issues"].values[0] == expected_issues + else: + assert result.equals(expected)