Update ad group structure and add sitelink validation (#111)

* Add Target Category field to ad group template * Update Ad group and keywords structure * Add sitelinks validation
airtai · Sep 10, 2024 · 07272ff · 07272ff
1 parent caa6d03
commit 07272ff
Show file tree

Hide file tree

Showing 3 changed files with 135 additions and 13 deletions.
diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py
@@ -175,6 +175,13 @@ def _process_row(
     if not _use_template_row(new_campaign_row["Category"], template_row):
         return final_df
 
+    # Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name)
+    if target_resource == "keyword" and (
+        template_row["Negative"].lower() == "false"
+        and template_row["Keyword Match Type"] != template_row["Match Type"]
+    ):
+        return final_df
+
     stations = [
         {
             "Station From": new_campaign_row["Station From"],
@@ -204,22 +211,28 @@ def _process_row(
         new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"])
         new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
         new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"])
-        new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
         new_row = new_row.str.replace(
             INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"]
         )
 
         if target_resource == "ad":
             new_row["Final URL"] = station["Final Url"]
-        elif (
-            target_resource == "keyword"
-            and new_row["Negative"]
-            and new_row["Negative"].lower() == "true"
-        ):
-            new_row["Match Type"] = new_row["Keyword Match Type"]
 
-            if "Campaign" in new_row["Level"]:
-                new_row["Ad Group Name"] = None
+        elif target_resource == "keyword":
+            if new_row["Negative"] and new_row["Negative"].lower() == "true":
+                new_row["Match Type"] = new_row["Keyword Match Type"]
+
+                if "Campaign" in new_row["Level"]:
+                    new_row["Ad Group Name"] = None
+            elif (
+                new_row["Target Category"].lower() == "false"
+                and new_row["Match Type"] == "Exact"
+            ):
+                new_row["Keyword"] = (
+                    new_row["Keyword"].replace(INSERT_CATEGORY, "").strip()
+                )
+
+        new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
 
         final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True)
 
@@ -260,7 +273,7 @@ def process_data_f(
                 new_campaign_row, template_row, final_df, target_resource
             )
 
-    final_df = final_df.drop(columns=["Language Code", "Category"])
+    final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"])
     if target_resource == "keyword":
         final_df = final_df.drop(columns=["Keyword Match Type"])
     final_df = final_df.drop_duplicates(ignore_index=True)
@@ -354,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame:  # noqa: C901
     return df
 
 
+MAX_SITELINK_TEXT_LENGTH = 25
+MAX_SITELINK_DESCRIPTION_LENGTH = 35
+
+
+def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame:
+    df.insert(0, "Issues", "")
+
+    sitelink_text_columns = [
+        col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text")
+    ]
+
+    for index, row in df.iterrows():
+        for site_text_column in sitelink_text_columns:
+            site_text = row[site_text_column]
+            if not site_text:
+                continue
+            error_msg = ""
+
+            final_url_column = site_text_column.replace("Text", "Final URL")
+            if not row.get(final_url_column, None):
+                error_msg += f"{final_url_column} is missing.\n"
+            if len(site_text) > MAX_SITELINK_TEXT_LENGTH:
+                error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n"
+            site_description_column = site_text_column.replace("Text", "Description")
+            for i in [1, 2]:
+                site_description = row.get(site_description_column + f" {i}", None)
+                if (
+                    site_description
+                    and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH
+                ):
+                    error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n"
+
+            if error_msg:
+                df.loc[index, "Issues"] += error_msg
+
+    if not df["Issues"].any():
+        df = df.drop(columns=["Issues"])
+
+    return df
+
+
 def validate_output_data(
-    df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"]
+    df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"]
 ) -> pd.DataFrame:
     if target_resource == "ad":
         return _validate_output_data_ad(df)
-    # No validation required for campaign and keyword data currently
+    elif target_resource == "campaign":
+        return _validate_output_data_campaign(df)
+    # No validation required for keyword data currently
     return df
diff --git a/tests/app/test_app.py b/tests/app/test_app.py
@@ -477,6 +477,7 @@ async def test_process_data_keywords(
                 "Language Code": ["EN"],
                 "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                 "Match Type": ["Exact"],
+                "Target Category": ["True"],
             }
         )
         if isinstance(detail, GoogleSheetValues):
@@ -560,6 +561,7 @@ async def test_process_data_ads(self) -> None:
                 "Language Code": ["EN"],
                 "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                 "Match Type": ["Exact"],
+                "Target Category": ["False"],
             }
         )
         result = await process_data(

diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py
@@ -10,6 +10,7 @@
     _update_campaign_name,
     _use_template_row,
     _validate_language_codes,
+    _validate_output_data_campaign,
     process_campaign_data_f,
     process_data_f,
     validate_input_data,
@@ -119,14 +120,15 @@ def test_process_row(
         {
             "Campaign Name": "USA - A - B - EN",
             "Ad Group Name": "A - B",
-            "Keyword": "k1",
+            "Keyword": "k1 {INSERT_CATEGORY}",
             "Max CPC": "",
             "Language Code": "EN",
             "Negative": "FALSE",
             "Level": "",
             "Keyword Match Type": "Exact",
             "Match Type": "Exact",
             "Category": "Bus",
+            "Target Category": "False",
         }
     )
     new_campaign_row = pd.Series(
@@ -144,6 +146,8 @@ def test_process_row(
     final_df = _process_row(new_campaign_row, template_row, final_df, "keyword")
 
     assert len(final_df) == expected_length
+    if expected_length == 1:
+        assert final_df["Keyword"].values[0] == "k1"
 
 
 @pytest.mark.parametrize(
@@ -158,6 +162,7 @@ def test_process_row(
                     "Language Code": ["EN"],
                     "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                     "Match Type": ["Exact"],
+                    "Target Category": ["False"],
                 }
             ),
             pd.DataFrame(
@@ -238,6 +243,7 @@ def test_process_row(
                     "Language Code": ["EN"],
                     "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                     "Match Type": ["Exact"],
+                    "Target Category": ["False"],
                 }
             ),
             pd.DataFrame(
@@ -322,6 +328,7 @@ def test_process_row(
                         "{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
                     ],
                     "Match Type": ["Exact", "Exact"],
+                    "Target Category": ["False", "False"],
                 }
             ),
             pd.DataFrame(
@@ -380,6 +387,7 @@ def test_process_row(
                         "{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
                     ],
                     "Match Type": ["Exact", "Exact"],
+                    "Target Category": ["False", "False"],
                 }
             ),
             pd.DataFrame(
@@ -702,3 +710,59 @@ def test_validate_language_codes(
             _validate_language_codes(new_campaign_df, valid_language_codes, "table")
     else:
         _validate_language_codes(new_campaign_df, valid_language_codes, "table")
+
+
+@pytest.mark.parametrize(
+    ("df", "expected_issues"),
+    [
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                    "Sitelink 1 Final URL": ["URL"],
+                },
+            ),
+            None,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                    "Sitelink 1 Final URL": ["URL"],
+                    "Sitelink 1 Description 1": ["D1"],
+                    "Sitelink 1 Description 2": ["D2"],
+                },
+            ),
+            None,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                }
+            ),
+            "Sitelink 1 Final URL is missing.\n",
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S" * 26],
+                    "Sitelink 1 Final URL": ["URL"],
+                    "Sitelink 1 Description 1": ["D" * 36],
+                    "Sitelink 1 Description 2": ["D2"],
+                },
+            ),
+            """Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text.
+Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""",
+        ),
+    ],
+)
+def test_validate_output_data_campaign(
+    df: pd.DataFrame, expected_issues: Optional[str]
+) -> None:
+    expected = df.copy()
+    result = _validate_output_data_campaign(df)
+    if expected_issues:
+        assert result["Issues"].values[0] == expected_issues
+    else:
+        assert result.equals(expected)