Merge pull request #112 from airtai/dev

2 PRs
airtai · Sep 10, 2024 · 75ea0b4 · 75ea0b4
2 parents 8f44374 + 07272ff
commit 75ea0b4
Show file tree

Hide file tree

Showing 4 changed files with 154 additions and 19 deletions.
diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py
@@ -33,6 +33,7 @@ def validate_input_data(
 INSERT_CRITERION_TYPE = "{INSERT_CRITERION_TYPE}"
 INSERT_LANGUAGE_CODE = "{INSERT_LANGUAGE_CODE}"
 INSERT_CATEGORY = "{INSERT_CATEGORY}"
+INSERT_TICKET_PRICE = "{INSERT_TICKET_PRICE}"
 
 
 def _update_campaign_name(
@@ -174,6 +175,13 @@ def _process_row(
     if not _use_template_row(new_campaign_row["Category"], template_row):
         return final_df
 
+    # Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name)
+    if target_resource == "keyword" and (
+        template_row["Negative"].lower() == "false"
+        and template_row["Keyword Match Type"] != template_row["Match Type"]
+    ):
+        return final_df
+
     stations = [
         {
             "Station From": new_campaign_row["Station From"],
@@ -203,19 +211,28 @@ def _process_row(
         new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"])
         new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
         new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"])
-        new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
+        new_row = new_row.str.replace(
+            INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"]
+        )
 
         if target_resource == "ad":
             new_row["Final URL"] = station["Final Url"]
-        elif (
-            target_resource == "keyword"
-            and new_row["Negative"]
-            and new_row["Negative"].lower() == "true"
-        ):
-            new_row["Match Type"] = new_row["Keyword Match Type"]
 
-            if "Campaign" in new_row["Level"]:
-                new_row["Ad Group Name"] = None
+        elif target_resource == "keyword":
+            if new_row["Negative"] and new_row["Negative"].lower() == "true":
+                new_row["Match Type"] = new_row["Keyword Match Type"]
+
+                if "Campaign" in new_row["Level"]:
+                    new_row["Ad Group Name"] = None
+            elif (
+                new_row["Target Category"].lower() == "false"
+                and new_row["Match Type"] == "Exact"
+            ):
+                new_row["Keyword"] = (
+                    new_row["Keyword"].replace(INSERT_CATEGORY, "").strip()
+                )
+
+        new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
 
         final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True)
 
@@ -256,7 +273,7 @@ def process_data_f(
                 new_campaign_row, template_row, final_df, target_resource
             )
 
-    final_df = final_df.drop(columns=["Language Code", "Category"])
+    final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"])
     if target_resource == "keyword":
         final_df = final_df.drop(columns=["Keyword Match Type"])
     final_df = final_df.drop_duplicates(ignore_index=True)
@@ -350,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame:  # noqa: C901
     return df
 
 
+MAX_SITELINK_TEXT_LENGTH = 25
+MAX_SITELINK_DESCRIPTION_LENGTH = 35
+
+
+def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame:
+    df.insert(0, "Issues", "")
+
+    sitelink_text_columns = [
+        col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text")
+    ]
+
+    for index, row in df.iterrows():
+        for site_text_column in sitelink_text_columns:
+            site_text = row[site_text_column]
+            if not site_text:
+                continue
+            error_msg = ""
+
+            final_url_column = site_text_column.replace("Text", "Final URL")
+            if not row.get(final_url_column, None):
+                error_msg += f"{final_url_column} is missing.\n"
+            if len(site_text) > MAX_SITELINK_TEXT_LENGTH:
+                error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n"
+            site_description_column = site_text_column.replace("Text", "Description")
+            for i in [1, 2]:
+                site_description = row.get(site_description_column + f" {i}", None)
+                if (
+                    site_description
+                    and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH
+                ):
+                    error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n"
+
+            if error_msg:
+                df.loc[index, "Issues"] += error_msg
+
+    if not df["Issues"].any():
+        df = df.drop(columns=["Issues"])
+
+    return df
+
+
 def validate_output_data(
-    df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"]
+    df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"]
 ) -> pd.DataFrame:
     if target_resource == "ad":
         return _validate_output_data_ad(df)
-    # No validation required for campaign and keyword data currently
+    elif target_resource == "campaign":
+        return _validate_output_data_campaign(df)
+    # No validation required for keyword data currently
     return df
diff --git a/pyproject.toml b/pyproject.toml
@@ -47,7 +47,7 @@ dependencies = [
     "pydantic>=2.3,<3",
     "fastapi>=0.110.2",
     "prisma==0.13.1",
-    "google-api-python-client==2.143.0",
+    "google-api-python-client==2.144.0",
     "asyncify==0.10.0",
     "pandas==2.2.2"
 ]
@@ -67,10 +67,10 @@ lint = [
     "types-Pygments",
     "types-docutils",
     "mypy==1.11.2",
-    "ruff==0.6.3",
+    "ruff==0.6.4",
     "pyupgrade-directories==0.3.0",
     "bandit==1.7.9",
-    "semgrep==1.85.0",
+    "semgrep==1.86.0",
     "pytest-mypy-plugins==3.1.2",
 ]
 

diff --git a/tests/app/test_app.py b/tests/app/test_app.py
@@ -409,6 +409,7 @@ class TestProcessData:
                             "Final Url To",
                             "Language Code",
                             "Category",
+                            "Ticket Price",
                         ],
                         [
                             "India",
@@ -418,6 +419,7 @@ class TestProcessData:
                             "https://www.example.com/to",
                             "EN",
                             "Bus",
+                            "10.5",
                         ],
                     ]
                 ),
@@ -475,6 +477,7 @@ async def test_process_data_keywords(
                 "Language Code": ["EN"],
                 "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                 "Match Type": ["Exact"],
+                "Target Category": ["True"],
             }
         )
         if isinstance(detail, GoogleSheetValues):
@@ -517,7 +520,7 @@ async def test_process_data_ads(self) -> None:
                     "Bus",
                     "https://www.example.com/from",
                     "H" * 31,
-                    "Headline 2",
+                    "Headline 2 {INSERT_TICKET_PRICE}",
                     "Headline 3",
                     "Description Line 1",
                     "Description Line 2",
@@ -536,6 +539,7 @@ async def test_process_data_ads(self) -> None:
                     "Final Url To",
                     "Language Code",
                     "Category",
+                    "Ticket Price",
                 ],
                 [
                     "India",
@@ -545,6 +549,7 @@ async def test_process_data_ads(self) -> None:
                     "https://www.example.com/to",
                     "EN",
                     "Bus",
+                    "10.5",
                 ],
             ]
         )
@@ -556,6 +561,7 @@ async def test_process_data_ads(self) -> None:
                 "Language Code": ["EN"],
                 "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                 "Match Type": ["Exact"],
+                "Target Category": ["False"],
             }
         )
         result = await process_data(
@@ -588,7 +594,7 @@ async def test_process_data_ads(self) -> None:
                     "Exact",
                     "https://www.example.com/from",
                     "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
-                    "Headline 2",
+                    "Headline 2 10.5",
                     "Headline 3",
                     "Description Line 1",
                     "Description Line 2",
@@ -602,7 +608,7 @@ async def test_process_data_ads(self) -> None:
                     "Exact",
                     "https://www.example.com/to",
                     "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
-                    "Headline 2",
+                    "Headline 2 10.5",
                     "Headline 3",
                     "Description Line 1",
                     "Description Line 2",

diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py
@@ -10,6 +10,7 @@
     _update_campaign_name,
     _use_template_row,
     _validate_language_codes,
+    _validate_output_data_campaign,
     process_campaign_data_f,
     process_data_f,
     validate_input_data,
@@ -119,14 +120,15 @@ def test_process_row(
         {
             "Campaign Name": "USA - A - B - EN",
             "Ad Group Name": "A - B",
-            "Keyword": "k1",
+            "Keyword": "k1 {INSERT_CATEGORY}",
             "Max CPC": "",
             "Language Code": "EN",
             "Negative": "FALSE",
             "Level": "",
             "Keyword Match Type": "Exact",
             "Match Type": "Exact",
             "Category": "Bus",
+            "Target Category": "False",
         }
     )
     new_campaign_row = pd.Series(
@@ -137,12 +139,15 @@ def test_process_row(
             "Station To": "B",
             "Language Code": "EN",
             "Category": category,
+            "Ticket Price": "100",
         }
     )
     final_df = pd.DataFrame(columns=template_row.index)
     final_df = _process_row(new_campaign_row, template_row, final_df, "keyword")
 
     assert len(final_df) == expected_length
+    if expected_length == 1:
+        assert final_df["Keyword"].values[0] == "k1"
 
 
 @pytest.mark.parametrize(
@@ -157,6 +162,7 @@ def test_process_row(
                     "Language Code": ["EN"],
                     "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                     "Match Type": ["Exact"],
+                    "Target Category": ["False"],
                 }
             ),
             pd.DataFrame(
@@ -177,6 +183,7 @@ def test_process_row(
                     "Station To": ["C", "D"],
                     "Language Code": ["EN", "EN"],
                     "Category": ["Bus", "Bus"],
+                    "Ticket Price": ["100", "200"],
                 }
             ),
             pd.DataFrame(
@@ -236,6 +243,7 @@ def test_process_row(
                     "Language Code": ["EN"],
                     "Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
                     "Match Type": ["Exact"],
+                    "Target Category": ["False"],
                 }
             ),
             pd.DataFrame(
@@ -256,6 +264,7 @@ def test_process_row(
                     "Station To": ["C", "D"],
                     "Language Code": ["EN", "EN"],
                     "Category": ["Bus", "Bus"],
+                    "Ticket Price": ["100", "200"],
                 }
             ),
             pd.DataFrame(
@@ -319,6 +328,7 @@ def test_process_row(
                         "{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
                     ],
                     "Match Type": ["Exact", "Exact"],
+                    "Target Category": ["False", "False"],
                 }
             ),
             pd.DataFrame(
@@ -339,6 +349,7 @@ def test_process_row(
                     "Station To": ["C", "D"],
                     "Language Code": ["EN", "DE"],
                     "Category": ["Bus", "Bus"],
+                    "Ticket Price": ["100", "200"],
                 }
             ),
             pd.DataFrame(
@@ -376,6 +387,7 @@ def test_process_row(
                         "{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
                     ],
                     "Match Type": ["Exact", "Exact"],
+                    "Target Category": ["False", "False"],
                 }
             ),
             pd.DataFrame(
@@ -396,6 +408,7 @@ def test_process_row(
                     "Station To": ["C", "D"],
                     "Language Code": ["EN", "DE"],
                     "Category": ["Bus", "Bus"],
+                    "Ticket Price": ["100", "200"],
                 }
             ),
             pd.DataFrame(
@@ -697,3 +710,59 @@ def test_validate_language_codes(
             _validate_language_codes(new_campaign_df, valid_language_codes, "table")
     else:
         _validate_language_codes(new_campaign_df, valid_language_codes, "table")
+
+
+@pytest.mark.parametrize(
+    ("df", "expected_issues"),
+    [
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                    "Sitelink 1 Final URL": ["URL"],
+                },
+            ),
+            None,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                    "Sitelink 1 Final URL": ["URL"],
+                    "Sitelink 1 Description 1": ["D1"],
+                    "Sitelink 1 Description 2": ["D2"],
+                },
+            ),
+            None,
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S1"],
+                }
+            ),
+            "Sitelink 1 Final URL is missing.\n",
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Sitelink 1 Text": ["S" * 26],
+                    "Sitelink 1 Final URL": ["URL"],
+                    "Sitelink 1 Description 1": ["D" * 36],
+                    "Sitelink 1 Description 2": ["D2"],
+                },
+            ),
+            """Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text.
+Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""",
+        ),
+    ],
+)
+def test_validate_output_data_campaign(
+    df: pd.DataFrame, expected_issues: Optional[str]
+) -> None:
+    expected = df.copy()
+    result = _validate_output_data_campaign(df)
+    if expected_issues:
+        assert result["Issues"].values[0] == expected_issues
+    else:
+        assert result.equals(expected)