Skip to content

Commit

Permalink
Merge pull request #112 from airtai/dev
Browse files Browse the repository at this point in the history
2 PRs
  • Loading branch information
kumaranvpl authored Sep 10, 2024
2 parents 8f44374 + 07272ff commit 75ea0b4
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 19 deletions.
84 changes: 72 additions & 12 deletions google_sheets/data_processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def validate_input_data(
INSERT_CRITERION_TYPE = "{INSERT_CRITERION_TYPE}"
INSERT_LANGUAGE_CODE = "{INSERT_LANGUAGE_CODE}"
INSERT_CATEGORY = "{INSERT_CATEGORY}"
INSERT_TICKET_PRICE = "{INSERT_TICKET_PRICE}"


def _update_campaign_name(
Expand Down Expand Up @@ -174,6 +175,13 @@ def _process_row(
if not _use_template_row(new_campaign_row["Category"], template_row):
return final_df

# Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name)
if target_resource == "keyword" and (
template_row["Negative"].lower() == "false"
and template_row["Keyword Match Type"] != template_row["Match Type"]
):
return final_df

stations = [
{
"Station From": new_campaign_row["Station From"],
Expand Down Expand Up @@ -203,19 +211,28 @@ def _process_row(
new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"])
new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"])
new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
new_row = new_row.str.replace(
INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"]
)

if target_resource == "ad":
new_row["Final URL"] = station["Final Url"]
elif (
target_resource == "keyword"
and new_row["Negative"]
and new_row["Negative"].lower() == "true"
):
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif target_resource == "keyword":
if new_row["Negative"] and new_row["Negative"].lower() == "true":
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif (
new_row["Target Category"].lower() == "false"
and new_row["Match Type"] == "Exact"
):
new_row["Keyword"] = (
new_row["Keyword"].replace(INSERT_CATEGORY, "").strip()
)

new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])

final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True)

Expand Down Expand Up @@ -256,7 +273,7 @@ def process_data_f(
new_campaign_row, template_row, final_df, target_resource
)

final_df = final_df.drop(columns=["Language Code", "Category"])
final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"])
if target_resource == "keyword":
final_df = final_df.drop(columns=["Keyword Match Type"])
final_df = final_df.drop_duplicates(ignore_index=True)
Expand Down Expand Up @@ -350,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901
return df


MAX_SITELINK_TEXT_LENGTH = 25
MAX_SITELINK_DESCRIPTION_LENGTH = 35


def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame:
df.insert(0, "Issues", "")

sitelink_text_columns = [
col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text")
]

for index, row in df.iterrows():
for site_text_column in sitelink_text_columns:
site_text = row[site_text_column]
if not site_text:
continue
error_msg = ""

final_url_column = site_text_column.replace("Text", "Final URL")
if not row.get(final_url_column, None):
error_msg += f"{final_url_column} is missing.\n"
if len(site_text) > MAX_SITELINK_TEXT_LENGTH:
error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n"
site_description_column = site_text_column.replace("Text", "Description")
for i in [1, 2]:
site_description = row.get(site_description_column + f" {i}", None)
if (
site_description
and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH
):
error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n"

if error_msg:
df.loc[index, "Issues"] += error_msg

if not df["Issues"].any():
df = df.drop(columns=["Issues"])

return df


def validate_output_data(
df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"]
df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"]
) -> pd.DataFrame:
if target_resource == "ad":
return _validate_output_data_ad(df)
# No validation required for campaign and keyword data currently
elif target_resource == "campaign":
return _validate_output_data_campaign(df)
# No validation required for keyword data currently
return df
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ dependencies = [
"pydantic>=2.3,<3",
"fastapi>=0.110.2",
"prisma==0.13.1",
"google-api-python-client==2.143.0",
"google-api-python-client==2.144.0",
"asyncify==0.10.0",
"pandas==2.2.2"
]
Expand All @@ -67,10 +67,10 @@ lint = [
"types-Pygments",
"types-docutils",
"mypy==1.11.2",
"ruff==0.6.3",
"ruff==0.6.4",
"pyupgrade-directories==0.3.0",
"bandit==1.7.9",
"semgrep==1.85.0",
"semgrep==1.86.0",
"pytest-mypy-plugins==3.1.2",
]

Expand Down
12 changes: 9 additions & 3 deletions tests/app/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ class TestProcessData:
"Final Url To",
"Language Code",
"Category",
"Ticket Price",
],
[
"India",
Expand All @@ -418,6 +419,7 @@ class TestProcessData:
"https://www.example.com/to",
"EN",
"Bus",
"10.5",
],
]
),
Expand Down Expand Up @@ -475,6 +477,7 @@ async def test_process_data_keywords(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["True"],
}
)
if isinstance(detail, GoogleSheetValues):
Expand Down Expand Up @@ -517,7 +520,7 @@ async def test_process_data_ads(self) -> None:
"Bus",
"https://www.example.com/from",
"H" * 31,
"Headline 2",
"Headline 2 {INSERT_TICKET_PRICE}",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand All @@ -536,6 +539,7 @@ async def test_process_data_ads(self) -> None:
"Final Url To",
"Language Code",
"Category",
"Ticket Price",
],
[
"India",
Expand All @@ -545,6 +549,7 @@ async def test_process_data_ads(self) -> None:
"https://www.example.com/to",
"EN",
"Bus",
"10.5",
],
]
)
Expand All @@ -556,6 +561,7 @@ async def test_process_data_ads(self) -> None:
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
)
result = await process_data(
Expand Down Expand Up @@ -588,7 +594,7 @@ async def test_process_data_ads(self) -> None:
"Exact",
"https://www.example.com/from",
"HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
"Headline 2",
"Headline 2 10.5",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand All @@ -602,7 +608,7 @@ async def test_process_data_ads(self) -> None:
"Exact",
"https://www.example.com/to",
"HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
"Headline 2",
"Headline 2 10.5",
"Headline 3",
"Description Line 1",
"Description Line 2",
Expand Down
71 changes: 70 additions & 1 deletion tests/data_processing/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_update_campaign_name,
_use_template_row,
_validate_language_codes,
_validate_output_data_campaign,
process_campaign_data_f,
process_data_f,
validate_input_data,
Expand Down Expand Up @@ -119,14 +120,15 @@ def test_process_row(
{
"Campaign Name": "USA - A - B - EN",
"Ad Group Name": "A - B",
"Keyword": "k1",
"Keyword": "k1 {INSERT_CATEGORY}",
"Max CPC": "",
"Language Code": "EN",
"Negative": "FALSE",
"Level": "",
"Keyword Match Type": "Exact",
"Match Type": "Exact",
"Category": "Bus",
"Target Category": "False",
}
)
new_campaign_row = pd.Series(
Expand All @@ -137,12 +139,15 @@ def test_process_row(
"Station To": "B",
"Language Code": "EN",
"Category": category,
"Ticket Price": "100",
}
)
final_df = pd.DataFrame(columns=template_row.index)
final_df = _process_row(new_campaign_row, template_row, final_df, "keyword")

assert len(final_df) == expected_length
if expected_length == 1:
assert final_df["Keyword"].values[0] == "k1"


@pytest.mark.parametrize(
Expand All @@ -157,6 +162,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand All @@ -177,6 +183,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "EN"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -236,6 +243,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand All @@ -256,6 +264,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "EN"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -319,6 +328,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand All @@ -339,6 +349,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "DE"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -376,6 +387,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand All @@ -396,6 +408,7 @@ def test_process_row(
"Station To": ["C", "D"],
"Language Code": ["EN", "DE"],
"Category": ["Bus", "Bus"],
"Ticket Price": ["100", "200"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -697,3 +710,59 @@ def test_validate_language_codes(
_validate_language_codes(new_campaign_df, valid_language_codes, "table")
else:
_validate_language_codes(new_campaign_df, valid_language_codes, "table")


@pytest.mark.parametrize(
("df", "expected_issues"),
[
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D1"],
"Sitelink 1 Description 2": ["D2"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
}
),
"Sitelink 1 Final URL is missing.\n",
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S" * 26],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D" * 36],
"Sitelink 1 Description 2": ["D2"],
},
),
"""Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text.
Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""",
),
],
)
def test_validate_output_data_campaign(
df: pd.DataFrame, expected_issues: Optional[str]
) -> None:
expected = df.copy()
result = _validate_output_data_campaign(df)
if expected_issues:
assert result["Issues"].values[0] == expected_issues
else:
assert result.equals(expected)

0 comments on commit 75ea0b4

Please sign in to comment.