Skip to content

Commit

Permalink
Update ad group structure and add sitelink validation (#111)
Browse files Browse the repository at this point in the history
* Add Target Category field to ad group template

* Update Ad group and keywords structure

* Add sitelinks validation
  • Loading branch information
rjambrecic authored Sep 10, 2024
1 parent caa6d03 commit 07272ff
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 13 deletions.
80 changes: 68 additions & 12 deletions google_sheets/data_processing/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ def _process_row(
if not _use_template_row(new_campaign_row["Category"], template_row):
return final_df

# Positive keywords (Keyword Match Type) should be the same as Match Type (which is used as a part of Ad Group Name)
if target_resource == "keyword" and (
template_row["Negative"].lower() == "false"
and template_row["Keyword Match Type"] != template_row["Match Type"]
):
return final_df

stations = [
{
"Station From": new_campaign_row["Station From"],
Expand Down Expand Up @@ -204,22 +211,28 @@ def _process_row(
new_row = new_row.str.replace(INSERT_STATION_FROM, station["Station From"])
new_row = new_row.str.replace(INSERT_STATION_TO, station["Station To"])
new_row = new_row.str.replace(INSERT_CRITERION_TYPE, new_row["Match Type"])
new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])
new_row = new_row.str.replace(
INSERT_TICKET_PRICE, new_campaign_row["Ticket Price"]
)

if target_resource == "ad":
new_row["Final URL"] = station["Final Url"]
elif (
target_resource == "keyword"
and new_row["Negative"]
and new_row["Negative"].lower() == "true"
):
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif target_resource == "keyword":
if new_row["Negative"] and new_row["Negative"].lower() == "true":
new_row["Match Type"] = new_row["Keyword Match Type"]

if "Campaign" in new_row["Level"]:
new_row["Ad Group Name"] = None
elif (
new_row["Target Category"].lower() == "false"
and new_row["Match Type"] == "Exact"
):
new_row["Keyword"] = (
new_row["Keyword"].replace(INSERT_CATEGORY, "").strip()
)

new_row = new_row.str.replace(INSERT_CATEGORY, new_campaign_row["Category"])

final_df = pd.concat([final_df, pd.DataFrame([new_row])], ignore_index=True)

Expand Down Expand Up @@ -260,7 +273,7 @@ def process_data_f(
new_campaign_row, template_row, final_df, target_resource
)

final_df = final_df.drop(columns=["Language Code", "Category"])
final_df = final_df.drop(columns=["Language Code", "Category", "Target Category"])
if target_resource == "keyword":
final_df = final_df.drop(columns=["Keyword Match Type"])
final_df = final_df.drop_duplicates(ignore_index=True)
Expand Down Expand Up @@ -354,10 +367,53 @@ def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame: # noqa: C901
return df


MAX_SITELINK_TEXT_LENGTH = 25
MAX_SITELINK_DESCRIPTION_LENGTH = 35


def _validate_output_data_campaign(df: pd.DataFrame) -> pd.DataFrame:
df.insert(0, "Issues", "")

sitelink_text_columns = [
col for col in df.columns if col.startswith("Sitelink") and col.endswith("Text")
]

for index, row in df.iterrows():
for site_text_column in sitelink_text_columns:
site_text = row[site_text_column]
if not site_text:
continue
error_msg = ""

final_url_column = site_text_column.replace("Text", "Final URL")
if not row.get(final_url_column, None):
error_msg += f"{final_url_column} is missing.\n"
if len(site_text) > MAX_SITELINK_TEXT_LENGTH:
error_msg += f"Sitelink text length should be less than {MAX_SITELINK_TEXT_LENGTH} characters, found {len(site_text)} in column {site_text_column}.\n"
site_description_column = site_text_column.replace("Text", "Description")
for i in [1, 2]:
site_description = row.get(site_description_column + f" {i}", None)
if (
site_description
and len(site_description) > MAX_SITELINK_DESCRIPTION_LENGTH
):
error_msg += f"Sitelink description length should be less than {MAX_SITELINK_DESCRIPTION_LENGTH} characters, found {len(site_description)} in column {site_description_column} {i}.\n"

if error_msg:
df.loc[index, "Issues"] += error_msg

if not df["Issues"].any():
df = df.drop(columns=["Issues"])

return df


def validate_output_data(
df: pd.DataFrame, target_resource: Literal["ad", "campaign" "keyword"]
df: pd.DataFrame, target_resource: Literal["ad", "campaign", "keyword"]
) -> pd.DataFrame:
if target_resource == "ad":
return _validate_output_data_ad(df)
# No validation required for campaign and keyword data currently
elif target_resource == "campaign":
return _validate_output_data_campaign(df)
# No validation required for keyword data currently
return df
2 changes: 2 additions & 0 deletions tests/app/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ async def test_process_data_keywords(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["True"],
}
)
if isinstance(detail, GoogleSheetValues):
Expand Down Expand Up @@ -560,6 +561,7 @@ async def test_process_data_ads(self) -> None:
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
)
result = await process_data(
Expand Down
66 changes: 65 additions & 1 deletion tests/data_processing/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
_update_campaign_name,
_use_template_row,
_validate_language_codes,
_validate_output_data_campaign,
process_campaign_data_f,
process_data_f,
validate_input_data,
Expand Down Expand Up @@ -119,14 +120,15 @@ def test_process_row(
{
"Campaign Name": "USA - A - B - EN",
"Ad Group Name": "A - B",
"Keyword": "k1",
"Keyword": "k1 {INSERT_CATEGORY}",
"Max CPC": "",
"Language Code": "EN",
"Negative": "FALSE",
"Level": "",
"Keyword Match Type": "Exact",
"Match Type": "Exact",
"Category": "Bus",
"Target Category": "False",
}
)
new_campaign_row = pd.Series(
Expand All @@ -144,6 +146,8 @@ def test_process_row(
final_df = _process_row(new_campaign_row, template_row, final_df, "keyword")

assert len(final_df) == expected_length
if expected_length == 1:
assert final_df["Keyword"].values[0] == "k1"


@pytest.mark.parametrize(
Expand All @@ -158,6 +162,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -238,6 +243,7 @@ def test_process_row(
"Language Code": ["EN"],
"Ad Group Name": ["{INSERT_STATION_FROM} - {INSERT_STATION_TO}"],
"Match Type": ["Exact"],
"Target Category": ["False"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -322,6 +328,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -380,6 +387,7 @@ def test_process_row(
"{INSERT_STATION_FROM} - {INSERT_STATION_TO}",
],
"Match Type": ["Exact", "Exact"],
"Target Category": ["False", "False"],
}
),
pd.DataFrame(
Expand Down Expand Up @@ -702,3 +710,59 @@ def test_validate_language_codes(
_validate_language_codes(new_campaign_df, valid_language_codes, "table")
else:
_validate_language_codes(new_campaign_df, valid_language_codes, "table")


@pytest.mark.parametrize(
("df", "expected_issues"),
[
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D1"],
"Sitelink 1 Description 2": ["D2"],
},
),
None,
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S1"],
}
),
"Sitelink 1 Final URL is missing.\n",
),
(
pd.DataFrame(
{
"Sitelink 1 Text": ["S" * 26],
"Sitelink 1 Final URL": ["URL"],
"Sitelink 1 Description 1": ["D" * 36],
"Sitelink 1 Description 2": ["D2"],
},
),
"""Sitelink text length should be less than 25 characters, found 26 in column Sitelink 1 Text.
Sitelink description length should be less than 35 characters, found 36 in column Sitelink 1 Description 1.\n""",
),
],
)
def test_validate_output_data_campaign(
df: pd.DataFrame, expected_issues: Optional[str]
) -> None:
expected = df.copy()
result = _validate_output_data_campaign(df)
if expected_issues:
assert result["Issues"].values[0] == expected_issues
else:
assert result.equals(expected)

0 comments on commit 07272ff

Please sign in to comment.