Merge pull request #64 from airtai/dev

3 PRs
airtai · Jul 26, 2024 · bc5344e · bc5344e
2 parents 1b37866 + e11d277
commit bc5344e
Show file tree

Hide file tree

Showing 5 changed files with 178 additions and 27 deletions.
diff --git a/google_sheets/app.py b/google_sheets/app.py
@@ -8,6 +8,7 @@
 import pandas as pd
 from fastapi import Body, FastAPI, HTTPException, Query, Response, status
 from fastapi.responses import RedirectResponse
+from google.auth.exceptions import RefreshError
 from googleapiclient.errors import HttpError
 
 from . import __version__
@@ -69,7 +70,7 @@ async def get_login_url(
             return {"login_url": "User is already authenticated"}
 
     google_oauth_url = get_google_oauth_url(user_id, conv_uuid)  # type: ignore
-    markdown_url = f"To navigate Google Ads waters, I require access to your account. Please [click here]({google_oauth_url}) to grant permission."
+    markdown_url = f"To navigate Google Sheets waters, I require access to your account. Please [click here]({google_oauth_url}) to grant permission."
     return {"login_url": markdown_url}
 
 
@@ -263,7 +264,17 @@ async def get_all_file_names(
     ],
 ) -> Dict[str, str]:
     service = await build_service(user_id=user_id, service_name="drive", version="v3")
-    files: List[Dict[str, str]] = await get_files_f(service=service)
+    try:
+        files: List[Dict[str, str]] = await get_files_f(service=service)
+    except RefreshError as e:
+        error_msg = "The user's credentials have expired. Please log in again with 'force_new_login' parameter set to 'True'.\n"
+        error_msg += f"Error: {e!s}"
+
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED, detail=error_msg
+        ) from e
+    except Exception as e:
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR) from e
     # create dict where key is id and value is name
     files_dict = {file["id"]: file["name"] for file in files}
     return files_dict
@@ -392,9 +403,10 @@ async def process_data(
         target_resource,  # type: ignore
     )
 
+    issues_present = "Issues" in validated_df.columns
     values = [validated_df.columns.tolist(), *validated_df.values.tolist()]
 
-    return GoogleSheetValues(values=values)
+    return GoogleSheetValues(values=values, issues_present=issues_present)
 
 
 async def process_campaigns_and_ad_groups(
@@ -544,5 +556,8 @@ async def process_spreadsheet(
             sheet_values=processed_values,
         )
         response += f"Sheet with the name '{title}' has been created successfully.\n"
+        if processed_values.issues_present:
+            response += """But there are issues present in the data.
+Please check the 'Issues' column and correct the data accordingly.\n\n"""
 
     return response
diff --git a/google_sheets/data_processing/processing.py b/google_sheets/data_processing/processing.py
@@ -116,7 +116,8 @@ def process_data_f(
 
 
 def _validate_output_data_ad(df: pd.DataFrame) -> pd.DataFrame:  # noqa: C901
-    df["Issues"] = ""
+    df.insert(0, "Issues", "")
+
     headline_columns = [col for col in df.columns if "Headline" in col]
     description_columns = [col for col in df.columns if "Description" in col]
 

diff --git a/google_sheets/model.py b/google_sheets/model.py
@@ -7,3 +7,8 @@ class GoogleSheetValues(BaseModel):
     values: List[List[Any]] = Field(
         ..., title="Values", description="Values to be written to the Google Sheet."
     )
+    issues_present: bool = Field(
+        default=False,
+        title="Issues Present",
+        description="Whether any issues are present.",
+    )
diff --git a/tests/app/test_app.py b/tests/app/test_app.py
@@ -294,7 +294,7 @@ class TestProcessData:
         ],
     )
     @pytest.mark.asyncio()
-    async def test_process_data(
+    async def test_process_data_keywords(
         self,
         template_sheet_values: GoogleSheetValues,
         new_campaign_sheet_values: GoogleSheetValues,
@@ -328,6 +328,116 @@ async def test_process_data(
                 )
             assert detail in exc.value.detail
 
+    @pytest.mark.asyncio()
+    async def test_process_data_ads(self) -> None:
+        template_sheet_values = GoogleSheetValues(
+            values=[
+                [
+                    "Final URL",
+                    "Headline 1",
+                    "Headline 2",
+                    "Headline 3",
+                    "Description Line 1",
+                    "Description Line 2",
+                    "Path 1",
+                    "Path 2",
+                ],
+                [
+                    "https://www.example.com/from",
+                    "H" * 31,
+                    "Headline 2",
+                    "Headline 3",
+                    "Description Line 1",
+                    "Description Line 2",
+                    "Path 1",
+                    "Path 2",
+                ],
+            ]
+        )
+        new_campaign_sheet_values = GoogleSheetValues(
+            values=[
+                [
+                    "Country",
+                    "Station From",
+                    "Station To",
+                    "Final Url From",
+                    "Final Url To",
+                ],
+                [
+                    "India",
+                    "Delhi",
+                    "Mumbai",
+                    "https://www.example.com/from",
+                    "https://www.example.com/to",
+                ],
+            ]
+        )
+        merged_campaigns_ad_groups_df = pd.DataFrame(
+            {
+                "Campaign Name": [
+                    "INSERT_COUNTRY - INSERT_STATION_FROM - INSERT_STATION_TO"
+                ],
+                "Ad Group Name": ["INSERT_STATION_FROM - INSERT_STATION_TO"],
+                "Match Type": ["Exact"],
+            }
+        )
+        result = await process_data(
+            template_sheet_values=template_sheet_values,
+            new_campaign_sheet_values=new_campaign_sheet_values,
+            merged_campaigns_ad_groups_df=merged_campaigns_ad_groups_df,
+            target_resource="ad",
+        )
+
+        expected = GoogleSheetValues(
+            values=[
+                [
+                    "Issues",
+                    "Campaign Name",
+                    "Ad Group Name",
+                    "Match Type",
+                    "Final URL",
+                    "Headline 1",
+                    "Headline 2",
+                    "Headline 3",
+                    "Description Line 1",
+                    "Description Line 2",
+                    "Path 1",
+                    "Path 2",
+                ],
+                [
+                    "Headline length should be less than 30 characters, found 31 in column Headline 1.\n",
+                    "India - Delhi - Mumbai",
+                    "Delhi - Mumbai",
+                    "Exact",
+                    "https://www.example.com/from",
+                    "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
+                    "Headline 2",
+                    "Headline 3",
+                    "Description Line 1",
+                    "Description Line 2",
+                    "Path 1",
+                    "Path 2",
+                ],
+                [
+                    "Headline length should be less than 30 characters, found 31 in column Headline 1.\n",
+                    "India - Delhi - Mumbai",
+                    "Mumbai - Delhi",
+                    "Exact",
+                    "https://www.example.com/to",
+                    "HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH",
+                    "Headline 2",
+                    "Headline 3",
+                    "Description Line 1",
+                    "Description Line 2",
+                    "Path 1",
+                    "Path 2",
+                ],
+            ],
+            issues_present=True,
+        )
+
+        assert result.model_dump() == expected.model_dump()
+
 
 class TestOpenAPIJSON:
     def test_openapi(self) -> None:

diff --git a/tests/data_processing/test_processing.py b/tests/data_processing/test_processing.py
@@ -167,33 +167,53 @@ def test_process_data_f(
 
 
 @pytest.mark.parametrize(
-    "issues_column",
+    ("df", "issues_column"),
     [
-        [
-            "Duplicate headlines found.\nDuplicate descriptions found.\n",
-            "Duplicate descriptions found.\n",
-            "",
-            "Minimum 3 headlines are required, found 2.\nMinimum 2 descriptions are required, found 1.\nHeadline length should be less than 30 characters, found 31 in column Headline 2.\n",
-        ],
-        None,
+        (
+            pd.DataFrame(
+                {
+                    "Headline 1": ["H1", "H1", "H1", "H1"],
+                    "Headline 2": ["H1", "H2", "H2", ("H" * 31)],
+                    "Headline 3": ["H3", "H3", "H3", ""],
+                    "Description 1": ["D1", "D1", "D2", "D3"],
+                    "Description 2": ["D1", "D1", "D3", ""],
+                    "Path 1": ["P1", "P1", "P1", "P1"],
+                    "Path 2": ["P2", "P2", "P2", "P2"],
+                    "Final URL": ["URL", "URL", "URL", "URL"],
+                }
+            ),
+            [
+                "Duplicate headlines found.\nDuplicate descriptions found.\n",
+                "Duplicate descriptions found.\n",
+                "",
+                "Minimum 3 headlines are required, found 2.\nMinimum 2 descriptions are required, found 1.\nHeadline length should be less than 30 characters, found 31 in column Headline 2.\n",
+            ],
+        ),
+        (
+            pd.DataFrame(
+                {
+                    "Headline 1": ["H1", "H1", "H1", "H1"],
+                    "Headline 2": ["H2", "H2", "H2", "H2"],
+                    "Headline 3": ["H3", "H3", "H3", "H3"],
+                    "Description 1": ["D1", "D1", "D1", "D1"],
+                    "Description 2": ["D2", "D2", "D2", "D2"],
+                    "Path 1": ["P1", "P1", "P1", "P1"],
+                    "Path 2": ["P2", "P2", "P2", "P2"],
+                    "Final URL": ["URL", "URL", "URL", "URL"],
+                }
+            ),
+            None,
+        ),
     ],
 )
-def test_validate_output_data(issues_column: Optional[List[str]]) -> None:
-    df = pd.DataFrame(
-        {
-            "Headline 1": ["H1", "H1", "H1", "H1"],
-            "Headline 2": ["H1", "H2", "H2", ("H" * 31)],
-            "Headline 3": ["H3", "H3", "H3", ""],
-            "Description 1": ["D1", "D1", "D2", "D3"],
-            "Description 2": ["D1", "D1", "D3", ""],
-            "Path 1": ["P1", "P1", "P1", "P1"],
-            "Path 2": ["P2", "P2", "P2", "P2"],
-            "Final URL": ["URL", "URL", "URL", "URL"],
-        }
-    )
-    result = validate_output_data(df, "ad")
+def test_validate_output_data(
+    df: pd.DataFrame, issues_column: Optional[List[str]]
+) -> None:
     expected = df.copy()
+    result = validate_output_data(df, "ad")
+
     if issues_column:
+        expected.insert(0, "Issues", "")
         expected["Issues"] = issues_column
 
     assert result.equals(expected)