token and retry added

dlubom · Jul 24, 2024 · 6e47f77 · 6e47f77
1 parent a79af6b
commit 6e47f77
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 21 deletions.
diff --git a/.github/workflows/monthly_data_fetch.yml b/.github/workflows/monthly_data_fetch.yml
@@ -37,9 +37,9 @@ jobs:
       run: |
         git config --global user.name 'github-actions[bot]'
         git config --global user.email 'github-actions[bot]@users.noreply.github.com'
-        git add iata_code_fetcher/carrier_data_full_unique.jsonl iata_code_fetcher/airport_data_full_unique.jsonl
+      git add iata_code_fetcher/carrier_data_full_unique.jsonl iata_code_fetcher/airport_data_full_unique.jsonl
         git commit -m 'Update unique data files [skip ci]'
         git push
       env:
-        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        GITHUB_TOKEN: ${{ secrets.ACTIONS_PUSH_TOKEN }}
 
diff --git a/iata_code_fetcher/fetcher.py b/iata_code_fetcher/fetcher.py
@@ -11,6 +11,8 @@
 import logging
 from bs4 import BeautifulSoup
 import requests
+from requests.exceptions import RequestException
+from time import sleep
 
 # Constants
 BASE_URL: str = (
@@ -22,6 +24,9 @@
 AIRPORT_FILE: str = "airport_data_full.jsonl"
 # Frequency of processing status updates
 REPORT_FREQUENCY: int = 100  # report every 100 codes
+MAX_RETRIES: int = 3
+RETRY_DELAY: int = 5  # seconds
+TIMEOUT: int = 20  # seconds
 
 # Configure Logging
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -61,25 +66,33 @@ def fetch_and_process_data(code: str, code_type: CodeType) -> Tuple[Union[List[D
     )
     file_path = CARRIER_FILE if code_type == CodeType.CARRIER else AIRPORT_FILE
 
-    try:
-        response = requests.get(url, timeout=20)
-        response.raise_for_status()
-        soup = BeautifulSoup(response.text, "html.parser")
-        table = soup.find("table", {"class": "datatable"})
-
-        if not table:
-            return "No table found or error in response", file_path
-
-        headers = [th.text.strip() for th in table.find_all("td")]
-        rows = []
-        for row in table.find("tbody").find_all("tr"):
-            cols = [col.text.strip() for col in row.find_all("td")]
-            row_data = dict(zip(headers, cols))
-            rows.append(row_data)
-
-        return rows, file_path
-    except requests.RequestException as e:
-        return f"Request failed: {str(e)}", file_path
+    for attempt in range(MAX_RETRIES):
+        try:
+            response = requests.get(url, timeout=TIMEOUT)
+            response.raise_for_status()
+            soup = BeautifulSoup(response.text, "html.parser")
+            table = soup.find("table", {"class": "datatable"})
+
+            if not table:
+                return "No table found or error in response", file_path
+
+            headers = [th.text.strip() for th in table.find_all("td")]
+            rows = []
+            for row in table.find("tbody").find_all("tr"):
+                cols = [col.text.strip() for col in row.find_all("td")]
+                row_data = dict(zip(headers, cols))
+                rows.append(row_data)
+
+            return rows, file_path
+
+        except requests.RequestException as e:
+            if attempt < MAX_RETRIES - 1:
+                logging.warning(
+                    f"Request failed for {code}. Retrying in {RETRY_DELAY} seconds... (Attempt {attempt + 1}/{MAX_RETRIES})"
+                )
+                sleep(RETRY_DELAY)
+            else:
+                return f"Request failed after {MAX_RETRIES} attempts: {str(e)}", file_path
 
 
 def process_and_save_data(code_type: CodeType) -> None: