Skip to content

Commit

Permalink
token and retry added
Browse files Browse the repository at this point in the history
  • Loading branch information
dlubom committed Jul 24, 2024
1 parent a79af6b commit 6e47f77
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 21 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/monthly_data_fetch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ jobs:
run: |
git config --global user.name 'github-actions[bot]'
git config --global user.email 'github-actions[bot]@users.noreply.github.com'
git add iata_code_fetcher/carrier_data_full_unique.jsonl iata_code_fetcher/airport_data_full_unique.jsonl
git add iata_code_fetcher/carrier_data_full_unique.jsonl iata_code_fetcher/airport_data_full_unique.jsonl
git commit -m 'Update unique data files [skip ci]'
git push
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.ACTIONS_PUSH_TOKEN }}

51 changes: 32 additions & 19 deletions iata_code_fetcher/fetcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import logging
from bs4 import BeautifulSoup
import requests
from requests.exceptions import RequestException
from time import sleep

# Constants
BASE_URL: str = (
Expand All @@ -22,6 +24,9 @@
AIRPORT_FILE: str = "airport_data_full.jsonl"
# Frequency of processing status updates
REPORT_FREQUENCY: int = 100 # report every 100 codes
MAX_RETRIES: int = 3
RETRY_DELAY: int = 5 # seconds
TIMEOUT: int = 20 # seconds

# Configure Logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
Expand Down Expand Up @@ -61,25 +66,33 @@ def fetch_and_process_data(code: str, code_type: CodeType) -> Tuple[Union[List[D
)
file_path = CARRIER_FILE if code_type == CodeType.CARRIER else AIRPORT_FILE

try:
response = requests.get(url, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table", {"class": "datatable"})

if not table:
return "No table found or error in response", file_path

headers = [th.text.strip() for th in table.find_all("td")]
rows = []
for row in table.find("tbody").find_all("tr"):
cols = [col.text.strip() for col in row.find_all("td")]
row_data = dict(zip(headers, cols))
rows.append(row_data)

return rows, file_path
except requests.RequestException as e:
return f"Request failed: {str(e)}", file_path
for attempt in range(MAX_RETRIES):
try:
response = requests.get(url, timeout=TIMEOUT)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
table = soup.find("table", {"class": "datatable"})

if not table:
return "No table found or error in response", file_path

headers = [th.text.strip() for th in table.find_all("td")]
rows = []
for row in table.find("tbody").find_all("tr"):
cols = [col.text.strip() for col in row.find_all("td")]
row_data = dict(zip(headers, cols))
rows.append(row_data)

return rows, file_path

except requests.RequestException as e:
if attempt < MAX_RETRIES - 1:
logging.warning(
f"Request failed for {code}. Retrying in {RETRY_DELAY} seconds... (Attempt {attempt + 1}/{MAX_RETRIES})"
)
sleep(RETRY_DELAY)
else:
return f"Request failed after {MAX_RETRIES} attempts: {str(e)}", file_path


def process_and_save_data(code_type: CodeType) -> None:
Expand Down

0 comments on commit 6e47f77

Please sign in to comment.