diff --git a/iata_code_fetcher/__pycache__/__init__.cpython-310.pyc b/iata_code_fetcher/__pycache__/__init__.cpython-310.pyc index b26edd5..ee4d03a 100644 Binary files a/iata_code_fetcher/__pycache__/__init__.cpython-310.pyc and b/iata_code_fetcher/__pycache__/__init__.cpython-310.pyc differ diff --git a/iata_code_fetcher/fetcher.py b/iata_code_fetcher/fetcher.py index 4df400e..28ec1c6 100644 --- a/iata_code_fetcher/fetcher.py +++ b/iata_code_fetcher/fetcher.py @@ -6,11 +6,13 @@ import json from string import ascii_uppercase from itertools import product -from typing import Generator, Union, List, Tuple, Dict +from typing import Generator, List, Dict from enum import Enum import logging +from time import sleep from bs4 import BeautifulSoup import requests +from requests.exceptions import RequestException # Constants BASE_URL: str = ( @@ -22,6 +24,9 @@ AIRPORT_FILE: str = "airport_data_full.jsonl" # Frequency of processing status updates REPORT_FREQUENCY: int = 100 # report every 100 codes +MAX_RETRIES: int = 3 +RETRY_DELAY: int = 5 # seconds +TIMEOUT: int = 20 # seconds # Configure Logging logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") @@ -46,40 +51,53 @@ def generate_codes(length: int) -> Generator[str, None, None]: return ("".join(letters) for letters in product(ascii_uppercase, repeat=length)) -def fetch_and_process_data(code: str, code_type: CodeType) -> Tuple[Union[List[Dict[str, str]], str], str]: +def fetch_and_process_data(code: str, code_type: CodeType) -> List[Dict[str, str]]: """ Fetch and process data from the IATA site based on the code and type. :param code: The IATA code. :param code_type: The type of the code (CodeType.CARRIER or CodeType.AIRPORT). - :return: A tuple of (rows, file_path). Rows is a list of dictionaries or an error message. + :return: List of dictionaries. """ url = BASE_URL.format( block=CARRIER_BLOCK if code_type == CodeType.CARRIER else AIRPORT_BLOCK, type="airline" if code_type == CodeType.CARRIER else "airport", code=code, ) - file_path = CARRIER_FILE if code_type == CodeType.CARRIER else AIRPORT_FILE - - try: - response = requests.get(url, timeout=10) - response.raise_for_status() - soup = BeautifulSoup(response.text, "html.parser") - table = soup.find("table", {"class": "datatable"}) - - if not table: - return "No table found or error in response", file_path - headers = [th.text.strip() for th in table.find_all("td")] - rows = [] - for row in table.find("tbody").find_all("tr"): - cols = [col.text.strip() for col in row.find_all("td")] - row_data = dict(zip(headers, cols)) - rows.append(row_data) - - return rows, file_path - except requests.RequestException as e: - return f"Request failed: {str(e)}", file_path + for attempt in range(MAX_RETRIES): + try: + response = requests.get(url, timeout=TIMEOUT) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + table = soup.find("table", {"class": "datatable"}) + + if not table: + raise ValueError("No table found or error in response") + + headers = [th.text.strip() for th in table.find_all("td")] + rows = [] + for row in table.find("tbody").find_all("tr"): + cols = [col.text.strip() for col in row.find_all("td")] + row_data = dict(zip(headers, cols)) + rows.append(row_data) + + return rows + + except RequestException as e: + if attempt < MAX_RETRIES - 1: + logging.warning( + "Request failed for %s. Retrying in {%d} seconds... (Attempt {%d}/{%d})", + code, + RETRY_DELAY, + attempt + 1, + MAX_RETRIES, + ) + sleep(RETRY_DELAY) + else: + raise RequestException(f"Request failed after {MAX_RETRIES} attempts: {str(e)}") from e + + return [] def process_and_save_data(code_type: CodeType) -> None: @@ -89,15 +107,17 @@ def process_and_save_data(code_type: CodeType) -> None: :param code_type: The type of the code (CodeType.CARRIER or CodeType.AIRPORT). """ processed: int = 0 + file_path = CARRIER_FILE if code_type == CodeType.CARRIER else AIRPORT_FILE for code in generate_codes(2 if code_type == CodeType.CARRIER else 3): - result, file_path = fetch_and_process_data(code, code_type) - if isinstance(result, list) and result: + try: + result = fetch_and_process_data(code, code_type) with open(file_path, "a", encoding="UTF-8") as file: for item in result: file.write(json.dumps(item) + "\n") - else: - if result != "No table found or error in response": - logging.error("Error for %s: %s", code, result) + except RequestException as e: + logging.error("For %s: %s", code, e) + except ValueError as e: + logging.warn("For %s, %s", code, e) processed += 1 if processed % REPORT_FREQUENCY == 0: diff --git a/tests/__pycache__/__init__.cpython-310.pyc b/tests/__pycache__/__init__.cpython-310.pyc index e9731f0..7fcfe16 100644 Binary files a/tests/__pycache__/__init__.cpython-310.pyc and b/tests/__pycache__/__init__.cpython-310.pyc differ diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index df6c33d..aad42c7 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -77,7 +77,7 @@ def test_fetch_and_process_data_carrier(mock_get, carrier_response_mock): mock_get.return_value.status_code = 200 mock_get.return_value.text = carrier_response_mock - data, file_path = fetch_and_process_data("AA", CodeType.CARRIER) + data = fetch_and_process_data("AA", CodeType.CARRIER) expected_data = [ { @@ -93,7 +93,6 @@ def test_fetch_and_process_data_carrier(mock_get, carrier_response_mock): ] assert data == expected_data - assert file_path == "carrier_data_full.jsonl" @patch("requests.get") @@ -104,7 +103,7 @@ def test_fetch_and_process_data_airport(mock_get, airport_response_mock): mock_get.return_value.status_code = 200 mock_get.return_value.text = airport_response_mock - data, file_path = fetch_and_process_data("AAA", CodeType.AIRPORT) + data = fetch_and_process_data("AAA", CodeType.AIRPORT) expected_data = [ { @@ -115,7 +114,6 @@ def test_fetch_and_process_data_airport(mock_get, airport_response_mock): ] assert data == expected_data - assert file_path == "airport_data_full.jsonl" @patch("requests.get") @@ -125,10 +123,8 @@ def test_fetch_and_process_data_error(mock_get): """ mock_get.side_effect = RequestException("Network error") - data, file_path = fetch_and_process_data("AA", CodeType.CARRIER) - - assert data == "Request failed: Network error" - assert file_path == "carrier_data_full.jsonl" + with pytest.raises(RequestException): + _ = fetch_and_process_data("AA", CodeType.CARRIER) @patch("builtins.open", new_callable=mock_open) @@ -176,14 +172,10 @@ def test_generate_codes_for_two_letter_codes(): """ length = 2 # Test with two-letter codes codes = list(generate_codes(length)) - expected_number_of_codes = ( - 26**2 - ) # There are 26 letters, so 26^2 two-letter combinations + expected_number_of_codes = 26**2 # There are 26 letters, so 26^2 two-letter combinations # Check if all codes have the correct length - assert all( - len(code) == length for code in codes - ), "All codes must have the specified length of 2" + assert all(len(code) == length for code in codes), "All codes must have the specified length of 2" # Check the total number of generated codes assert ( @@ -201,14 +193,10 @@ def test_generate_codes_for_three_letter_codes(): """ length = 3 # Test with three-letter codes codes = list(generate_codes(length)) - expected_number_of_codes = ( - 26**3 - ) # There are 26 letters, so 26^3 three-letter combinations + expected_number_of_codes = 26**3 # There are 26 letters, so 26^3 three-letter combinations # Check if all codes have the correct length - assert all( - len(code) == length for code in codes - ), "All codes must have the specified length of 3" + assert all(len(code) == length for code in codes), "All codes must have the specified length of 3" # Check the total number of generated codes assert (