diff --git a/catalog/dags/common/requester.py b/catalog/dags/common/requester.py index c355321609b..1b91c3a7cae 100644 --- a/catalog/dags/common/requester.py +++ b/catalog/dags/common/requester.py @@ -43,7 +43,7 @@ class DelayedRequester: delay: an integer giving the minimum number of seconds to wait between consecutive requests via the `get` method. headers: a dict that will be passed in all requests, unless overridden - by kwargs in specific calls to the get method + by kwargs in specific calls to the `get` method """ def __init__(self, delay: int = 0, headers: dict | None = None): diff --git a/catalog/dags/providers/provider_api_scripts/provider_data_ingester.py b/catalog/dags/providers/provider_api_scripts/provider_data_ingester.py index 5c6c0032a30..982dc4bc91a 100644 --- a/catalog/dags/providers/provider_api_scripts/provider_data_ingester.py +++ b/catalog/dags/providers/provider_api_scripts/provider_data_ingester.py @@ -8,6 +8,7 @@ from airflow.exceptions import AirflowException from airflow.models import Variable +from common.loader import provider_details as prov from common.requester import DelayedRequester from common.storage.media import MediaStore from common.storage.util import get_media_store_class @@ -145,6 +146,9 @@ def __init__( # Keep track of number of records ingested self.record_count = 0 + # Set default headers + self.headers = {"User-Agent": prov.UA_STRING} | self.headers + # Initialize the DelayedRequester and all necessary Media Stores. self.delayed_requester = DelayedRequester( delay=self.delay, headers=self.headers