From 852e6b7d852728a7213f860d0a8657f06c584e00 Mon Sep 17 00:00:00 2001 From: Zachary Haroian <43462360+zaharoian@users.noreply.github.com> Date: Mon, 8 Apr 2024 16:06:50 -0700 Subject: [PATCH] Refactor: Remove `get_media_type()` redundant override in providers with a single media type (#4061) * Refactor: Remove `get_media_type()` redundant override * Chore: Remove unused imports to pass linting --- .../dags/providers/provider_api_scripts/auckland_museum.py | 4 ---- .../dags/providers/provider_api_scripts/brooklyn_museum.py | 4 ---- catalog/dags/providers/provider_api_scripts/cc_mixter.py | 5 ----- .../dags/providers/provider_api_scripts/cleveland_museum.py | 4 ---- .../dags/providers/provider_api_scripts/finnish_museums.py | 4 ---- catalog/dags/providers/provider_api_scripts/flickr.py | 5 ----- catalog/dags/providers/provider_api_scripts/freesound.py | 4 ---- catalog/dags/providers/provider_api_scripts/jamendo.py | 3 --- .../dags/providers/provider_api_scripts/justtakeitfree.py | 4 ---- .../providers/provider_api_scripts/metropolitan_museum.py | 5 ----- .../dags/providers/provider_api_scripts/museum_victoria.py | 4 ---- catalog/dags/providers/provider_api_scripts/nappy.py | 3 --- catalog/dags/providers/provider_api_scripts/nypl.py | 5 ----- catalog/dags/providers/provider_api_scripts/rawpixel.py | 3 --- .../dags/providers/provider_api_scripts/science_museum.py | 4 ---- catalog/dags/providers/provider_api_scripts/smithsonian.py | 4 ---- catalog/dags/providers/provider_api_scripts/smk.py | 4 ---- catalog/dags/providers/provider_api_scripts/stocksnap.py | 3 --- catalog/dags/providers/provider_api_scripts/wordpress.py | 3 --- 19 files changed, 75 deletions(-) diff --git a/catalog/dags/providers/provider_api_scripts/auckland_museum.py b/catalog/dags/providers/provider_api_scripts/auckland_museum.py index 2d57da0fc3e..49c04f8c582 100644 --- a/catalog/dags/providers/provider_api_scripts/auckland_museum.py +++ b/catalog/dags/providers/provider_api_scripts/auckland_museum.py @@ -20,7 +20,6 @@ import logging from datetime import datetime, timedelta -from common.constants import IMAGE from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -99,9 +98,6 @@ def get_should_continue(self, response_json): return True - def get_media_type(self, record: dict): - return IMAGE - def get_record_data(self, data: dict) -> dict | list[dict] | None: # check if _id is empty then foreign_landing_url and # foreign_identifier doesn't exist diff --git a/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py b/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py index 80a871c5f78..e1e6e2804b5 100644 --- a/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py +++ b/catalog/dags/providers/provider_api_scripts/brooklyn_museum.py @@ -3,7 +3,6 @@ import lxml.html as html from airflow.models import Variable -from common import constants from common.licenses import LicenseInfo, get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -22,9 +21,6 @@ def __init__(self, *args, **kwargs): self.api_key = Variable.get("API_KEY_BROOKLYN_MUSEUM") self.headers = {"api_key": self.api_key} - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: if not prev_query_params: return { diff --git a/catalog/dags/providers/provider_api_scripts/cc_mixter.py b/catalog/dags/providers/provider_api_scripts/cc_mixter.py index 1f5254def95..284d1cbee03 100644 --- a/catalog/dags/providers/provider_api_scripts/cc_mixter.py +++ b/catalog/dags/providers/provider_api_scripts/cc_mixter.py @@ -14,9 +14,7 @@ import json import logging import re -from typing import Literal -from common import constants from common.licenses import get_license_info from common.loader import provider_details as prov from common.requester import DelayedRequester @@ -141,9 +139,6 @@ def get_should_continue(self, response_json): # less than the batch limit. return len(response_json) >= self.batch_limit - def get_media_type(self, record: dict) -> Literal["audio"]: - return constants.AUDIO - @staticmethod def _get_duration(ps: str | None) -> int | None: """ diff --git a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py index 9d6b96ecccf..b63ab4e8d34 100644 --- a/catalog/dags/providers/provider_api_scripts/cleveland_museum.py +++ b/catalog/dags/providers/provider_api_scripts/cleveland_museum.py @@ -27,10 +27,6 @@ def get_next_query_params(self, prev_query_params, **kwargs): "skip": prev_query_params["skip"] + self.batch_limit, } - def get_media_type(self, record): - # This provider only supports Images. - return "image" - def get_batch_data(self, response_json): if response_json: return response_json.get("data") diff --git a/catalog/dags/providers/provider_api_scripts/finnish_museums.py b/catalog/dags/providers/provider_api_scripts/finnish_museums.py index 28ec1314783..66330b020ec 100644 --- a/catalog/dags/providers/provider_api_scripts/finnish_museums.py +++ b/catalog/dags/providers/provider_api_scripts/finnish_museums.py @@ -18,7 +18,6 @@ import logging from itertools import chain -from common import constants from common.licenses import LicenseInfo, get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.time_delineated_provider_data_ingester import ( @@ -91,9 +90,6 @@ def get_record_count_from_response(self, response_json): return response_json.get("resultCount", 0) return 0 - def get_media_type(self, record): - return constants.IMAGE - def get_batch_data(self, response_json): if ( not response_json diff --git a/catalog/dags/providers/provider_api_scripts/flickr.py b/catalog/dags/providers/provider_api_scripts/flickr.py index 89a4fa06d3a..97bdaa58a8f 100644 --- a/catalog/dags/providers/provider_api_scripts/flickr.py +++ b/catalog/dags/providers/provider_api_scripts/flickr.py @@ -17,7 +17,6 @@ import lxml.html as html from airflow.models import Variable -from common import constants from common.licenses import LicenseInfo, get_license_info from common.loader import provider_details as prov from common.loader.provider_details import ImageCategory @@ -175,10 +174,6 @@ def get_next_query_params(self, prev_query_params, **kwargs): # Increment the page number on subsequent requests return {**prev_query_params, "page": prev_query_params["page"] + 1} - def get_media_type(self, record): - # We only ingest images from Flickr - return constants.IMAGE - def get_batch_data(self, response_json): self.requests_count += 1 if response_json is None or response_json.get("stat") != "ok": diff --git a/catalog/dags/providers/provider_api_scripts/freesound.py b/catalog/dags/providers/provider_api_scripts/freesound.py index 97bfda4a49e..cb3d271bdfb 100644 --- a/catalog/dags/providers/provider_api_scripts/freesound.py +++ b/catalog/dags/providers/provider_api_scripts/freesound.py @@ -20,7 +20,6 @@ from requests.exceptions import ConnectionError, HTTPError, SSLError from retry import retry -from common import constants from common.licenses.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -53,9 +52,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def get_media_type(self, record: dict) -> str: - return constants.AUDIO - def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: if not prev_query_params: start_date = "*" diff --git a/catalog/dags/providers/provider_api_scripts/jamendo.py b/catalog/dags/providers/provider_api_scripts/jamendo.py index 0b58c1f9418..9d814837149 100644 --- a/catalog/dags/providers/provider_api_scripts/jamendo.py +++ b/catalog/dags/providers/provider_api_scripts/jamendo.py @@ -45,9 +45,6 @@ class JamendoDataIngester(ProviderDataIngester): batch_limit = 200 headers = {"Accept": "application/json"} - def get_media_type(self, record): - return constants.AUDIO - def get_next_query_params(self, prev_query_params, **kwargs): if not prev_query_params: # On first request, build default params. diff --git a/catalog/dags/providers/provider_api_scripts/justtakeitfree.py b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py index 503b8c8ed97..1b5242d0dee 100644 --- a/catalog/dags/providers/provider_api_scripts/justtakeitfree.py +++ b/catalog/dags/providers/provider_api_scripts/justtakeitfree.py @@ -14,7 +14,6 @@ from airflow.models import Variable -from common.constants import IMAGE from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -45,9 +44,6 @@ def get_batch_data(self, response_json) -> list[list[dict]] | None: return data return None - def get_media_type(self, record: dict): - return IMAGE - def get_record_data(self, data: list[dict]) -> dict | None: data = data[0] if not (foreign_landing_url := data.get("page_link")): diff --git a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py index 8942e45da78..2061bee2ad7 100644 --- a/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py +++ b/catalog/dags/providers/provider_api_scripts/metropolitan_museum.py @@ -28,7 +28,6 @@ import argparse import logging -from common import constants from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -167,10 +166,6 @@ def _get_title(self, object_json: dict) -> str | None: def _get_artist_name(self, object_json: dict) -> str | None: return object_json.get("artistDisplayName") - def get_media_type(self, object_json: dict): - # This provider only supports Images. - return constants.IMAGE - def main(date: str): logger.info("Begin: Metropolitan Museum data ingestion") diff --git a/catalog/dags/providers/provider_api_scripts/museum_victoria.py b/catalog/dags/providers/provider_api_scripts/museum_victoria.py index 7198a1e8b12..9b1bf4c470e 100644 --- a/catalog/dags/providers/provider_api_scripts/museum_victoria.py +++ b/catalog/dags/providers/provider_api_scripts/museum_victoria.py @@ -1,7 +1,6 @@ import logging from typing import TypedDict -from common import constants from common.licenses import LicenseInfo, get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -114,9 +113,6 @@ def _get_images(media_data) -> list[ImageDetails]: images.append(image) return images - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - @staticmethod def _get_image_data( media: dict, diff --git a/catalog/dags/providers/provider_api_scripts/nappy.py b/catalog/dags/providers/provider_api_scripts/nappy.py index ffe9d6eb982..178d5c3cc45 100644 --- a/catalog/dags/providers/provider_api_scripts/nappy.py +++ b/catalog/dags/providers/provider_api_scripts/nappy.py @@ -52,9 +52,6 @@ def get_batch_data(self, response_json): def get_should_continue(self, response_json): return bool(response_json.get("next_page")) - def get_media_type(self, record: dict): - return constants.IMAGE - @staticmethod def _convert_filesize(raw_filesize_string: str) -> int: """Convert sizes from strings to byte integers, ex. "187.8kB" to 188.""" diff --git a/catalog/dags/providers/provider_api_scripts/nypl.py b/catalog/dags/providers/provider_api_scripts/nypl.py index 8f180628dce..3dd4196ecf9 100644 --- a/catalog/dags/providers/provider_api_scripts/nypl.py +++ b/catalog/dags/providers/provider_api_scripts/nypl.py @@ -4,7 +4,6 @@ from airflow.models import Variable -from common import constants from common.licenses import get_license_info from common.loader import provider_details as prov from common.loader.provider_details import ImageCategory @@ -75,10 +74,6 @@ def get_next_query_params(self, prev_query_params, **kwargs): "page": prev_query_params["page"] + 1, } - def get_media_type(self, record): - # This provider only supports Images. - return constants.IMAGE - def get_batch_data(self, response_json): if response_json: return response_json.get("nyplAPI", {}).get("response", {}).get("result") diff --git a/catalog/dags/providers/provider_api_scripts/rawpixel.py b/catalog/dags/providers/provider_api_scripts/rawpixel.py index dbe68df0370..2d0620a94f9 100644 --- a/catalog/dags/providers/provider_api_scripts/rawpixel.py +++ b/catalog/dags/providers/provider_api_scripts/rawpixel.py @@ -91,9 +91,6 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.api_key: str = Variable.get("API_KEY_RAWPIXEL") - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - def _get_signature(self, query_params: dict) -> str: """ Get the query signature for a request. diff --git a/catalog/dags/providers/provider_api_scripts/science_museum.py b/catalog/dags/providers/provider_api_scripts/science_museum.py index 7876ecf4bda..cd425337019 100644 --- a/catalog/dags/providers/provider_api_scripts/science_museum.py +++ b/catalog/dags/providers/provider_api_scripts/science_museum.py @@ -98,10 +98,6 @@ def get_next_query_params(self, prev_query_params, **kwargs): "date[to]": to_, } - def get_media_type(self, record): - # This provider only supports Images. - return "image" - def get_batch_data(self, response_json): if response_json: return response_json.get("data") diff --git a/catalog/dags/providers/provider_api_scripts/smithsonian.py b/catalog/dags/providers/provider_api_scripts/smithsonian.py index fd14aefa98e..102ede4c091 100644 --- a/catalog/dags/providers/provider_api_scripts/smithsonian.py +++ b/catalog/dags/providers/provider_api_scripts/smithsonian.py @@ -14,7 +14,6 @@ from airflow.models import Variable from retry import retry -from common import constants from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -115,9 +114,6 @@ def __init__(self, *args, **kwargs): license_url="https://creativecommons.org/publicdomain/zero/1.0/" ) - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: # On the first request, `prev_query_params` will be `None`. We can detect this # and return our default params. diff --git a/catalog/dags/providers/provider_api_scripts/smk.py b/catalog/dags/providers/provider_api_scripts/smk.py index b17d1142eaf..474b089e859 100644 --- a/catalog/dags/providers/provider_api_scripts/smk.py +++ b/catalog/dags/providers/provider_api_scripts/smk.py @@ -11,7 +11,6 @@ import logging import urllib.parse -from common import constants from common.licenses import get_license_info from common.loader import provider_details as prov from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester @@ -27,9 +26,6 @@ class SmkDataIngester(ProviderDataIngester): headers = {"Accept": "application/json"} providers = {"image": prov.SMK_DEFAULT_PROVIDER} - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: if not prev_query_params: return { diff --git a/catalog/dags/providers/provider_api_scripts/stocksnap.py b/catalog/dags/providers/provider_api_scripts/stocksnap.py index faca731e383..9beb5c1f328 100644 --- a/catalog/dags/providers/provider_api_scripts/stocksnap.py +++ b/catalog/dags/providers/provider_api_scripts/stocksnap.py @@ -45,9 +45,6 @@ def get_next_query_params(self, prev_query_params, **kwargs): self._page_counter += 1 return {} - def get_media_type(self, record): - return "image" - @property def endpoint(self): return f"{ENDPOINT_BASE}/{self._page_counter}" diff --git a/catalog/dags/providers/provider_api_scripts/wordpress.py b/catalog/dags/providers/provider_api_scripts/wordpress.py index 4c60d1b462a..560d50c8d68 100644 --- a/catalog/dags/providers/provider_api_scripts/wordpress.py +++ b/catalog/dags/providers/provider_api_scripts/wordpress.py @@ -53,9 +53,6 @@ def __init__(self, *args, **kwargs): self.total_pages = None self.current_page = 1 - def get_media_type(self, record: dict) -> str: - return constants.IMAGE - def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict: if self.total_pages is None: # On the first request, make a HEAD request to get the number of pages of