Skip to content

Commit

Permalink
Refactor: Remove get_media_type() redundant override in providers w…
Browse files Browse the repository at this point in the history
…ith a single media type (#4061)

* Refactor: Remove `get_media_type()` redundant override

* Chore: Remove unused imports to pass linting
  • Loading branch information
zaharoian authored Apr 8, 2024
1 parent 3cf0fdd commit 852e6b7
Show file tree
Hide file tree
Showing 19 changed files with 0 additions and 75 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import logging
from datetime import datetime, timedelta

from common.constants import IMAGE
from common.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -99,9 +98,6 @@ def get_should_continue(self, response_json):

return True

def get_media_type(self, record: dict):
return IMAGE

def get_record_data(self, data: dict) -> dict | list[dict] | None:
# check if _id is empty then foreign_landing_url and
# foreign_identifier doesn't exist
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import lxml.html as html
from airflow.models import Variable

from common import constants
from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand All @@ -22,9 +21,6 @@ def __init__(self, *args, **kwargs):
self.api_key = Variable.get("API_KEY_BROOKLYN_MUSEUM")
self.headers = {"api_key": self.api_key}

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict:
if not prev_query_params:
return {
Expand Down
5 changes: 0 additions & 5 deletions catalog/dags/providers/provider_api_scripts/cc_mixter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,7 @@
import json
import logging
import re
from typing import Literal

from common import constants
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.requester import DelayedRequester
Expand Down Expand Up @@ -141,9 +139,6 @@ def get_should_continue(self, response_json):
# less than the batch limit.
return len(response_json) >= self.batch_limit

def get_media_type(self, record: dict) -> Literal["audio"]:
return constants.AUDIO

@staticmethod
def _get_duration(ps: str | None) -> int | None:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ def get_next_query_params(self, prev_query_params, **kwargs):
"skip": prev_query_params["skip"] + self.batch_limit,
}

def get_media_type(self, record):
# This provider only supports Images.
return "image"

def get_batch_data(self, response_json):
if response_json:
return response_json.get("data")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import logging
from itertools import chain

from common import constants
from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.time_delineated_provider_data_ingester import (
Expand Down Expand Up @@ -91,9 +90,6 @@ def get_record_count_from_response(self, response_json):
return response_json.get("resultCount", 0)
return 0

def get_media_type(self, record):
return constants.IMAGE

def get_batch_data(self, response_json):
if (
not response_json
Expand Down
5 changes: 0 additions & 5 deletions catalog/dags/providers/provider_api_scripts/flickr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import lxml.html as html
from airflow.models import Variable

from common import constants
from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
from common.loader.provider_details import ImageCategory
Expand Down Expand Up @@ -175,10 +174,6 @@ def get_next_query_params(self, prev_query_params, **kwargs):
# Increment the page number on subsequent requests
return {**prev_query_params, "page": prev_query_params["page"] + 1}

def get_media_type(self, record):
# We only ingest images from Flickr
return constants.IMAGE

def get_batch_data(self, response_json):
self.requests_count += 1
if response_json is None or response_json.get("stat") != "ok":
Expand Down
4 changes: 0 additions & 4 deletions catalog/dags/providers/provider_api_scripts/freesound.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from requests.exceptions import ConnectionError, HTTPError, SSLError
from retry import retry

from common import constants
from common.licenses.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -53,9 +52,6 @@ def __init__(self, *args, **kwargs):

super().__init__(*args, **kwargs)

def get_media_type(self, record: dict) -> str:
return constants.AUDIO

def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict:
if not prev_query_params:
start_date = "*"
Expand Down
3 changes: 0 additions & 3 deletions catalog/dags/providers/provider_api_scripts/jamendo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ class JamendoDataIngester(ProviderDataIngester):
batch_limit = 200
headers = {"Accept": "application/json"}

def get_media_type(self, record):
return constants.AUDIO

def get_next_query_params(self, prev_query_params, **kwargs):
if not prev_query_params:
# On first request, build default params.
Expand Down
4 changes: 0 additions & 4 deletions catalog/dags/providers/provider_api_scripts/justtakeitfree.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

from airflow.models import Variable

from common.constants import IMAGE
from common.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -45,9 +44,6 @@ def get_batch_data(self, response_json) -> list[list[dict]] | None:
return data
return None

def get_media_type(self, record: dict):
return IMAGE

def get_record_data(self, data: list[dict]) -> dict | None:
data = data[0]
if not (foreign_landing_url := data.get("page_link")):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import argparse
import logging

from common import constants
from common.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -167,10 +166,6 @@ def _get_title(self, object_json: dict) -> str | None:
def _get_artist_name(self, object_json: dict) -> str | None:
return object_json.get("artistDisplayName")

def get_media_type(self, object_json: dict):
# This provider only supports Images.
return constants.IMAGE


def main(date: str):
logger.info("Begin: Metropolitan Museum data ingestion")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from typing import TypedDict

from common import constants
from common.licenses import LicenseInfo, get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -114,9 +113,6 @@ def _get_images(media_data) -> list[ImageDetails]:
images.append(image)
return images

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

@staticmethod
def _get_image_data(
media: dict,
Expand Down
3 changes: 0 additions & 3 deletions catalog/dags/providers/provider_api_scripts/nappy.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,6 @@ def get_batch_data(self, response_json):
def get_should_continue(self, response_json):
return bool(response_json.get("next_page"))

def get_media_type(self, record: dict):
return constants.IMAGE

@staticmethod
def _convert_filesize(raw_filesize_string: str) -> int:
"""Convert sizes from strings to byte integers, ex. "187.8kB" to 188."""
Expand Down
5 changes: 0 additions & 5 deletions catalog/dags/providers/provider_api_scripts/nypl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from airflow.models import Variable

from common import constants
from common.licenses import get_license_info
from common.loader import provider_details as prov
from common.loader.provider_details import ImageCategory
Expand Down Expand Up @@ -75,10 +74,6 @@ def get_next_query_params(self, prev_query_params, **kwargs):
"page": prev_query_params["page"] + 1,
}

def get_media_type(self, record):
# This provider only supports Images.
return constants.IMAGE

def get_batch_data(self, response_json):
if response_json:
return response_json.get("nyplAPI", {}).get("response", {}).get("result")
Expand Down
3 changes: 0 additions & 3 deletions catalog/dags/providers/provider_api_scripts/rawpixel.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.api_key: str = Variable.get("API_KEY_RAWPIXEL")

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

def _get_signature(self, query_params: dict) -> str:
"""
Get the query signature for a request.
Expand Down
4 changes: 0 additions & 4 deletions catalog/dags/providers/provider_api_scripts/science_museum.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ def get_next_query_params(self, prev_query_params, **kwargs):
"date[to]": to_,
}

def get_media_type(self, record):
# This provider only supports Images.
return "image"

def get_batch_data(self, response_json):
if response_json:
return response_json.get("data")
Expand Down
4 changes: 0 additions & 4 deletions catalog/dags/providers/provider_api_scripts/smithsonian.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from airflow.models import Variable
from retry import retry

from common import constants
from common.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand Down Expand Up @@ -115,9 +114,6 @@ def __init__(self, *args, **kwargs):
license_url="https://creativecommons.org/publicdomain/zero/1.0/"
)

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict:
# On the first request, `prev_query_params` will be `None`. We can detect this
# and return our default params.
Expand Down
4 changes: 0 additions & 4 deletions catalog/dags/providers/provider_api_scripts/smk.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import logging
import urllib.parse

from common import constants
from common.licenses import get_license_info
from common.loader import provider_details as prov
from providers.provider_api_scripts.provider_data_ingester import ProviderDataIngester
Expand All @@ -27,9 +26,6 @@ class SmkDataIngester(ProviderDataIngester):
headers = {"Accept": "application/json"}
providers = {"image": prov.SMK_DEFAULT_PROVIDER}

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict:
if not prev_query_params:
return {
Expand Down
3 changes: 0 additions & 3 deletions catalog/dags/providers/provider_api_scripts/stocksnap.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ def get_next_query_params(self, prev_query_params, **kwargs):
self._page_counter += 1
return {}

def get_media_type(self, record):
return "image"

@property
def endpoint(self):
return f"{ENDPOINT_BASE}/{self._page_counter}"
Expand Down
3 changes: 0 additions & 3 deletions catalog/dags/providers/provider_api_scripts/wordpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,6 @@ def __init__(self, *args, **kwargs):
self.total_pages = None
self.current_page = 1

def get_media_type(self, record: dict) -> str:
return constants.IMAGE

def get_next_query_params(self, prev_query_params: dict | None, **kwargs) -> dict:
if self.total_pages is None:
# On the first request, make a HEAD request to get the number of pages of
Expand Down

0 comments on commit 852e6b7

Please sign in to comment.