From 3c1ec3450ebbce1ee40dc57e75a5610f20a390e4 Mon Sep 17 00:00:00 2001 From: sarayourfriend Date: Tue, 7 May 2024 13:44:51 +1000 Subject: [PATCH] Cache repeated thumbnail failures within configured TTL (#4249) * Add pytest-pook and move pytest config into pyproject.toml * Remove unnecessary custom async test handling for image proxy * Remove unnecessary pook mock match assertions pytest-pook handles this check automatically * Cache repeated thumbnail failures within configured TTL * Fix media info interface to match actual usage * Fix documentation string * Fix typo Co-authored-by: zack <6351754+zackkrida@users.noreply.github.com> --------- Co-authored-by: zack <6351754+zackkrida@users.noreply.github.com> --- api/api/utils/image_proxy/__init__.py | 89 +++++++++- api/conf/settings/thumbnails.py | 14 ++ api/pdm.lock | 17 +- api/pyproject.toml | 19 ++ api/pytest.ini | 15 -- api/test/unit/utils/test_image_proxy.py | 219 ++++++++++++++---------- 6 files changed, 269 insertions(+), 104 deletions(-) delete mode 100644 api/pytest.ini diff --git a/api/api/utils/image_proxy/__init__.py b/api/api/utils/image_proxy/__init__.py index f6be7d1178f..5f47c034b8e 100644 --- a/api/api/utils/image_proxy/__init__.py +++ b/api/api/utils/image_proxy/__init__.py @@ -1,7 +1,10 @@ import logging from dataclasses import dataclass -from typing import Literal +from datetime import timedelta +from functools import wraps +from typing import Literal, Type from urllib.parse import urlparse +from uuid import UUID from django.conf import settings from django.http import HttpResponse @@ -11,6 +14,7 @@ import django_redis from aiohttp.client_exceptions import ClientResponseError from asgiref.sync import sync_to_async +from redis.client import Redis from redis.exceptions import ConnectionError from api.utils.aiohttp import get_aiohttp_session @@ -39,7 +43,7 @@ @dataclass class MediaInfo: media_provider: str - media_identifier: str + media_identifier: UUID image_url: str @@ -120,9 +124,90 @@ def _tally_client_response_errors(tallies, month: str, domain: str, status: int) logger.warning("Redis connect failed, thumbnail HTTP errors not tallied.") +# thmbfail == THuMBnail FAILures; this key path will exist for every thumbnail requested, so it needs to be space efficient +FAILURE_CACHE_KEY_TEMPLATE = "thmbfail:{ident}" + + +def _cache_repeated_failures(_get): + """ + Wrap ``image_proxy.get`` to cache repeated upstream failures + and avoid re-requesting images likely to fail. + + Do this by incrementing a counter for each media identifier each time the inner request + fails. Before making thumbnail requests, check this counter. If it is above the configured + threshold, assume the request will fail again, and eagerly return a failed response without + sending the request upstream. + + Additionally, if the request succeeds and the failure count is not 0, decrement the counter + to reflect the successful response, accounting for thumbnails that were temporarily flaky, + while still allowing them to get temporarily cached as a failure if additional requests fail + and push the counter over the threshold. + """ + logger = parent_logger.getChild("_cache_repeated_failures") + + @wraps(_get) + async def do_cache(*args, **kwargs): + media_info: MediaInfo = args[0] + compressed_ident = str(media_info.media_identifier).replace("-", "") + redis_key = FAILURE_CACHE_KEY_TEMPLATE.format(ident=compressed_ident) + tallies: Redis = django_redis.get_redis_connection("tallies") + + try: + cached_failure_count = await sync_to_async(tallies.get)( + redis_key, + ) + cached_failure_count = ( + int(cached_failure_count) if cached_failure_count is not None else 0 + ) + except ConnectionError: + # Ignore the connection error, treat it like it's never been cached + cached_failure_count = 0 + + if cached_failure_count > settings.THUMBNAIL_FAILURE_CACHE_TOLERANCE: + logger.info( + "%s thumbnail is too flaky, using cached failure response.", + media_info.media_identifier, + ) + raise UpstreamThumbnailException("Thumbnail unavailable from provider.") + + try: + response = await _get(*args, **kwargs) + if cached_failure_count > 0: + # Decrement the key + # Do not delete it, because if it isn't 0, then it has failed before + # meaning we should continue to monitor it within the cache window + # in case the upstream is flaky and eventually goes over the tolerance + try: + await sync_to_async(tallies.decr)(redis_key) + await sync_to_async(tallies.expire)( + redis_key, settings.THUMBNAIL_FAILURE_CACHE_WINDOW_SECONDS + ) + except ConnectionError: + logger.warning( + "Redis connect failed, thumbnail failure not decremented." + ) + return response + except: + try: + await sync_to_async(tallies.incr)(redis_key) + # Call expire each time the key is incremented + # This pushes expiration out each time a new failure is cached + await sync_to_async(tallies.expire)( + redis_key, settings.THUMBNAIL_FAILURE_CACHE_WINDOW_SECONDS + ) + except ConnectionError: + logger.warning( + "Redis connect failed, thumbnail failure not incremented." + ) + raise + + return do_cache + + _UPSTREAM_TIMEOUT = aiohttp.ClientTimeout(15) +@_cache_repeated_failures async def get( media_info: MediaInfo, request_config: RequestConfig = RequestConfig(), diff --git a/api/conf/settings/thumbnails.py b/api/conf/settings/thumbnails.py index b5bf94a7b6b..d538871c063 100644 --- a/api/conf/settings/thumbnails.py +++ b/api/conf/settings/thumbnails.py @@ -1,3 +1,5 @@ +from datetime import timedelta + from decouple import config @@ -15,3 +17,15 @@ # to cast them in assertions to match the parsed param types) THUMBNAIL_WIDTH_PX = config("THUMBNAIL_WIDTH_PX", default="600") THUMBNAIL_QUALITY = config("THUMBNAIL_JPG_QUALITY", default="80") + +# The length of time to cache repeated thumbnail failures +THUMBNAIL_FAILURE_CACHE_WINDOW_SECONDS = config( + "THUMBNAIL_FAILURE_CACHE_WINDOW_SECONDS", + default=int(timedelta(days=2).total_seconds()), + cast=int, +) + +# The number of times to try a thumbnail request before caching a failure +THUMBNAIL_FAILURE_CACHE_TOLERANCE = config( + "THUMBNAIL_FAILURE_CACHE_TOLERANCE", default=2, cast=int +) diff --git a/api/pdm.lock b/api/pdm.lock index a28fded04db..71ec5fc29d5 100644 --- a/api/pdm.lock +++ b/api/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev", "overrides", "test"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:f77cf4bb500db6a7da4bc51e7987b1510453ffca752368645186d0f07c170718" +content_hash = "sha256:c489a98705c98c79da029deab97b14edd4fe69f3d23ea3f0f4b1641c052776a7" [[package]] name = "adrf" @@ -1363,6 +1363,21 @@ files = [ {file = "pytest_django-4.8.0-py3-none-any.whl", hash = "sha256:ca1ddd1e0e4c227cf9e3e40a6afc6d106b3e70868fd2ac5798a22501271cd0c7"}, ] +[[package]] +name = "pytest-pook" +version = "1.0.0" +requires_python = ">=3.8" +summary = "Pytest plugin for pook" +groups = ["test"] +dependencies = [ + "pook", + "pytest", +] +files = [ + {file = "pytest_pook-1.0.0-py3-none-any.whl", hash = "sha256:16a8e7391401d818aa79a3ab63aee2450d18764821fd3f8b7ef9d34824090955"}, + {file = "pytest_pook-1.0.0.tar.gz", hash = "sha256:ee22a4bc133ffc49da569d1ecf76f3a674fccecf7dd394c21a46b704dbe67b47"}, +] + [[package]] name = "pytest-raises" version = "0.11" diff --git a/api/pyproject.toml b/api/pyproject.toml index 8d67aacd8bc..81f8ef101b1 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -60,7 +60,26 @@ test = [ "pook @ git+https://github.com/h2non/pook.git@master", "pytest >=7.4.4, <8", "pytest-django >=4.8.0, <5", + "pytest-pook>=1.0.0", "pytest-raises >=0.11, <0.12", "pytest-sugar >=0.9.7, <0.10", "schemathesis >=3.25.6, <4", ] + +[tool.pytest.ini_options] +DJANGO_SETTINGS_MODULE = "conf.settings" + +pythonpath = "." + +filterwarnings = [ + # Ignore warnings related to unverified HTTPS requests. + # Reason: This warning is suppressed to avoid raising warnings when making HTTP requests + # to servers with invalid or self-signed SSL certificates. It allows the tests to proceed + # without being interrupted by these warnings. + "ignore:Unverified HTTPS request", + + # Ignore warnings about the upcoming change in the default value of 'USE_TZ' setting in Django 5.0. + # Reason: The reason this warning is suppressed is actually because we already set USE_TZ to True. + # Since no changes are required on our part, we can safely ignore this to declutter the logs. + "ignore:The default value of USE_TZ will change from False to True in Django 5.0", +] diff --git a/api/pytest.ini b/api/pytest.ini deleted file mode 100644 index 19987697166..00000000000 --- a/api/pytest.ini +++ /dev/null @@ -1,15 +0,0 @@ -[pytest] -DJANGO_SETTINGS_MODULE = conf.settings - -pythonpath = . - -filterwarnings= -# Ignore warnings related to unverified HTTPS requests. -# Reason: This warning is suppressed to avoid raising warnings when making HTTP requests -# to servers with invalid or self-signed SSL certificates. It allows the tests to proceed -# without being interrupted by these warnings. - ignore:Unverified HTTPS request -# Ignore warnings about the upcoming change in the default value of 'USE_TZ' setting in Django 5.0. -# Reason: The reason this warning is suppressed is actually because we already set USE_TZ to True. -# Since no changes are required on our part, we can safely ignore this to declutter the logs. - ignore:The default value of USE_TZ will change from False to True in Django 5.0 diff --git a/api/test/unit/utils/test_image_proxy.py b/api/test/unit/utils/test_image_proxy.py index 5dce10b014d..bf364746e6b 100644 --- a/api/test/unit/utils/test_image_proxy.py +++ b/api/test/unit/utils/test_image_proxy.py @@ -1,6 +1,6 @@ -import asyncio from dataclasses import replace from urllib.parse import urlencode +from uuid import uuid4 from django.conf import settings from rest_framework.exceptions import UnsupportedMediaType @@ -10,8 +10,10 @@ import pytest from aiohttp import client_exceptions from aiohttp.client_reqrep import ConnectionKey +from asgiref.sync import async_to_sync from api.utils.image_proxy import ( + FAILURE_CACHE_KEY_TEMPLATE, HEADERS, MediaInfo, RequestConfig, @@ -28,7 +30,7 @@ f"{settings.PHOTON_ENDPOINT}{TEST_IMAGE_DOMAIN}/path_part1/part2/image_dot_jpg.jpg" ) TEST_IMAGE_URL = PHOTON_URL_FOR_TEST_IMAGE.replace(settings.PHOTON_ENDPOINT, "http://") -TEST_MEDIA_IDENTIFIER = "123" +TEST_MEDIA_IDENTIFIER = uuid4() TEST_MEDIA_PROVIDER = "foo" TEST_MEDIA_INFO = MediaInfo( @@ -67,25 +69,17 @@ def auth_key(): settings.PHOTON_AUTH_KEY = None -@pytest.fixture -def photon_get(session_loop): - """Run ``image_proxy.get`` and wait for all tasks to finish.""" - - def do(*args, **kwargs): - try: - res = session_loop.run_until_complete(_photon_get(*args, **kwargs)) - return res - finally: - tasks = asyncio.all_tasks(session_loop) - for task in tasks: - session_loop.run_until_complete(task) - - yield do +# Do not convert tests to async, because of this issue: +# https://github.com/pytest-dev/pytest-django/issues/580 +# While the transaction workaround technically works, it is +# tedious, easy to forget, and just wrapping tested functions +# with async_to_sync is much easier +photon_get = async_to_sync(_photon_get) -@pook.on -def test_get_successful_no_auth_key_default_args(photon_get, mock_image_data): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_no_auth_key_default_args(mock_image_data): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -97,27 +91,22 @@ def test_get_successful_no_auth_key_default_args(photon_get, mock_image_data): .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get(TEST_MEDIA_INFO) assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_original_svg_no_auth_key_default_args( - photon_get, mock_image_data -): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_original_svg_no_auth_key_default_args(mock_image_data): + ( pook.get(TEST_IMAGE_URL.replace(".jpg", ".svg")) .header("User-Agent", UA_HEADER) .header("Accept", "image/*") .reply(200) .body(SVG_BODY) - .mock ) media_info = replace( @@ -128,14 +117,11 @@ def test_get_successful_original_svg_no_auth_key_default_args( assert res.content == SVG_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_with_auth_key_default_args( - photon_get, mock_image_data, auth_key -): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_with_auth_key_default_args(mock_image_data, auth_key): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -148,19 +134,17 @@ def test_get_successful_with_auth_key_default_args( .header("X-Photon-Authentication", auth_key) .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get(TEST_MEDIA_INFO) assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_no_auth_key_not_compressed(photon_get, mock_image_data): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_no_auth_key_not_compressed(mock_image_data): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -171,19 +155,17 @@ def test_get_successful_no_auth_key_not_compressed(photon_get, mock_image_data): .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get(TEST_MEDIA_INFO, RequestConfig(is_compressed=False)) assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_no_auth_key_full_size(photon_get, mock_image_data): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_no_auth_key_full_size(mock_image_data): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -194,27 +176,22 @@ def test_get_successful_no_auth_key_full_size(photon_get, mock_image_data): .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get(TEST_MEDIA_INFO, RequestConfig(is_full_size=True)) assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_no_auth_key_full_size_not_compressed( - photon_get, mock_image_data -): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_no_auth_key_full_size_not_compressed(mock_image_data): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .header("User-Agent", UA_HEADER) .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get( @@ -224,12 +201,11 @@ def test_get_successful_no_auth_key_full_size_not_compressed( assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_no_auth_key_png_only(photon_get, mock_image_data): - mock_get: pook.Mock = ( +@pytest.mark.pook +def test_get_successful_no_auth_key_png_only(mock_image_data): + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -241,20 +217,18 @@ def test_get_successful_no_auth_key_png_only(photon_get, mock_image_data): .header("Accept", "image/png") .reply(200) .body(MOCK_BODY) - .mock ) res = photon_get(TEST_MEDIA_INFO, RequestConfig(accept_header="image/png")) assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_successful_forward_query_params(photon_get, mock_image_data): +@pytest.mark.pook +def test_get_successful_forward_query_params(mock_image_data): params = urlencode({"hello": "world", 1: 2, "beep": "boop"}) - mock_get: pook.Mock = ( + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -267,7 +241,6 @@ def test_get_successful_forward_query_params(photon_get, mock_image_data): .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) media_info_with_url_params = replace( @@ -278,7 +251,6 @@ def test_get_successful_forward_query_params(photon_get, mock_image_data): assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched @pytest.fixture @@ -292,10 +264,10 @@ async def raise_exc(*args, **kwargs): yield do -@pook.on +@pytest.mark.pook @cache_availability_params def test_get_successful_records_response_code( - photon_get, mock_image_data, is_cache_reachable, cache_name, request, caplog + mock_image_data, is_cache_reachable, cache_name, request, caplog ): cache = request.getfixturevalue(cache_name) ( @@ -369,7 +341,6 @@ def test_get_successful_records_response_code( @cache_availability_params @alert_count_params def test_get_exception_handles_error( - photon_get, exc, exc_name, count_start, @@ -410,8 +381,8 @@ def test_get_exception_handles_error( (500, "Internal Server Error"), ], ) +@pytest.mark.pook def test_get_http_exception_handles_error( - photon_get, status_code, text, count_start, @@ -429,9 +400,8 @@ def test_get_http_exception_handles_error( cache.set(key, count_start) with pytest.raises(UpstreamThumbnailException): - with pook.use(): - pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text) - photon_get(TEST_MEDIA_INFO) + pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(status_code, text) + photon_get(TEST_MEDIA_INFO) sentry_capture_exception.assert_not_called() @@ -451,12 +421,93 @@ def test_get_http_exception_handles_error( ) -@pook.on -def test_get_successful_https_image_url_sends_ssl_parameter( - photon_get, mock_image_data +@pytest.mark.pook +def test_caches_failures_if_cache_surpasses_tolerance(mock_image_data, settings, redis): + tolerance = settings.THUMBNAIL_FAILURE_CACHE_TOLERANCE = 2 + + ( + pook.get(PHOTON_URL_FOR_TEST_IMAGE) + .params( + { + "w": settings.THUMBNAIL_WIDTH_PX, + "quality": settings.THUMBNAIL_QUALITY, + } + ) + .header("User-Agent", UA_HEADER) + .header("Accept", "image/*") + .times(tolerance) + .reply(401) + .body(MOCK_BODY) + ) + + for _ in range(tolerance + 1): + with pytest.raises( + UpstreamThumbnailException, match="Failed to render thumbnail" + ): + photon_get(TEST_MEDIA_INFO) + + assert ( + redis.ttl( + FAILURE_CACHE_KEY_TEMPLATE.format( + ident=str(TEST_MEDIA_INFO.media_identifier).replace("-", "") + ) + ) + is not None + ) + + with pytest.raises( + UpstreamThumbnailException, match="Thumbnail unavailable from provider" + ): + photon_get(TEST_MEDIA_INFO) + + +@pytest.mark.pook +def test_caches_failures_with_successes_extending_tolerance( + mock_image_data, settings, redis ): + settings.THUMBNAIL_FAILURE_CACHE_TOLERANCE = 1 + + def _make_failed_request(): + (pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(401)) + + with pytest.raises( + UpstreamThumbnailException, match="Failed to render thumbnail" + ): + photon_get(TEST_MEDIA_INFO) + + def _make_successful_request(): + (pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(200).body(MOCK_BODY)) + + res = photon_get(TEST_MEDIA_INFO) + assert res.content == MOCK_BODY.encode() + + # Before any requests, count = 0, within tolerance + + _make_failed_request() + # count = 1, maximum within tolerance + + _make_successful_request() + # count = 0, decremented due to successful request, allow additional failures before bypassing + + _make_failed_request() + # count = 1 again, maximum within tolerance + + _make_failed_request() + # count = 2, over tolerance, next request will fail without requesting upstream + + # "unavailable from provider" indicates the cached failure + # we do not add any pook mock for this request, proving it is never made + # and therefore bypassed by the cached failure + with pytest.raises( + UpstreamThumbnailException, match="Thumbnail unavailable from provider" + ): + photon_get(TEST_MEDIA_INFO) + + +@pytest.mark.pook +def test_get_successful_https_image_url_sends_ssl_parameter(mock_image_data): https_url = TEST_IMAGE_URL.replace("http://", "https://") - mock_get: pook.Mock = ( + ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( { @@ -469,7 +520,6 @@ def test_get_successful_https_image_url_sends_ssl_parameter( .header("Accept", "image/*") .reply(200) .body(MOCK_BODY) - .mock ) https_media_info = replace(TEST_MEDIA_INFO, image_url=https_url) @@ -478,20 +528,17 @@ def test_get_successful_https_image_url_sends_ssl_parameter( assert res.content == MOCK_BODY.encode() assert res.status_code == 200 - assert mock_get.matched -@pook.on -def test_get_unsuccessful_request_raises_custom_exception(photon_get): - mock_get: pook.Mock = pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(404).mock +@pytest.mark.pook +def test_get_unsuccessful_request_raises_custom_exception(): + pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(404).mock with pytest.raises( UpstreamThumbnailException, match=r"Failed to render thumbnail." ): photon_get(TEST_MEDIA_INFO) - assert mock_get.matched - @pytest.mark.parametrize( "image_url, expected_ext", @@ -536,7 +583,7 @@ def test_get_extension_from_content_type(content_type, expected_ext): @pytest.mark.django_db @pytest.mark.parametrize("image_type", ["apng", "tiff", "bmp"]) -def test_photon_get_raises_by_not_allowed_types(photon_get, image_type): +def test_photon_get_raises_by_not_allowed_types(image_type): image_url = TEST_IMAGE_URL.replace(".jpg", f".{image_type}") image = ImageFactory.create(url=image_url) media_info = MediaInfo( @@ -558,8 +605,8 @@ def test_photon_get_raises_by_not_allowed_types(photon_get, image_type): ], ) @cache_availability_params +@pytest.mark.pook(start_active=False) def test_photon_get_saves_image_type_to_cache( - photon_get, headers, expected_cache_val, is_cache_reachable, @@ -576,10 +623,10 @@ def test_photon_get_saves_image_type_to_cache( media_provider=image.provider, image_url=image_url, ) - with pook.use(): - pook.head(image_url, reply=200, response_headers=headers) - with pytest.raises(UnsupportedMediaType): - photon_get(media_info) + pook.on() + pook.head(image_url, reply=200, response_headers=headers) + with pytest.raises(UnsupportedMediaType): + photon_get(media_info) key = f"media:{image.identifier}:thumb_type" if is_cache_reachable: