Skip to content

Commit 4fd417d

Browse files
Convert entire image proxy route async
1 parent 1de2d21 commit 4fd417d

File tree

4 files changed

+110
-42
lines changed

4 files changed

+110
-42
lines changed

api/api/utils/asyncio.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import asyncio
2+
import logging
3+
from collections.abc import Awaitable
4+
5+
6+
parent_logger = logging.getLogger(__name__)
7+
8+
9+
_fire_and_forget_logger = parent_logger.getChild("fire_and_forget")
10+
11+
12+
def fire_and_forget(awaitable: Awaitable) -> None:
13+
"""
14+
Consume an awaitable without waiting for it to finish.
15+
16+
This allows us to "fire and forget" an async function that
17+
we don't care about the result of. This is useful, for example,
18+
if some operation creates a side effect that isn't necessary for
19+
the response we're processing (e.g., Redis tallying).
20+
"""
21+
try:
22+
loop = asyncio.get_running_loop()
23+
except RuntimeError as exc:
24+
_fire_and_forget_logger.error(
25+
"`fire_and_forget` must be called inside a running" " event loop."
26+
)
27+
raise exc
28+
29+
loop.create_task(awaitable)

api/api/utils/image_proxy/__init__.py

Lines changed: 62 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
from collections.abc import Callable
23
from dataclasses import dataclass
34
from typing import Literal
45
from urllib.parse import urlparse
@@ -7,11 +8,15 @@
78
from django.http import HttpResponse
89
from rest_framework.exceptions import UnsupportedMediaType
910

11+
import aiohttp
1012
import django_redis
1113
import requests
1214
import sentry_sdk
15+
from asgiref.sync import sync_to_async
1316
from sentry_sdk import push_scope, set_context
1417

18+
from api.utils.aiohttp import get_aiohttp_session
19+
from api.utils.asyncio import fire_and_forget
1520
from api.utils.image_proxy.exception import UpstreamThumbnailException
1621
from api.utils.image_proxy.extension import get_image_extension
1722
from api.utils.image_proxy.photon import get_photon_request_params
@@ -75,11 +80,40 @@ def get_request_params_for_extension(
7580
)
7681

7782

78-
def get(
83+
@sync_to_async
84+
def _tally_response(
85+
tallies,
86+
media_info: MediaInfo,
87+
month: str,
88+
domain: str,
89+
response: aiohttp.ClientResponse,
90+
):
91+
"""
92+
Tally image proxy response without waiting for Redis to respond.
93+
94+
Pulled into a separate function to help reduce overload when skimming
95+
the `get` function, which is complex enough as is.
96+
"""
97+
tallies.incr(f"thumbnail_response_code:{month}:{response.status}"),
98+
tallies.incr(
99+
f"thumbnail_response_code_by_domain:{domain}:" f"{month}:{response.status}"
100+
)
101+
tallies.incr(
102+
f"thumbnail_response_code_by_provider:{media_info.media_provider}:"
103+
f"{month}:{response.status}"
104+
)
105+
106+
107+
_UPSTREAM_TIMEOUT = aiohttp.ClientTimeout(15)
108+
109+
110+
async def get(
79111
media_info: MediaInfo,
80112
request_config: RequestConfig = RequestConfig(),
81113
) -> HttpResponse:
82114
"""
115+
Retrieve proxied image from site accelerator.
116+
83117
Proxy an image through Photon if its file type is supported, else return the
84118
original image if the file type is SVG. Otherwise, raise an exception.
85119
"""
@@ -88,9 +122,10 @@ def get(
88122

89123
logger = parent_logger.getChild("get")
90124
tallies = django_redis.get_redis_connection("tallies")
125+
tallies_incr = sync_to_async(tallies.incr)
91126
month = get_monthly_timestamp()
92127

93-
image_extension = get_image_extension(image_url, media_identifier)
128+
image_extension = await get_image_extension(image_url, media_identifier)
94129

95130
headers = {"Accept": request_config.accept_header} | HEADERS
96131

@@ -106,26 +141,36 @@ def get(
106141
)
107142

108143
try:
109-
upstream_response = requests.get(
144+
# todo: refactor to use aiohttp shared session
145+
session = await get_aiohttp_session()
146+
147+
upstream_response = await session.get(
110148
upstream_url,
111-
timeout=15,
149+
timeout=_UPSTREAM_TIMEOUT,
112150
params=params,
113151
headers=headers,
114152
)
115-
tallies.incr(f"thumbnail_response_code:{month}:{upstream_response.status_code}")
116-
tallies.incr(
117-
f"thumbnail_response_code_by_domain:{domain}:"
118-
f"{month}:{upstream_response.status_code}"
119-
)
120-
tallies.incr(
121-
f"thumbnail_response_code_by_provider:{media_info.media_provider}:"
122-
f"{month}:{upstream_response.status_code}"
153+
fire_and_forget(
154+
_tally_response(tallies, media_info, month, domain, upstream_response)
123155
)
124156
upstream_response.raise_for_status()
157+
status_code = upstream_response.status
158+
content_type = upstream_response.headers.get("Content-Type")
159+
logger.debug(
160+
"Image proxy response status: %s, content-type: %s",
161+
status_code,
162+
content_type,
163+
)
164+
content = await upstream_response.content.read()
165+
return HttpResponse(
166+
content,
167+
status=status_code,
168+
content_type=content_type,
169+
)
125170
except Exception as exc:
126171
exception_name = f"{exc.__class__.__module__}.{exc.__class__.__name__}"
127172
key = f"thumbnail_error:{exception_name}:{domain}:{month}"
128-
count = tallies.incr(key)
173+
count = await tallies_incr(key)
129174
if count <= settings.THUMBNAIL_ERROR_INITIAL_ALERT_THRESHOLD or (
130175
count % settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY == 0
131176
):
@@ -144,24 +189,14 @@ def get(
144189
sentry_sdk.capture_exception(exc)
145190
if isinstance(exc, requests.exceptions.HTTPError):
146191
code = exc.response.status_code
147-
tallies.incr(
148-
f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}"
192+
fire_and_forget(
193+
tallies_incr(
194+
f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}"
195+
)
149196
)
150197
logger.warning(
151198
f"Failed to render thumbnail "
152199
f"{upstream_url=} {code=} "
153200
f"{media_info.media_provider=}"
154201
)
155202
raise UpstreamThumbnailException(f"Failed to render thumbnail. {exc}")
156-
157-
res_status = upstream_response.status_code
158-
content_type = upstream_response.headers.get("Content-Type")
159-
logger.debug(
160-
f"Image proxy response status: {res_status}, content-type: {content_type}"
161-
)
162-
163-
return HttpResponse(
164-
upstream_response.content,
165-
status=res_status,
166-
content_type=content_type,
167-
)

api/api/utils/image_proxy/extension.py

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,50 @@
11
from os.path import splitext
22
from urllib.parse import urlparse
33

4+
import aiohttp
45
import django_redis
5-
import requests
66
import sentry_sdk
7+
from asgiref.sync import sync_to_async
78

9+
from api.utils.aiohttp import get_aiohttp_session
10+
from api.utils.asyncio import fire_and_forget
811
from api.utils.image_proxy.exception import UpstreamThumbnailException
912

1013

11-
def get_image_extension(image_url: str, media_identifier: str) -> str | None:
14+
_HEAD_TIMEOUT = aiohttp.ClientTimeout(10)
15+
16+
17+
async def get_image_extension(image_url: str, media_identifier) -> str | None:
1218
cache = django_redis.get_redis_connection("default")
1319
key = f"media:{media_identifier}:thumb_type"
1420

1521
ext = _get_file_extension_from_url(image_url)
1622

1723
if not ext:
1824
# If the extension is not present in the URL, try to get it from the redis cache
19-
ext = cache.get(key)
25+
ext = await sync_to_async(cache.get)(key)
2026
ext = ext.decode("utf-8") if ext else None
2127

2228
if not ext:
2329
# If the extension is still not present, try getting it from the content type
2430
try:
25-
response = requests.head(image_url, timeout=10)
31+
session = await get_aiohttp_session()
32+
response = await session.head(image_url, timeout=_HEAD_TIMEOUT)
2633
response.raise_for_status()
34+
if response.headers and "Content-Type" in response.headers:
35+
content_type = response.headers["Content-Type"]
36+
ext = _get_file_extension_from_content_type(content_type)
37+
else:
38+
ext = None
39+
40+
fire_and_forget(sync_to_async(cache.set)(key, ext if ext else "unknown"))
2741
except Exception as exc:
2842
sentry_sdk.capture_exception(exc)
2943
raise UpstreamThumbnailException(
3044
"Failed to render thumbnail due to inability to check media "
3145
f"type. {exc}"
3246
)
33-
else:
34-
if response.headers and "Content-Type" in response.headers:
35-
content_type = response.headers["Content-Type"]
36-
ext = _get_file_extension_from_content_type(content_type)
37-
else:
38-
ext = None
3947

40-
cache.set(key, ext if ext else "unknown")
4148
return ext
4249

4350

api/api/views/media_views.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@ class InvalidSource(APIException):
4040
default_code = "invalid_source"
4141

4242

43-
image_proxy_aget = sync_to_async(image_proxy.get)
44-
45-
4643
class MediaViewSet(AsyncViewSetMixin, AsyncAPIView, ReadOnlyModelViewSet):
4744
view_is_async = True
4845

@@ -296,7 +293,7 @@ async def thumbnail(self, request, *_, **__):
296293

297294
media_info = await self.get_image_proxy_media_info()
298295

299-
return await image_proxy_aget(
296+
return await image_proxy.get(
300297
media_info,
301298
request_config=image_proxy.RequestConfig(
302299
accept_header=request.headers.get("Accept", "image/*"),

0 commit comments

Comments
 (0)