Skip to content

Commit

Permalink
http: Add Caching Support
Browse files Browse the repository at this point in the history
This is a simple implementation which requires users to manage cache
objects per request. It doesn't implement conditional requests for
revalidation, instead it fetches the complete content again, as soon as
the cached response is stale.
  • Loading branch information
holesch committed Aug 3, 2024
1 parent b1b30a2 commit affbdd1
Showing 1 changed file with 95 additions and 3 deletions.
98 changes: 95 additions & 3 deletions not_my_board/_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import asyncio
import codecs
import contextlib
import datetime
import email.utils
import ipaddress
import json
import logging
Expand Down Expand Up @@ -47,15 +49,23 @@ def __init__(self, ca_files=None, proxies=None):
self._proxies[scheme] = self._parse_url(proxies[scheme])
self._no_proxy = proxies.get("no", "")

async def get_json(self, url):
return await self._request_json("GET", url)
async def get_json(self, url, cache=None):
return await self._request_json("GET", url, cache=cache)

async def post_form(self, url, params):
content_type = "application/x-www-form-urlencoded"
body = urllib.parse.urlencode(params).encode()
return await self._request_json("POST", url, content_type, body)

async def _request_json(self, method, url, content_type=None, body=None):
async def _request_json(
self, method, url, content_type=None, body=None, cache=None
):
now = datetime.datetime.now(tz=datetime.timezone.utc)

if cache and cache.url == url and now <= cache.fresh_until:
return json.loads(cache.content)

raw_url = url
url = self._parse_url(url)
headers = [
("Host", url.netloc),
Expand All @@ -78,7 +88,13 @@ async def _request_json(self, method, url, content_type=None, body=None):
to_send += conn.send(h11.Data(body))
to_send += conn.send(h11.EndOfMessage())

request_time = now
response = await self._request_response(conn, url, to_send)
response_time = datetime.datetime.now(tz=datetime.timezone.utc)

if cache:
self._update_cache(cache, raw_url, request_time, response_time, response)

await self._check_response_ok(response)

return json.loads(response.body)
Expand Down Expand Up @@ -166,6 +182,61 @@ def _parse_url(self, url):
ssl_,
)

def _update_cache(self, cache, url, request_time, response_time, response):
headers = {k.decode("ascii"): v.decode("ascii") for k, v in response.headers}
cache_control = _parse_dict_header(headers.get("cache-control", ""))

if "no-store" in cache_control:
cache.url = None
return

cache.url = url
cache.content = response.body
cache.fresh_until = response_time + datetime.timedelta(seconds=5)

if "no-cache" in cache_control:
cache.fresh_until = datetime.datetime.min
elif "max-age" in cache_control:
date_value = response_time
age = datetime.timedelta(seconds=0)

if "date" in headers:
try:
date_value = email.utils.parsedate_to_datetime(headers["date"])
except Exception as e:
logger.warning(
'Error parsing "Date" header value "%s": %s', headers["date"], e
)

if "age" in headers:
try:
age = datetime.timedelta(seconds=int(headers["age"]))
except Exception as e:
logger.warning(
'Error parsing "Age" header value "%s": %s', headers["age"], e
)

generated_at = min(date_value, request_time - age)

max_age = datetime.timedelta(seconds=int(cache_control["max-age"]))
cache.fresh_until = generated_at + max_age
elif "expires" in headers:
expires_value = datetime.datetime.min

if headers["expires"] != "0":
try:
expires_value = email.utils.parsedate_to_datetime(
headers["expires"]
)
except Exception as e:
logger.warning(
'Error parsing "Expires" header value "%s": %s',
headers["expires"],
e,
)

cache.fresh_until = expires_value

@contextlib.asynccontextmanager
async def open_tunnel(
self, proxy_host, proxy_port, target_host, target_port, ssl_=False
Expand Down Expand Up @@ -359,6 +430,13 @@ class Response:
body: str


@dataclass
class CacheEntry:
url: Optional[str] = None
content: str = ""
fresh_until: datetime.datetime = datetime.datetime.min


def is_proxy_disabled(host, no_proxy_env):
if not host or not no_proxy_env:
return False
Expand Down Expand Up @@ -444,6 +522,20 @@ def _is_proxy_disabled_host(host, patterns):
return False


def _parse_dict_header(value):
result = {}

for item in urllib.request.parse_http_list(value):
if "=" in item:
name, value = item.split("=", 1)
if len(value) >= len('""') and value[0] == value[-1] == '"':
value = value[1:-1]
result[name] = value
else:
result[item] = None
return result


# remove, if Python version < 3.11 is no longer supported
async def _start_tls(writer, url):
if hasattr(writer, "start_tls"):
Expand Down

0 comments on commit affbdd1

Please sign in to comment.