From 76421052651033bccbeae40cec579dda51ff27ec Mon Sep 17 00:00:00 2001 From: Phuoc Dinh Le <47278241+cuom1999@users.noreply.github.com> Date: Mon, 30 Dec 2024 14:39:26 -0600 Subject: [PATCH] Create new cache handler (#144) --- dmoj/sample_local_settings.py | 9 +- judge/cache_handler.py | 165 ++++++++++++++++++++++++++++++++++ judge/caching.py | 147 ++++++++++++++++-------------- 3 files changed, 254 insertions(+), 67 deletions(-) create mode 100644 judge/cache_handler.py diff --git a/dmoj/sample_local_settings.py b/dmoj/sample_local_settings.py index 36fb20cc..ce190ff3 100644 --- a/dmoj/sample_local_settings.py +++ b/dmoj/sample_local_settings.py @@ -28,11 +28,18 @@ # Caching. You can use memcached or redis instead. # Documentation: CACHES = { - "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}, + "default": { + "BACKEND": "judge.cache_handler.CacheHandler", + "LOCATION": "127.0.0.1:11211", + }, "l0": { "BACKEND": "django.core.cache.backends.locmem.LocMemCache", "OPTIONS": {"MAX_ENTRIES": 1000}, }, + "primary": { + "BACKEND": "django.core.cache.backends.memcached.PyMemcacheCache", + "LOCATION": "127.0.0.1:11211", + }, } # Your database credentials. Only MySQL is supported by DMOJ. diff --git a/judge/cache_handler.py b/judge/cache_handler.py new file mode 100644 index 00000000..97572e9b --- /dev/null +++ b/judge/cache_handler.py @@ -0,0 +1,165 @@ +from django.core.cache.backends.base import BaseCache +from django.core.cache import caches +from django.core.exceptions import ImproperlyConfigured + +NUM_CACHE_RETRY = 3 +DEFAULT_L0_TIMEOUT = 300 +NONE_RESULT = "__None__" +l0_cache = caches["l0"] if "l0" in caches else None +primary_cache = caches["primary"] if "primary" in caches else None + + +class CacheHandler(BaseCache): + """ + Custom Django cache backend with support for L0 (short-term) and primary cache layers. + """ + + def __init__(self, location, params): + """ + Initialize the cache backend with L0 and primary (default) cache. + """ + super().__init__(params) + + def get(self, key, default=None): + """ + Retrieve a value from the cache with retry logic and L0 caching. + """ + if l0_cache: + result = l0_cache.get(key) + if result is not None: + return None if result == NONE_RESULT else result + + for attempt in range(NUM_CACHE_RETRY): + try: + result = primary_cache.get(key) + if result is not None: + if l0_cache: + l0_cache.set( + key, + NONE_RESULT if result is None else result, + DEFAULT_L0_TIMEOUT, + ) # Cache in L0 + return None if result == NONE_RESULT else result + except Exception: + if attempt == NUM_CACHE_RETRY - 1: + raise + return default + + def set(self, key, value, timeout=None): + """ + Set a value in the cache and optionally in the L0 cache. + """ + value_to_store = NONE_RESULT if value is None else value + if l0_cache: + l0_cache.set(key, value_to_store, DEFAULT_L0_TIMEOUT) + primary_cache.set(key, value_to_store, timeout) + + def delete(self, key): + """ + Delete a value from both L0 and primary cache. + """ + if l0_cache: + l0_cache.delete(key) + primary_cache.delete(key) + + def add(self, key, value, timeout=None): + """ + Add a value to the cache only if the key does not already exist. + """ + value_to_store = NONE_RESULT if value is None else value + if l0_cache and not l0_cache.get(key): + l0_cache.set(key, value_to_store, DEFAULT_L0_TIMEOUT) + primary_cache.add(key, value_to_store, timeout) + + def get_many(self, keys): + """ + Retrieve multiple values from the cache. + """ + results = {} + if l0_cache: + l0_results = l0_cache.get_many(keys) + results.update( + { + key: (None if value == NONE_RESULT else value) + for key, value in l0_results.items() + } + ) + keys = [key for key in keys if key not in l0_results] + + if not keys: + return results + + for attempt in range(NUM_CACHE_RETRY): + try: + cache_results = primary_cache.get_many(keys) + if l0_cache: + for key, value in cache_results.items(): + l0_cache.set( + key, + NONE_RESULT if value is None else value, + DEFAULT_L0_TIMEOUT, + ) + results.update( + { + key: (None if value == NONE_RESULT else value) + for key, value in cache_results.items() + } + ) + return results + except Exception: + if attempt == NUM_CACHE_RETRY - 1: + raise + return results + + def set_many(self, data, timeout=None): + """ + Set multiple values in the cache. + """ + data_to_store = { + key: (NONE_RESULT if value is None else value) + for key, value in data.items() + } + if l0_cache: + for key, value in data_to_store.items(): + l0_cache.set(key, value, DEFAULT_L0_TIMEOUT) + primary_cache.set_many(data_to_store, timeout) + + def delete_many(self, keys): + """ + Delete multiple values from the cache. + """ + if l0_cache: + l0_cache.delete_many(keys) + primary_cache.delete_many(keys) + + def clear(self): + """ + Clear both L0 and primary caches. + """ + if l0_cache: + l0_cache.clear() + primary_cache.clear() + + def incr(self, key, delta=1): + """ + Increment a value in the cache. + """ + if l0_cache: + l0_value = l0_cache.get(key) + if l0_value and l0_value != NONE_RESULT: + updated_value = l0_value + delta + l0_cache.set(key, updated_value, DEFAULT_L0_TIMEOUT) + return updated_value + return primary_cache.incr(key, delta) + + def decr(self, key, delta=1): + """ + Decrement a value in the cache. + """ + if l0_cache: + l0_value = l0_cache.get(key) + if l0_value and l0_value != NONE_RESULT: + updated_value = l0_value - delta + l0_cache.set(key, updated_value, DEFAULT_L0_TIMEOUT) + return updated_value + return primary_cache.decr(key, delta) diff --git a/judge/caching.py b/judge/caching.py index e5180308..1ecee65e 100644 --- a/judge/caching.py +++ b/judge/caching.py @@ -1,18 +1,15 @@ -from inspect import signature -from django.core.cache import cache, caches +from django.core.cache import cache from django.db.models.query import QuerySet from django.core.handlers.wsgi import WSGIRequest - +from django.db import models import hashlib - -from judge.logging import log_debug +from inspect import signature MAX_NUM_CHAR = 50 -NONE_RESULT = "__None__" -NUM_CACHE_RETRY = 3 - +# Utility functions def arg_to_str(arg): + """Convert arguments to strings for generating cache keys.""" if hasattr(arg, "id"): return str(arg.id) if isinstance(arg, list) or isinstance(arg, QuerySet): @@ -23,12 +20,11 @@ def arg_to_str(arg): def filter_args(args_list): + """Filter out arguments that are not relevant for caching (e.g., WSGIRequest).""" return [x for x in args_list if not isinstance(x, WSGIRequest)] -l0_cache = caches["l0"] if "l0" in caches else None - - +# Cache decorator def cache_wrapper(prefix, timeout=None, expected_type=None): def get_key(func, *args, **kwargs): args_list = list(args) @@ -40,82 +36,35 @@ def get_key(func, *args, **kwargs): key = key.replace(" ", "_") return key - def _get(key): - if l0_cache: - result = l0_cache.get(key) - if result is not None: - return result - - # pymemcache sometimes throws KeyError when running in - # multi-thread environment. When it happens, we retry. - for attempt in range(NUM_CACHE_RETRY): - try: - result = cache.get(key) - return result - except KeyError as e: - if attempt == NUM_CACHE_RETRY - 1: - raise e - - def _set_l0(key, value): - if l0_cache: - l0_cache.set(key, value, 30) - - def _set(key, value, timeout): - _set_l0(key, value) - cache.set(key, value, timeout) - def decorator(func): def _validate_type(cache_key, result): if expected_type and not isinstance(result, expected_type): - data = { - "function": f"{func.__module__}.{func.__qualname__}", - "result": str(result)[:30], - "expected_type": expected_type, - "type": type(result), - "key": cache_key, - } - log_debug("invalid_key", data) return False return True def wrapper(*args, **kwargs): cache_key = get_key(func, *args, **kwargs) - result = _get(cache_key) - if result is not None and _validate_type(cache_key, result): - _set_l0(cache_key, result) - if type(result) == str and result == NONE_RESULT: - result = None + result = cache.get(cache_key) + + if result is None or _validate_type(cache_key, result): return result + + # Call the original function result = func(*args, **kwargs) - if result is None: - cache_result = NONE_RESULT - else: - cache_result = result - _set(cache_key, cache_result, timeout) + cache.set(cache_key, result, timeout) return result def dirty(*args, **kwargs): cache_key = get_key(func, *args, **kwargs) cache.delete(cache_key) - if l0_cache: - l0_cache.delete(cache_key) def prefetch_multi(args_list): - keys = [] - for args in args_list: - keys.append(get_key(func, *args)) + keys = [get_key(func, *args) for args in args_list] results = cache.get_many(keys) - for key, result in results.items(): - if result is not None: - _set_l0(key, result) def dirty_multi(args_list): - keys = [] - for args in args_list: - keys.append(get_key(func, *args)) + keys = [get_key(func, *args) for args in args_list] cache.delete_many(keys) - if l0_cache: - l0_cache.delete_many(keys) wrapper.dirty = dirty wrapper.prefetch_multi = prefetch_multi @@ -124,3 +73,69 @@ def dirty_multi(args_list): return wrapper return decorator + + +# CacheableModel with optimized caching +class CacheableModel(models.Model): + """ + Base class for models with caching support using cache utilities. + """ + + cache_timeout = None # Cache timeout in seconds (default: 1 hour) + + class Meta: + abstract = True # This is an abstract base class and won't create a table + + @classmethod + def _get_cache_key(cls, obj_id): + """Generate a cache key based on the model name and object ID.""" + return f"{cls.__name__.lower()}_{obj_id}" + + @classmethod + def get_instance(cls, *ids): + """ + Fetch one or multiple objects by IDs using caching. + """ + if not ids: + return None + + ids = ids[0] if len(ids) == 1 and isinstance(ids[0], (list, tuple)) else ids + cache_keys = {cls._get_cache_key(obj_id): obj_id for obj_id in ids} + cached_objects = cache.get_many(cache_keys.keys()) + + results = { + cache_keys[key]: cls(**cached_objects[key]) for key in cached_objects + } + missing_ids = [obj_id for obj_id in ids if obj_id not in results] + + if missing_ids: + missing_objects = cls.objects.filter(id__in=missing_ids) + objects_to_cache = {} + for obj in missing_objects: + obj_dict = model_to_dict(obj) + cache_key = cls._get_cache_key(obj.id) + objects_to_cache[cache_key] = obj_dict + results[obj.id] = cls(**obj_dict) + cache.set_many(objects_to_cache, timeout=cls.cache_timeout) + + return results[ids[0]] if len(ids) == 1 else [results[obj_id] for obj_id in ids] + + @classmethod + def dirty_cache(cls, *ids): + """ + Clear the cache for one or multiple object IDs using delete_many. + """ + if not ids: + return + + ids = ids[0] if len(ids) == 1 and isinstance(ids[0], (list, tuple)) else ids + cache_keys = [cls._get_cache_key(obj_id) for obj_id in ids] + cache.delete_many(cache_keys) + + def save(self, *args, **kwargs): + super().save(*args, **kwargs) + self.dirty_cache(self.id) + + def delete(self, *args, **kwargs): + self.dirty_cache(self.id) + super().delete(*args, **kwargs)