From db9b6516d8cf672b7538302c7ab8c094cdccac31 Mon Sep 17 00:00:00 2001 From: Soham Date: Sun, 15 Feb 2026 15:48:55 +0530 Subject: [PATCH 1/3] feat: Add model caching to batch_predict endpoint to eliminate redundant training --- app/services/gaze_tracker.py | 54 +++++++++++++++++++++++++++++++----- 1 file changed, 47 insertions(+), 7 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index 3354551..c625193 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -1,6 +1,8 @@ # Necessary imports +import os import math import warnings +import pickle warnings.filterwarnings("ignore") @@ -249,6 +251,8 @@ def predict_new_data_simple( SQUASH_LIMIT_X = 1.0 SQUASH_LIMIT_Y = 1.0 Y_GAIN = 1.2 # adjustment to compensate for vertical bias + csv_filename = os.path.basename(calib_csv_path) + calib_id = csv_filename.replace("_fixed_train_data.csv", "") # ============================ # LOAD TRAIN @@ -309,14 +313,50 @@ def predict_new_data_simple( diff_y_norm, rel_y_norm ]) - # ============================ - # MODELS - # ============================ - model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_x_pickle_filepath = Path().absolute() / "app/services/calib_validation/pickles" / f"{calib_id}_model_x.pkl" + model_y_pickle_filepath = Path().absolute() / "app/services/calib_validation/pickles" / f"{calib_id}_model_y.pkl" + + if os.path.exists(model_x_pickle_filepath) and os.path.exists(model_y_pickle_filepath): + + print(f'Loading cached models for {calib_id}') + try: + with open(model_x_pickle_filepath, 'rb') as f: + model_x = pickle.load(f) + + with open(model_y_pickle_filepath, 'rb') as f: + model_y = pickle.load(f) + print(f'Successfully loaded models for {calib_id}') + except Exception as e: + print(f'Unexpected error with caching for {calib_id}: {e}') + print(f'Falling back to training new models') + + model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + + model_x.fit(X_train_x, y_train_x) + model_y.fit(X_train_y, y_train_y) + else: + + # ============================ + # MODELS + # ============================ + model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + + model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + + + model_x.fit(X_train_x, y_train_x) + model_y.fit(X_train_y, y_train_y) + + model_x_pickle_filepath.parent.mkdir(parents=True, exist_ok=True) + + with open(model_x_pickle_filepath, 'wb') as f: + pickle.dump(model_x, f) - model_x.fit(X_train_x, y_train_x) - model_y.fit(X_train_y, y_train_y) + with open(model_y_pickle_filepath, 'wb') as f: + pickle.dump(model_y, f) # ============================ # Real scale (calibration) - normalize predicted values to screen coordinates From 9535e823b10b95f24a6fd336a7a4d26a0fd2c531 Mon Sep 17 00:00:00 2001 From: Soham Date: Sun, 15 Feb 2026 21:14:26 +0530 Subject: [PATCH 2/3] refactor: Implement thread-safe in-memory model caching --- app/services/gaze_tracker.py | 43 ++++++++++-------------------------- 1 file changed, 12 insertions(+), 31 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index c625193..a12f6aa 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -78,6 +78,7 @@ "mae": make_scorer(mean_absolute_error), } +model_cache={} def squash(v, limit=1.0): """Squash não-linear estilo WebGazer""" @@ -313,28 +314,12 @@ def predict_new_data_simple( diff_y_norm, rel_y_norm ]) - model_x_pickle_filepath = Path().absolute() / "app/services/calib_validation/pickles" / f"{calib_id}_model_x.pkl" - model_y_pickle_filepath = Path().absolute() / "app/services/calib_validation/pickles" / f"{calib_id}_model_y.pkl" - - if os.path.exists(model_x_pickle_filepath) and os.path.exists(model_y_pickle_filepath): - - print(f'Loading cached models for {calib_id}') - try: - with open(model_x_pickle_filepath, 'rb') as f: - model_x = pickle.load(f) - - with open(model_y_pickle_filepath, 'rb') as f: - model_y = pickle.load(f) - print(f'Successfully loaded models for {calib_id}') - except Exception as e: - print(f'Unexpected error with caching for {calib_id}: {e}') - print(f'Falling back to training new models') - - model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - - model_x.fit(X_train_x, y_train_x) - model_y.fit(X_train_y, y_train_y) + if calib_id in model_cache: + print(f'Loading models from cache') + cached_models = model_cache.get(calib_id) + model_x = cached_models.get('x') + model_y = cached_models.get('y') + else: # ============================ @@ -343,20 +328,16 @@ def predict_new_data_simple( model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - - model_x.fit(X_train_x, y_train_x) model_y.fit(X_train_y, y_train_y) - model_x_pickle_filepath.parent.mkdir(parents=True, exist_ok=True) + cached_models={ + "x":model_x, + "y":model_y + } - with open(model_x_pickle_filepath, 'wb') as f: - pickle.dump(model_x, f) + model_cache[calib_id]=cached_models - with open(model_y_pickle_filepath, 'wb') as f: - pickle.dump(model_y, f) # ============================ # Real scale (calibration) - normalize predicted values to screen coordinates From 4eeb396fd25ae1494cd9422653ce55da5f1ac75a Mon Sep 17 00:00:00 2001 From: Soham Date: Sun, 15 Feb 2026 21:26:57 +0530 Subject: [PATCH 3/3] perf: Implement LRU cache for in-memory model caching --- app/services/gaze_tracker.py | 74 ++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 24 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index a12f6aa..f987b17 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -1,8 +1,8 @@ # Necessary imports import os +import threading import math import warnings -import pickle warnings.filterwarnings("ignore") @@ -44,7 +44,7 @@ func_total_accuracy, ) from app.services.config import hyperparameters - +from collections import defaultdict,OrderedDict # Machine learning models to use models = { @@ -78,7 +78,32 @@ "mae": make_scorer(mean_absolute_error), } -model_cache={} +class LRUCache: + def __init__(self, max_size=100): + self.max_size = max_size + self.cache = OrderedDict() + + def get(self, key): + if key in self.cache: + # Move to end + self.cache.move_to_end(key) + return self.cache[key] + return None + + def put(self, key, value): + if key in self.cache: + self.cache.move_to_end(key) + elif len(self.cache) >= self.max_size: + # Remove LRU + self.cache.popitem(last=False) + + self.cache[key] = value + + def __contains__(self, key): + return key in self.cache + +model_cache=LRUCache(max_size=100) +calibration_locks = defaultdict(threading.Lock) def squash(v, limit=1.0): """Squash não-linear estilo WebGazer""" @@ -314,29 +339,30 @@ def predict_new_data_simple( diff_y_norm, rel_y_norm ]) - if calib_id in model_cache: - print(f'Loading models from cache') + with calibration_locks[calib_id]: cached_models = model_cache.get(calib_id) - model_x = cached_models.get('x') - model_y = cached_models.get('y') - - else: - - # ============================ - # MODELS - # ============================ - model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - - model_x.fit(X_train_x, y_train_x) - model_y.fit(X_train_y, y_train_y) - - cached_models={ - "x":model_x, - "y":model_y - } - model_cache[calib_id]=cached_models + if cached_models: + print(f'Loading models from cache') + model_x = cached_models.get('x') + model_y = cached_models.get('y') + else: + + # ============================ + # MODELS + # ============================ + model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + + model_x.fit(X_train_x, y_train_x) + model_y.fit(X_train_y, y_train_y) + + model_cache.put(calib_id,{ + "x":model_x, + "y":model_y + }) + + # ============================