diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dd51045..29480fc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,31 +1,12 @@ -# This is a basic workflow to help you get started with Actions - name: CI -# Controls when the action will run. on: - # Triggers the workflow on push or pull request events but only for the main branch - push: - branches: [main] pull_request: branches: [main] - - # Allows you to run this workflow manually from the Actions tab workflow_dispatch: -# A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - # This workflow contains a single job called "build" build: - # The type of runner that the job will run on runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - - uses: akhileshns/heroku-deploy@v3.12.12 # This is the action - with: - heroku_api_key: ${{secrets.HEROKU_API_KEY}} # Located in GitHub secrets - heroku_app_name: "web-eye-tracker-1204" # Must be unique in Heroku - heroku_email: "karine.pistili@gmail.com" diff --git a/app/services/config.py b/app/services/config.py index 941c23f..e2db0d1 100644 --- a/app/services/config.py +++ b/app/services/config.py @@ -2,56 +2,18 @@ hyperparameters = { "Lasso Regression": { "param_grid": { - "lasso__alpha": [ - 1e-15, - 1e-10, - 1e-8, - 1e-3, - 1e-2, - 1e-1, - 0.5, - 1, - 5, - 10, - 20, - 30, - 35, - 40, - 45, - 50, - 55, - 100, - ] + "lasso__alpha": [10, 20, 30, 40, 45, 50, 55, 100, 200, 500] } }, "Ridge Regression": { "param_grid": { - "ridge__alpha": [ - 1e-15, - 1e-10, - 1e-8, - 1e-3, - 1e-2, - 1e-1, - 0.5, - 1, - 5, - 10, - 20, - 30, - 35, - 40, - 45, - 50, - 55, - 100, - ] + "ridge__alpha": [ 1e-3, 0.005, 0.01, 0.1, 0.5, 1.0, 10, 20, 50, 100] } }, "Elastic Net": { "param_grid": { - "elasticnet__alpha": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0], - "elasticnet__l1_ratio": [0, 0.01, 0.2, 0.5, 0.8, 1], + "elasticnet__alpha": [0.1, 0.5, 1.0, 2.0, 5.0], + "elasticnet__l1_ratio": [0.5, 0.7, 0.8, 0.9, 1.0], } }, "Bayesian Ridge": { @@ -62,17 +24,24 @@ }, "SGD Regressor": { "param_grid": { - "sgdregressor__alpha": [0.0001, 0.001, 0.01, 0.1], - "sgdregressor__l1_ratio": [0, 0.2, 0.5, 0.7, 1], - "sgdregressor__max_iter": [500, 1000], - "sgdregressor__eta0": [0.0001, 0.001, 0.01], + "sgdregressor__alpha": [0.0001, 0.001], + "sgdregressor__l1_ratio": [0.5, 0.7, 0.8, 1], + "sgdregressor__max_iter": [1000], + "sgdregressor__eta0": [0.0001, 0.001], } }, "Support Vector Regressor": { "param_grid": { - "svr__C": [0.1, 1, 10, 100, 1000], - "svr__gamma": [0.0001, 0.001, 0.01, 0.1, 1], - "svr__kernel": ["linear", "rbf", "poly"], + "svr__C": [50, 100, 200, 500, 1000, 2000], + "svr__gamma": [0.1, 0.5, 1, 2, 5], + "svr__kernel": ["rbf"], + } + }, + "Random Forest Regressor": { + "param_grid": { + "randomforestregressor__n_estimators": [100], + "randomforestregressor__max_depth": [10], + "randomforestregressor__min_samples_split": [2, 5, 10], } }, } diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index 3354551..00e272a 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -14,7 +14,7 @@ from sklearn.pipeline import make_pipeline from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import Ridge - +import time # Model imports from sklearn import linear_model @@ -87,13 +87,14 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): """ if ( model_name == "Linear Regression" - or model_name == "Elastic Net" - or model_name == "Support Vector Regressor" ): model = models[model_name] + start_time = time.time() model.fit(X_train, y_train) + end_time = time.time() y_pred = model.predict(X_test) print(f"Score {label}: {r2_score(y_test, y_pred)}") + print(f"Time {label}: {end_time - start_time}") return y_pred else: pipeline = models[model_name] @@ -106,9 +107,12 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): refit="r2", return_train_score=True, ) + start_time = time.time() grid_search.fit(X_train, y_train) + end_time = time.time() best_model = grid_search.best_estimator_ y_pred = best_model.predict(X_test) + print(f"Time {label}: {end_time - start_time}") return y_pred @@ -202,32 +206,25 @@ def predict(data, k, model_X, model_Y): # Create a dictionary to store the data data = {} + grouped = df_data.groupby("True XY") - # Iterate over the dataframe and store the data - for index, row in df_data.iterrows(): + for (true_x, true_y), group in grouped: - # Get the outer and inner keys - outer_key = str(row["True X"]).split(".")[0] - inner_key = str(row["True Y"]).split(".")[0] + # keys + outer_key = str(true_x).split(".")[0] + inner_key = str(true_y).split(".")[0] - # If the outer key is not in the dictionary, add it + # create outer key if missing if outer_key not in data: data[outer_key] = {} - # Add the data to the dictionary + # fill data data[outer_key][inner_key] = { - "predicted_x": df_data[ - (df_data["True X"] == row["True X"]) - & (df_data["True Y"] == row["True Y"]) - ]["Predicted X"].values.tolist(), - "predicted_y": df_data[ - (df_data["True X"] == row["True X"]) - & (df_data["True Y"] == row["True Y"]) - ]["Predicted Y"].values.tolist(), - "PrecisionSD": precision_xy[(row["True X"], row["True Y"])], - "Accuracy": accuracy_xy[(row["True X"], row["True Y"])], + "predicted_x": group["Predicted X"].tolist(), + "predicted_y": group["Predicted Y"].tolist(), + "PrecisionSD": precision_xy[(true_x, true_y)], + "Accuracy": accuracy_xy[(true_x, true_y)], } - # Centroids of the clusters data["centroids"] = model.cluster_centers_.tolist() diff --git a/app/services/reports/report.md b/app/services/reports/report.md new file mode 100644 index 0000000..9635584 --- /dev/null +++ b/app/services/reports/report.md @@ -0,0 +1,200 @@ +# ๐ŸŽฏ Eye-Gaze Calibration Regression Study + +This project evaluates multiple regression models for **eye-gaze calibration**, aiming to map iris landmark coordinates to screen positions. + +The goal is to determine which regression model provides the best trade-off between: + +- ๐ŸŽฏ Prediction accuracy (screen point error) +- ๐Ÿ“ Precision +- โšก Execution speed +- ๐Ÿง  Generalization (overfitting behavior) + +--- + +## ๐Ÿ“Š Dataset Overview + +- Total samples: **900** +- Features: + - `left_iris_x`, `left_iris_y` + - `right_iris_x`, `right_iris_y` +- Targets: + - `point_x`, `point_y` +- Calibration points detected: **9 unique points (k = 9)** + +--- + +## ๐Ÿง  Models Evaluated + +The following regression models were tested: + +- Linear Regression +- Ridge Regression +- Lasso Regression +- Elastic Net +- Bayesian Ridge +- SGD Regressor +- Support Vector Regressor (SVR) +- Random Forest Regressor + +--- + +## โš™๏ธ Evaluation Metrics + +The pipeline reports: + +- **Avg Accuracy** โ†’ positional error (lower is better) +- **Avg Precision** +- **Execution Time** +- Axis-wise Rยฒ scores during calibration + +Hyperparameter tuning was performed using **GridSearchCV** when available. + +--- + +# ๐Ÿงช Experiment 1 โ€” Baseline Results + +Initial run before pipeline modifications. + +### ๐Ÿ”Ž Key Observations + +- Linear & Ridge produced stable baseline performance. +- Elastic Net was very fast but less precise. +- SVR achieved strong Rยฒ values. +- Random Forest failed due to missing configuration. + +### ๐Ÿ“‹ Performance Summary + +| Model | Avg Accuracy | Avg Precision | Time (s) | +|------|-------------|---------------|---------| +| Linear Regression | 235.37 | 49.11 | 1.56 | +| Ridge Regression | 236.13 | 48.81 | 0.67 | +| Lasso Regression | 290.82 | 45.88 | 0.77 | +| Elastic Net | 298.97 | 38.66 | 0.11 | +| Bayesian Ridge | 235.39 | 48.12 | 1.69 | +| SGD Regressor | 241.21 | 46.88 | 15.31 | +| Support Vector Regressor | 266.18 | 48.07 | 0.13 | +| Random Forest | โŒ Error | โ€” | โ€” | + +--- + +# ๐Ÿงช Experiment 2 โ€” Pipeline Improvements + +Changes made: + +- Added Random Forest configuration +- Expanded hyperparameter grids +- Improved pipeline stability + +### ๐Ÿ“‹ Updated Performance Summary + +| Model | Avg Accuracy | Avg Precision | Time (s) | +|------|-------------|---------------|---------| +| Linear Regression | 235.37 | 49.11 | 1.50 | +| Ridge Regression | 235.87 | 48.71 | 0.39 | +| Lasso Regression | 292.79 | 45.10 | 0.41 | +| Elastic Net | 298.97 | 38.66 | 0.13 | +| Bayesian Ridge | 235.39 | 48.12 | 1.68 | +| SGD Regressor | 241.24 | 46.94 | 3.78 | +| Support Vector Regressor | 266.18 | 48.07 | 0.36 | +| Random Forest | **52.85** | 31.96 | 4.43 | + +### ๐Ÿ’ก Insights + +- Random Forest dramatically reduced positional error. +- Precision dropped, suggesting sensitivity or instability. +- Ridge became faster after optimization. +- SVR remained a strong non-linear alternative. + +--- + +# ๐Ÿงช Experiment 3 โ€” Train/Validation Split (Overfitting Check) + +To evaluate generalization: + +- Training: **765 samples** +- Validation: **135 samples** + +Each model went through: + +1. Calibration phase (internal split) +2. Validation phase (hold-out set) + +--- + +## ๐Ÿ“‹ Train vs Validation Results + +| Model | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s) | +|------|-----------|------------|-----------|------------|---------| +| Linear Regression | 245.73 | 49.84 | 229.45 | 51.55 | 1.55 | +| Ridge Regression | 247.09 | 49.94 | 229.64 | 51.80 | 0.42 | +| Lasso Regression | 251.61 | 47.05 | 265.08 | 44.32 | 0.44 | +| Elastic Net | 301.30 | 41.36 | 294.61 | 38.25 | 0.09 | +| Bayesian Ridge | 246.86 | 50.26 | 229.11 | 50.94 | 1.69 | +| SGD Regressor | 247.04 | 49.60 | 239.26 | 48.56 | 3.98 | +| Support Vector Regressor | 281.02 | 49.79 | 257.47 | 47.05 | 0.22 | +| Random Forest | **69.50** | 46.49 | **46.65** | 29.15 | 6.57 | + +--- + +## ๐Ÿ” Overfitting Analysis + +### โœ… Stable Models +- Linear Regression +- Ridge Regression +- Bayesian Ridge + +These models showed consistent calibration and validation behavior. + +### โš ๏ธ Potential Overfitting +- Random Forest achieved lowest error but suffered large precision drop. +- Indicates high capacity and sensitivity to dataset structure. + +### โšก Best Balance +- SVR provided a strong balance between: + - Accuracy + - Speed + - Generalization + +--- + +# ๐Ÿ† Final Findings + +### ๐Ÿฅ‡ Best Raw Accuracy +**Random Forest Regressor** + +- Lowest positional error +- Higher computation cost +- Possible overfitting + +### ๐Ÿฅˆ Most Stable Models +- Linear Regression +- Ridge Regression + +### ๐Ÿฅ‰ Best Overall Trade-off +**Support Vector Regressor (SVR)** + +--- + +# ๐Ÿš€ Future Improvements + +Possible next steps: + +- Feature scaling & normalization experiments +- Temporal smoothing for gaze stability +- Ensemble methods (Linear + Non-linear) +- Neural network-based gaze regression +- Real-time latency benchmarking + +--- + +# ๐Ÿ“Œ Conclusion + +This study shows that: + +- Non-linear models improve gaze estimation accuracy. +- Random Forest can greatly reduce error but may overfit. +- Linear models remain strong baselines for robustness. +- Proper train/validation splits are essential for realistic performance evaluation. + +--- +