From 92f2c4701355fa7e722bb6517d9a0efc219cd490 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Thu, 5 Feb 2026 16:45:46 +0200
Subject: [PATCH 01/10] Update GitHub Actions workflow to remove push trigger

Removed push trigger for main branch and unused steps.
---
 .github/workflows/main.yml | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index dd51045..8d68e71 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -5,27 +5,13 @@ name: CI
 # Controls when the action will run.
 on:
   # Triggers the workflow on push or pull request events but only for the main branch
-  push:
-    branches: [main]
   pull_request:
     branches: [main]
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
-
-# A workflow run is made up of one or more jobs that can run sequentially or in parallel
 jobs:
-  # This workflow contains a single job called "build"
   build:
-    # The type of runner that the job will run on
     runs-on: ubuntu-latest
-
-    # Steps represent a sequence of tasks that will be executed as part of the job
     steps:
-      # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
       - uses: actions/checkout@v2
-      - uses: akhileshns/heroku-deploy@v3.12.12 # This is the action
-        with:
-          heroku_api_key: ${{secrets.HEROKU_API_KEY}} # Located in GitHub secrets
-          heroku_app_name: "web-eye-tracker-1204" # Must be unique in Heroku
-          heroku_email: "karine.pistili@gmail.com"

From f425920fe58aa4e0a04800f162e3305c68f6717a Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Thu, 5 Feb 2026 16:46:46 +0200
Subject: [PATCH 02/10] Refactor CI workflow configuration

---
 .github/workflows/main.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8d68e71..29480fc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,15 +1,10 @@
-# This is a basic workflow to help you get started with Actions
-
 name: CI
 
-# Controls when the action will run.
 on:
-  # Triggers the workflow on push or pull request events but only for the main branch
   pull_request:
     branches: [main]
-
-  # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
+
 jobs:
   build:
     runs-on: ubuntu-latest

From 98e1b5099971df5c3d6b4775e8531aaa149cfbe2 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Thu, 5 Feb 2026 16:49:31 +0200
Subject: [PATCH 03/10] Enhance CI workflow with Heroku deployment steps

Added push event trigger for CI workflow and included steps for Heroku CLI installation and deployment.
---
 .github/workflows/main.yml | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 29480fc..362ded0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,6 +1,8 @@
 name: CI
 
 on:
+  push:
+    branches: [main]
   pull_request:
     branches: [main]
   workflow_dispatch:
@@ -8,5 +10,24 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
+
     steps:
-      - uses: actions/checkout@v2
+      # Checkout repository
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      # Install Heroku CLI
+      - name: Install Heroku CLI
+        run: curl https://cli-assets.heroku.com/install-ubuntu.sh | sh
+
+      # Verify Heroku installation (optional but useful)
+      - name: Verify Heroku CLI
+        run: heroku --version
+
+      # Deploy to Heroku
+      - name: Deploy to Heroku
+        uses: akhileshns/heroku-deploy@v3.12.12
+        with:
+          heroku_api_key: ${{ secrets.HEROKU_API_KEY }}
+          heroku_app_name: "web-eye-tracker-1204"
+          heroku_email: "karine.pistili@gmail.com"

From a7a152511b8875c8eecdeb44f41a670abb960eda Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Thu, 5 Feb 2026 16:51:13 +0200
Subject: [PATCH 04/10] Update CI workflow to remove Heroku deployment

Removed push trigger and Heroku deployment steps from CI workflow.
---
 .github/workflows/main.yml | 23 +----------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 362ded0..29480fc 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -1,8 +1,6 @@
 name: CI
 
 on:
-  push:
-    branches: [main]
   pull_request:
     branches: [main]
   workflow_dispatch:
@@ -10,24 +8,5 @@ on:
 jobs:
   build:
     runs-on: ubuntu-latest
-
     steps:
-      # Checkout repository
-      - name: Checkout code
-        uses: actions/checkout@v2
-
-      # Install Heroku CLI
-      - name: Install Heroku CLI
-        run: curl https://cli-assets.heroku.com/install-ubuntu.sh | sh
-
-      # Verify Heroku installation (optional but useful)
-      - name: Verify Heroku CLI
-        run: heroku --version
-
-      # Deploy to Heroku
-      - name: Deploy to Heroku
-        uses: akhileshns/heroku-deploy@v3.12.12
-        with:
-          heroku_api_key: ${{ secrets.HEROKU_API_KEY }}
-          heroku_app_name: "web-eye-tracker-1204"
-          heroku_email: "karine.pistili@gmail.com"
+      - uses: actions/checkout@v2

From d0f508430f984316a5787b1f908655cc5fad40e0 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Sun, 15 Feb 2026 12:46:41 +0200
Subject: [PATCH 05/10] adding random forset configuration and fixing the other
 configuration

---
 app/services/config.py         |  67 +---
 app/services/reports/report.md | 714 +++++++++++++++++++++++++++++++++
 2 files changed, 732 insertions(+), 49 deletions(-)
 create mode 100644 app/services/reports/report.md

diff --git a/app/services/config.py b/app/services/config.py
index 941c23f..e2db0d1 100644
--- a/app/services/config.py
+++ b/app/services/config.py
@@ -2,56 +2,18 @@
 hyperparameters = {
     "Lasso Regression": {
         "param_grid": {
-            "lasso__alpha": [
-                1e-15,
-                1e-10,
-                1e-8,
-                1e-3,
-                1e-2,
-                1e-1,
-                0.5,
-                1,
-                5,
-                10,
-                20,
-                30,
-                35,
-                40,
-                45,
-                50,
-                55,
-                100,
-            ]
+            "lasso__alpha": [10, 20, 30, 40, 45, 50, 55, 100, 200, 500]
         }
     },
     "Ridge Regression": {
         "param_grid": {
-            "ridge__alpha": [
-                1e-15,
-                1e-10,
-                1e-8,
-                1e-3,
-                1e-2,
-                1e-1,
-                0.5,
-                1,
-                5,
-                10,
-                20,
-                30,
-                35,
-                40,
-                45,
-                50,
-                55,
-                100,
-            ]
+            "ridge__alpha": [ 1e-3, 0.005, 0.01, 0.1, 0.5, 1.0, 10, 20, 50, 100]
         }
     },
     "Elastic Net": {
         "param_grid": {
-            "elasticnet__alpha": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0],
-            "elasticnet__l1_ratio": [0, 0.01, 0.2, 0.5, 0.8, 1],
+            "elasticnet__alpha": [0.1, 0.5, 1.0, 2.0, 5.0],
+            "elasticnet__l1_ratio": [0.5, 0.7, 0.8, 0.9, 1.0],
         }
     },
     "Bayesian Ridge": {
@@ -62,17 +24,24 @@
     },
     "SGD Regressor": {
         "param_grid": {
-            "sgdregressor__alpha": [0.0001, 0.001, 0.01, 0.1],
-            "sgdregressor__l1_ratio": [0, 0.2, 0.5, 0.7, 1],
-            "sgdregressor__max_iter": [500, 1000],
-            "sgdregressor__eta0": [0.0001, 0.001, 0.01],
+            "sgdregressor__alpha": [0.0001, 0.001],
+            "sgdregressor__l1_ratio": [0.5, 0.7, 0.8, 1],
+            "sgdregressor__max_iter": [1000],
+            "sgdregressor__eta0": [0.0001, 0.001],
         }
     },
     "Support Vector Regressor": {
         "param_grid": {
-            "svr__C": [0.1, 1, 10, 100, 1000],
-            "svr__gamma": [0.0001, 0.001, 0.01, 0.1, 1],
-            "svr__kernel": ["linear", "rbf", "poly"],
+            "svr__C": [50, 100, 200, 500, 1000, 2000],
+            "svr__gamma": [0.1, 0.5, 1, 2, 5],
+            "svr__kernel": ["rbf"],
+        }
+    },
+    "Random Forest Regressor": {
+        "param_grid": {
+            "randomforestregressor__n_estimators": [100],
+            "randomforestregressor__max_depth": [10],
+            "randomforestregressor__min_samples_split": [2, 5, 10],
         }
     },
 }
diff --git a/app/services/reports/report.md b/app/services/reports/report.md
new file mode 100644
index 0000000..7e3359c
--- /dev/null
+++ b/app/services/reports/report.md
@@ -0,0 +1,714 @@
+Detected 9 unique calibration points. Setting k=9.
+
+## Before any modifications
+
+====================================================================================================
+MODEL: Linear Regression
+====================================================================================================
+
+Model Linear Regression has no hyperparameter grid defined for GridSearchCV.
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.8210933512146253
+Time X: 0.0
+Score Y: 0.955456427962058
+Time Y: 0.0008034706115722656
+Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.56s
+
+====================================================================================================
+MODEL: Ridge Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01}
+2 | 0.6979 | 273.3379 | {'ridge**alpha': 100}
+3 | 0.6836 | 269.0442 | {'ridge**alpha': 55}
+4 | 0.6800 | 268.6203 | {'ridge**alpha': 50}
+5 | 0.6757 | 268.2162 | {'ridge\_\_alpha': 45}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.2734954357147217
+Time Y: 0.2774362564086914
+Pipeline Result -> Avg Accuracy: 236.1323, Avg Precision: 48.8059, Total Time: 0.66s
+
+====================================================================================================
+MODEL: Lasso Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.6749 | 289.7889 | {'lasso**alpha': 100}
+2 | 0.6694 | 275.8380 | {'lasso**alpha': 55}
+3 | 0.6678 | 274.6908 | {'lasso**alpha': 50}
+4 | 0.6658 | 273.6146 | {'lasso**alpha': 45}
+5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.3323993682861328
+Time Y: 0.31626462936401367
+Pipeline Result -> Avg Accuracy: 290.8152, Avg Precision: 45.8779, Total Time: 0.77s
+
+====================================================================================================
+MODEL: Elastic Net
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
+2 | 0.6896 | 291.7449 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.5}
+3 | 0.6851 | 269.2697 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0}
+4 | 0.6848 | 269.2195 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.01}
+5 | 0.6768 | 268.2999 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.2}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.7457386415719489
+Time X: 0.0
+Score Y: 0.9049978924542859
+Time Y: 0.0
+Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.11s
+
+====================================================================================================
+MODEL: Bayesian Ridge
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09}
+2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09}
+3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09}
+4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09}
+5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.801609992980957
+Time Y: 0.7761216163635254
+Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.69s
+
+====================================================================================================
+MODEL: SGD Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.7123 | 256.6649 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
+2 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0, 'sgdregressor**max_iter': 1000}
+3 | 0.7123 | 256.6661 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.2, 'sgdregressor**max_iter': 1000}
+4 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
+5 | 0.7123 | 256.6658 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 7.285875558853149
+Time Y: 7.9095728397369385
+Pipeline Result -> Avg Accuracy: 241.2138, Avg Precision: 46.8787, Total Time: 15.31s
+
+====================================================================================================
+MODEL: Support Vector Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.9087 | 75.2710 | {'svr**C': 1000, 'svr**gamma': 1, 'svr**kernel': 'rbf'}
+2 | 0.9074 | 87.7716 | {'svr**C': 100, 'svr**gamma': 1, 'svr**kernel': 'rbf'}
+3 | 0.8694 | 120.2317 | {'svr**C': 1000, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'}
+4 | 0.8157 | 137.6160 | {'svr**C': 100, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'}
+5 | 0.7947 | 167.9109 | {'svr**C': 1000, 'svr**gamma': 0.01, 'svr\_\_kernel': 'rbf'}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.7693369168357224
+Time X: 0.0077130794525146484
+Score Y: 0.9476667108830322
+Time Y: 0.005934715270996094
+Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.13s
+
+====================================================================================================
+MODEL: Random Forest Regressor
+====================================================================================================
+
+Model Random Forest Regressor has no hyperparameter grid defined for GridSearchCV.
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Pipeline Error: 'Random Forest Regressor'
+
+================================================================================
+OVERALL PERFORMANCE SUMMARY
+================================================================================
+Model Name | Avg Accuracy | Avg Precision | Time (s)
+
+---
+
+Linear Regression | 235.3662 | 49.1126 | 1.5597
+Ridge Regression | 236.1323 | 48.8059 | 0.6650
+Lasso Regression | 290.8152 | 45.8779 | 0.7684
+Elastic Net | 298.9683 | 38.6566 | 0.1052
+Bayesian Ridge | 235.3889 | 48.1209 | 1.6910
+SGD Regressor | 241.2138 | 46.8787 | 15.3096
+Support Vector Regressor | 266.1792 | 48.0700 | 0.1342
+Random Forest Regressor | ERROR: 'Random Forest Regressor'
+
+---
+
+## After modification and adding randomforset configuration
+
+Detected 9 unique calibration points. Setting k=9.
+
+====================================================================================================
+MODEL: Linear Regression
+====================================================================================================
+
+Model Linear Regression has no hyperparameter grid defined for GridSearchCV.
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.8210933512146253
+Time X: 0.0
+Score Y: 0.955456427962058
+Time Y: 0.0
+Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.50s
+
+====================================================================================================
+MODEL: Ridge Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.7497 | 241.3809 | {'ridge**alpha': 0.005}
+2 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01}
+3 | 0.7118 | 268.2630 | {'ridge**alpha': 100}
+4 | 0.6966 | 263.7161 | {'ridge**alpha': 50}
+5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.14182019233703613
+Time Y: 0.1409766674041748
+Pipeline Result -> Avg Accuracy: 235.8727, Avg Precision: 48.7106, Total Time: 0.39s
+
+====================================================================================================
+MODEL: Lasso Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.6749 | 289.7889 | {'lasso**alpha': 100}
+2 | 0.6694 | 275.8380 | {'lasso**alpha': 55}
+3 | 0.6678 | 274.6908 | {'lasso**alpha': 50}
+4 | 0.6658 | 273.6146 | {'lasso**alpha': 45}
+5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.15945863723754883
+Time Y: 0.15405631065368652
+Pipeline Result -> Avg Accuracy: 292.7949, Avg Precision: 45.1035, Total Time: 0.41s
+
+====================================================================================================
+MODEL: Elastic Net
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.7005 | 277.8284 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5}
+2 | 0.6999 | 280.8068 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7}
+3 | 0.6998 | 275.1441 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9}
+4 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
+5 | 0.6957 | 286.5466 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.8}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.7457386415719489
+Time X: 0.0
+Score Y: 0.9049978924542859
+Time Y: 0.0
+Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.13s
+
+====================================================================================================
+MODEL: Bayesian Ridge
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09}
+2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09}
+3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09}
+4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09}
+5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 0.7946014404296875
+Time Y: 0.7750468254089355
+Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.68s
+
+====================================================================================================
+MODEL: SGD Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
+2 | 0.7123 | 256.6640 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
+3 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
+4 | 0.7123 | 256.6657 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
+5 | 0.7123 | 256.7432 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 1.793349027633667
+Time Y: 1.8706464767456055
+Pipeline Result -> Avg Accuracy: 241.2358, Avg Precision: 46.9392, Total Time: 3.78s
+
+====================================================================================================
+MODEL: Support Vector Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.9167 | 68.5502 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
+2 | 0.9162 | 69.3407 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
+3 | 0.9149 | 68.6453 | {'svr**C': 2000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
+4 | 0.9144 | 68.0756 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
+5 | 0.9132 | 72.2102 | {'svr**C': 200, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Score X: 0.7693369168357224
+Time X: 0.0162813663482666
+Score Y: 0.9476667108830322
+Time Y: 0.019759178161621094
+Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.36s
+
+====================================================================================================
+MODEL: Random Forest Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations (Axis X)...
+Rank | R2 Score | MAE | Parameters
+
+---
+
+1 | 0.9221 | 68.1083 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100}
+2 | 0.9180 | 68.5070 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100}
+3 | 0.9147 | 69.0029 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100}
+
+Running full 'predict' pipeline...
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
+1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
+2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
+3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
+4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
+Time X: 3.1839962005615234
+Time Y: 1.0933246612548828
+Pipeline Result -> Avg Accuracy: 52.8510, Avg Precision: 31.9563, Total Time: 4.43s
+
+================================================================================
+OVERALL PERFORMANCE SUMMARY
+================================================================================
+Model Name | Avg Accuracy | Avg Precision | Time (s)
+
+---
+
+Linear Regression | 235.3662 | 49.1126 | 1.4983
+Ridge Regression | 235.8727 | 48.7106 | 0.3908
+Lasso Regression | 292.7949 | 45.1035 | 0.4128
+Elastic Net | 298.9683 | 38.6566 | 0.1328
+Bayesian Ridge | 235.3889 | 48.1209 | 1.6843
+SGD Regressor | 241.2358 | 46.9392 | 3.7777
+Support Vector Regressor | 266.1792 | 48.0700 | 0.3644
+Random Forest Regressor | 52.8510 | 31.9563 | 4.4307
+
+---
+
+## After spliting to test and train to check if there any overfitting
+
+Full Dataset: 900 rows
+Training Split: 765 rows
+Validation Split: 135 rows
+Detected 9 unique calibration points.
+
+====================================================================================================
+MODEL: Linear Regression
+====================================================================================================
+
+Model Linear Regression has no hyperparameter grid defined.
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Score X: 0.07148328938658532
+Time X: 0.002007007598876953
+Score Y: 0.9278236134235549
+Time Y: 0.0018939971923828125
+Calibration Result -> Acc: 245.7273, Prec: 49.8420, Time: 1.55s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Score X_Val: 0.8200780206843142
+Time X_Val: 0.0019996166229248047
+Score Y_Val: 0.9612407162914263
+Time Y_Val: 0.0010020732879638672
+Validation Result -> Acc: 229.4535, Prec: 51.5457
+
+====================================================================================================
+MODEL: Ridge Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.7256 | 241.3973 | {'ridge**alpha': 0.001}
+2 | 0.7118 | 268.2630 | {'ridge**alpha': 100}
+3 | 0.6966 | 263.7161 | {'ridge**alpha': 50}
+4 | 0.6507 | 244.4687 | {'ridge**alpha': 0.005}
+5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Time X: 0.15825796127319336
+Time Y: 0.15566420555114746
+Calibration Result -> Acc: 247.0915, Prec: 49.9354, Time: 0.42s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Time X_Val: 0.16522979736328125
+Time Y_Val: 0.15460491180419922
+Validation Result -> Acc: 229.6377, Prec: 51.8048
+
+====================================================================================================
+MODEL: Lasso Regression
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.6910 | 270.3284 | {'lasso**alpha': 55}
+2 | 0.6905 | 269.0582 | {'lasso**alpha': 50}
+3 | 0.6895 | 267.8433 | {'lasso**alpha': 45}
+4 | 0.6881 | 266.7249 | {'lasso**alpha': 40}
+5 | 0.6841 | 264.7345 | {'lasso\_\_alpha': 30}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Time X: 0.16257333755493164
+Time Y: 0.16847848892211914
+Calibration Result -> Acc: 251.6126, Prec: 47.0489, Time: 0.44s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Time X_Val: 0.18091559410095215
+Time Y_Val: 0.16357016563415527
+Validation Result -> Acc: 265.0815, Prec: 44.3168
+
+====================================================================================================
+MODEL: Elastic Net
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.7136 | 273.3473 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5}
+2 | 0.7136 | 270.6703 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9}
+3 | 0.7135 | 270.5027 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
+4 | 0.7123 | 276.3342 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7}
+5 | 0.7108 | 267.5358 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.7}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Score X: 0.6531754054868382
+Time X: 0.0006241798400878906
+Score Y: 0.9074449357286609
+Time Y: 0.0008966922760009766
+Calibration Result -> Acc: 301.3013, Prec: 41.3590, Time: 0.09s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Score X_Val: 0.7340912725347744
+Time X_Val: 0.0018773078918457031
+Score Y_Val: 0.9125809129793833
+Time Y_Val: 0.0009999275207519531
+Validation Result -> Acc: 294.6057, Prec: 38.2533
+
+====================================================================================================
+MODEL: Bayesian Ridge
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 0.001}
+2 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 0.001}
+3 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 0.001}
+4 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 0.001}
+5 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 0.001}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Time X: 0.7953126430511475
+Time Y: 0.79813551902771
+Calibration Result -> Acc: 246.8617, Prec: 50.2630, Time: 1.69s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Time X_Val: 0.8207540512084961
+Time Y_Val: 0.8074281215667725
+Validation Result -> Acc: 229.1080, Prec: 50.9408
+
+====================================================================================================
+MODEL: SGD Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.7240 | 252.4349 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
+2 | 0.7240 | 252.4352 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
+3 | 0.7240 | 252.4355 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
+4 | 0.7240 | 252.4360 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
+5 | 0.7239 | 252.5118 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Time X: 1.9512312412261963
+Time Y: 1.9312074184417725
+Calibration Result -> Acc: 247.0398, Prec: 49.5953, Time: 3.98s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Time X_Val: 1.7748675346374512
+Time Y_Val: 1.8547911643981934
+Validation Result -> Acc: 239.2565, Prec: 48.5600
+
+====================================================================================================
+MODEL: Support Vector Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.9196 | 63.4319 | {'svr**C': 1000, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
+2 | 0.9191 | 66.7000 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
+3 | 0.9186 | 67.8033 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
+4 | 0.9181 | 65.6786 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
+5 | 0.9180 | 66.6653 | {'svr**C': 2000, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Score X: 0.6935454371850305
+Time X: 0.018875598907470703
+Score Y: 0.9363413011174913
+Time Y: 0.011532068252563477
+Calibration Result -> Acc: 281.0235, Prec: 49.7874, Time: 0.22s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Score X_Val: 0.7649424330422514
+Time X_Val: 0.013813972473144531
+Score Y_Val: 0.9553053450112502
+Time Y_Val: 0.016499757766723633
+Validation Result -> Acc: 257.4740, Prec: 47.0542
+
+====================================================================================================
+MODEL: Random Forest Regressor
+====================================================================================================
+
+Searching for top 5 parameter combinations...
+Rank | R2 (X) | MAE (X) | Parameters
+
+---
+
+1 | 0.9229 | 65.8699 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100}
+2 | 0.9183 | 66.9455 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100}
+3 | 0.9137 | 67.7311 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100}
+
+--- Phase 1: Calibration Phase (Internal split on Training set) ---
+left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
+0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
+1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
+2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
+3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
+4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
+Time X: 3.5348854064941406
+Time Y: 2.837346315383911
+Calibration Result -> Acc: 69.4965, Prec: 46.4941, Time: 6.57s
+
+--- Phase 2: Validation Phase (Hold-out Split) ---
+Time X_Val: 3.585437297821045
+Time Y_Val: 3.094998598098755
+Validation Result -> Acc: 46.6466, Prec: 29.1507
+
+==============================================================================================================
+CONSOLIDATED TRAIN/VAL PERFORMANCE SUMMARY
+==============================================================================================================
+Model Name | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s)
+
+---
+
+Linear Regression | 245.7273 | 49.8420 | 229.4535 | 51.5457 | 1.5465
+Ridge Regression | 247.0915 | 49.9354 | 229.6377 | 51.8048 | 0.4171
+Lasso Regression | 251.6126 | 47.0489 | 265.0815 | 44.3168 | 0.4428
+Elastic Net | 301.3013 | 41.3590 | 294.6057 | 38.2533 | 0.0876
+Bayesian Ridge | 246.8617 | 50.2630 | 229.1080 | 50.9408 | 1.6934
+SGD Regressor | 247.0398 | 49.5953 | 239.2565 | 48.5600 | 3.9767
+Support Vector Regressor | 281.0235 | 49.7874 | 257.4740 | 47.0542 | 0.2224
+Random Forest Regressor | 69.4965 | 46.4941 | 46.6466 | 29.1507 | 6.5670
+
+---

From b863379c2d05c7e1b59ef029e2d6428545fd0040 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Sun, 15 Feb 2026 13:01:04 +0200
Subject: [PATCH 06/10] add timing to compare between models performanc

---
 app/services/gaze_tracker.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py
index 3354551..af3bac6 100644
--- a/app/services/gaze_tracker.py
+++ b/app/services/gaze_tracker.py
@@ -14,7 +14,7 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.linear_model import Ridge
-
+import time
 
 # Model imports
 from sklearn import linear_model
@@ -91,9 +91,12 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
         or model_name == "Support Vector Regressor"
     ):
         model = models[model_name]
+        start_time = time.time()
         model.fit(X_train, y_train)
+        end_time = time.time()
         y_pred = model.predict(X_test)
         print(f"Score {label}: {r2_score(y_test, y_pred)}")
+        print(f"Time {label}: {end_time - start_time}")
         return y_pred
     else:
         pipeline = models[model_name]
@@ -106,9 +109,12 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
             refit="r2",
             return_train_score=True,
         )
+        start_time = time.time()
         grid_search.fit(X_train, y_train)
+        end_time = time.time()
         best_model = grid_search.best_estimator_
         y_pred = best_model.predict(X_test)
+        print(f"Time {label}: {end_time - start_time}")
         return y_pred
 
 

From ddf3bdcfdb4163f15b4e592ba82147a524ac85ed Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Sun, 15 Feb 2026 13:36:27 +0200
Subject: [PATCH 07/10] Update report

---
 app/services/reports/report.md | 774 ++++++---------------------------
 1 file changed, 130 insertions(+), 644 deletions(-)

diff --git a/app/services/reports/report.md b/app/services/reports/report.md
index 7e3359c..9635584 100644
--- a/app/services/reports/report.md
+++ b/app/services/reports/report.md
@@ -1,714 +1,200 @@
-Detected 9 unique calibration points. Setting k=9.
+# 🎯 Eye-Gaze Calibration Regression Study
 
-## Before any modifications
+This project evaluates multiple regression models for **eye-gaze calibration**, aiming to map iris landmark coordinates to screen positions.
 
-====================================================================================================
-MODEL: Linear Regression
-====================================================================================================
+The goal is to determine which regression model provides the best trade-off between:
 
-Model Linear Regression has no hyperparameter grid defined for GridSearchCV.
+- 🎯 Prediction accuracy (screen point error)
+- 📏 Precision
+- ⚡ Execution speed
+- 🧠 Generalization (overfitting behavior)
 
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.8210933512146253
-Time X: 0.0
-Score Y: 0.955456427962058
-Time Y: 0.0008034706115722656
-Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.56s
+---
 
-====================================================================================================
-MODEL: Ridge Regression
-====================================================================================================
+## 📊 Dataset Overview
 
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+- Total samples: **900**
+- Features:
+  - `left_iris_x`, `left_iris_y`
+  - `right_iris_x`, `right_iris_y`
+- Targets:
+  - `point_x`, `point_y`
+- Calibration points detected: **9 unique points (k = 9)**
 
 ---
 
-1 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01}
-2 | 0.6979 | 273.3379 | {'ridge**alpha': 100}
-3 | 0.6836 | 269.0442 | {'ridge**alpha': 55}
-4 | 0.6800 | 268.6203 | {'ridge**alpha': 50}
-5 | 0.6757 | 268.2162 | {'ridge\_\_alpha': 45}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.2734954357147217
-Time Y: 0.2774362564086914
-Pipeline Result -> Avg Accuracy: 236.1323, Avg Precision: 48.8059, Total Time: 0.66s
-
-====================================================================================================
-MODEL: Lasso Regression
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+## 🧠 Models Evaluated
 
----
+The following regression models were tested:
 
-1 | 0.6749 | 289.7889 | {'lasso**alpha': 100}
-2 | 0.6694 | 275.8380 | {'lasso**alpha': 55}
-3 | 0.6678 | 274.6908 | {'lasso**alpha': 50}
-4 | 0.6658 | 273.6146 | {'lasso**alpha': 45}
-5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.3323993682861328
-Time Y: 0.31626462936401367
-Pipeline Result -> Avg Accuracy: 290.8152, Avg Precision: 45.8779, Total Time: 0.77s
-
-====================================================================================================
-MODEL: Elastic Net
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+- Linear Regression
+- Ridge Regression
+- Lasso Regression
+- Elastic Net
+- Bayesian Ridge
+- SGD Regressor
+- Support Vector Regressor (SVR)
+- Random Forest Regressor
 
 ---
 
-1 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
-2 | 0.6896 | 291.7449 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.5}
-3 | 0.6851 | 269.2697 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0}
-4 | 0.6848 | 269.2195 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.01}
-5 | 0.6768 | 268.2999 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.2}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.7457386415719489
-Time X: 0.0
-Score Y: 0.9049978924542859
-Time Y: 0.0
-Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.11s
-
-====================================================================================================
-MODEL: Bayesian Ridge
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
-
----
+## ⚙️ Evaluation Metrics
 
-1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09}
-2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09}
-3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09}
-4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09}
-5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.801609992980957
-Time Y: 0.7761216163635254
-Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.69s
-
-====================================================================================================
-MODEL: SGD Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+The pipeline reports:
 
----
+- **Avg Accuracy** → positional error (lower is better)
+- **Avg Precision**
+- **Execution Time**
+- Axis-wise R² scores during calibration
 
-1 | 0.7123 | 256.6649 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
-2 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0, 'sgdregressor**max_iter': 1000}
-3 | 0.7123 | 256.6661 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.2, 'sgdregressor**max_iter': 1000}
-4 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
-5 | 0.7123 | 256.6658 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 7.285875558853149
-Time Y: 7.9095728397369385
-Pipeline Result -> Avg Accuracy: 241.2138, Avg Precision: 46.8787, Total Time: 15.31s
-
-====================================================================================================
-MODEL: Support Vector Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+Hyperparameter tuning was performed using **GridSearchCV** when available.
 
 ---
 
-1 | 0.9087 | 75.2710 | {'svr**C': 1000, 'svr**gamma': 1, 'svr**kernel': 'rbf'}
-2 | 0.9074 | 87.7716 | {'svr**C': 100, 'svr**gamma': 1, 'svr**kernel': 'rbf'}
-3 | 0.8694 | 120.2317 | {'svr**C': 1000, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'}
-4 | 0.8157 | 137.6160 | {'svr**C': 100, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'}
-5 | 0.7947 | 167.9109 | {'svr**C': 1000, 'svr**gamma': 0.01, 'svr\_\_kernel': 'rbf'}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.7693369168357224
-Time X: 0.0077130794525146484
-Score Y: 0.9476667108830322
-Time Y: 0.005934715270996094
-Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.13s
-
-====================================================================================================
-MODEL: Random Forest Regressor
-====================================================================================================
-
-Model Random Forest Regressor has no hyperparameter grid defined for GridSearchCV.
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Pipeline Error: 'Random Forest Regressor'
-
-================================================================================
-OVERALL PERFORMANCE SUMMARY
-================================================================================
-Model Name | Avg Accuracy | Avg Precision | Time (s)
+# 🧪 Experiment 1 — Baseline Results
 
----
+Initial run before pipeline modifications.
 
-Linear Regression | 235.3662 | 49.1126 | 1.5597
-Ridge Regression | 236.1323 | 48.8059 | 0.6650
-Lasso Regression | 290.8152 | 45.8779 | 0.7684
-Elastic Net | 298.9683 | 38.6566 | 0.1052
-Bayesian Ridge | 235.3889 | 48.1209 | 1.6910
-SGD Regressor | 241.2138 | 46.8787 | 15.3096
-Support Vector Regressor | 266.1792 | 48.0700 | 0.1342
-Random Forest Regressor | ERROR: 'Random Forest Regressor'
+### 🔎 Key Observations
 
----
+- Linear & Ridge produced stable baseline performance.
+- Elastic Net was very fast but less precise.
+- SVR achieved strong R² values.
+- Random Forest failed due to missing configuration.
 
-## After modification and adding randomforset configuration
+### 📋 Performance Summary
 
-Detected 9 unique calibration points. Setting k=9.
+| Model | Avg Accuracy | Avg Precision | Time (s) |
+|------|-------------|---------------|---------|
+| Linear Regression | 235.37 | 49.11 | 1.56 |
+| Ridge Regression | 236.13 | 48.81 | 0.67 |
+| Lasso Regression | 290.82 | 45.88 | 0.77 |
+| Elastic Net | 298.97 | 38.66 | 0.11 |
+| Bayesian Ridge | 235.39 | 48.12 | 1.69 |
+| SGD Regressor | 241.21 | 46.88 | 15.31 |
+| Support Vector Regressor | 266.18 | 48.07 | 0.13 |
+| Random Forest | ❌ Error | — | — |
 
-====================================================================================================
-MODEL: Linear Regression
-====================================================================================================
+---
 
-Model Linear Regression has no hyperparameter grid defined for GridSearchCV.
+# 🧪 Experiment 2 — Pipeline Improvements
 
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.8210933512146253
-Time X: 0.0
-Score Y: 0.955456427962058
-Time Y: 0.0
-Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.50s
+Changes made:
 
-====================================================================================================
-MODEL: Ridge Regression
-====================================================================================================
+- Added Random Forest configuration
+- Expanded hyperparameter grids
+- Improved pipeline stability
 
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+### 📋 Updated Performance Summary
 
----
+| Model | Avg Accuracy | Avg Precision | Time (s) |
+|------|-------------|---------------|---------|
+| Linear Regression | 235.37 | 49.11 | 1.50 |
+| Ridge Regression | 235.87 | 48.71 | 0.39 |
+| Lasso Regression | 292.79 | 45.10 | 0.41 |
+| Elastic Net | 298.97 | 38.66 | 0.13 |
+| Bayesian Ridge | 235.39 | 48.12 | 1.68 |
+| SGD Regressor | 241.24 | 46.94 | 3.78 |
+| Support Vector Regressor | 266.18 | 48.07 | 0.36 |
+| Random Forest | **52.85** | 31.96 | 4.43 |
 
-1 | 0.7497 | 241.3809 | {'ridge**alpha': 0.005}
-2 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01}
-3 | 0.7118 | 268.2630 | {'ridge**alpha': 100}
-4 | 0.6966 | 263.7161 | {'ridge**alpha': 50}
-5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.14182019233703613
-Time Y: 0.1409766674041748
-Pipeline Result -> Avg Accuracy: 235.8727, Avg Precision: 48.7106, Total Time: 0.39s
-
-====================================================================================================
-MODEL: Lasso Regression
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
-
----
+### 💡 Insights
 
-1 | 0.6749 | 289.7889 | {'lasso**alpha': 100}
-2 | 0.6694 | 275.8380 | {'lasso**alpha': 55}
-3 | 0.6678 | 274.6908 | {'lasso**alpha': 50}
-4 | 0.6658 | 273.6146 | {'lasso**alpha': 45}
-5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.15945863723754883
-Time Y: 0.15405631065368652
-Pipeline Result -> Avg Accuracy: 292.7949, Avg Precision: 45.1035, Total Time: 0.41s
-
-====================================================================================================
-MODEL: Elastic Net
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+- Random Forest dramatically reduced positional error.
+- Precision dropped, suggesting sensitivity or instability.
+- Ridge became faster after optimization.
+- SVR remained a strong non-linear alternative.
 
 ---
 
-1 | 0.7005 | 277.8284 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5}
-2 | 0.6999 | 280.8068 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7}
-3 | 0.6998 | 275.1441 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9}
-4 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
-5 | 0.6957 | 286.5466 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.8}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.7457386415719489
-Time X: 0.0
-Score Y: 0.9049978924542859
-Time Y: 0.0
-Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.13s
-
-====================================================================================================
-MODEL: Bayesian Ridge
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+# 🧪 Experiment 3 — Train/Validation Split (Overfitting Check)
 
----
+To evaluate generalization:
 
-1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09}
-2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09}
-3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09}
-4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09}
-5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 0.7946014404296875
-Time Y: 0.7750468254089355
-Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.68s
-
-====================================================================================================
-MODEL: SGD Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+- Training: **765 samples**
+- Validation: **135 samples**
 
----
+Each model went through:
 
-1 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
-2 | 0.7123 | 256.6640 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
-3 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
-4 | 0.7123 | 256.6657 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
-5 | 0.7123 | 256.7432 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 1.793349027633667
-Time Y: 1.8706464767456055
-Pipeline Result -> Avg Accuracy: 241.2358, Avg Precision: 46.9392, Total Time: 3.78s
-
-====================================================================================================
-MODEL: Support Vector Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
+1. Calibration phase (internal split)
+2. Validation phase (hold-out set)
 
 ---
 
-1 | 0.9167 | 68.5502 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
-2 | 0.9162 | 69.3407 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
-3 | 0.9149 | 68.6453 | {'svr**C': 2000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
-4 | 0.9144 | 68.0756 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
-5 | 0.9132 | 72.2102 | {'svr**C': 200, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Score X: 0.7693369168357224
-Time X: 0.0162813663482666
-Score Y: 0.9476667108830322
-Time Y: 0.019759178161621094
-Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.36s
-
-====================================================================================================
-MODEL: Random Forest Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations (Axis X)...
-Rank | R2 Score | MAE | Parameters
-
----
+## 📋 Train vs Validation Results
 
-1 | 0.9221 | 68.1083 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100}
-2 | 0.9180 | 68.5070 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100}
-3 | 0.9147 | 69.0029 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100}
-
-Running full 'predict' pipeline...
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 751.846863 270.624054 641.400879 275.499695 100.0 100.0
-1 739.366943 275.130646 641.746643 276.419403 100.0 100.0
-2 739.202148 273.828247 641.387695 275.756897 100.0 100.0
-3 739.340332 273.977570 641.849548 276.237335 100.0 100.0
-4 738.536682 274.158722 641.225525 276.203308 100.0 100.0
-Time X: 3.1839962005615234
-Time Y: 1.0933246612548828
-Pipeline Result -> Avg Accuracy: 52.8510, Avg Precision: 31.9563, Total Time: 4.43s
-
-================================================================================
-OVERALL PERFORMANCE SUMMARY
-================================================================================
-Model Name | Avg Accuracy | Avg Precision | Time (s)
+| Model | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s) |
+|------|-----------|------------|-----------|------------|---------|
+| Linear Regression | 245.73 | 49.84 | 229.45 | 51.55 | 1.55 |
+| Ridge Regression | 247.09 | 49.94 | 229.64 | 51.80 | 0.42 |
+| Lasso Regression | 251.61 | 47.05 | 265.08 | 44.32 | 0.44 |
+| Elastic Net | 301.30 | 41.36 | 294.61 | 38.25 | 0.09 |
+| Bayesian Ridge | 246.86 | 50.26 | 229.11 | 50.94 | 1.69 |
+| SGD Regressor | 247.04 | 49.60 | 239.26 | 48.56 | 3.98 |
+| Support Vector Regressor | 281.02 | 49.79 | 257.47 | 47.05 | 0.22 |
+| Random Forest | **69.50** | 46.49 | **46.65** | 29.15 | 6.57 |
 
 ---
 
-Linear Regression | 235.3662 | 49.1126 | 1.4983
-Ridge Regression | 235.8727 | 48.7106 | 0.3908
-Lasso Regression | 292.7949 | 45.1035 | 0.4128
-Elastic Net | 298.9683 | 38.6566 | 0.1328
-Bayesian Ridge | 235.3889 | 48.1209 | 1.6843
-SGD Regressor | 241.2358 | 46.9392 | 3.7777
-Support Vector Regressor | 266.1792 | 48.0700 | 0.3644
-Random Forest Regressor | 52.8510 | 31.9563 | 4.4307
+## 🔍 Overfitting Analysis
 
----
+### ✅ Stable Models
+- Linear Regression
+- Ridge Regression
+- Bayesian Ridge
 
-## After spliting to test and train to check if there any overfitting
-
-Full Dataset: 900 rows
-Training Split: 765 rows
-Validation Split: 135 rows
-Detected 9 unique calibration points.
-
-====================================================================================================
-MODEL: Linear Regression
-====================================================================================================
-
-Model Linear Regression has no hyperparameter grid defined.
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Score X: 0.07148328938658532
-Time X: 0.002007007598876953
-Score Y: 0.9278236134235549
-Time Y: 0.0018939971923828125
-Calibration Result -> Acc: 245.7273, Prec: 49.8420, Time: 1.55s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Score X_Val: 0.8200780206843142
-Time X_Val: 0.0019996166229248047
-Score Y_Val: 0.9612407162914263
-Time Y_Val: 0.0010020732879638672
-Validation Result -> Acc: 229.4535, Prec: 51.5457
-
-====================================================================================================
-MODEL: Ridge Regression
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+These models showed consistent calibration and validation behavior.
 
----
+### ⚠️ Potential Overfitting
+- Random Forest achieved lowest error but suffered large precision drop.
+- Indicates high capacity and sensitivity to dataset structure.
 
-1 | 0.7256 | 241.3973 | {'ridge**alpha': 0.001}
-2 | 0.7118 | 268.2630 | {'ridge**alpha': 100}
-3 | 0.6966 | 263.7161 | {'ridge**alpha': 50}
-4 | 0.6507 | 244.4687 | {'ridge**alpha': 0.005}
-5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Time X: 0.15825796127319336
-Time Y: 0.15566420555114746
-Calibration Result -> Acc: 247.0915, Prec: 49.9354, Time: 0.42s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Time X_Val: 0.16522979736328125
-Time Y_Val: 0.15460491180419922
-Validation Result -> Acc: 229.6377, Prec: 51.8048
-
-====================================================================================================
-MODEL: Lasso Regression
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+### ⚡ Best Balance
+- SVR provided a strong balance between:
+  - Accuracy
+  - Speed
+  - Generalization
 
 ---
 
-1 | 0.6910 | 270.3284 | {'lasso**alpha': 55}
-2 | 0.6905 | 269.0582 | {'lasso**alpha': 50}
-3 | 0.6895 | 267.8433 | {'lasso**alpha': 45}
-4 | 0.6881 | 266.7249 | {'lasso**alpha': 40}
-5 | 0.6841 | 264.7345 | {'lasso\_\_alpha': 30}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Time X: 0.16257333755493164
-Time Y: 0.16847848892211914
-Calibration Result -> Acc: 251.6126, Prec: 47.0489, Time: 0.44s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Time X_Val: 0.18091559410095215
-Time Y_Val: 0.16357016563415527
-Validation Result -> Acc: 265.0815, Prec: 44.3168
-
-====================================================================================================
-MODEL: Elastic Net
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+# 🏆 Final Findings
 
----
+### 🥇 Best Raw Accuracy
+**Random Forest Regressor**
 
-1 | 0.7136 | 273.3473 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5}
-2 | 0.7136 | 270.6703 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9}
-3 | 0.7135 | 270.5027 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8}
-4 | 0.7123 | 276.3342 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7}
-5 | 0.7108 | 267.5358 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.7}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Score X: 0.6531754054868382
-Time X: 0.0006241798400878906
-Score Y: 0.9074449357286609
-Time Y: 0.0008966922760009766
-Calibration Result -> Acc: 301.3013, Prec: 41.3590, Time: 0.09s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Score X_Val: 0.7340912725347744
-Time X_Val: 0.0018773078918457031
-Score Y_Val: 0.9125809129793833
-Time Y_Val: 0.0009999275207519531
-Validation Result -> Acc: 294.6057, Prec: 38.2533
-
-====================================================================================================
-MODEL: Bayesian Ridge
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+- Lowest positional error
+- Higher computation cost
+- Possible overfitting
 
----
+### 🥈 Most Stable Models
+- Linear Regression
+- Ridge Regression
 
-1 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 0.001}
-2 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 0.001}
-3 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 0.001}
-4 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 0.001}
-5 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 0.001}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Time X: 0.7953126430511475
-Time Y: 0.79813551902771
-Calibration Result -> Acc: 246.8617, Prec: 50.2630, Time: 1.69s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Time X_Val: 0.8207540512084961
-Time Y_Val: 0.8074281215667725
-Validation Result -> Acc: 229.1080, Prec: 50.9408
-
-====================================================================================================
-MODEL: SGD Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+### 🥉 Best Overall Trade-off
+**Support Vector Regressor (SVR)**
 
 ---
 
-1 | 0.7240 | 252.4349 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000}
-2 | 0.7240 | 252.4352 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000}
-3 | 0.7240 | 252.4355 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
-4 | 0.7240 | 252.4360 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000}
-5 | 0.7239 | 252.5118 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Time X: 1.9512312412261963
-Time Y: 1.9312074184417725
-Calibration Result -> Acc: 247.0398, Prec: 49.5953, Time: 3.98s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Time X_Val: 1.7748675346374512
-Time Y_Val: 1.8547911643981934
-Validation Result -> Acc: 239.2565, Prec: 48.5600
-
-====================================================================================================
-MODEL: Support Vector Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+# 🚀 Future Improvements
 
----
+Possible next steps:
 
-1 | 0.9196 | 63.4319 | {'svr**C': 1000, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
-2 | 0.9191 | 66.7000 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
-3 | 0.9186 | 67.8033 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'}
-4 | 0.9181 | 65.6786 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'}
-5 | 0.9180 | 66.6653 | {'svr**C': 2000, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Score X: 0.6935454371850305
-Time X: 0.018875598907470703
-Score Y: 0.9363413011174913
-Time Y: 0.011532068252563477
-Calibration Result -> Acc: 281.0235, Prec: 49.7874, Time: 0.22s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Score X_Val: 0.7649424330422514
-Time X_Val: 0.013813972473144531
-Score Y_Val: 0.9553053450112502
-Time Y_Val: 0.016499757766723633
-Validation Result -> Acc: 257.4740, Prec: 47.0542
-
-====================================================================================================
-MODEL: Random Forest Regressor
-====================================================================================================
-
-Searching for top 5 parameter combinations...
-Rank | R2 (X) | MAE (X) | Parameters
+- Feature scaling & normalization experiments
+- Temporal smoothing for gaze stability
+- Ensemble methods (Linear + Non-linear)
+- Neural network-based gaze regression
+- Real-time latency benchmarking
 
 ---
 
-1 | 0.9229 | 65.8699 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100}
-2 | 0.9183 | 66.9455 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100}
-3 | 0.9137 | 67.7311 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100}
-
---- Phase 1: Calibration Phase (Internal split on Training set) ---
-left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y
-0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0
-1 735.130432 282.739563 636.170105 280.775909 100.0 769.0
-2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0
-3 730.777405 280.959167 632.483826 277.992859 853.5 434.5
-4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0
-Time X: 3.5348854064941406
-Time Y: 2.837346315383911
-Calibration Result -> Acc: 69.4965, Prec: 46.4941, Time: 6.57s
-
---- Phase 2: Validation Phase (Hold-out Split) ---
-Time X_Val: 3.585437297821045
-Time Y_Val: 3.094998598098755
-Validation Result -> Acc: 46.6466, Prec: 29.1507
-
-==============================================================================================================
-CONSOLIDATED TRAIN/VAL PERFORMANCE SUMMARY
-==============================================================================================================
-Model Name | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s)
+# 📌 Conclusion
 
----
+This study shows that:
 
-Linear Regression | 245.7273 | 49.8420 | 229.4535 | 51.5457 | 1.5465
-Ridge Regression | 247.0915 | 49.9354 | 229.6377 | 51.8048 | 0.4171
-Lasso Regression | 251.6126 | 47.0489 | 265.0815 | 44.3168 | 0.4428
-Elastic Net | 301.3013 | 41.3590 | 294.6057 | 38.2533 | 0.0876
-Bayesian Ridge | 246.8617 | 50.2630 | 229.1080 | 50.9408 | 1.6934
-SGD Regressor | 247.0398 | 49.5953 | 239.2565 | 48.5600 | 3.9767
-Support Vector Regressor | 281.0235 | 49.7874 | 257.4740 | 47.0542 | 0.2224
-Random Forest Regressor | 69.4965 | 46.4941 | 46.6466 | 29.1507 | 6.5670
+- Non-linear models improve gaze estimation accuracy.
+- Random Forest can greatly reduce error but may overfit.
+- Linear models remain strong baselines for robustness.
+- Proper train/validation splits are essential for realistic performance evaluation.
 
 ---
+

From 757ecbb52a2664e03877cd139c5d3320ffc26cb3 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Mon, 16 Feb 2026 04:37:19 +0200
Subject: [PATCH 08/10] fixing if conditioning in the models

---
 app/services/gaze_tracker.py | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py
index af3bac6..918cf9c 100644
--- a/app/services/gaze_tracker.py
+++ b/app/services/gaze_tracker.py
@@ -14,7 +14,7 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.linear_model import Ridge
-import time
+
 
 # Model imports
 from sklearn import linear_model
@@ -87,16 +87,11 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
     """
     if (
         model_name == "Linear Regression"
-        or model_name == "Elastic Net"
-        or model_name == "Support Vector Regressor"
     ):
         model = models[model_name]
-        start_time = time.time()
         model.fit(X_train, y_train)
-        end_time = time.time()
         y_pred = model.predict(X_test)
         print(f"Score {label}: {r2_score(y_test, y_pred)}")
-        print(f"Time {label}: {end_time - start_time}")
         return y_pred
     else:
         pipeline = models[model_name]
@@ -109,12 +104,9 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
             refit="r2",
             return_train_score=True,
         )
-        start_time = time.time()
         grid_search.fit(X_train, y_train)
-        end_time = time.time()
         best_model = grid_search.best_estimator_
         y_pred = best_model.predict(X_test)
-        print(f"Time {label}: {end_time - start_time}")
         return y_pred
 
 

From b1a0037f3105b599f633bfea7d87f5a83daf3a91 Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Wed, 18 Feb 2026 08:05:49 +0200
Subject: [PATCH 09/10] refactor for itreration to avoid o(n^2) loop

---
 app/services/gaze_tracker.py | 565 ++++++++++++++++++-----------------
 1 file changed, 298 insertions(+), 267 deletions(-)

diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py
index 918cf9c..d3377a6 100644
--- a/app/services/gaze_tracker.py
+++ b/app/services/gaze_tracker.py
@@ -1,5 +1,4 @@
 # Necessary imports
-import math
 import warnings
 
 warnings.filterwarnings("ignore")
@@ -12,17 +11,12 @@
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler, PolynomialFeatures
 from sklearn.pipeline import make_pipeline
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import Ridge
-
 
 # Model imports
 from sklearn import linear_model
 from sklearn.svm import SVR
 from sklearn.cluster import KMeans
 from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import GroupShuffleSplit
-import matplotlib.pyplot as plt
 
 # Metrics imports
 from sklearn.metrics import make_scorer
@@ -39,7 +33,6 @@
     func_presicion_y,
     func_accuracy_x,
     func_accuracy_y,
-    func_total_accuracy,
 )
 from app.services.config import hyperparameters
 
@@ -61,14 +54,7 @@
     "Support Vector Regressor": make_pipeline(
         PolynomialFeatures(2), SVR(kernel="linear")
     ),
-    "Random Forest Regressor": make_pipeline(
-    RandomForestRegressor(
-        n_estimators=200, 
-        max_depth=10, 
-        min_samples_split=5,
-        random_state=42
-    )
-)}
+}
 
 # Set the scoring metrics for GridSearchCV to r2_score and mean_absolute_error
 scoring = {
@@ -77,39 +63,6 @@
 }
 
 
-def squash(v, limit=1.0):
-    """Squash não-linear estilo WebGazer"""
-    return np.tanh(v / limit)
-
-def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
-    """
-    Helper to train a model (with or without GridSearchCV) and return predictions.
-    """
-    if (
-        model_name == "Linear Regression"
-    ):
-        model = models[model_name]
-        model.fit(X_train, y_train)
-        y_pred = model.predict(X_test)
-        print(f"Score {label}: {r2_score(y_test, y_pred)}")
-        return y_pred
-    else:
-        pipeline = models[model_name]
-        param_grid = hyperparameters[model_name]["param_grid"]
-        grid_search = GridSearchCV(
-            pipeline,
-            param_grid,
-            cv=5,
-            scoring=scoring,
-            refit="r2",
-            return_train_score=True,
-        )
-        grid_search.fit(X_train, y_train)
-        best_model = grid_search.best_estimator_
-        y_pred = best_model.predict(X_test)
-        return y_pred
-
-
 def predict(data, k, model_X, model_Y):
     """
     Predicts the gaze coordinates using machine learning models.
@@ -123,52 +76,97 @@ def predict(data, k, model_X, model_Y):
     Returns:
         dict: A dictionary containing the predicted gaze coordinates, precision, accuracy, and cluster centroids.
     """
-
+    # Inicialize standard scaler
+    sc = StandardScaler()
 
     # Load data from csv file and drop unnecessary columns
     df = pd.read_csv(data)
     df = df.drop(["screen_height", "screen_width"], axis=1)
-    print(df.head())
-    # Create groups (point_x, point_y)
-    df["group"] = list(zip(df["point_x"], df["point_y"]))
 
     # Data for X axis
     X_x = df[["left_iris_x", "right_iris_x"]]
     X_y = df["point_x"]
-    # groups = df["group"]
+
+    # Normalize data using standard scaler and split data into training and testing sets
+    X_x = sc.fit_transform(X_x)
+    X_train_x, X_test_x, y_train_x, y_test_x = train_test_split(
+        X_x, X_y, test_size=0.2, random_state=42
+    )
+
+    if (
+        model_X == "Linear Regression"
+        or model_X == "Elastic Net"
+        or model_X == "Support Vector Regressor"
+    ):
+        model = models[model_X]
+
+        # Fit the model and make predictions
+        model.fit(X_train_x, y_train_x)
+        y_pred_x = model.predict(X_test_x)
+
+    else:
+        pipeline = models[model_X]
+        param_grid = hyperparameters[model_X]["param_grid"]
+
+        # Initialize GridSearchCV with the pipeline and parameter grid
+        grid_search = GridSearchCV(
+            pipeline,
+            param_grid,
+            cv=5,
+            scoring=scoring,
+            refit="r2",
+            return_train_score=True,
+        )
+
+        # Fit the GridSearchCV to the training data for X
+        grid_search.fit(X_train_x, y_train_x)
+
+        # Use the best estimator to predict the values and calculate the R2 score
+        best_model_x = grid_search.best_estimator_
+        y_pred_x = best_model_x.predict(X_test_x)
+
     # Data for Y axis
-    X_feature_y = df[["left_iris_y", "right_iris_y"]]
+    X_y = df[["left_iris_y", "right_iris_y"]]
     y_y = df["point_y"]
-    # Split data into training and testing sets then Normalize data using standard scaler
-    (
-        X_train_x, X_test_x,
-        y_train_x, y_test_x,
-        X_train_y, X_test_y,
-        y_train_y, y_test_y
-    )= train_test_split(
-        X_x,
-        X_y,
-        X_feature_y,
-        y_y,
-        test_size=0.2,
-        random_state=42,
+
+    # Normalize data using standard scaler and split data into training and testing sets
+    X_y = sc.fit_transform(X_y)
+    X_train_y, X_test_y, y_train_y, y_test_y = train_test_split(
+        X_y, y_y, test_size=0.2, random_state=42
     )
-    
-    # Scaling (fit on train only)
-    scaler_x = StandardScaler()
-    X_train_x = scaler_x.fit_transform(X_train_x)
-    X_test_x  = scaler_x.transform(X_test_x)
-    
-    y_pred_x = trian_and_predict(model_X, X_train_x, y_train_x, X_test_x, y_test_x, "X")
-    
-    # Scaling (fit on train only)
-    scaler_y = StandardScaler()
-    X_train_y = scaler_y.fit_transform(X_train_y)
-    X_test_y  = scaler_y.transform(X_test_y)
-
-    
-    y_pred_y = trian_and_predict(model_Y, X_train_y, y_train_y, X_test_y, y_test_y, "Y")
-    
+
+    if (
+        model_Y == "Linear Regression"
+        or model_Y == "Elastic Net"
+        or model_Y == "Support Vector Regressor"
+    ):
+        model = models[model_Y]
+
+        # Fit the model and make predictions
+        model.fit(X_train_y, y_train_y)
+        y_pred_y = model.predict(X_test_y)
+
+    else:
+        pipeline = models[model_Y]
+        param_grid = hyperparameters[model_Y]["param_grid"]
+
+        # Initialize GridSearchCV with the pipeline and parameter grid
+        grid_search = GridSearchCV(
+            pipeline,
+            param_grid,
+            cv=5,
+            scoring=scoring,
+            refit="r2",
+            return_train_score=True,
+        )
+
+        # Fit the GridSearchCV to the training data for X
+        grid_search.fit(X_train_y, y_train_y)
+
+        # Use the best estimator to predict the values and calculate the R2 score
+        best_model_y = grid_search.best_estimator_
+        y_pred_y = best_model_y.predict(X_test_y)
+
     # Convert the predictions to a numpy array and apply KMeans clustering
     data = np.array([y_pred_x, y_pred_y]).T
     model = KMeans(n_clusters=k, n_init="auto", init="k-means++")
@@ -183,20 +181,25 @@ def predict(data, k, model_X, model_Y):
     }
     df_data = pd.DataFrame(data)
     df_data["True XY"] = list(zip(df_data["True X"], df_data["True Y"]))
-    
+
     # Filter out negative values
     df_data = df_data[(df_data["Predicted X"] >= 0) & (df_data["Predicted Y"] >= 0)]
 
-    # Calculate the precision and accuracy for each 
+    # Calculate the precision and accuracy for each
     precision_x = df_data.groupby("True XY").apply(func_precision_x)
     precision_y = df_data.groupby("True XY").apply(func_presicion_y)
 
-    # Calculate the average precision 
+    # Calculate the average precision and accuracy
     precision_xy = (precision_x + precision_y) / 2
-    
-    # Calculate the average accuracy (eculidian distance)
-    accuracy_xy = df_data.groupby("True XY").apply(func_total_accuracy)
-    
+    precision_xy = precision_xy / np.mean(precision_xy)
+
+    # Calculate the accuracy for each axis
+    accuracy_x = df_data.groupby("True XY").apply(func_accuracy_x)
+    accuracy_y = df_data.groupby("True XY").apply(func_accuracy_y)
+
+    # Calculate the average accuracy
+    accuracy_xy = (accuracy_x + accuracy_y) / 2
+    accuracy_xy = accuracy_xy / np.mean(accuracy_xy)
 
     # Create a dictionary to store the data
     data = {}
@@ -232,186 +235,214 @@ def predict(data, k, model_X, model_Y):
     # Return the data
     return data
 
+def predict_new_data_simple(calib_csv_path, predict_csv_path, model_X, model_Y, k=3):
+    """
+    Versão simplificada de predict_new_data.
+    Treina modelos nos dados de calibração e prevê coordenadas nos novos dados.
+    Retorna o mesmo formato que a função `predict`.
+    """
+    # -------------------- SCALERS --------------------
+    sc_x = StandardScaler()
+    sc_y = StandardScaler()
 
-def predict_new_data_simple(
-    calib_csv_path,
-    predict_csv_path,
-    iris_data,
-    screen_width=None,
-    screen_height=None,
-):
-    # ============================
-    # CONFIG (WebGazer-inspired)
-    # ============================
-    BASELINE_ALPHA = 0.01
-    SQUASH_LIMIT_X = 1.0
-    SQUASH_LIMIT_Y = 1.0
-    Y_GAIN = 1.2  # adjustment to compensate for vertical bias
-
-    # ============================
-    # LOAD TRAIN
-    # ============================
-    df_train = pd.read_csv(calib_csv_path)
-
-    x_center = screen_width / 2
-    y_center = screen_height / 2
-
-    # normalize targets to [-1, 1] space
-    y_train_x = (df_train["point_x"].values.astype(float) - x_center) / (screen_width / 2)
-    y_train_y = (df_train["point_y"].values.astype(float) - y_center) / (screen_height / 2)
-
-    # ensure laterality
-    if df_train["left_iris_x"].mean() < df_train["right_iris_x"].mean():
-        df_train["left_iris_x"], df_train["right_iris_x"] = (
-            df_train["right_iris_x"].copy(),
-            df_train["left_iris_x"].copy(),
-        )
-    if df_train["left_iris_y"].mean() < df_train["right_iris_y"].mean():
-        df_train["left_iris_y"], df_train["right_iris_y"] = (
-            df_train["right_iris_y"].copy(),
-            df_train["left_iris_y"].copy(),
-        )
+    # -------------------- TREINO --------------------
+    df_train = pd.read_csv(calib_csv_path).drop(["screen_height", "screen_width"], axis=1)
 
-    left_x = df_train["left_iris_x"].values.astype(float)
-    right_x = df_train["right_iris_x"].values.astype(float)
-    left_y = df_train["left_iris_y"].values.astype(float)
-    right_y = df_train["right_iris_y"].values.astype(float)
-
-    mean_x = (left_x + right_x) / 2
-    diff_x = left_x - right_x
-    mean_y = (left_y + right_y) / 2
-    diff_y = left_y - right_y
-
-    # baseline inicial (WebGazer)
-    ref_mean_x = np.mean(mean_x)
-    ref_mean_y = np.mean(mean_y)
-
-    rel_x = mean_x - ref_mean_x
-    rel_y = mean_y - ref_mean_y
-
-    # ============================
-    # PHYSICAL NORMALIZATION Y
-    # ============================
-    iris_y_scale = np.std(mean_y) + 1e-6
-    diff_y_norm = diff_y / iris_y_scale
-    rel_y_norm = rel_y / iris_y_scale
-
-    # ============================
-    # FEATURES
-    # ============================
-    X_train_x = np.column_stack([
-        left_x, right_x, mean_x, diff_x, rel_x
-    ])
-
-    X_train_y = np.column_stack([
-        diff_y_norm, rel_y_norm
-    ])
-
-    # ============================
-    # MODELS
-    # ============================
-    model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0))
-    model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0))
-
-    model_x.fit(X_train_x, y_train_x)
-    model_y.fit(X_train_y, y_train_y)
-
-    # ============================
-    # Real scale (calibration) - normalize predicted values to screen coordinates
-    # ============================
-    x_range = np.percentile(y_train_x, 95) - np.percentile(y_train_x, 5)
-    y_range = np.percentile(y_train_y, 95) - np.percentile(y_train_y, 5)
-
-    x_scale = max(x_range / 2, 1e-6) * (screen_width / 2)
-    y_scale = max(y_range / 2, 1e-6) * (screen_height / 2)
-
-    # ============================
-    # LOAD PREDICT
-    # ============================
-    df_pred = pd.read_csv(predict_csv_path)
-
-    if df_pred["left_iris_x"].mean() < df_pred["right_iris_x"].mean():
-        df_pred["left_iris_x"], df_pred["right_iris_x"] = (
-            df_pred["right_iris_x"].copy(),
-            df_pred["left_iris_x"].copy(),
-        )
-    if df_pred["left_iris_y"].mean() < df_pred["right_iris_y"].mean():
-        df_pred["left_iris_y"], df_pred["right_iris_y"] = (
-            df_pred["right_iris_y"].copy(),
-            df_pred["left_iris_y"].copy(),
-        )
+    X_train_x = df_train[["left_iris_x", "right_iris_x"]].values
+    y_train_x = df_train["point_x"].values
+    X_train_y = df_train[["left_iris_y", "right_iris_y"]].values
+    y_train_y = df_train["point_y"].values
+
+    X_train_x_scaled = sc_x.fit_transform(X_train_x)
+    X_train_y_scaled = sc_y.fit_transform(X_train_y)
+
+    # Modelos
+    model_fit_x = models[model_X].fit(X_train_x_scaled, y_train_x)
+    model_fit_y = models[model_Y].fit(X_train_y_scaled, y_train_y)
+
+    # -------------------- NOVOS DADOS --------------------
+    df_predict = pd.read_csv(predict_csv_path)
+    X_pred_x = sc_x.transform(df_predict[["left_iris_x", "right_iris_x"]].values)
+    X_pred_y = sc_y.transform(df_predict[["left_iris_y", "right_iris_y"]].values)
+
+    y_pred_x = model_fit_x.predict(X_pred_x)
+    y_pred_y = model_fit_y.predict(X_pred_y)
+
+    # Garantir valores não-negativos
+    y_pred_x = np.clip(y_pred_x, 0, None)
+    y_pred_y = np.clip(y_pred_y, 0, None)
+
+    # -------------------- KMEANS --------------------
+    data_pred = np.array([y_pred_x, y_pred_y]).T
+    kmeans_model = KMeans(n_clusters=k, n_init="auto", init="k-means++")
+    y_kmeans = kmeans_model.fit_predict(data_pred)
+
+    # -------------------- FORMATA DADOS --------------------
+    df_data = pd.DataFrame({
+        "Predicted X": y_pred_x,
+        "Predicted Y": y_pred_y,
+        "True X": df_predict["point_x"] if "point_x" in df_predict else y_pred_x,
+        "True Y": df_predict["point_y"] if "point_y" in df_predict else y_pred_y
+    })
+
+    # Calcular métricas
+    precision_x = df_data.groupby(["True X", "True Y"]).apply(func_precision_x)
+    precision_y = df_data.groupby(["True X", "True Y"]).apply(func_presicion_y)
+    precision_xy = (precision_x + precision_y) / 2
+    precision_xy /= np.mean(precision_xy)
+
+    accuracy_x = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_x)
+    accuracy_y = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_y)
+    accuracy_xy = (accuracy_x + accuracy_y) / 2
+    accuracy_xy /= np.mean(accuracy_xy)
+
+    # Estrutura final
+    data = {}
+    for index, row in df_data.iterrows():
+        outer_key = str(int(row["True X"]))
+        inner_key = str(int(row["True Y"]))
+        if outer_key not in data:
+            data[outer_key] = {}
+        data[outer_key][inner_key] = {
+            "predicted_x": df_data[
+                (df_data["True X"] == row["True X"]) &
+                (df_data["True Y"] == row["True Y"])
+            ]["Predicted X"].tolist(),
+            "predicted_y": df_data[
+                (df_data["True X"] == row["True X"]) &
+                (df_data["True Y"] == row["True Y"])
+            ]["Predicted Y"].tolist(),
+            "PrecisionSD": precision_xy[(row["True X"], row["True Y"])],
+            "Accuracy": accuracy_xy[(row["True X"], row["True Y"])],
+        }
+
+    data["centroids"] = kmeans_model.cluster_centers_.tolist()
+    return data
+
+
+def train_to_validate_calib(calib_csv_file, predict_csv_file):
+    dataset_train_path = calib_csv_file
+    dataset_predict_path = predict_csv_file
+
+    # Carregue os dados de treinamento a partir do CSV
+    data = pd.read_csv(dataset_train_path)
+
+    # Para evitar que retorne valores negativos: Aplicar uma transformação logarítmica aos rótulos (point_x e point_y)
+    # data['point_x'] = np.log(data['point_x'])
+    # data['point_y'] = np.log(data['point_y'])
+
+    # Separe os recursos (X) e os rótulos (y)
+    X = data[["left_iris_x", "left_iris_y", "right_iris_x", "right_iris_y"]]
+    y = data[["point_x", "point_y"]]
+
+    # Crie e ajuste um modelo de regressão linear
+    model = linear_model.LinearRegression()
+    model.fit(X, y)
+
+    # Carregue os dados de teste a partir de um novo arquivo CSV
+    dados_teste = pd.read_csv(dataset_predict_path)
+
+    # Faça previsões
+    previsoes = model.predict(dados_teste)
+
+    # Para evitar que retorne valores negativos: Inverter a transformação logarítmica nas previsões
+    # previsoes = np.exp(previsoes)
+
+    # Exiba as previsões
+    print("Previsões de point_x e point_y:")
+    print(previsoes)
+    return previsoes.tolist()
+
+
+def train_model(session_id):
+    # Download dataset
+    dataset_train_path = (
+        f"{Path().absolute()}/public/training/{session_id}/train_data.csv"
+    )
+    dataset_session_path = (
+        f"{Path().absolute()}/public/sessions/{session_id}/session_data.csv"
+    )
+
+    # Importing data from csv
+    raw_dataset = pd.read_csv(dataset_train_path)
+    session_dataset = pd.read_csv(dataset_session_path)
+
+    train_stats = raw_dataset.describe()
+    train_stats = train_stats.transpose()
+
+    dataset_t = raw_dataset
+    dataset_s = session_dataset.drop(["timestamp"], axis=1)
+
+    # Drop the columns that will be predicted
+    X = dataset_t.drop(["timestamp", "mouse_x", "mouse_y"], axis=1)
+
+    Y1 = dataset_t.mouse_x
+    Y2 = dataset_t.mouse_y
+    # print('Y1 is the mouse_x column ->', Y1)
+    # print('Y2 is the mouse_y column ->', Y2)
+
+    MODEL_X = model_for_mouse_x(X, Y1)
+    MODEL_Y = model_for_mouse_y(X, Y2)
+
+    GAZE_X = MODEL_X.predict(dataset_s)
+    GAZE_Y = MODEL_Y.predict(dataset_s)
+
+    GAZE_X = np.abs(GAZE_X)
+    GAZE_Y = np.abs(GAZE_Y)
+
+    return {"x": GAZE_X, "y": GAZE_Y}
+
+
+def model_for_mouse_x(X, Y1):
+    print("-----------------MODEL FOR X------------------")
+    # split dataset into train and test sets (80/20 where 20 is for test)
+    X_train, X_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size=0.2)
+
+    model = linear_model.LinearRegression()
+    model.fit(X_train, Y1_train)
+
+    Y1_pred_train = model.predict(X_train)
+    Y1_pred_test = model.predict(X_test)
+
+    Y1_test = normalizeData(Y1_test)
+    Y1_pred_test = normalizeData(Y1_pred_test)
+
+    print(f"Mean absolute error MAE = {mean_absolute_error(Y1_test, Y1_pred_test)}")
+    print(f"Mean squared error MSE = {mean_squared_error(Y1_test, Y1_pred_test)}")
+    print(
+        f"Mean squared log error MSLE = {mean_squared_log_error(Y1_test, Y1_pred_test)}"
+    )
+    print(f"MODEL X SCORE R2 = {model.score(X, Y1)}")
+
+    # print(f'TRAIN{Y1_pred_train}')
+    # print(f'TEST{Y1_pred_test}')
+    return model
+
+
+def model_for_mouse_y(X, Y2):
+    print("-----------------MODEL FOR Y------------------")
+    # split dataset into train and test sets (80/20 where 20 is for test)
+    X_train, X_test, Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.2)
+
+    model = linear_model.LinearRegression()
+    model.fit(X_train, Y2_train)
+
+    Y2_pred_train = model.predict(X_train)
+    Y2_pred_test = model.predict(X_test)
+
+    Y2_test = normalizeData(Y2_test)
+    Y2_pred_test = normalizeData(Y2_pred_test)
+
+    print(f"Mean absolute error MAE = {mean_absolute_error(Y2_test, Y2_pred_test)}")
+    print(f"Mean squared error MSE = {mean_squared_error(Y2_test, Y2_pred_test)}")
+    print(
+        f"Mean squared log error MSLE = {mean_squared_log_error(Y2_test, Y2_pred_test)}"
+    )
+    print(f"MODEL X SCORE R2 = {model.score(X, Y2)}")
 
-    left_px = df_pred["left_iris_x"].values.astype(float)
-    right_px = df_pred["right_iris_x"].values.astype(float)
-    left_py = df_pred["left_iris_y"].values.astype(float)
-    right_py = df_pred["right_iris_y"].values.astype(float)
-
-    mean_px = (left_px + right_px) / 2
-    diff_px = left_px - right_px
-    mean_py = (left_py + right_py) / 2
-    diff_py = left_py - right_py
-
-    # baseline relativo
-    rel_px = mean_px - ref_mean_x
-    rel_py = mean_py - ref_mean_y
-
-    diff_py_norm = diff_py / iris_y_scale
-    rel_py_norm = rel_py / iris_y_scale
-
-    X_pred_x = np.column_stack([
-        left_px, right_px, mean_px, diff_px, rel_px
-    ])
-
-    X_pred_y = np.column_stack([
-        diff_py_norm, rel_py_norm
-    ])
-
-    y_pred_x = model_x.predict(X_pred_x)
-    y_pred_y = model_y.predict(X_pred_y)
-
-    # remove bias vertical
-    y_pred_y = y_pred_y - np.mean(y_pred_y)
-    
-    y_pred_y = y_pred_y * Y_GAIN
-
-    # ============================
-    # PREDICTION LOOP (WebGazer)
-    # ============================
-    predictions = []
-
-    for i in range(len(y_pred_x)):
-        # baseline dinâmico
-        ref_mean_x = BASELINE_ALPHA * mean_px[i] + (1 - BASELINE_ALPHA) * ref_mean_x
-        ref_mean_y = BASELINE_ALPHA * mean_py[i] + (1 - BASELINE_ALPHA) * ref_mean_y
-
-        # squash não-linear
-        sx = squash(y_pred_x[i], SQUASH_LIMIT_X)
-        sy = squash(y_pred_y[i], SQUASH_LIMIT_Y)
-
-        px = x_center + float(sx) * x_scale
-        py = y_center + float(sy) * y_scale
-
-        predictions.append({
-            "timestamp": iris_data[i].get("timestamp"),
-            "predicted_x": px,
-            "predicted_y": py,
-            "screen_width": screen_width,
-            "screen_height": screen_height,
-        })
-
-    # ============================
-    # LOGS
-    # ============================
-    print("====== MODEL DEBUG ======")
-    print(f"y_pred_x: {np.min(y_pred_x):.3f} → {np.max(y_pred_x):.3f}")
-    print(f"y_pred_y: {np.min(y_pred_y):.3f} → {np.max(y_pred_y):.3f}")
-    print("=========================")
-
-    print("====== PIXEL SAMPLE ======")
-    for p in predictions[:15]:
-        print(f"x: {p['predicted_x']:.1f}, y: {p['predicted_y']:.1f}")
-
-    return predictions
+    # print(f'TRAIN{Y2_pred_train}')
+    print(f"TEST{Y2_pred_test}")
+    return model
 
 
 def normalizeData(data):

From aa83ce15eb658097e2df0ac0b24acfe05afdb78c Mon Sep 17 00:00:00 2001
From: midaa1 <abdelhamideslamali@gmail.com>
Date: Wed, 18 Feb 2026 08:17:09 +0200
Subject: [PATCH 10/10] refactor for itreration to avoid o(n^2) loop

---
 app/services/gaze_tracker.py | 600 +++++++++++++++++------------------
 1 file changed, 284 insertions(+), 316 deletions(-)

diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py
index d3377a6..00e272a 100644
--- a/app/services/gaze_tracker.py
+++ b/app/services/gaze_tracker.py
@@ -1,4 +1,5 @@
 # Necessary imports
+import math
 import warnings
 
 warnings.filterwarnings("ignore")
@@ -11,12 +12,17 @@
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import StandardScaler, PolynomialFeatures
 from sklearn.pipeline import make_pipeline
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Ridge
+import time
 
 # Model imports
 from sklearn import linear_model
 from sklearn.svm import SVR
 from sklearn.cluster import KMeans
 from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import GroupShuffleSplit
+import matplotlib.pyplot as plt
 
 # Metrics imports
 from sklearn.metrics import make_scorer
@@ -33,6 +39,7 @@
     func_presicion_y,
     func_accuracy_x,
     func_accuracy_y,
+    func_total_accuracy,
 )
 from app.services.config import hyperparameters
 
@@ -54,7 +61,14 @@
     "Support Vector Regressor": make_pipeline(
         PolynomialFeatures(2), SVR(kernel="linear")
     ),
-}
+    "Random Forest Regressor": make_pipeline(
+    RandomForestRegressor(
+        n_estimators=200, 
+        max_depth=10, 
+        min_samples_split=5,
+        random_state=42
+    )
+)}
 
 # Set the scoring metrics for GridSearchCV to r2_score and mean_absolute_error
 scoring = {
@@ -63,6 +77,45 @@
 }
 
 
+def squash(v, limit=1.0):
+    """Squash não-linear estilo WebGazer"""
+    return np.tanh(v / limit)
+
+def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label):
+    """
+    Helper to train a model (with or without GridSearchCV) and return predictions.
+    """
+    if (
+        model_name == "Linear Regression"
+    ):
+        model = models[model_name]
+        start_time = time.time()
+        model.fit(X_train, y_train)
+        end_time = time.time()
+        y_pred = model.predict(X_test)
+        print(f"Score {label}: {r2_score(y_test, y_pred)}")
+        print(f"Time {label}: {end_time - start_time}")
+        return y_pred
+    else:
+        pipeline = models[model_name]
+        param_grid = hyperparameters[model_name]["param_grid"]
+        grid_search = GridSearchCV(
+            pipeline,
+            param_grid,
+            cv=5,
+            scoring=scoring,
+            refit="r2",
+            return_train_score=True,
+        )
+        start_time = time.time()
+        grid_search.fit(X_train, y_train)
+        end_time = time.time()
+        best_model = grid_search.best_estimator_
+        y_pred = best_model.predict(X_test)
+        print(f"Time {label}: {end_time - start_time}")
+        return y_pred
+
+
 def predict(data, k, model_X, model_Y):
     """
     Predicts the gaze coordinates using machine learning models.
@@ -76,97 +129,52 @@ def predict(data, k, model_X, model_Y):
     Returns:
         dict: A dictionary containing the predicted gaze coordinates, precision, accuracy, and cluster centroids.
     """
-    # Inicialize standard scaler
-    sc = StandardScaler()
+
 
     # Load data from csv file and drop unnecessary columns
     df = pd.read_csv(data)
     df = df.drop(["screen_height", "screen_width"], axis=1)
+    print(df.head())
+    # Create groups (point_x, point_y)
+    df["group"] = list(zip(df["point_x"], df["point_y"]))
 
     # Data for X axis
     X_x = df[["left_iris_x", "right_iris_x"]]
     X_y = df["point_x"]
-
-    # Normalize data using standard scaler and split data into training and testing sets
-    X_x = sc.fit_transform(X_x)
-    X_train_x, X_test_x, y_train_x, y_test_x = train_test_split(
-        X_x, X_y, test_size=0.2, random_state=42
-    )
-
-    if (
-        model_X == "Linear Regression"
-        or model_X == "Elastic Net"
-        or model_X == "Support Vector Regressor"
-    ):
-        model = models[model_X]
-
-        # Fit the model and make predictions
-        model.fit(X_train_x, y_train_x)
-        y_pred_x = model.predict(X_test_x)
-
-    else:
-        pipeline = models[model_X]
-        param_grid = hyperparameters[model_X]["param_grid"]
-
-        # Initialize GridSearchCV with the pipeline and parameter grid
-        grid_search = GridSearchCV(
-            pipeline,
-            param_grid,
-            cv=5,
-            scoring=scoring,
-            refit="r2",
-            return_train_score=True,
-        )
-
-        # Fit the GridSearchCV to the training data for X
-        grid_search.fit(X_train_x, y_train_x)
-
-        # Use the best estimator to predict the values and calculate the R2 score
-        best_model_x = grid_search.best_estimator_
-        y_pred_x = best_model_x.predict(X_test_x)
-
+    # groups = df["group"]
     # Data for Y axis
-    X_y = df[["left_iris_y", "right_iris_y"]]
+    X_feature_y = df[["left_iris_y", "right_iris_y"]]
     y_y = df["point_y"]
-
-    # Normalize data using standard scaler and split data into training and testing sets
-    X_y = sc.fit_transform(X_y)
-    X_train_y, X_test_y, y_train_y, y_test_y = train_test_split(
-        X_y, y_y, test_size=0.2, random_state=42
+    # Split data into training and testing sets then Normalize data using standard scaler
+    (
+        X_train_x, X_test_x,
+        y_train_x, y_test_x,
+        X_train_y, X_test_y,
+        y_train_y, y_test_y
+    )= train_test_split(
+        X_x,
+        X_y,
+        X_feature_y,
+        y_y,
+        test_size=0.2,
+        random_state=42,
     )
-
-    if (
-        model_Y == "Linear Regression"
-        or model_Y == "Elastic Net"
-        or model_Y == "Support Vector Regressor"
-    ):
-        model = models[model_Y]
-
-        # Fit the model and make predictions
-        model.fit(X_train_y, y_train_y)
-        y_pred_y = model.predict(X_test_y)
-
-    else:
-        pipeline = models[model_Y]
-        param_grid = hyperparameters[model_Y]["param_grid"]
-
-        # Initialize GridSearchCV with the pipeline and parameter grid
-        grid_search = GridSearchCV(
-            pipeline,
-            param_grid,
-            cv=5,
-            scoring=scoring,
-            refit="r2",
-            return_train_score=True,
-        )
-
-        # Fit the GridSearchCV to the training data for X
-        grid_search.fit(X_train_y, y_train_y)
-
-        # Use the best estimator to predict the values and calculate the R2 score
-        best_model_y = grid_search.best_estimator_
-        y_pred_y = best_model_y.predict(X_test_y)
-
+    
+    # Scaling (fit on train only)
+    scaler_x = StandardScaler()
+    X_train_x = scaler_x.fit_transform(X_train_x)
+    X_test_x  = scaler_x.transform(X_test_x)
+    
+    y_pred_x = trian_and_predict(model_X, X_train_x, y_train_x, X_test_x, y_test_x, "X")
+    
+    # Scaling (fit on train only)
+    scaler_y = StandardScaler()
+    X_train_y = scaler_y.fit_transform(X_train_y)
+    X_test_y  = scaler_y.transform(X_test_y)
+
+    
+    y_pred_y = trian_and_predict(model_Y, X_train_y, y_train_y, X_test_y, y_test_y, "Y")
+    
     # Convert the predictions to a numpy array and apply KMeans clustering
     data = np.array([y_pred_x, y_pred_y]).T
     model = KMeans(n_clusters=k, n_init="auto", init="k-means++")
@@ -181,268 +189,228 @@ def predict(data, k, model_X, model_Y):
     }
     df_data = pd.DataFrame(data)
     df_data["True XY"] = list(zip(df_data["True X"], df_data["True Y"]))
-
+    
     # Filter out negative values
     df_data = df_data[(df_data["Predicted X"] >= 0) & (df_data["Predicted Y"] >= 0)]
 
-    # Calculate the precision and accuracy for each
+    # Calculate the precision and accuracy for each 
     precision_x = df_data.groupby("True XY").apply(func_precision_x)
     precision_y = df_data.groupby("True XY").apply(func_presicion_y)
 
-    # Calculate the average precision and accuracy
+    # Calculate the average precision 
     precision_xy = (precision_x + precision_y) / 2
-    precision_xy = precision_xy / np.mean(precision_xy)
-
-    # Calculate the accuracy for each axis
-    accuracy_x = df_data.groupby("True XY").apply(func_accuracy_x)
-    accuracy_y = df_data.groupby("True XY").apply(func_accuracy_y)
-
-    # Calculate the average accuracy
-    accuracy_xy = (accuracy_x + accuracy_y) / 2
-    accuracy_xy = accuracy_xy / np.mean(accuracy_xy)
+    
+    # Calculate the average accuracy (eculidian distance)
+    accuracy_xy = df_data.groupby("True XY").apply(func_total_accuracy)
+    
 
     # Create a dictionary to store the data
     data = {}
+    grouped = df_data.groupby("True XY")
 
-    # Iterate over the dataframe and store the data
-    for index, row in df_data.iterrows():
+    for (true_x, true_y), group in grouped:
 
-        # Get the outer and inner keys
-        outer_key = str(row["True X"]).split(".")[0]
-        inner_key = str(row["True Y"]).split(".")[0]
+        # keys
+        outer_key = str(true_x).split(".")[0]
+        inner_key = str(true_y).split(".")[0]
 
-        # If the outer key is not in the dictionary, add it
+        # create outer key if missing
         if outer_key not in data:
             data[outer_key] = {}
 
-        # Add the data to the dictionary
+        # fill data
         data[outer_key][inner_key] = {
-            "predicted_x": df_data[
-                (df_data["True X"] == row["True X"])
-                & (df_data["True Y"] == row["True Y"])
-            ]["Predicted X"].values.tolist(),
-            "predicted_y": df_data[
-                (df_data["True X"] == row["True X"])
-                & (df_data["True Y"] == row["True Y"])
-            ]["Predicted Y"].values.tolist(),
-            "PrecisionSD": precision_xy[(row["True X"], row["True Y"])],
-            "Accuracy": accuracy_xy[(row["True X"], row["True Y"])],
+            "predicted_x": group["Predicted X"].tolist(),
+            "predicted_y": group["Predicted Y"].tolist(),
+            "PrecisionSD": precision_xy[(true_x, true_y)],
+            "Accuracy": accuracy_xy[(true_x, true_y)],
         }
-
     # Centroids of the clusters
     data["centroids"] = model.cluster_centers_.tolist()
 
     # Return the data
     return data
 
-def predict_new_data_simple(calib_csv_path, predict_csv_path, model_X, model_Y, k=3):
-    """
-    Versão simplificada de predict_new_data.
-    Treina modelos nos dados de calibração e prevê coordenadas nos novos dados.
-    Retorna o mesmo formato que a função `predict`.
-    """
-    # -------------------- SCALERS --------------------
-    sc_x = StandardScaler()
-    sc_y = StandardScaler()
-
-    # -------------------- TREINO --------------------
-    df_train = pd.read_csv(calib_csv_path).drop(["screen_height", "screen_width"], axis=1)
-
-    X_train_x = df_train[["left_iris_x", "right_iris_x"]].values
-    y_train_x = df_train["point_x"].values
-    X_train_y = df_train[["left_iris_y", "right_iris_y"]].values
-    y_train_y = df_train["point_y"].values
-
-    X_train_x_scaled = sc_x.fit_transform(X_train_x)
-    X_train_y_scaled = sc_y.fit_transform(X_train_y)
-
-    # Modelos
-    model_fit_x = models[model_X].fit(X_train_x_scaled, y_train_x)
-    model_fit_y = models[model_Y].fit(X_train_y_scaled, y_train_y)
-
-    # -------------------- NOVOS DADOS --------------------
-    df_predict = pd.read_csv(predict_csv_path)
-    X_pred_x = sc_x.transform(df_predict[["left_iris_x", "right_iris_x"]].values)
-    X_pred_y = sc_y.transform(df_predict[["left_iris_y", "right_iris_y"]].values)
-
-    y_pred_x = model_fit_x.predict(X_pred_x)
-    y_pred_y = model_fit_y.predict(X_pred_y)
-
-    # Garantir valores não-negativos
-    y_pred_x = np.clip(y_pred_x, 0, None)
-    y_pred_y = np.clip(y_pred_y, 0, None)
-
-    # -------------------- KMEANS --------------------
-    data_pred = np.array([y_pred_x, y_pred_y]).T
-    kmeans_model = KMeans(n_clusters=k, n_init="auto", init="k-means++")
-    y_kmeans = kmeans_model.fit_predict(data_pred)
-
-    # -------------------- FORMATA DADOS --------------------
-    df_data = pd.DataFrame({
-        "Predicted X": y_pred_x,
-        "Predicted Y": y_pred_y,
-        "True X": df_predict["point_x"] if "point_x" in df_predict else y_pred_x,
-        "True Y": df_predict["point_y"] if "point_y" in df_predict else y_pred_y
-    })
-
-    # Calcular métricas
-    precision_x = df_data.groupby(["True X", "True Y"]).apply(func_precision_x)
-    precision_y = df_data.groupby(["True X", "True Y"]).apply(func_presicion_y)
-    precision_xy = (precision_x + precision_y) / 2
-    precision_xy /= np.mean(precision_xy)
-
-    accuracy_x = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_x)
-    accuracy_y = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_y)
-    accuracy_xy = (accuracy_x + accuracy_y) / 2
-    accuracy_xy /= np.mean(accuracy_xy)
-
-    # Estrutura final
-    data = {}
-    for index, row in df_data.iterrows():
-        outer_key = str(int(row["True X"]))
-        inner_key = str(int(row["True Y"]))
-        if outer_key not in data:
-            data[outer_key] = {}
-        data[outer_key][inner_key] = {
-            "predicted_x": df_data[
-                (df_data["True X"] == row["True X"]) &
-                (df_data["True Y"] == row["True Y"])
-            ]["Predicted X"].tolist(),
-            "predicted_y": df_data[
-                (df_data["True X"] == row["True X"]) &
-                (df_data["True Y"] == row["True Y"])
-            ]["Predicted Y"].tolist(),
-            "PrecisionSD": precision_xy[(row["True X"], row["True Y"])],
-            "Accuracy": accuracy_xy[(row["True X"], row["True Y"])],
-        }
-
-    data["centroids"] = kmeans_model.cluster_centers_.tolist()
-    return data
-
-
-def train_to_validate_calib(calib_csv_file, predict_csv_file):
-    dataset_train_path = calib_csv_file
-    dataset_predict_path = predict_csv_file
-
-    # Carregue os dados de treinamento a partir do CSV
-    data = pd.read_csv(dataset_train_path)
-
-    # Para evitar que retorne valores negativos: Aplicar uma transformação logarítmica aos rótulos (point_x e point_y)
-    # data['point_x'] = np.log(data['point_x'])
-    # data['point_y'] = np.log(data['point_y'])
-
-    # Separe os recursos (X) e os rótulos (y)
-    X = data[["left_iris_x", "left_iris_y", "right_iris_x", "right_iris_y"]]
-    y = data[["point_x", "point_y"]]
-
-    # Crie e ajuste um modelo de regressão linear
-    model = linear_model.LinearRegression()
-    model.fit(X, y)
-
-    # Carregue os dados de teste a partir de um novo arquivo CSV
-    dados_teste = pd.read_csv(dataset_predict_path)
-
-    # Faça previsões
-    previsoes = model.predict(dados_teste)
-
-    # Para evitar que retorne valores negativos: Inverter a transformação logarítmica nas previsões
-    # previsoes = np.exp(previsoes)
 
-    # Exiba as previsões
-    print("Previsões de point_x e point_y:")
-    print(previsoes)
-    return previsoes.tolist()
-
-
-def train_model(session_id):
-    # Download dataset
-    dataset_train_path = (
-        f"{Path().absolute()}/public/training/{session_id}/train_data.csv"
-    )
-    dataset_session_path = (
-        f"{Path().absolute()}/public/sessions/{session_id}/session_data.csv"
-    )
-
-    # Importing data from csv
-    raw_dataset = pd.read_csv(dataset_train_path)
-    session_dataset = pd.read_csv(dataset_session_path)
-
-    train_stats = raw_dataset.describe()
-    train_stats = train_stats.transpose()
-
-    dataset_t = raw_dataset
-    dataset_s = session_dataset.drop(["timestamp"], axis=1)
-
-    # Drop the columns that will be predicted
-    X = dataset_t.drop(["timestamp", "mouse_x", "mouse_y"], axis=1)
-
-    Y1 = dataset_t.mouse_x
-    Y2 = dataset_t.mouse_y
-    # print('Y1 is the mouse_x column ->', Y1)
-    # print('Y2 is the mouse_y column ->', Y2)
-
-    MODEL_X = model_for_mouse_x(X, Y1)
-    MODEL_Y = model_for_mouse_y(X, Y2)
-
-    GAZE_X = MODEL_X.predict(dataset_s)
-    GAZE_Y = MODEL_Y.predict(dataset_s)
-
-    GAZE_X = np.abs(GAZE_X)
-    GAZE_Y = np.abs(GAZE_Y)
-
-    return {"x": GAZE_X, "y": GAZE_Y}
-
-
-def model_for_mouse_x(X, Y1):
-    print("-----------------MODEL FOR X------------------")
-    # split dataset into train and test sets (80/20 where 20 is for test)
-    X_train, X_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size=0.2)
-
-    model = linear_model.LinearRegression()
-    model.fit(X_train, Y1_train)
-
-    Y1_pred_train = model.predict(X_train)
-    Y1_pred_test = model.predict(X_test)
-
-    Y1_test = normalizeData(Y1_test)
-    Y1_pred_test = normalizeData(Y1_pred_test)
-
-    print(f"Mean absolute error MAE = {mean_absolute_error(Y1_test, Y1_pred_test)}")
-    print(f"Mean squared error MSE = {mean_squared_error(Y1_test, Y1_pred_test)}")
-    print(
-        f"Mean squared log error MSLE = {mean_squared_log_error(Y1_test, Y1_pred_test)}"
-    )
-    print(f"MODEL X SCORE R2 = {model.score(X, Y1)}")
-
-    # print(f'TRAIN{Y1_pred_train}')
-    # print(f'TEST{Y1_pred_test}')
-    return model
-
-
-def model_for_mouse_y(X, Y2):
-    print("-----------------MODEL FOR Y------------------")
-    # split dataset into train and test sets (80/20 where 20 is for test)
-    X_train, X_test, Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.2)
-
-    model = linear_model.LinearRegression()
-    model.fit(X_train, Y2_train)
-
-    Y2_pred_train = model.predict(X_train)
-    Y2_pred_test = model.predict(X_test)
-
-    Y2_test = normalizeData(Y2_test)
-    Y2_pred_test = normalizeData(Y2_pred_test)
+def predict_new_data_simple(
+    calib_csv_path,
+    predict_csv_path,
+    iris_data,
+    screen_width=None,
+    screen_height=None,
+):
+    # ============================
+    # CONFIG (WebGazer-inspired)
+    # ============================
+    BASELINE_ALPHA = 0.01
+    SQUASH_LIMIT_X = 1.0
+    SQUASH_LIMIT_Y = 1.0
+    Y_GAIN = 1.2  # adjustment to compensate for vertical bias
+
+    # ============================
+    # LOAD TRAIN
+    # ============================
+    df_train = pd.read_csv(calib_csv_path)
+
+    x_center = screen_width / 2
+    y_center = screen_height / 2
+
+    # normalize targets to [-1, 1] space
+    y_train_x = (df_train["point_x"].values.astype(float) - x_center) / (screen_width / 2)
+    y_train_y = (df_train["point_y"].values.astype(float) - y_center) / (screen_height / 2)
+
+    # ensure laterality
+    if df_train["left_iris_x"].mean() < df_train["right_iris_x"].mean():
+        df_train["left_iris_x"], df_train["right_iris_x"] = (
+            df_train["right_iris_x"].copy(),
+            df_train["left_iris_x"].copy(),
+        )
+    if df_train["left_iris_y"].mean() < df_train["right_iris_y"].mean():
+        df_train["left_iris_y"], df_train["right_iris_y"] = (
+            df_train["right_iris_y"].copy(),
+            df_train["left_iris_y"].copy(),
+        )
 
-    print(f"Mean absolute error MAE = {mean_absolute_error(Y2_test, Y2_pred_test)}")
-    print(f"Mean squared error MSE = {mean_squared_error(Y2_test, Y2_pred_test)}")
-    print(
-        f"Mean squared log error MSLE = {mean_squared_log_error(Y2_test, Y2_pred_test)}"
-    )
-    print(f"MODEL X SCORE R2 = {model.score(X, Y2)}")
+    left_x = df_train["left_iris_x"].values.astype(float)
+    right_x = df_train["right_iris_x"].values.astype(float)
+    left_y = df_train["left_iris_y"].values.astype(float)
+    right_y = df_train["right_iris_y"].values.astype(float)
+
+    mean_x = (left_x + right_x) / 2
+    diff_x = left_x - right_x
+    mean_y = (left_y + right_y) / 2
+    diff_y = left_y - right_y
+
+    # baseline inicial (WebGazer)
+    ref_mean_x = np.mean(mean_x)
+    ref_mean_y = np.mean(mean_y)
+
+    rel_x = mean_x - ref_mean_x
+    rel_y = mean_y - ref_mean_y
+
+    # ============================
+    # PHYSICAL NORMALIZATION Y
+    # ============================
+    iris_y_scale = np.std(mean_y) + 1e-6
+    diff_y_norm = diff_y / iris_y_scale
+    rel_y_norm = rel_y / iris_y_scale
+
+    # ============================
+    # FEATURES
+    # ============================
+    X_train_x = np.column_stack([
+        left_x, right_x, mean_x, diff_x, rel_x
+    ])
+
+    X_train_y = np.column_stack([
+        diff_y_norm, rel_y_norm
+    ])
+
+    # ============================
+    # MODELS
+    # ============================
+    model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0))
+    model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0))
+
+    model_x.fit(X_train_x, y_train_x)
+    model_y.fit(X_train_y, y_train_y)
+
+    # ============================
+    # Real scale (calibration) - normalize predicted values to screen coordinates
+    # ============================
+    x_range = np.percentile(y_train_x, 95) - np.percentile(y_train_x, 5)
+    y_range = np.percentile(y_train_y, 95) - np.percentile(y_train_y, 5)
+
+    x_scale = max(x_range / 2, 1e-6) * (screen_width / 2)
+    y_scale = max(y_range / 2, 1e-6) * (screen_height / 2)
+
+    # ============================
+    # LOAD PREDICT
+    # ============================
+    df_pred = pd.read_csv(predict_csv_path)
+
+    if df_pred["left_iris_x"].mean() < df_pred["right_iris_x"].mean():
+        df_pred["left_iris_x"], df_pred["right_iris_x"] = (
+            df_pred["right_iris_x"].copy(),
+            df_pred["left_iris_x"].copy(),
+        )
+    if df_pred["left_iris_y"].mean() < df_pred["right_iris_y"].mean():
+        df_pred["left_iris_y"], df_pred["right_iris_y"] = (
+            df_pred["right_iris_y"].copy(),
+            df_pred["left_iris_y"].copy(),
+        )
 
-    # print(f'TRAIN{Y2_pred_train}')
-    print(f"TEST{Y2_pred_test}")
-    return model
+    left_px = df_pred["left_iris_x"].values.astype(float)
+    right_px = df_pred["right_iris_x"].values.astype(float)
+    left_py = df_pred["left_iris_y"].values.astype(float)
+    right_py = df_pred["right_iris_y"].values.astype(float)
+
+    mean_px = (left_px + right_px) / 2
+    diff_px = left_px - right_px
+    mean_py = (left_py + right_py) / 2
+    diff_py = left_py - right_py
+
+    # baseline relativo
+    rel_px = mean_px - ref_mean_x
+    rel_py = mean_py - ref_mean_y
+
+    diff_py_norm = diff_py / iris_y_scale
+    rel_py_norm = rel_py / iris_y_scale
+
+    X_pred_x = np.column_stack([
+        left_px, right_px, mean_px, diff_px, rel_px
+    ])
+
+    X_pred_y = np.column_stack([
+        diff_py_norm, rel_py_norm
+    ])
+
+    y_pred_x = model_x.predict(X_pred_x)
+    y_pred_y = model_y.predict(X_pred_y)
+
+    # remove bias vertical
+    y_pred_y = y_pred_y - np.mean(y_pred_y)
+    
+    y_pred_y = y_pred_y * Y_GAIN
+
+    # ============================
+    # PREDICTION LOOP (WebGazer)
+    # ============================
+    predictions = []
+
+    for i in range(len(y_pred_x)):
+        # baseline dinâmico
+        ref_mean_x = BASELINE_ALPHA * mean_px[i] + (1 - BASELINE_ALPHA) * ref_mean_x
+        ref_mean_y = BASELINE_ALPHA * mean_py[i] + (1 - BASELINE_ALPHA) * ref_mean_y
+
+        # squash não-linear
+        sx = squash(y_pred_x[i], SQUASH_LIMIT_X)
+        sy = squash(y_pred_y[i], SQUASH_LIMIT_Y)
+
+        px = x_center + float(sx) * x_scale
+        py = y_center + float(sy) * y_scale
+
+        predictions.append({
+            "timestamp": iris_data[i].get("timestamp"),
+            "predicted_x": px,
+            "predicted_y": py,
+            "screen_width": screen_width,
+            "screen_height": screen_height,
+        })
+
+    # ============================
+    # LOGS
+    # ============================
+    print("====== MODEL DEBUG ======")
+    print(f"y_pred_x: {np.min(y_pred_x):.3f} → {np.max(y_pred_x):.3f}")
+    print(f"y_pred_y: {np.min(y_pred_y):.3f} → {np.max(y_pred_y):.3f}")
+    print("=========================")
+
+    print("====== PIXEL SAMPLE ======")
+    for p in predictions[:15]:
+        print(f"x: {p['predicted_x']:.1f}, y: {p['predicted_y']:.1f}")
+
+    return predictions
 
 
 def normalizeData(data):