From 92f2c4701355fa7e722bb6517d9a0efc219cd490 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Thu, 5 Feb 2026 16:45:46 +0200 Subject: [PATCH 01/10] Update GitHub Actions workflow to remove push trigger Removed push trigger for main branch and unused steps. --- .github/workflows/main.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index dd51045..8d68e71 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -5,27 +5,13 @@ name: CI # Controls when the action will run. on: # Triggers the workflow on push or pull request events but only for the main branch - push: - branches: [main] pull_request: branches: [main] # Allows you to run this workflow manually from the Actions tab workflow_dispatch: - -# A workflow run is made up of one or more jobs that can run sequentially or in parallel jobs: - # This workflow contains a single job called "build" build: - # The type of runner that the job will run on runs-on: ubuntu-latest - - # Steps represent a sequence of tasks that will be executed as part of the job steps: - # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it - uses: actions/checkout@v2 - - uses: akhileshns/heroku-deploy@v3.12.12 # This is the action - with: - heroku_api_key: ${{secrets.HEROKU_API_KEY}} # Located in GitHub secrets - heroku_app_name: "web-eye-tracker-1204" # Must be unique in Heroku - heroku_email: "karine.pistili@gmail.com" From f425920fe58aa4e0a04800f162e3305c68f6717a Mon Sep 17 00:00:00 2001 From: midaa1 Date: Thu, 5 Feb 2026 16:46:46 +0200 Subject: [PATCH 02/10] Refactor CI workflow configuration --- .github/workflows/main.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 8d68e71..29480fc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,15 +1,10 @@ -# This is a basic workflow to help you get started with Actions - name: CI -# Controls when the action will run. on: - # Triggers the workflow on push or pull request events but only for the main branch pull_request: branches: [main] - - # Allows you to run this workflow manually from the Actions tab workflow_dispatch: + jobs: build: runs-on: ubuntu-latest From 98e1b5099971df5c3d6b4775e8531aaa149cfbe2 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Thu, 5 Feb 2026 16:49:31 +0200 Subject: [PATCH 03/10] Enhance CI workflow with Heroku deployment steps Added push event trigger for CI workflow and included steps for Heroku CLI installation and deployment. --- .github/workflows/main.yml | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 29480fc..362ded0 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,6 +1,8 @@ name: CI on: + push: + branches: [main] pull_request: branches: [main] workflow_dispatch: @@ -8,5 +10,24 @@ on: jobs: build: runs-on: ubuntu-latest + steps: - - uses: actions/checkout@v2 + # Checkout repository + - name: Checkout code + uses: actions/checkout@v2 + + # Install Heroku CLI + - name: Install Heroku CLI + run: curl https://cli-assets.heroku.com/install-ubuntu.sh | sh + + # Verify Heroku installation (optional but useful) + - name: Verify Heroku CLI + run: heroku --version + + # Deploy to Heroku + - name: Deploy to Heroku + uses: akhileshns/heroku-deploy@v3.12.12 + with: + heroku_api_key: ${{ secrets.HEROKU_API_KEY }} + heroku_app_name: "web-eye-tracker-1204" + heroku_email: "karine.pistili@gmail.com" From a7a152511b8875c8eecdeb44f41a670abb960eda Mon Sep 17 00:00:00 2001 From: midaa1 Date: Thu, 5 Feb 2026 16:51:13 +0200 Subject: [PATCH 04/10] Update CI workflow to remove Heroku deployment Removed push trigger and Heroku deployment steps from CI workflow. --- .github/workflows/main.yml | 23 +---------------------- 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 362ded0..29480fc 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,8 +1,6 @@ name: CI on: - push: - branches: [main] pull_request: branches: [main] workflow_dispatch: @@ -10,24 +8,5 @@ on: jobs: build: runs-on: ubuntu-latest - steps: - # Checkout repository - - name: Checkout code - uses: actions/checkout@v2 - - # Install Heroku CLI - - name: Install Heroku CLI - run: curl https://cli-assets.heroku.com/install-ubuntu.sh | sh - - # Verify Heroku installation (optional but useful) - - name: Verify Heroku CLI - run: heroku --version - - # Deploy to Heroku - - name: Deploy to Heroku - uses: akhileshns/heroku-deploy@v3.12.12 - with: - heroku_api_key: ${{ secrets.HEROKU_API_KEY }} - heroku_app_name: "web-eye-tracker-1204" - heroku_email: "karine.pistili@gmail.com" + - uses: actions/checkout@v2 From d0f508430f984316a5787b1f908655cc5fad40e0 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Sun, 15 Feb 2026 12:46:41 +0200 Subject: [PATCH 05/10] adding random forset configuration and fixing the other configuration --- app/services/config.py | 67 +--- app/services/reports/report.md | 714 +++++++++++++++++++++++++++++++++ 2 files changed, 732 insertions(+), 49 deletions(-) create mode 100644 app/services/reports/report.md diff --git a/app/services/config.py b/app/services/config.py index 941c23f..e2db0d1 100644 --- a/app/services/config.py +++ b/app/services/config.py @@ -2,56 +2,18 @@ hyperparameters = { "Lasso Regression": { "param_grid": { - "lasso__alpha": [ - 1e-15, - 1e-10, - 1e-8, - 1e-3, - 1e-2, - 1e-1, - 0.5, - 1, - 5, - 10, - 20, - 30, - 35, - 40, - 45, - 50, - 55, - 100, - ] + "lasso__alpha": [10, 20, 30, 40, 45, 50, 55, 100, 200, 500] } }, "Ridge Regression": { "param_grid": { - "ridge__alpha": [ - 1e-15, - 1e-10, - 1e-8, - 1e-3, - 1e-2, - 1e-1, - 0.5, - 1, - 5, - 10, - 20, - 30, - 35, - 40, - 45, - 50, - 55, - 100, - ] + "ridge__alpha": [ 1e-3, 0.005, 0.01, 0.1, 0.5, 1.0, 10, 20, 50, 100] } }, "Elastic Net": { "param_grid": { - "elasticnet__alpha": [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0], - "elasticnet__l1_ratio": [0, 0.01, 0.2, 0.5, 0.8, 1], + "elasticnet__alpha": [0.1, 0.5, 1.0, 2.0, 5.0], + "elasticnet__l1_ratio": [0.5, 0.7, 0.8, 0.9, 1.0], } }, "Bayesian Ridge": { @@ -62,17 +24,24 @@ }, "SGD Regressor": { "param_grid": { - "sgdregressor__alpha": [0.0001, 0.001, 0.01, 0.1], - "sgdregressor__l1_ratio": [0, 0.2, 0.5, 0.7, 1], - "sgdregressor__max_iter": [500, 1000], - "sgdregressor__eta0": [0.0001, 0.001, 0.01], + "sgdregressor__alpha": [0.0001, 0.001], + "sgdregressor__l1_ratio": [0.5, 0.7, 0.8, 1], + "sgdregressor__max_iter": [1000], + "sgdregressor__eta0": [0.0001, 0.001], } }, "Support Vector Regressor": { "param_grid": { - "svr__C": [0.1, 1, 10, 100, 1000], - "svr__gamma": [0.0001, 0.001, 0.01, 0.1, 1], - "svr__kernel": ["linear", "rbf", "poly"], + "svr__C": [50, 100, 200, 500, 1000, 2000], + "svr__gamma": [0.1, 0.5, 1, 2, 5], + "svr__kernel": ["rbf"], + } + }, + "Random Forest Regressor": { + "param_grid": { + "randomforestregressor__n_estimators": [100], + "randomforestregressor__max_depth": [10], + "randomforestregressor__min_samples_split": [2, 5, 10], } }, } diff --git a/app/services/reports/report.md b/app/services/reports/report.md new file mode 100644 index 0000000..7e3359c --- /dev/null +++ b/app/services/reports/report.md @@ -0,0 +1,714 @@ +Detected 9 unique calibration points. Setting k=9. + +## Before any modifications + +==================================================================================================== +MODEL: Linear Regression +==================================================================================================== + +Model Linear Regression has no hyperparameter grid defined for GridSearchCV. + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.8210933512146253 +Time X: 0.0 +Score Y: 0.955456427962058 +Time Y: 0.0008034706115722656 +Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.56s + +==================================================================================================== +MODEL: Ridge Regression +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01} +2 | 0.6979 | 273.3379 | {'ridge**alpha': 100} +3 | 0.6836 | 269.0442 | {'ridge**alpha': 55} +4 | 0.6800 | 268.6203 | {'ridge**alpha': 50} +5 | 0.6757 | 268.2162 | {'ridge\_\_alpha': 45} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.2734954357147217 +Time Y: 0.2774362564086914 +Pipeline Result -> Avg Accuracy: 236.1323, Avg Precision: 48.8059, Total Time: 0.66s + +==================================================================================================== +MODEL: Lasso Regression +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.6749 | 289.7889 | {'lasso**alpha': 100} +2 | 0.6694 | 275.8380 | {'lasso**alpha': 55} +3 | 0.6678 | 274.6908 | {'lasso**alpha': 50} +4 | 0.6658 | 273.6146 | {'lasso**alpha': 45} +5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.3323993682861328 +Time Y: 0.31626462936401367 +Pipeline Result -> Avg Accuracy: 290.8152, Avg Precision: 45.8779, Total Time: 0.77s + +==================================================================================================== +MODEL: Elastic Net +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} +2 | 0.6896 | 291.7449 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.5} +3 | 0.6851 | 269.2697 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0} +4 | 0.6848 | 269.2195 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.01} +5 | 0.6768 | 268.2999 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.2} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.7457386415719489 +Time X: 0.0 +Score Y: 0.9049978924542859 +Time Y: 0.0 +Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.11s + +==================================================================================================== +MODEL: Bayesian Ridge +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09} +2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09} +3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09} +4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09} +5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.801609992980957 +Time Y: 0.7761216163635254 +Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.69s + +==================================================================================================== +MODEL: SGD Regressor +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.7123 | 256.6649 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} +2 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0, 'sgdregressor**max_iter': 1000} +3 | 0.7123 | 256.6661 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.2, 'sgdregressor**max_iter': 1000} +4 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} +5 | 0.7123 | 256.6658 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 7.285875558853149 +Time Y: 7.9095728397369385 +Pipeline Result -> Avg Accuracy: 241.2138, Avg Precision: 46.8787, Total Time: 15.31s + +==================================================================================================== +MODEL: Support Vector Regressor +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.9087 | 75.2710 | {'svr**C': 1000, 'svr**gamma': 1, 'svr**kernel': 'rbf'} +2 | 0.9074 | 87.7716 | {'svr**C': 100, 'svr**gamma': 1, 'svr**kernel': 'rbf'} +3 | 0.8694 | 120.2317 | {'svr**C': 1000, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'} +4 | 0.8157 | 137.6160 | {'svr**C': 100, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'} +5 | 0.7947 | 167.9109 | {'svr**C': 1000, 'svr**gamma': 0.01, 'svr\_\_kernel': 'rbf'} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.7693369168357224 +Time X: 0.0077130794525146484 +Score Y: 0.9476667108830322 +Time Y: 0.005934715270996094 +Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.13s + +==================================================================================================== +MODEL: Random Forest Regressor +==================================================================================================== + +Model Random Forest Regressor has no hyperparameter grid defined for GridSearchCV. + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Pipeline Error: 'Random Forest Regressor' + +================================================================================ +OVERALL PERFORMANCE SUMMARY +================================================================================ +Model Name | Avg Accuracy | Avg Precision | Time (s) + +--- + +Linear Regression | 235.3662 | 49.1126 | 1.5597 +Ridge Regression | 236.1323 | 48.8059 | 0.6650 +Lasso Regression | 290.8152 | 45.8779 | 0.7684 +Elastic Net | 298.9683 | 38.6566 | 0.1052 +Bayesian Ridge | 235.3889 | 48.1209 | 1.6910 +SGD Regressor | 241.2138 | 46.8787 | 15.3096 +Support Vector Regressor | 266.1792 | 48.0700 | 0.1342 +Random Forest Regressor | ERROR: 'Random Forest Regressor' + +--- + +## After modification and adding randomforset configuration + +Detected 9 unique calibration points. Setting k=9. + +==================================================================================================== +MODEL: Linear Regression +==================================================================================================== + +Model Linear Regression has no hyperparameter grid defined for GridSearchCV. + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.8210933512146253 +Time X: 0.0 +Score Y: 0.955456427962058 +Time Y: 0.0 +Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.50s + +==================================================================================================== +MODEL: Ridge Regression +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.7497 | 241.3809 | {'ridge**alpha': 0.005} +2 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01} +3 | 0.7118 | 268.2630 | {'ridge**alpha': 100} +4 | 0.6966 | 263.7161 | {'ridge**alpha': 50} +5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.14182019233703613 +Time Y: 0.1409766674041748 +Pipeline Result -> Avg Accuracy: 235.8727, Avg Precision: 48.7106, Total Time: 0.39s + +==================================================================================================== +MODEL: Lasso Regression +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.6749 | 289.7889 | {'lasso**alpha': 100} +2 | 0.6694 | 275.8380 | {'lasso**alpha': 55} +3 | 0.6678 | 274.6908 | {'lasso**alpha': 50} +4 | 0.6658 | 273.6146 | {'lasso**alpha': 45} +5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.15945863723754883 +Time Y: 0.15405631065368652 +Pipeline Result -> Avg Accuracy: 292.7949, Avg Precision: 45.1035, Total Time: 0.41s + +==================================================================================================== +MODEL: Elastic Net +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.7005 | 277.8284 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5} +2 | 0.6999 | 280.8068 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7} +3 | 0.6998 | 275.1441 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9} +4 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} +5 | 0.6957 | 286.5466 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.8} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.7457386415719489 +Time X: 0.0 +Score Y: 0.9049978924542859 +Time Y: 0.0 +Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.13s + +==================================================================================================== +MODEL: Bayesian Ridge +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09} +2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09} +3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09} +4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09} +5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 0.7946014404296875 +Time Y: 0.7750468254089355 +Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.68s + +==================================================================================================== +MODEL: SGD Regressor +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} +2 | 0.7123 | 256.6640 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} +3 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} +4 | 0.7123 | 256.6657 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} +5 | 0.7123 | 256.7432 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 1.793349027633667 +Time Y: 1.8706464767456055 +Pipeline Result -> Avg Accuracy: 241.2358, Avg Precision: 46.9392, Total Time: 3.78s + +==================================================================================================== +MODEL: Support Vector Regressor +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.9167 | 68.5502 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} +2 | 0.9162 | 69.3407 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'} +3 | 0.9149 | 68.6453 | {'svr**C': 2000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} +4 | 0.9144 | 68.0756 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'} +5 | 0.9132 | 72.2102 | {'svr**C': 200, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Score X: 0.7693369168357224 +Time X: 0.0162813663482666 +Score Y: 0.9476667108830322 +Time Y: 0.019759178161621094 +Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.36s + +==================================================================================================== +MODEL: Random Forest Regressor +==================================================================================================== + +Searching for top 5 parameter combinations (Axis X)... +Rank | R2 Score | MAE | Parameters + +--- + +1 | 0.9221 | 68.1083 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100} +2 | 0.9180 | 68.5070 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100} +3 | 0.9147 | 69.0029 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100} + +Running full 'predict' pipeline... +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 +1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 +2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 +3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 +4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 +Time X: 3.1839962005615234 +Time Y: 1.0933246612548828 +Pipeline Result -> Avg Accuracy: 52.8510, Avg Precision: 31.9563, Total Time: 4.43s + +================================================================================ +OVERALL PERFORMANCE SUMMARY +================================================================================ +Model Name | Avg Accuracy | Avg Precision | Time (s) + +--- + +Linear Regression | 235.3662 | 49.1126 | 1.4983 +Ridge Regression | 235.8727 | 48.7106 | 0.3908 +Lasso Regression | 292.7949 | 45.1035 | 0.4128 +Elastic Net | 298.9683 | 38.6566 | 0.1328 +Bayesian Ridge | 235.3889 | 48.1209 | 1.6843 +SGD Regressor | 241.2358 | 46.9392 | 3.7777 +Support Vector Regressor | 266.1792 | 48.0700 | 0.3644 +Random Forest Regressor | 52.8510 | 31.9563 | 4.4307 + +--- + +## After spliting to test and train to check if there any overfitting + +Full Dataset: 900 rows +Training Split: 765 rows +Validation Split: 135 rows +Detected 9 unique calibration points. + +==================================================================================================== +MODEL: Linear Regression +==================================================================================================== + +Model Linear Regression has no hyperparameter grid defined. + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Score X: 0.07148328938658532 +Time X: 0.002007007598876953 +Score Y: 0.9278236134235549 +Time Y: 0.0018939971923828125 +Calibration Result -> Acc: 245.7273, Prec: 49.8420, Time: 1.55s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Score X_Val: 0.8200780206843142 +Time X_Val: 0.0019996166229248047 +Score Y_Val: 0.9612407162914263 +Time Y_Val: 0.0010020732879638672 +Validation Result -> Acc: 229.4535, Prec: 51.5457 + +==================================================================================================== +MODEL: Ridge Regression +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.7256 | 241.3973 | {'ridge**alpha': 0.001} +2 | 0.7118 | 268.2630 | {'ridge**alpha': 100} +3 | 0.6966 | 263.7161 | {'ridge**alpha': 50} +4 | 0.6507 | 244.4687 | {'ridge**alpha': 0.005} +5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Time X: 0.15825796127319336 +Time Y: 0.15566420555114746 +Calibration Result -> Acc: 247.0915, Prec: 49.9354, Time: 0.42s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Time X_Val: 0.16522979736328125 +Time Y_Val: 0.15460491180419922 +Validation Result -> Acc: 229.6377, Prec: 51.8048 + +==================================================================================================== +MODEL: Lasso Regression +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.6910 | 270.3284 | {'lasso**alpha': 55} +2 | 0.6905 | 269.0582 | {'lasso**alpha': 50} +3 | 0.6895 | 267.8433 | {'lasso**alpha': 45} +4 | 0.6881 | 266.7249 | {'lasso**alpha': 40} +5 | 0.6841 | 264.7345 | {'lasso\_\_alpha': 30} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Time X: 0.16257333755493164 +Time Y: 0.16847848892211914 +Calibration Result -> Acc: 251.6126, Prec: 47.0489, Time: 0.44s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Time X_Val: 0.18091559410095215 +Time Y_Val: 0.16357016563415527 +Validation Result -> Acc: 265.0815, Prec: 44.3168 + +==================================================================================================== +MODEL: Elastic Net +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.7136 | 273.3473 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5} +2 | 0.7136 | 270.6703 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9} +3 | 0.7135 | 270.5027 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} +4 | 0.7123 | 276.3342 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7} +5 | 0.7108 | 267.5358 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.7} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Score X: 0.6531754054868382 +Time X: 0.0006241798400878906 +Score Y: 0.9074449357286609 +Time Y: 0.0008966922760009766 +Calibration Result -> Acc: 301.3013, Prec: 41.3590, Time: 0.09s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Score X_Val: 0.7340912725347744 +Time X_Val: 0.0018773078918457031 +Score Y_Val: 0.9125809129793833 +Time Y_Val: 0.0009999275207519531 +Validation Result -> Acc: 294.6057, Prec: 38.2533 + +==================================================================================================== +MODEL: Bayesian Ridge +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 0.001} +2 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 0.001} +3 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 0.001} +4 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 0.001} +5 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 0.001} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Time X: 0.7953126430511475 +Time Y: 0.79813551902771 +Calibration Result -> Acc: 246.8617, Prec: 50.2630, Time: 1.69s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Time X_Val: 0.8207540512084961 +Time Y_Val: 0.8074281215667725 +Validation Result -> Acc: 229.1080, Prec: 50.9408 + +==================================================================================================== +MODEL: SGD Regressor +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.7240 | 252.4349 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} +2 | 0.7240 | 252.4352 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} +3 | 0.7240 | 252.4355 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} +4 | 0.7240 | 252.4360 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} +5 | 0.7239 | 252.5118 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Time X: 1.9512312412261963 +Time Y: 1.9312074184417725 +Calibration Result -> Acc: 247.0398, Prec: 49.5953, Time: 3.98s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Time X_Val: 1.7748675346374512 +Time Y_Val: 1.8547911643981934 +Validation Result -> Acc: 239.2565, Prec: 48.5600 + +==================================================================================================== +MODEL: Support Vector Regressor +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.9196 | 63.4319 | {'svr**C': 1000, 'svr**gamma': 5, 'svr**kernel': 'rbf'} +2 | 0.9191 | 66.7000 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} +3 | 0.9186 | 67.8033 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'} +4 | 0.9181 | 65.6786 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'} +5 | 0.9180 | 66.6653 | {'svr**C': 2000, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Score X: 0.6935454371850305 +Time X: 0.018875598907470703 +Score Y: 0.9363413011174913 +Time Y: 0.011532068252563477 +Calibration Result -> Acc: 281.0235, Prec: 49.7874, Time: 0.22s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Score X_Val: 0.7649424330422514 +Time X_Val: 0.013813972473144531 +Score Y_Val: 0.9553053450112502 +Time Y_Val: 0.016499757766723633 +Validation Result -> Acc: 257.4740, Prec: 47.0542 + +==================================================================================================== +MODEL: Random Forest Regressor +==================================================================================================== + +Searching for top 5 parameter combinations... +Rank | R2 (X) | MAE (X) | Parameters + +--- + +1 | 0.9229 | 65.8699 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100} +2 | 0.9183 | 66.9455 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100} +3 | 0.9137 | 67.7311 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100} + +--- Phase 1: Calibration Phase (Internal split on Training set) --- +left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y +0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 +1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 +2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 +3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 +4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 +Time X: 3.5348854064941406 +Time Y: 2.837346315383911 +Calibration Result -> Acc: 69.4965, Prec: 46.4941, Time: 6.57s + +--- Phase 2: Validation Phase (Hold-out Split) --- +Time X_Val: 3.585437297821045 +Time Y_Val: 3.094998598098755 +Validation Result -> Acc: 46.6466, Prec: 29.1507 + +============================================================================================================== +CONSOLIDATED TRAIN/VAL PERFORMANCE SUMMARY +============================================================================================================== +Model Name | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s) + +--- + +Linear Regression | 245.7273 | 49.8420 | 229.4535 | 51.5457 | 1.5465 +Ridge Regression | 247.0915 | 49.9354 | 229.6377 | 51.8048 | 0.4171 +Lasso Regression | 251.6126 | 47.0489 | 265.0815 | 44.3168 | 0.4428 +Elastic Net | 301.3013 | 41.3590 | 294.6057 | 38.2533 | 0.0876 +Bayesian Ridge | 246.8617 | 50.2630 | 229.1080 | 50.9408 | 1.6934 +SGD Regressor | 247.0398 | 49.5953 | 239.2565 | 48.5600 | 3.9767 +Support Vector Regressor | 281.0235 | 49.7874 | 257.4740 | 47.0542 | 0.2224 +Random Forest Regressor | 69.4965 | 46.4941 | 46.6466 | 29.1507 | 6.5670 + +--- From b863379c2d05c7e1b59ef029e2d6428545fd0040 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Sun, 15 Feb 2026 13:01:04 +0200 Subject: [PATCH 06/10] add timing to compare between models performanc --- app/services/gaze_tracker.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index 3354551..af3bac6 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -14,7 +14,7 @@ from sklearn.pipeline import make_pipeline from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import Ridge - +import time # Model imports from sklearn import linear_model @@ -91,9 +91,12 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): or model_name == "Support Vector Regressor" ): model = models[model_name] + start_time = time.time() model.fit(X_train, y_train) + end_time = time.time() y_pred = model.predict(X_test) print(f"Score {label}: {r2_score(y_test, y_pred)}") + print(f"Time {label}: {end_time - start_time}") return y_pred else: pipeline = models[model_name] @@ -106,9 +109,12 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): refit="r2", return_train_score=True, ) + start_time = time.time() grid_search.fit(X_train, y_train) + end_time = time.time() best_model = grid_search.best_estimator_ y_pred = best_model.predict(X_test) + print(f"Time {label}: {end_time - start_time}") return y_pred From ddf3bdcfdb4163f15b4e592ba82147a524ac85ed Mon Sep 17 00:00:00 2001 From: midaa1 Date: Sun, 15 Feb 2026 13:36:27 +0200 Subject: [PATCH 07/10] Update report --- app/services/reports/report.md | 774 ++++++--------------------------- 1 file changed, 130 insertions(+), 644 deletions(-) diff --git a/app/services/reports/report.md b/app/services/reports/report.md index 7e3359c..9635584 100644 --- a/app/services/reports/report.md +++ b/app/services/reports/report.md @@ -1,714 +1,200 @@ -Detected 9 unique calibration points. Setting k=9. +# 🎯 Eye-Gaze Calibration Regression Study -## Before any modifications +This project evaluates multiple regression models for **eye-gaze calibration**, aiming to map iris landmark coordinates to screen positions. -==================================================================================================== -MODEL: Linear Regression -==================================================================================================== +The goal is to determine which regression model provides the best trade-off between: -Model Linear Regression has no hyperparameter grid defined for GridSearchCV. +- 🎯 Prediction accuracy (screen point error) +- 📏 Precision +- ⚡ Execution speed +- 🧠 Generalization (overfitting behavior) -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.8210933512146253 -Time X: 0.0 -Score Y: 0.955456427962058 -Time Y: 0.0008034706115722656 -Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.56s +--- -==================================================================================================== -MODEL: Ridge Regression -==================================================================================================== +## 📊 Dataset Overview -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +- Total samples: **900** +- Features: + - `left_iris_x`, `left_iris_y` + - `right_iris_x`, `right_iris_y` +- Targets: + - `point_x`, `point_y` +- Calibration points detected: **9 unique points (k = 9)** --- -1 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01} -2 | 0.6979 | 273.3379 | {'ridge**alpha': 100} -3 | 0.6836 | 269.0442 | {'ridge**alpha': 55} -4 | 0.6800 | 268.6203 | {'ridge**alpha': 50} -5 | 0.6757 | 268.2162 | {'ridge\_\_alpha': 45} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.2734954357147217 -Time Y: 0.2774362564086914 -Pipeline Result -> Avg Accuracy: 236.1323, Avg Precision: 48.8059, Total Time: 0.66s - -==================================================================================================== -MODEL: Lasso Regression -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +## 🧠 Models Evaluated ---- +The following regression models were tested: -1 | 0.6749 | 289.7889 | {'lasso**alpha': 100} -2 | 0.6694 | 275.8380 | {'lasso**alpha': 55} -3 | 0.6678 | 274.6908 | {'lasso**alpha': 50} -4 | 0.6658 | 273.6146 | {'lasso**alpha': 45} -5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.3323993682861328 -Time Y: 0.31626462936401367 -Pipeline Result -> Avg Accuracy: 290.8152, Avg Precision: 45.8779, Total Time: 0.77s - -==================================================================================================== -MODEL: Elastic Net -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +- Linear Regression +- Ridge Regression +- Lasso Regression +- Elastic Net +- Bayesian Ridge +- SGD Regressor +- Support Vector Regressor (SVR) +- Random Forest Regressor --- -1 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} -2 | 0.6896 | 291.7449 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.5} -3 | 0.6851 | 269.2697 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0} -4 | 0.6848 | 269.2195 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.01} -5 | 0.6768 | 268.2999 | {'elasticnet**alpha': 0.1, 'elasticnet**l1_ratio': 0.2} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.7457386415719489 -Time X: 0.0 -Score Y: 0.9049978924542859 -Time Y: 0.0 -Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.11s - -==================================================================================================== -MODEL: Bayesian Ridge -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters - ---- +## ⚙️ Evaluation Metrics -1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09} -2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09} -3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09} -4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09} -5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.801609992980957 -Time Y: 0.7761216163635254 -Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.69s - -==================================================================================================== -MODEL: SGD Regressor -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +The pipeline reports: ---- +- **Avg Accuracy** → positional error (lower is better) +- **Avg Precision** +- **Execution Time** +- Axis-wise R² scores during calibration -1 | 0.7123 | 256.6649 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} -2 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0, 'sgdregressor**max_iter': 1000} -3 | 0.7123 | 256.6661 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.2, 'sgdregressor**max_iter': 1000} -4 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} -5 | 0.7123 | 256.6658 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 7.285875558853149 -Time Y: 7.9095728397369385 -Pipeline Result -> Avg Accuracy: 241.2138, Avg Precision: 46.8787, Total Time: 15.31s - -==================================================================================================== -MODEL: Support Vector Regressor -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +Hyperparameter tuning was performed using **GridSearchCV** when available. --- -1 | 0.9087 | 75.2710 | {'svr**C': 1000, 'svr**gamma': 1, 'svr**kernel': 'rbf'} -2 | 0.9074 | 87.7716 | {'svr**C': 100, 'svr**gamma': 1, 'svr**kernel': 'rbf'} -3 | 0.8694 | 120.2317 | {'svr**C': 1000, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'} -4 | 0.8157 | 137.6160 | {'svr**C': 100, 'svr**gamma': 0.1, 'svr**kernel': 'rbf'} -5 | 0.7947 | 167.9109 | {'svr**C': 1000, 'svr**gamma': 0.01, 'svr\_\_kernel': 'rbf'} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.7693369168357224 -Time X: 0.0077130794525146484 -Score Y: 0.9476667108830322 -Time Y: 0.005934715270996094 -Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.13s - -==================================================================================================== -MODEL: Random Forest Regressor -==================================================================================================== - -Model Random Forest Regressor has no hyperparameter grid defined for GridSearchCV. - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Pipeline Error: 'Random Forest Regressor' - -================================================================================ -OVERALL PERFORMANCE SUMMARY -================================================================================ -Model Name | Avg Accuracy | Avg Precision | Time (s) +# 🧪 Experiment 1 — Baseline Results ---- +Initial run before pipeline modifications. -Linear Regression | 235.3662 | 49.1126 | 1.5597 -Ridge Regression | 236.1323 | 48.8059 | 0.6650 -Lasso Regression | 290.8152 | 45.8779 | 0.7684 -Elastic Net | 298.9683 | 38.6566 | 0.1052 -Bayesian Ridge | 235.3889 | 48.1209 | 1.6910 -SGD Regressor | 241.2138 | 46.8787 | 15.3096 -Support Vector Regressor | 266.1792 | 48.0700 | 0.1342 -Random Forest Regressor | ERROR: 'Random Forest Regressor' +### 🔎 Key Observations ---- +- Linear & Ridge produced stable baseline performance. +- Elastic Net was very fast but less precise. +- SVR achieved strong R² values. +- Random Forest failed due to missing configuration. -## After modification and adding randomforset configuration +### 📋 Performance Summary -Detected 9 unique calibration points. Setting k=9. +| Model | Avg Accuracy | Avg Precision | Time (s) | +|------|-------------|---------------|---------| +| Linear Regression | 235.37 | 49.11 | 1.56 | +| Ridge Regression | 236.13 | 48.81 | 0.67 | +| Lasso Regression | 290.82 | 45.88 | 0.77 | +| Elastic Net | 298.97 | 38.66 | 0.11 | +| Bayesian Ridge | 235.39 | 48.12 | 1.69 | +| SGD Regressor | 241.21 | 46.88 | 15.31 | +| Support Vector Regressor | 266.18 | 48.07 | 0.13 | +| Random Forest | ❌ Error | — | — | -==================================================================================================== -MODEL: Linear Regression -==================================================================================================== +--- -Model Linear Regression has no hyperparameter grid defined for GridSearchCV. +# 🧪 Experiment 2 — Pipeline Improvements -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.8210933512146253 -Time X: 0.0 -Score Y: 0.955456427962058 -Time Y: 0.0 -Pipeline Result -> Avg Accuracy: 235.3662, Avg Precision: 49.1126, Total Time: 1.50s +Changes made: -==================================================================================================== -MODEL: Ridge Regression -==================================================================================================== +- Added Random Forest configuration +- Expanded hyperparameter grids +- Improved pipeline stability -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +### 📋 Updated Performance Summary ---- +| Model | Avg Accuracy | Avg Precision | Time (s) | +|------|-------------|---------------|---------| +| Linear Regression | 235.37 | 49.11 | 1.50 | +| Ridge Regression | 235.87 | 48.71 | 0.39 | +| Lasso Regression | 292.79 | 45.10 | 0.41 | +| Elastic Net | 298.97 | 38.66 | 0.13 | +| Bayesian Ridge | 235.39 | 48.12 | 1.68 | +| SGD Regressor | 241.24 | 46.94 | 3.78 | +| Support Vector Regressor | 266.18 | 48.07 | 0.36 | +| Random Forest | **52.85** | 31.96 | 4.43 | -1 | 0.7497 | 241.3809 | {'ridge**alpha': 0.005} -2 | 0.7475 | 241.7027 | {'ridge**alpha': 0.01} -3 | 0.7118 | 268.2630 | {'ridge**alpha': 100} -4 | 0.6966 | 263.7161 | {'ridge**alpha': 50} -5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.14182019233703613 -Time Y: 0.1409766674041748 -Pipeline Result -> Avg Accuracy: 235.8727, Avg Precision: 48.7106, Total Time: 0.39s - -==================================================================================================== -MODEL: Lasso Regression -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters - ---- +### 💡 Insights -1 | 0.6749 | 289.7889 | {'lasso**alpha': 100} -2 | 0.6694 | 275.8380 | {'lasso**alpha': 55} -3 | 0.6678 | 274.6908 | {'lasso**alpha': 50} -4 | 0.6658 | 273.6146 | {'lasso**alpha': 45} -5 | 0.6634 | 272.5937 | {'lasso\_\_alpha': 40} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.15945863723754883 -Time Y: 0.15405631065368652 -Pipeline Result -> Avg Accuracy: 292.7949, Avg Precision: 45.1035, Total Time: 0.41s - -==================================================================================================== -MODEL: Elastic Net -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +- Random Forest dramatically reduced positional error. +- Precision dropped, suggesting sensitivity or instability. +- Ridge became faster after optimization. +- SVR remained a strong non-linear alternative. --- -1 | 0.7005 | 277.8284 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5} -2 | 0.6999 | 280.8068 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7} -3 | 0.6998 | 275.1441 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9} -4 | 0.6996 | 274.9757 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} -5 | 0.6957 | 286.5466 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.8} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.7457386415719489 -Time X: 0.0 -Score Y: 0.9049978924542859 -Time Y: 0.0 -Pipeline Result -> Avg Accuracy: 298.9683, Avg Precision: 38.6566, Total Time: 0.13s - -==================================================================================================== -MODEL: Bayesian Ridge -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +# 🧪 Experiment 3 — Train/Validation Split (Overfitting Check) ---- +To evaluate generalization: -1 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 1e-09} -2 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 1e-09} -3 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 1e-09} -4 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 1e-09} -5 | 0.2365 | 255.4301 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 1e-09} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 0.7946014404296875 -Time Y: 0.7750468254089355 -Pipeline Result -> Avg Accuracy: 235.3889, Avg Precision: 48.1209, Total Time: 1.68s - -==================================================================================================== -MODEL: SGD Regressor -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +- Training: **765 samples** +- Validation: **135 samples** ---- +Each model went through: -1 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} -2 | 0.7123 | 256.6640 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} -3 | 0.7123 | 256.6659 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} -4 | 0.7123 | 256.6657 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} -5 | 0.7123 | 256.7432 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 1.793349027633667 -Time Y: 1.8706464767456055 -Pipeline Result -> Avg Accuracy: 241.2358, Avg Precision: 46.9392, Total Time: 3.78s - -==================================================================================================== -MODEL: Support Vector Regressor -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters +1. Calibration phase (internal split) +2. Validation phase (hold-out set) --- -1 | 0.9167 | 68.5502 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} -2 | 0.9162 | 69.3407 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'} -3 | 0.9149 | 68.6453 | {'svr**C': 2000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} -4 | 0.9144 | 68.0756 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'} -5 | 0.9132 | 72.2102 | {'svr**C': 200, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Score X: 0.7693369168357224 -Time X: 0.0162813663482666 -Score Y: 0.9476667108830322 -Time Y: 0.019759178161621094 -Pipeline Result -> Avg Accuracy: 266.1792, Avg Precision: 48.0700, Total Time: 0.36s - -==================================================================================================== -MODEL: Random Forest Regressor -==================================================================================================== - -Searching for top 5 parameter combinations (Axis X)... -Rank | R2 Score | MAE | Parameters - ---- +## 📋 Train vs Validation Results -1 | 0.9221 | 68.1083 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100} -2 | 0.9180 | 68.5070 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100} -3 | 0.9147 | 69.0029 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100} - -Running full 'predict' pipeline... -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 751.846863 270.624054 641.400879 275.499695 100.0 100.0 -1 739.366943 275.130646 641.746643 276.419403 100.0 100.0 -2 739.202148 273.828247 641.387695 275.756897 100.0 100.0 -3 739.340332 273.977570 641.849548 276.237335 100.0 100.0 -4 738.536682 274.158722 641.225525 276.203308 100.0 100.0 -Time X: 3.1839962005615234 -Time Y: 1.0933246612548828 -Pipeline Result -> Avg Accuracy: 52.8510, Avg Precision: 31.9563, Total Time: 4.43s - -================================================================================ -OVERALL PERFORMANCE SUMMARY -================================================================================ -Model Name | Avg Accuracy | Avg Precision | Time (s) +| Model | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s) | +|------|-----------|------------|-----------|------------|---------| +| Linear Regression | 245.73 | 49.84 | 229.45 | 51.55 | 1.55 | +| Ridge Regression | 247.09 | 49.94 | 229.64 | 51.80 | 0.42 | +| Lasso Regression | 251.61 | 47.05 | 265.08 | 44.32 | 0.44 | +| Elastic Net | 301.30 | 41.36 | 294.61 | 38.25 | 0.09 | +| Bayesian Ridge | 246.86 | 50.26 | 229.11 | 50.94 | 1.69 | +| SGD Regressor | 247.04 | 49.60 | 239.26 | 48.56 | 3.98 | +| Support Vector Regressor | 281.02 | 49.79 | 257.47 | 47.05 | 0.22 | +| Random Forest | **69.50** | 46.49 | **46.65** | 29.15 | 6.57 | --- -Linear Regression | 235.3662 | 49.1126 | 1.4983 -Ridge Regression | 235.8727 | 48.7106 | 0.3908 -Lasso Regression | 292.7949 | 45.1035 | 0.4128 -Elastic Net | 298.9683 | 38.6566 | 0.1328 -Bayesian Ridge | 235.3889 | 48.1209 | 1.6843 -SGD Regressor | 241.2358 | 46.9392 | 3.7777 -Support Vector Regressor | 266.1792 | 48.0700 | 0.3644 -Random Forest Regressor | 52.8510 | 31.9563 | 4.4307 +## 🔍 Overfitting Analysis ---- +### ✅ Stable Models +- Linear Regression +- Ridge Regression +- Bayesian Ridge -## After spliting to test and train to check if there any overfitting - -Full Dataset: 900 rows -Training Split: 765 rows -Validation Split: 135 rows -Detected 9 unique calibration points. - -==================================================================================================== -MODEL: Linear Regression -==================================================================================================== - -Model Linear Regression has no hyperparameter grid defined. - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Score X: 0.07148328938658532 -Time X: 0.002007007598876953 -Score Y: 0.9278236134235549 -Time Y: 0.0018939971923828125 -Calibration Result -> Acc: 245.7273, Prec: 49.8420, Time: 1.55s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Score X_Val: 0.8200780206843142 -Time X_Val: 0.0019996166229248047 -Score Y_Val: 0.9612407162914263 -Time Y_Val: 0.0010020732879638672 -Validation Result -> Acc: 229.4535, Prec: 51.5457 - -==================================================================================================== -MODEL: Ridge Regression -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +These models showed consistent calibration and validation behavior. ---- +### ⚠️ Potential Overfitting +- Random Forest achieved lowest error but suffered large precision drop. +- Indicates high capacity and sensitivity to dataset structure. -1 | 0.7256 | 241.3973 | {'ridge**alpha': 0.001} -2 | 0.7118 | 268.2630 | {'ridge**alpha': 100} -3 | 0.6966 | 263.7161 | {'ridge**alpha': 50} -4 | 0.6507 | 244.4687 | {'ridge**alpha': 0.005} -5 | 0.6468 | 261.6515 | {'ridge\_\_alpha': 20} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Time X: 0.15825796127319336 -Time Y: 0.15566420555114746 -Calibration Result -> Acc: 247.0915, Prec: 49.9354, Time: 0.42s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Time X_Val: 0.16522979736328125 -Time Y_Val: 0.15460491180419922 -Validation Result -> Acc: 229.6377, Prec: 51.8048 - -==================================================================================================== -MODEL: Lasso Regression -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +### ⚡ Best Balance +- SVR provided a strong balance between: + - Accuracy + - Speed + - Generalization --- -1 | 0.6910 | 270.3284 | {'lasso**alpha': 55} -2 | 0.6905 | 269.0582 | {'lasso**alpha': 50} -3 | 0.6895 | 267.8433 | {'lasso**alpha': 45} -4 | 0.6881 | 266.7249 | {'lasso**alpha': 40} -5 | 0.6841 | 264.7345 | {'lasso\_\_alpha': 30} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Time X: 0.16257333755493164 -Time Y: 0.16847848892211914 -Calibration Result -> Acc: 251.6126, Prec: 47.0489, Time: 0.44s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Time X_Val: 0.18091559410095215 -Time Y_Val: 0.16357016563415527 -Validation Result -> Acc: 265.0815, Prec: 44.3168 - -==================================================================================================== -MODEL: Elastic Net -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +# 🏆 Final Findings ---- +### 🥇 Best Raw Accuracy +**Random Forest Regressor** -1 | 0.7136 | 273.3473 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.5} -2 | 0.7136 | 270.6703 | {'elasticnet**alpha': 2.0, 'elasticnet**l1_ratio': 0.9} -3 | 0.7135 | 270.5027 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.8} -4 | 0.7123 | 276.3342 | {'elasticnet**alpha': 1.0, 'elasticnet**l1_ratio': 0.7} -5 | 0.7108 | 267.5358 | {'elasticnet**alpha': 0.5, 'elasticnet**l1_ratio': 0.7} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Score X: 0.6531754054868382 -Time X: 0.0006241798400878906 -Score Y: 0.9074449357286609 -Time Y: 0.0008966922760009766 -Calibration Result -> Acc: 301.3013, Prec: 41.3590, Time: 0.09s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Score X_Val: 0.7340912725347744 -Time X_Val: 0.0018773078918457031 -Score Y_Val: 0.9125809129793833 -Time Y_Val: 0.0009999275207519531 -Validation Result -> Acc: 294.6057, Prec: 38.2533 - -==================================================================================================== -MODEL: Bayesian Ridge -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +- Lowest positional error +- Higher computation cost +- Possible overfitting ---- +### 🥈 Most Stable Models +- Linear Regression +- Ridge Regression -1 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1, 'bayesianridge**lambda_init': 0.001} -2 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.1, 'bayesianridge**lambda_init': 0.001} -3 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.2, 'bayesianridge**lambda_init': 0.001} -4 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.3, 'bayesianridge**lambda_init': 0.001} -5 | 0.3463 | 251.4715 | {'bayesianridge**alpha_init': 1.4, 'bayesianridge**lambda_init': 0.001} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Time X: 0.7953126430511475 -Time Y: 0.79813551902771 -Calibration Result -> Acc: 246.8617, Prec: 50.2630, Time: 1.69s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Time X_Val: 0.8207540512084961 -Time Y_Val: 0.8074281215667725 -Validation Result -> Acc: 229.1080, Prec: 50.9408 - -==================================================================================================== -MODEL: SGD Regressor -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +### 🥉 Best Overall Trade-off +**Support Vector Regressor (SVR)** --- -1 | 0.7240 | 252.4349 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.8, 'sgdregressor**max_iter': 1000} -2 | 0.7240 | 252.4352 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 1, 'sgdregressor**max_iter': 1000} -3 | 0.7240 | 252.4355 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} -4 | 0.7240 | 252.4360 | {'sgdregressor**alpha': 0.0001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.7, 'sgdregressor**max_iter': 1000} -5 | 0.7239 | 252.5118 | {'sgdregressor**alpha': 0.001, 'sgdregressor**eta0': 0.0001, 'sgdregressor**l1_ratio': 0.5, 'sgdregressor**max_iter': 1000} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Time X: 1.9512312412261963 -Time Y: 1.9312074184417725 -Calibration Result -> Acc: 247.0398, Prec: 49.5953, Time: 3.98s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Time X_Val: 1.7748675346374512 -Time Y_Val: 1.8547911643981934 -Validation Result -> Acc: 239.2565, Prec: 48.5600 - -==================================================================================================== -MODEL: Support Vector Regressor -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +# 🚀 Future Improvements ---- +Possible next steps: -1 | 0.9196 | 63.4319 | {'svr**C': 1000, 'svr**gamma': 5, 'svr**kernel': 'rbf'} -2 | 0.9191 | 66.7000 | {'svr**C': 1000, 'svr**gamma': 2, 'svr**kernel': 'rbf'} -3 | 0.9186 | 67.8033 | {'svr**C': 500, 'svr**gamma': 2, 'svr**kernel': 'rbf'} -4 | 0.9181 | 65.6786 | {'svr**C': 500, 'svr**gamma': 5, 'svr**kernel': 'rbf'} -5 | 0.9180 | 66.6653 | {'svr**C': 2000, 'svr**gamma': 2, 'svr\_\_kernel': 'rbf'} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Score X: 0.6935454371850305 -Time X: 0.018875598907470703 -Score Y: 0.9363413011174913 -Time Y: 0.011532068252563477 -Calibration Result -> Acc: 281.0235, Prec: 49.7874, Time: 0.22s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Score X_Val: 0.7649424330422514 -Time X_Val: 0.013813972473144531 -Score Y_Val: 0.9553053450112502 -Time Y_Val: 0.016499757766723633 -Validation Result -> Acc: 257.4740, Prec: 47.0542 - -==================================================================================================== -MODEL: Random Forest Regressor -==================================================================================================== - -Searching for top 5 parameter combinations... -Rank | R2 (X) | MAE (X) | Parameters +- Feature scaling & normalization experiments +- Temporal smoothing for gaze stability +- Ensemble methods (Linear + Non-linear) +- Neural network-based gaze regression +- Real-time latency benchmarking --- -1 | 0.9229 | 65.8699 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 10, 'randomforestregressor**n_estimators': 100} -2 | 0.9183 | 66.9455 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 5, 'randomforestregressor**n_estimators': 100} -3 | 0.9137 | 67.7311 | {'randomforestregressor**max_depth': 10, 'randomforestregressor**min_samples_split': 2, 'randomforestregressor\_\_n_estimators': 100} - ---- Phase 1: Calibration Phase (Internal split on Training set) --- -left_iris_x left_iris_y right_iris_x right_iris_y point_x point_y -0 725.386353 275.860901 627.103882 274.170715 1607.0 100.0 -1 735.130432 282.739563 636.170105 280.775909 100.0 769.0 -2 729.180908 284.334076 630.414978 282.022430 1607.0 769.0 -3 730.777405 280.959167 632.483826 277.992859 853.5 434.5 -4 723.184448 275.512177 625.264465 274.214630 1607.0 100.0 -Time X: 3.5348854064941406 -Time Y: 2.837346315383911 -Calibration Result -> Acc: 69.4965, Prec: 46.4941, Time: 6.57s - ---- Phase 2: Validation Phase (Hold-out Split) --- -Time X_Val: 3.585437297821045 -Time Y_Val: 3.094998598098755 -Validation Result -> Acc: 46.6466, Prec: 29.1507 - -============================================================================================================== -CONSOLIDATED TRAIN/VAL PERFORMANCE SUMMARY -============================================================================================================== -Model Name | Calib Acc | Calib Prec | Valid Acc | Valid Prec | Time (s) +# 📌 Conclusion ---- +This study shows that: -Linear Regression | 245.7273 | 49.8420 | 229.4535 | 51.5457 | 1.5465 -Ridge Regression | 247.0915 | 49.9354 | 229.6377 | 51.8048 | 0.4171 -Lasso Regression | 251.6126 | 47.0489 | 265.0815 | 44.3168 | 0.4428 -Elastic Net | 301.3013 | 41.3590 | 294.6057 | 38.2533 | 0.0876 -Bayesian Ridge | 246.8617 | 50.2630 | 229.1080 | 50.9408 | 1.6934 -SGD Regressor | 247.0398 | 49.5953 | 239.2565 | 48.5600 | 3.9767 -Support Vector Regressor | 281.0235 | 49.7874 | 257.4740 | 47.0542 | 0.2224 -Random Forest Regressor | 69.4965 | 46.4941 | 46.6466 | 29.1507 | 6.5670 +- Non-linear models improve gaze estimation accuracy. +- Random Forest can greatly reduce error but may overfit. +- Linear models remain strong baselines for robustness. +- Proper train/validation splits are essential for realistic performance evaluation. --- + From 757ecbb52a2664e03877cd139c5d3320ffc26cb3 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Mon, 16 Feb 2026 04:37:19 +0200 Subject: [PATCH 08/10] fixing if conditioning in the models --- app/services/gaze_tracker.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index af3bac6..918cf9c 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -14,7 +14,7 @@ from sklearn.pipeline import make_pipeline from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import Ridge -import time + # Model imports from sklearn import linear_model @@ -87,16 +87,11 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): """ if ( model_name == "Linear Regression" - or model_name == "Elastic Net" - or model_name == "Support Vector Regressor" ): model = models[model_name] - start_time = time.time() model.fit(X_train, y_train) - end_time = time.time() y_pred = model.predict(X_test) print(f"Score {label}: {r2_score(y_test, y_pred)}") - print(f"Time {label}: {end_time - start_time}") return y_pred else: pipeline = models[model_name] @@ -109,12 +104,9 @@ def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): refit="r2", return_train_score=True, ) - start_time = time.time() grid_search.fit(X_train, y_train) - end_time = time.time() best_model = grid_search.best_estimator_ y_pred = best_model.predict(X_test) - print(f"Time {label}: {end_time - start_time}") return y_pred From b1a0037f3105b599f633bfea7d87f5a83daf3a91 Mon Sep 17 00:00:00 2001 From: midaa1 Date: Wed, 18 Feb 2026 08:05:49 +0200 Subject: [PATCH 09/10] refactor for itreration to avoid o(n^2) loop --- app/services/gaze_tracker.py | 565 ++++++++++++++++++----------------- 1 file changed, 298 insertions(+), 267 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index 918cf9c..d3377a6 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -1,5 +1,4 @@ # Necessary imports -import math import warnings warnings.filterwarnings("ignore") @@ -12,17 +11,12 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.pipeline import make_pipeline -from sklearn.ensemble import RandomForestRegressor -from sklearn.linear_model import Ridge - # Model imports from sklearn import linear_model from sklearn.svm import SVR from sklearn.cluster import KMeans from sklearn.model_selection import GridSearchCV -from sklearn.model_selection import GroupShuffleSplit -import matplotlib.pyplot as plt # Metrics imports from sklearn.metrics import make_scorer @@ -39,7 +33,6 @@ func_presicion_y, func_accuracy_x, func_accuracy_y, - func_total_accuracy, ) from app.services.config import hyperparameters @@ -61,14 +54,7 @@ "Support Vector Regressor": make_pipeline( PolynomialFeatures(2), SVR(kernel="linear") ), - "Random Forest Regressor": make_pipeline( - RandomForestRegressor( - n_estimators=200, - max_depth=10, - min_samples_split=5, - random_state=42 - ) -)} +} # Set the scoring metrics for GridSearchCV to r2_score and mean_absolute_error scoring = { @@ -77,39 +63,6 @@ } -def squash(v, limit=1.0): - """Squash não-linear estilo WebGazer""" - return np.tanh(v / limit) - -def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): - """ - Helper to train a model (with or without GridSearchCV) and return predictions. - """ - if ( - model_name == "Linear Regression" - ): - model = models[model_name] - model.fit(X_train, y_train) - y_pred = model.predict(X_test) - print(f"Score {label}: {r2_score(y_test, y_pred)}") - return y_pred - else: - pipeline = models[model_name] - param_grid = hyperparameters[model_name]["param_grid"] - grid_search = GridSearchCV( - pipeline, - param_grid, - cv=5, - scoring=scoring, - refit="r2", - return_train_score=True, - ) - grid_search.fit(X_train, y_train) - best_model = grid_search.best_estimator_ - y_pred = best_model.predict(X_test) - return y_pred - - def predict(data, k, model_X, model_Y): """ Predicts the gaze coordinates using machine learning models. @@ -123,52 +76,97 @@ def predict(data, k, model_X, model_Y): Returns: dict: A dictionary containing the predicted gaze coordinates, precision, accuracy, and cluster centroids. """ - + # Inicialize standard scaler + sc = StandardScaler() # Load data from csv file and drop unnecessary columns df = pd.read_csv(data) df = df.drop(["screen_height", "screen_width"], axis=1) - print(df.head()) - # Create groups (point_x, point_y) - df["group"] = list(zip(df["point_x"], df["point_y"])) # Data for X axis X_x = df[["left_iris_x", "right_iris_x"]] X_y = df["point_x"] - # groups = df["group"] + + # Normalize data using standard scaler and split data into training and testing sets + X_x = sc.fit_transform(X_x) + X_train_x, X_test_x, y_train_x, y_test_x = train_test_split( + X_x, X_y, test_size=0.2, random_state=42 + ) + + if ( + model_X == "Linear Regression" + or model_X == "Elastic Net" + or model_X == "Support Vector Regressor" + ): + model = models[model_X] + + # Fit the model and make predictions + model.fit(X_train_x, y_train_x) + y_pred_x = model.predict(X_test_x) + + else: + pipeline = models[model_X] + param_grid = hyperparameters[model_X]["param_grid"] + + # Initialize GridSearchCV with the pipeline and parameter grid + grid_search = GridSearchCV( + pipeline, + param_grid, + cv=5, + scoring=scoring, + refit="r2", + return_train_score=True, + ) + + # Fit the GridSearchCV to the training data for X + grid_search.fit(X_train_x, y_train_x) + + # Use the best estimator to predict the values and calculate the R2 score + best_model_x = grid_search.best_estimator_ + y_pred_x = best_model_x.predict(X_test_x) + # Data for Y axis - X_feature_y = df[["left_iris_y", "right_iris_y"]] + X_y = df[["left_iris_y", "right_iris_y"]] y_y = df["point_y"] - # Split data into training and testing sets then Normalize data using standard scaler - ( - X_train_x, X_test_x, - y_train_x, y_test_x, - X_train_y, X_test_y, - y_train_y, y_test_y - )= train_test_split( - X_x, - X_y, - X_feature_y, - y_y, - test_size=0.2, - random_state=42, + + # Normalize data using standard scaler and split data into training and testing sets + X_y = sc.fit_transform(X_y) + X_train_y, X_test_y, y_train_y, y_test_y = train_test_split( + X_y, y_y, test_size=0.2, random_state=42 ) - - # Scaling (fit on train only) - scaler_x = StandardScaler() - X_train_x = scaler_x.fit_transform(X_train_x) - X_test_x = scaler_x.transform(X_test_x) - - y_pred_x = trian_and_predict(model_X, X_train_x, y_train_x, X_test_x, y_test_x, "X") - - # Scaling (fit on train only) - scaler_y = StandardScaler() - X_train_y = scaler_y.fit_transform(X_train_y) - X_test_y = scaler_y.transform(X_test_y) - - - y_pred_y = trian_and_predict(model_Y, X_train_y, y_train_y, X_test_y, y_test_y, "Y") - + + if ( + model_Y == "Linear Regression" + or model_Y == "Elastic Net" + or model_Y == "Support Vector Regressor" + ): + model = models[model_Y] + + # Fit the model and make predictions + model.fit(X_train_y, y_train_y) + y_pred_y = model.predict(X_test_y) + + else: + pipeline = models[model_Y] + param_grid = hyperparameters[model_Y]["param_grid"] + + # Initialize GridSearchCV with the pipeline and parameter grid + grid_search = GridSearchCV( + pipeline, + param_grid, + cv=5, + scoring=scoring, + refit="r2", + return_train_score=True, + ) + + # Fit the GridSearchCV to the training data for X + grid_search.fit(X_train_y, y_train_y) + + # Use the best estimator to predict the values and calculate the R2 score + best_model_y = grid_search.best_estimator_ + y_pred_y = best_model_y.predict(X_test_y) + # Convert the predictions to a numpy array and apply KMeans clustering data = np.array([y_pred_x, y_pred_y]).T model = KMeans(n_clusters=k, n_init="auto", init="k-means++") @@ -183,20 +181,25 @@ def predict(data, k, model_X, model_Y): } df_data = pd.DataFrame(data) df_data["True XY"] = list(zip(df_data["True X"], df_data["True Y"])) - + # Filter out negative values df_data = df_data[(df_data["Predicted X"] >= 0) & (df_data["Predicted Y"] >= 0)] - # Calculate the precision and accuracy for each + # Calculate the precision and accuracy for each precision_x = df_data.groupby("True XY").apply(func_precision_x) precision_y = df_data.groupby("True XY").apply(func_presicion_y) - # Calculate the average precision + # Calculate the average precision and accuracy precision_xy = (precision_x + precision_y) / 2 - - # Calculate the average accuracy (eculidian distance) - accuracy_xy = df_data.groupby("True XY").apply(func_total_accuracy) - + precision_xy = precision_xy / np.mean(precision_xy) + + # Calculate the accuracy for each axis + accuracy_x = df_data.groupby("True XY").apply(func_accuracy_x) + accuracy_y = df_data.groupby("True XY").apply(func_accuracy_y) + + # Calculate the average accuracy + accuracy_xy = (accuracy_x + accuracy_y) / 2 + accuracy_xy = accuracy_xy / np.mean(accuracy_xy) # Create a dictionary to store the data data = {} @@ -232,186 +235,214 @@ def predict(data, k, model_X, model_Y): # Return the data return data +def predict_new_data_simple(calib_csv_path, predict_csv_path, model_X, model_Y, k=3): + """ + Versão simplificada de predict_new_data. + Treina modelos nos dados de calibração e prevê coordenadas nos novos dados. + Retorna o mesmo formato que a função `predict`. + """ + # -------------------- SCALERS -------------------- + sc_x = StandardScaler() + sc_y = StandardScaler() -def predict_new_data_simple( - calib_csv_path, - predict_csv_path, - iris_data, - screen_width=None, - screen_height=None, -): - # ============================ - # CONFIG (WebGazer-inspired) - # ============================ - BASELINE_ALPHA = 0.01 - SQUASH_LIMIT_X = 1.0 - SQUASH_LIMIT_Y = 1.0 - Y_GAIN = 1.2 # adjustment to compensate for vertical bias - - # ============================ - # LOAD TRAIN - # ============================ - df_train = pd.read_csv(calib_csv_path) - - x_center = screen_width / 2 - y_center = screen_height / 2 - - # normalize targets to [-1, 1] space - y_train_x = (df_train["point_x"].values.astype(float) - x_center) / (screen_width / 2) - y_train_y = (df_train["point_y"].values.astype(float) - y_center) / (screen_height / 2) - - # ensure laterality - if df_train["left_iris_x"].mean() < df_train["right_iris_x"].mean(): - df_train["left_iris_x"], df_train["right_iris_x"] = ( - df_train["right_iris_x"].copy(), - df_train["left_iris_x"].copy(), - ) - if df_train["left_iris_y"].mean() < df_train["right_iris_y"].mean(): - df_train["left_iris_y"], df_train["right_iris_y"] = ( - df_train["right_iris_y"].copy(), - df_train["left_iris_y"].copy(), - ) + # -------------------- TREINO -------------------- + df_train = pd.read_csv(calib_csv_path).drop(["screen_height", "screen_width"], axis=1) - left_x = df_train["left_iris_x"].values.astype(float) - right_x = df_train["right_iris_x"].values.astype(float) - left_y = df_train["left_iris_y"].values.astype(float) - right_y = df_train["right_iris_y"].values.astype(float) - - mean_x = (left_x + right_x) / 2 - diff_x = left_x - right_x - mean_y = (left_y + right_y) / 2 - diff_y = left_y - right_y - - # baseline inicial (WebGazer) - ref_mean_x = np.mean(mean_x) - ref_mean_y = np.mean(mean_y) - - rel_x = mean_x - ref_mean_x - rel_y = mean_y - ref_mean_y - - # ============================ - # PHYSICAL NORMALIZATION Y - # ============================ - iris_y_scale = np.std(mean_y) + 1e-6 - diff_y_norm = diff_y / iris_y_scale - rel_y_norm = rel_y / iris_y_scale - - # ============================ - # FEATURES - # ============================ - X_train_x = np.column_stack([ - left_x, right_x, mean_x, diff_x, rel_x - ]) - - X_train_y = np.column_stack([ - diff_y_norm, rel_y_norm - ]) - - # ============================ - # MODELS - # ============================ - model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) - - model_x.fit(X_train_x, y_train_x) - model_y.fit(X_train_y, y_train_y) - - # ============================ - # Real scale (calibration) - normalize predicted values to screen coordinates - # ============================ - x_range = np.percentile(y_train_x, 95) - np.percentile(y_train_x, 5) - y_range = np.percentile(y_train_y, 95) - np.percentile(y_train_y, 5) - - x_scale = max(x_range / 2, 1e-6) * (screen_width / 2) - y_scale = max(y_range / 2, 1e-6) * (screen_height / 2) - - # ============================ - # LOAD PREDICT - # ============================ - df_pred = pd.read_csv(predict_csv_path) - - if df_pred["left_iris_x"].mean() < df_pred["right_iris_x"].mean(): - df_pred["left_iris_x"], df_pred["right_iris_x"] = ( - df_pred["right_iris_x"].copy(), - df_pred["left_iris_x"].copy(), - ) - if df_pred["left_iris_y"].mean() < df_pred["right_iris_y"].mean(): - df_pred["left_iris_y"], df_pred["right_iris_y"] = ( - df_pred["right_iris_y"].copy(), - df_pred["left_iris_y"].copy(), - ) + X_train_x = df_train[["left_iris_x", "right_iris_x"]].values + y_train_x = df_train["point_x"].values + X_train_y = df_train[["left_iris_y", "right_iris_y"]].values + y_train_y = df_train["point_y"].values + + X_train_x_scaled = sc_x.fit_transform(X_train_x) + X_train_y_scaled = sc_y.fit_transform(X_train_y) + + # Modelos + model_fit_x = models[model_X].fit(X_train_x_scaled, y_train_x) + model_fit_y = models[model_Y].fit(X_train_y_scaled, y_train_y) + + # -------------------- NOVOS DADOS -------------------- + df_predict = pd.read_csv(predict_csv_path) + X_pred_x = sc_x.transform(df_predict[["left_iris_x", "right_iris_x"]].values) + X_pred_y = sc_y.transform(df_predict[["left_iris_y", "right_iris_y"]].values) + + y_pred_x = model_fit_x.predict(X_pred_x) + y_pred_y = model_fit_y.predict(X_pred_y) + + # Garantir valores não-negativos + y_pred_x = np.clip(y_pred_x, 0, None) + y_pred_y = np.clip(y_pred_y, 0, None) + + # -------------------- KMEANS -------------------- + data_pred = np.array([y_pred_x, y_pred_y]).T + kmeans_model = KMeans(n_clusters=k, n_init="auto", init="k-means++") + y_kmeans = kmeans_model.fit_predict(data_pred) + + # -------------------- FORMATA DADOS -------------------- + df_data = pd.DataFrame({ + "Predicted X": y_pred_x, + "Predicted Y": y_pred_y, + "True X": df_predict["point_x"] if "point_x" in df_predict else y_pred_x, + "True Y": df_predict["point_y"] if "point_y" in df_predict else y_pred_y + }) + + # Calcular métricas + precision_x = df_data.groupby(["True X", "True Y"]).apply(func_precision_x) + precision_y = df_data.groupby(["True X", "True Y"]).apply(func_presicion_y) + precision_xy = (precision_x + precision_y) / 2 + precision_xy /= np.mean(precision_xy) + + accuracy_x = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_x) + accuracy_y = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_y) + accuracy_xy = (accuracy_x + accuracy_y) / 2 + accuracy_xy /= np.mean(accuracy_xy) + + # Estrutura final + data = {} + for index, row in df_data.iterrows(): + outer_key = str(int(row["True X"])) + inner_key = str(int(row["True Y"])) + if outer_key not in data: + data[outer_key] = {} + data[outer_key][inner_key] = { + "predicted_x": df_data[ + (df_data["True X"] == row["True X"]) & + (df_data["True Y"] == row["True Y"]) + ]["Predicted X"].tolist(), + "predicted_y": df_data[ + (df_data["True X"] == row["True X"]) & + (df_data["True Y"] == row["True Y"]) + ]["Predicted Y"].tolist(), + "PrecisionSD": precision_xy[(row["True X"], row["True Y"])], + "Accuracy": accuracy_xy[(row["True X"], row["True Y"])], + } + + data["centroids"] = kmeans_model.cluster_centers_.tolist() + return data + + +def train_to_validate_calib(calib_csv_file, predict_csv_file): + dataset_train_path = calib_csv_file + dataset_predict_path = predict_csv_file + + # Carregue os dados de treinamento a partir do CSV + data = pd.read_csv(dataset_train_path) + + # Para evitar que retorne valores negativos: Aplicar uma transformação logarítmica aos rótulos (point_x e point_y) + # data['point_x'] = np.log(data['point_x']) + # data['point_y'] = np.log(data['point_y']) + + # Separe os recursos (X) e os rótulos (y) + X = data[["left_iris_x", "left_iris_y", "right_iris_x", "right_iris_y"]] + y = data[["point_x", "point_y"]] + + # Crie e ajuste um modelo de regressão linear + model = linear_model.LinearRegression() + model.fit(X, y) + + # Carregue os dados de teste a partir de um novo arquivo CSV + dados_teste = pd.read_csv(dataset_predict_path) + + # Faça previsões + previsoes = model.predict(dados_teste) + + # Para evitar que retorne valores negativos: Inverter a transformação logarítmica nas previsões + # previsoes = np.exp(previsoes) + + # Exiba as previsões + print("Previsões de point_x e point_y:") + print(previsoes) + return previsoes.tolist() + + +def train_model(session_id): + # Download dataset + dataset_train_path = ( + f"{Path().absolute()}/public/training/{session_id}/train_data.csv" + ) + dataset_session_path = ( + f"{Path().absolute()}/public/sessions/{session_id}/session_data.csv" + ) + + # Importing data from csv + raw_dataset = pd.read_csv(dataset_train_path) + session_dataset = pd.read_csv(dataset_session_path) + + train_stats = raw_dataset.describe() + train_stats = train_stats.transpose() + + dataset_t = raw_dataset + dataset_s = session_dataset.drop(["timestamp"], axis=1) + + # Drop the columns that will be predicted + X = dataset_t.drop(["timestamp", "mouse_x", "mouse_y"], axis=1) + + Y1 = dataset_t.mouse_x + Y2 = dataset_t.mouse_y + # print('Y1 is the mouse_x column ->', Y1) + # print('Y2 is the mouse_y column ->', Y2) + + MODEL_X = model_for_mouse_x(X, Y1) + MODEL_Y = model_for_mouse_y(X, Y2) + + GAZE_X = MODEL_X.predict(dataset_s) + GAZE_Y = MODEL_Y.predict(dataset_s) + + GAZE_X = np.abs(GAZE_X) + GAZE_Y = np.abs(GAZE_Y) + + return {"x": GAZE_X, "y": GAZE_Y} + + +def model_for_mouse_x(X, Y1): + print("-----------------MODEL FOR X------------------") + # split dataset into train and test sets (80/20 where 20 is for test) + X_train, X_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size=0.2) + + model = linear_model.LinearRegression() + model.fit(X_train, Y1_train) + + Y1_pred_train = model.predict(X_train) + Y1_pred_test = model.predict(X_test) + + Y1_test = normalizeData(Y1_test) + Y1_pred_test = normalizeData(Y1_pred_test) + + print(f"Mean absolute error MAE = {mean_absolute_error(Y1_test, Y1_pred_test)}") + print(f"Mean squared error MSE = {mean_squared_error(Y1_test, Y1_pred_test)}") + print( + f"Mean squared log error MSLE = {mean_squared_log_error(Y1_test, Y1_pred_test)}" + ) + print(f"MODEL X SCORE R2 = {model.score(X, Y1)}") + + # print(f'TRAIN{Y1_pred_train}') + # print(f'TEST{Y1_pred_test}') + return model + + +def model_for_mouse_y(X, Y2): + print("-----------------MODEL FOR Y------------------") + # split dataset into train and test sets (80/20 where 20 is for test) + X_train, X_test, Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.2) + + model = linear_model.LinearRegression() + model.fit(X_train, Y2_train) + + Y2_pred_train = model.predict(X_train) + Y2_pred_test = model.predict(X_test) + + Y2_test = normalizeData(Y2_test) + Y2_pred_test = normalizeData(Y2_pred_test) + + print(f"Mean absolute error MAE = {mean_absolute_error(Y2_test, Y2_pred_test)}") + print(f"Mean squared error MSE = {mean_squared_error(Y2_test, Y2_pred_test)}") + print( + f"Mean squared log error MSLE = {mean_squared_log_error(Y2_test, Y2_pred_test)}" + ) + print(f"MODEL X SCORE R2 = {model.score(X, Y2)}") - left_px = df_pred["left_iris_x"].values.astype(float) - right_px = df_pred["right_iris_x"].values.astype(float) - left_py = df_pred["left_iris_y"].values.astype(float) - right_py = df_pred["right_iris_y"].values.astype(float) - - mean_px = (left_px + right_px) / 2 - diff_px = left_px - right_px - mean_py = (left_py + right_py) / 2 - diff_py = left_py - right_py - - # baseline relativo - rel_px = mean_px - ref_mean_x - rel_py = mean_py - ref_mean_y - - diff_py_norm = diff_py / iris_y_scale - rel_py_norm = rel_py / iris_y_scale - - X_pred_x = np.column_stack([ - left_px, right_px, mean_px, diff_px, rel_px - ]) - - X_pred_y = np.column_stack([ - diff_py_norm, rel_py_norm - ]) - - y_pred_x = model_x.predict(X_pred_x) - y_pred_y = model_y.predict(X_pred_y) - - # remove bias vertical - y_pred_y = y_pred_y - np.mean(y_pred_y) - - y_pred_y = y_pred_y * Y_GAIN - - # ============================ - # PREDICTION LOOP (WebGazer) - # ============================ - predictions = [] - - for i in range(len(y_pred_x)): - # baseline dinâmico - ref_mean_x = BASELINE_ALPHA * mean_px[i] + (1 - BASELINE_ALPHA) * ref_mean_x - ref_mean_y = BASELINE_ALPHA * mean_py[i] + (1 - BASELINE_ALPHA) * ref_mean_y - - # squash não-linear - sx = squash(y_pred_x[i], SQUASH_LIMIT_X) - sy = squash(y_pred_y[i], SQUASH_LIMIT_Y) - - px = x_center + float(sx) * x_scale - py = y_center + float(sy) * y_scale - - predictions.append({ - "timestamp": iris_data[i].get("timestamp"), - "predicted_x": px, - "predicted_y": py, - "screen_width": screen_width, - "screen_height": screen_height, - }) - - # ============================ - # LOGS - # ============================ - print("====== MODEL DEBUG ======") - print(f"y_pred_x: {np.min(y_pred_x):.3f} → {np.max(y_pred_x):.3f}") - print(f"y_pred_y: {np.min(y_pred_y):.3f} → {np.max(y_pred_y):.3f}") - print("=========================") - - print("====== PIXEL SAMPLE ======") - for p in predictions[:15]: - print(f"x: {p['predicted_x']:.1f}, y: {p['predicted_y']:.1f}") - - return predictions + # print(f'TRAIN{Y2_pred_train}') + print(f"TEST{Y2_pred_test}") + return model def normalizeData(data): From aa83ce15eb658097e2df0ac0b24acfe05afdb78c Mon Sep 17 00:00:00 2001 From: midaa1 Date: Wed, 18 Feb 2026 08:17:09 +0200 Subject: [PATCH 10/10] refactor for itreration to avoid o(n^2) loop --- app/services/gaze_tracker.py | 600 +++++++++++++++++------------------ 1 file changed, 284 insertions(+), 316 deletions(-) diff --git a/app/services/gaze_tracker.py b/app/services/gaze_tracker.py index d3377a6..00e272a 100644 --- a/app/services/gaze_tracker.py +++ b/app/services/gaze_tracker.py @@ -1,4 +1,5 @@ # Necessary imports +import math import warnings warnings.filterwarnings("ignore") @@ -11,12 +12,17 @@ from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler, PolynomialFeatures from sklearn.pipeline import make_pipeline +from sklearn.ensemble import RandomForestRegressor +from sklearn.linear_model import Ridge +import time # Model imports from sklearn import linear_model from sklearn.svm import SVR from sklearn.cluster import KMeans from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GroupShuffleSplit +import matplotlib.pyplot as plt # Metrics imports from sklearn.metrics import make_scorer @@ -33,6 +39,7 @@ func_presicion_y, func_accuracy_x, func_accuracy_y, + func_total_accuracy, ) from app.services.config import hyperparameters @@ -54,7 +61,14 @@ "Support Vector Regressor": make_pipeline( PolynomialFeatures(2), SVR(kernel="linear") ), -} + "Random Forest Regressor": make_pipeline( + RandomForestRegressor( + n_estimators=200, + max_depth=10, + min_samples_split=5, + random_state=42 + ) +)} # Set the scoring metrics for GridSearchCV to r2_score and mean_absolute_error scoring = { @@ -63,6 +77,45 @@ } +def squash(v, limit=1.0): + """Squash não-linear estilo WebGazer""" + return np.tanh(v / limit) + +def trian_and_predict(model_name, X_train, y_train, X_test, y_test, label): + """ + Helper to train a model (with or without GridSearchCV) and return predictions. + """ + if ( + model_name == "Linear Regression" + ): + model = models[model_name] + start_time = time.time() + model.fit(X_train, y_train) + end_time = time.time() + y_pred = model.predict(X_test) + print(f"Score {label}: {r2_score(y_test, y_pred)}") + print(f"Time {label}: {end_time - start_time}") + return y_pred + else: + pipeline = models[model_name] + param_grid = hyperparameters[model_name]["param_grid"] + grid_search = GridSearchCV( + pipeline, + param_grid, + cv=5, + scoring=scoring, + refit="r2", + return_train_score=True, + ) + start_time = time.time() + grid_search.fit(X_train, y_train) + end_time = time.time() + best_model = grid_search.best_estimator_ + y_pred = best_model.predict(X_test) + print(f"Time {label}: {end_time - start_time}") + return y_pred + + def predict(data, k, model_X, model_Y): """ Predicts the gaze coordinates using machine learning models. @@ -76,97 +129,52 @@ def predict(data, k, model_X, model_Y): Returns: dict: A dictionary containing the predicted gaze coordinates, precision, accuracy, and cluster centroids. """ - # Inicialize standard scaler - sc = StandardScaler() + # Load data from csv file and drop unnecessary columns df = pd.read_csv(data) df = df.drop(["screen_height", "screen_width"], axis=1) + print(df.head()) + # Create groups (point_x, point_y) + df["group"] = list(zip(df["point_x"], df["point_y"])) # Data for X axis X_x = df[["left_iris_x", "right_iris_x"]] X_y = df["point_x"] - - # Normalize data using standard scaler and split data into training and testing sets - X_x = sc.fit_transform(X_x) - X_train_x, X_test_x, y_train_x, y_test_x = train_test_split( - X_x, X_y, test_size=0.2, random_state=42 - ) - - if ( - model_X == "Linear Regression" - or model_X == "Elastic Net" - or model_X == "Support Vector Regressor" - ): - model = models[model_X] - - # Fit the model and make predictions - model.fit(X_train_x, y_train_x) - y_pred_x = model.predict(X_test_x) - - else: - pipeline = models[model_X] - param_grid = hyperparameters[model_X]["param_grid"] - - # Initialize GridSearchCV with the pipeline and parameter grid - grid_search = GridSearchCV( - pipeline, - param_grid, - cv=5, - scoring=scoring, - refit="r2", - return_train_score=True, - ) - - # Fit the GridSearchCV to the training data for X - grid_search.fit(X_train_x, y_train_x) - - # Use the best estimator to predict the values and calculate the R2 score - best_model_x = grid_search.best_estimator_ - y_pred_x = best_model_x.predict(X_test_x) - + # groups = df["group"] # Data for Y axis - X_y = df[["left_iris_y", "right_iris_y"]] + X_feature_y = df[["left_iris_y", "right_iris_y"]] y_y = df["point_y"] - - # Normalize data using standard scaler and split data into training and testing sets - X_y = sc.fit_transform(X_y) - X_train_y, X_test_y, y_train_y, y_test_y = train_test_split( - X_y, y_y, test_size=0.2, random_state=42 + # Split data into training and testing sets then Normalize data using standard scaler + ( + X_train_x, X_test_x, + y_train_x, y_test_x, + X_train_y, X_test_y, + y_train_y, y_test_y + )= train_test_split( + X_x, + X_y, + X_feature_y, + y_y, + test_size=0.2, + random_state=42, ) - - if ( - model_Y == "Linear Regression" - or model_Y == "Elastic Net" - or model_Y == "Support Vector Regressor" - ): - model = models[model_Y] - - # Fit the model and make predictions - model.fit(X_train_y, y_train_y) - y_pred_y = model.predict(X_test_y) - - else: - pipeline = models[model_Y] - param_grid = hyperparameters[model_Y]["param_grid"] - - # Initialize GridSearchCV with the pipeline and parameter grid - grid_search = GridSearchCV( - pipeline, - param_grid, - cv=5, - scoring=scoring, - refit="r2", - return_train_score=True, - ) - - # Fit the GridSearchCV to the training data for X - grid_search.fit(X_train_y, y_train_y) - - # Use the best estimator to predict the values and calculate the R2 score - best_model_y = grid_search.best_estimator_ - y_pred_y = best_model_y.predict(X_test_y) - + + # Scaling (fit on train only) + scaler_x = StandardScaler() + X_train_x = scaler_x.fit_transform(X_train_x) + X_test_x = scaler_x.transform(X_test_x) + + y_pred_x = trian_and_predict(model_X, X_train_x, y_train_x, X_test_x, y_test_x, "X") + + # Scaling (fit on train only) + scaler_y = StandardScaler() + X_train_y = scaler_y.fit_transform(X_train_y) + X_test_y = scaler_y.transform(X_test_y) + + + y_pred_y = trian_and_predict(model_Y, X_train_y, y_train_y, X_test_y, y_test_y, "Y") + # Convert the predictions to a numpy array and apply KMeans clustering data = np.array([y_pred_x, y_pred_y]).T model = KMeans(n_clusters=k, n_init="auto", init="k-means++") @@ -181,268 +189,228 @@ def predict(data, k, model_X, model_Y): } df_data = pd.DataFrame(data) df_data["True XY"] = list(zip(df_data["True X"], df_data["True Y"])) - + # Filter out negative values df_data = df_data[(df_data["Predicted X"] >= 0) & (df_data["Predicted Y"] >= 0)] - # Calculate the precision and accuracy for each + # Calculate the precision and accuracy for each precision_x = df_data.groupby("True XY").apply(func_precision_x) precision_y = df_data.groupby("True XY").apply(func_presicion_y) - # Calculate the average precision and accuracy + # Calculate the average precision precision_xy = (precision_x + precision_y) / 2 - precision_xy = precision_xy / np.mean(precision_xy) - - # Calculate the accuracy for each axis - accuracy_x = df_data.groupby("True XY").apply(func_accuracy_x) - accuracy_y = df_data.groupby("True XY").apply(func_accuracy_y) - - # Calculate the average accuracy - accuracy_xy = (accuracy_x + accuracy_y) / 2 - accuracy_xy = accuracy_xy / np.mean(accuracy_xy) + + # Calculate the average accuracy (eculidian distance) + accuracy_xy = df_data.groupby("True XY").apply(func_total_accuracy) + # Create a dictionary to store the data data = {} + grouped = df_data.groupby("True XY") - # Iterate over the dataframe and store the data - for index, row in df_data.iterrows(): + for (true_x, true_y), group in grouped: - # Get the outer and inner keys - outer_key = str(row["True X"]).split(".")[0] - inner_key = str(row["True Y"]).split(".")[0] + # keys + outer_key = str(true_x).split(".")[0] + inner_key = str(true_y).split(".")[0] - # If the outer key is not in the dictionary, add it + # create outer key if missing if outer_key not in data: data[outer_key] = {} - # Add the data to the dictionary + # fill data data[outer_key][inner_key] = { - "predicted_x": df_data[ - (df_data["True X"] == row["True X"]) - & (df_data["True Y"] == row["True Y"]) - ]["Predicted X"].values.tolist(), - "predicted_y": df_data[ - (df_data["True X"] == row["True X"]) - & (df_data["True Y"] == row["True Y"]) - ]["Predicted Y"].values.tolist(), - "PrecisionSD": precision_xy[(row["True X"], row["True Y"])], - "Accuracy": accuracy_xy[(row["True X"], row["True Y"])], + "predicted_x": group["Predicted X"].tolist(), + "predicted_y": group["Predicted Y"].tolist(), + "PrecisionSD": precision_xy[(true_x, true_y)], + "Accuracy": accuracy_xy[(true_x, true_y)], } - # Centroids of the clusters data["centroids"] = model.cluster_centers_.tolist() # Return the data return data -def predict_new_data_simple(calib_csv_path, predict_csv_path, model_X, model_Y, k=3): - """ - Versão simplificada de predict_new_data. - Treina modelos nos dados de calibração e prevê coordenadas nos novos dados. - Retorna o mesmo formato que a função `predict`. - """ - # -------------------- SCALERS -------------------- - sc_x = StandardScaler() - sc_y = StandardScaler() - - # -------------------- TREINO -------------------- - df_train = pd.read_csv(calib_csv_path).drop(["screen_height", "screen_width"], axis=1) - - X_train_x = df_train[["left_iris_x", "right_iris_x"]].values - y_train_x = df_train["point_x"].values - X_train_y = df_train[["left_iris_y", "right_iris_y"]].values - y_train_y = df_train["point_y"].values - - X_train_x_scaled = sc_x.fit_transform(X_train_x) - X_train_y_scaled = sc_y.fit_transform(X_train_y) - - # Modelos - model_fit_x = models[model_X].fit(X_train_x_scaled, y_train_x) - model_fit_y = models[model_Y].fit(X_train_y_scaled, y_train_y) - - # -------------------- NOVOS DADOS -------------------- - df_predict = pd.read_csv(predict_csv_path) - X_pred_x = sc_x.transform(df_predict[["left_iris_x", "right_iris_x"]].values) - X_pred_y = sc_y.transform(df_predict[["left_iris_y", "right_iris_y"]].values) - - y_pred_x = model_fit_x.predict(X_pred_x) - y_pred_y = model_fit_y.predict(X_pred_y) - - # Garantir valores não-negativos - y_pred_x = np.clip(y_pred_x, 0, None) - y_pred_y = np.clip(y_pred_y, 0, None) - - # -------------------- KMEANS -------------------- - data_pred = np.array([y_pred_x, y_pred_y]).T - kmeans_model = KMeans(n_clusters=k, n_init="auto", init="k-means++") - y_kmeans = kmeans_model.fit_predict(data_pred) - - # -------------------- FORMATA DADOS -------------------- - df_data = pd.DataFrame({ - "Predicted X": y_pred_x, - "Predicted Y": y_pred_y, - "True X": df_predict["point_x"] if "point_x" in df_predict else y_pred_x, - "True Y": df_predict["point_y"] if "point_y" in df_predict else y_pred_y - }) - - # Calcular métricas - precision_x = df_data.groupby(["True X", "True Y"]).apply(func_precision_x) - precision_y = df_data.groupby(["True X", "True Y"]).apply(func_presicion_y) - precision_xy = (precision_x + precision_y) / 2 - precision_xy /= np.mean(precision_xy) - - accuracy_x = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_x) - accuracy_y = df_data.groupby(["True X", "True Y"]).apply(func_accuracy_y) - accuracy_xy = (accuracy_x + accuracy_y) / 2 - accuracy_xy /= np.mean(accuracy_xy) - - # Estrutura final - data = {} - for index, row in df_data.iterrows(): - outer_key = str(int(row["True X"])) - inner_key = str(int(row["True Y"])) - if outer_key not in data: - data[outer_key] = {} - data[outer_key][inner_key] = { - "predicted_x": df_data[ - (df_data["True X"] == row["True X"]) & - (df_data["True Y"] == row["True Y"]) - ]["Predicted X"].tolist(), - "predicted_y": df_data[ - (df_data["True X"] == row["True X"]) & - (df_data["True Y"] == row["True Y"]) - ]["Predicted Y"].tolist(), - "PrecisionSD": precision_xy[(row["True X"], row["True Y"])], - "Accuracy": accuracy_xy[(row["True X"], row["True Y"])], - } - - data["centroids"] = kmeans_model.cluster_centers_.tolist() - return data - - -def train_to_validate_calib(calib_csv_file, predict_csv_file): - dataset_train_path = calib_csv_file - dataset_predict_path = predict_csv_file - - # Carregue os dados de treinamento a partir do CSV - data = pd.read_csv(dataset_train_path) - - # Para evitar que retorne valores negativos: Aplicar uma transformação logarítmica aos rótulos (point_x e point_y) - # data['point_x'] = np.log(data['point_x']) - # data['point_y'] = np.log(data['point_y']) - - # Separe os recursos (X) e os rótulos (y) - X = data[["left_iris_x", "left_iris_y", "right_iris_x", "right_iris_y"]] - y = data[["point_x", "point_y"]] - - # Crie e ajuste um modelo de regressão linear - model = linear_model.LinearRegression() - model.fit(X, y) - - # Carregue os dados de teste a partir de um novo arquivo CSV - dados_teste = pd.read_csv(dataset_predict_path) - - # Faça previsões - previsoes = model.predict(dados_teste) - - # Para evitar que retorne valores negativos: Inverter a transformação logarítmica nas previsões - # previsoes = np.exp(previsoes) - # Exiba as previsões - print("Previsões de point_x e point_y:") - print(previsoes) - return previsoes.tolist() - - -def train_model(session_id): - # Download dataset - dataset_train_path = ( - f"{Path().absolute()}/public/training/{session_id}/train_data.csv" - ) - dataset_session_path = ( - f"{Path().absolute()}/public/sessions/{session_id}/session_data.csv" - ) - - # Importing data from csv - raw_dataset = pd.read_csv(dataset_train_path) - session_dataset = pd.read_csv(dataset_session_path) - - train_stats = raw_dataset.describe() - train_stats = train_stats.transpose() - - dataset_t = raw_dataset - dataset_s = session_dataset.drop(["timestamp"], axis=1) - - # Drop the columns that will be predicted - X = dataset_t.drop(["timestamp", "mouse_x", "mouse_y"], axis=1) - - Y1 = dataset_t.mouse_x - Y2 = dataset_t.mouse_y - # print('Y1 is the mouse_x column ->', Y1) - # print('Y2 is the mouse_y column ->', Y2) - - MODEL_X = model_for_mouse_x(X, Y1) - MODEL_Y = model_for_mouse_y(X, Y2) - - GAZE_X = MODEL_X.predict(dataset_s) - GAZE_Y = MODEL_Y.predict(dataset_s) - - GAZE_X = np.abs(GAZE_X) - GAZE_Y = np.abs(GAZE_Y) - - return {"x": GAZE_X, "y": GAZE_Y} - - -def model_for_mouse_x(X, Y1): - print("-----------------MODEL FOR X------------------") - # split dataset into train and test sets (80/20 where 20 is for test) - X_train, X_test, Y1_train, Y1_test = train_test_split(X, Y1, test_size=0.2) - - model = linear_model.LinearRegression() - model.fit(X_train, Y1_train) - - Y1_pred_train = model.predict(X_train) - Y1_pred_test = model.predict(X_test) - - Y1_test = normalizeData(Y1_test) - Y1_pred_test = normalizeData(Y1_pred_test) - - print(f"Mean absolute error MAE = {mean_absolute_error(Y1_test, Y1_pred_test)}") - print(f"Mean squared error MSE = {mean_squared_error(Y1_test, Y1_pred_test)}") - print( - f"Mean squared log error MSLE = {mean_squared_log_error(Y1_test, Y1_pred_test)}" - ) - print(f"MODEL X SCORE R2 = {model.score(X, Y1)}") - - # print(f'TRAIN{Y1_pred_train}') - # print(f'TEST{Y1_pred_test}') - return model - - -def model_for_mouse_y(X, Y2): - print("-----------------MODEL FOR Y------------------") - # split dataset into train and test sets (80/20 where 20 is for test) - X_train, X_test, Y2_train, Y2_test = train_test_split(X, Y2, test_size=0.2) - - model = linear_model.LinearRegression() - model.fit(X_train, Y2_train) - - Y2_pred_train = model.predict(X_train) - Y2_pred_test = model.predict(X_test) - - Y2_test = normalizeData(Y2_test) - Y2_pred_test = normalizeData(Y2_pred_test) +def predict_new_data_simple( + calib_csv_path, + predict_csv_path, + iris_data, + screen_width=None, + screen_height=None, +): + # ============================ + # CONFIG (WebGazer-inspired) + # ============================ + BASELINE_ALPHA = 0.01 + SQUASH_LIMIT_X = 1.0 + SQUASH_LIMIT_Y = 1.0 + Y_GAIN = 1.2 # adjustment to compensate for vertical bias + + # ============================ + # LOAD TRAIN + # ============================ + df_train = pd.read_csv(calib_csv_path) + + x_center = screen_width / 2 + y_center = screen_height / 2 + + # normalize targets to [-1, 1] space + y_train_x = (df_train["point_x"].values.astype(float) - x_center) / (screen_width / 2) + y_train_y = (df_train["point_y"].values.astype(float) - y_center) / (screen_height / 2) + + # ensure laterality + if df_train["left_iris_x"].mean() < df_train["right_iris_x"].mean(): + df_train["left_iris_x"], df_train["right_iris_x"] = ( + df_train["right_iris_x"].copy(), + df_train["left_iris_x"].copy(), + ) + if df_train["left_iris_y"].mean() < df_train["right_iris_y"].mean(): + df_train["left_iris_y"], df_train["right_iris_y"] = ( + df_train["right_iris_y"].copy(), + df_train["left_iris_y"].copy(), + ) - print(f"Mean absolute error MAE = {mean_absolute_error(Y2_test, Y2_pred_test)}") - print(f"Mean squared error MSE = {mean_squared_error(Y2_test, Y2_pred_test)}") - print( - f"Mean squared log error MSLE = {mean_squared_log_error(Y2_test, Y2_pred_test)}" - ) - print(f"MODEL X SCORE R2 = {model.score(X, Y2)}") + left_x = df_train["left_iris_x"].values.astype(float) + right_x = df_train["right_iris_x"].values.astype(float) + left_y = df_train["left_iris_y"].values.astype(float) + right_y = df_train["right_iris_y"].values.astype(float) + + mean_x = (left_x + right_x) / 2 + diff_x = left_x - right_x + mean_y = (left_y + right_y) / 2 + diff_y = left_y - right_y + + # baseline inicial (WebGazer) + ref_mean_x = np.mean(mean_x) + ref_mean_y = np.mean(mean_y) + + rel_x = mean_x - ref_mean_x + rel_y = mean_y - ref_mean_y + + # ============================ + # PHYSICAL NORMALIZATION Y + # ============================ + iris_y_scale = np.std(mean_y) + 1e-6 + diff_y_norm = diff_y / iris_y_scale + rel_y_norm = rel_y / iris_y_scale + + # ============================ + # FEATURES + # ============================ + X_train_x = np.column_stack([ + left_x, right_x, mean_x, diff_x, rel_x + ]) + + X_train_y = np.column_stack([ + diff_y_norm, rel_y_norm + ]) + + # ============================ + # MODELS + # ============================ + model_x = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + model_y = make_pipeline(StandardScaler(), Ridge(alpha=1.0)) + + model_x.fit(X_train_x, y_train_x) + model_y.fit(X_train_y, y_train_y) + + # ============================ + # Real scale (calibration) - normalize predicted values to screen coordinates + # ============================ + x_range = np.percentile(y_train_x, 95) - np.percentile(y_train_x, 5) + y_range = np.percentile(y_train_y, 95) - np.percentile(y_train_y, 5) + + x_scale = max(x_range / 2, 1e-6) * (screen_width / 2) + y_scale = max(y_range / 2, 1e-6) * (screen_height / 2) + + # ============================ + # LOAD PREDICT + # ============================ + df_pred = pd.read_csv(predict_csv_path) + + if df_pred["left_iris_x"].mean() < df_pred["right_iris_x"].mean(): + df_pred["left_iris_x"], df_pred["right_iris_x"] = ( + df_pred["right_iris_x"].copy(), + df_pred["left_iris_x"].copy(), + ) + if df_pred["left_iris_y"].mean() < df_pred["right_iris_y"].mean(): + df_pred["left_iris_y"], df_pred["right_iris_y"] = ( + df_pred["right_iris_y"].copy(), + df_pred["left_iris_y"].copy(), + ) - # print(f'TRAIN{Y2_pred_train}') - print(f"TEST{Y2_pred_test}") - return model + left_px = df_pred["left_iris_x"].values.astype(float) + right_px = df_pred["right_iris_x"].values.astype(float) + left_py = df_pred["left_iris_y"].values.astype(float) + right_py = df_pred["right_iris_y"].values.astype(float) + + mean_px = (left_px + right_px) / 2 + diff_px = left_px - right_px + mean_py = (left_py + right_py) / 2 + diff_py = left_py - right_py + + # baseline relativo + rel_px = mean_px - ref_mean_x + rel_py = mean_py - ref_mean_y + + diff_py_norm = diff_py / iris_y_scale + rel_py_norm = rel_py / iris_y_scale + + X_pred_x = np.column_stack([ + left_px, right_px, mean_px, diff_px, rel_px + ]) + + X_pred_y = np.column_stack([ + diff_py_norm, rel_py_norm + ]) + + y_pred_x = model_x.predict(X_pred_x) + y_pred_y = model_y.predict(X_pred_y) + + # remove bias vertical + y_pred_y = y_pred_y - np.mean(y_pred_y) + + y_pred_y = y_pred_y * Y_GAIN + + # ============================ + # PREDICTION LOOP (WebGazer) + # ============================ + predictions = [] + + for i in range(len(y_pred_x)): + # baseline dinâmico + ref_mean_x = BASELINE_ALPHA * mean_px[i] + (1 - BASELINE_ALPHA) * ref_mean_x + ref_mean_y = BASELINE_ALPHA * mean_py[i] + (1 - BASELINE_ALPHA) * ref_mean_y + + # squash não-linear + sx = squash(y_pred_x[i], SQUASH_LIMIT_X) + sy = squash(y_pred_y[i], SQUASH_LIMIT_Y) + + px = x_center + float(sx) * x_scale + py = y_center + float(sy) * y_scale + + predictions.append({ + "timestamp": iris_data[i].get("timestamp"), + "predicted_x": px, + "predicted_y": py, + "screen_width": screen_width, + "screen_height": screen_height, + }) + + # ============================ + # LOGS + # ============================ + print("====== MODEL DEBUG ======") + print(f"y_pred_x: {np.min(y_pred_x):.3f} → {np.max(y_pred_x):.3f}") + print(f"y_pred_y: {np.min(y_pred_y):.3f} → {np.max(y_pred_y):.3f}") + print("=========================") + + print("====== PIXEL SAMPLE ======") + for p in predictions[:15]: + print(f"x: {p['predicted_x']:.1f}, y: {p['predicted_y']:.1f}") + + return predictions def normalizeData(data):