feat: working v2

martinohansen · martinohansen · commit 6b481b765646 · 2023-07-31T13:26:41.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,3 @@
 __pycache__/
-.mypy_cache
-.sofascore_cache
+*_cache
+.vscode
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -23,6 +23,7 @@ rich = "^13.3.5"
 requests-cache = "^1.0.1"
 pyrate-limiter = "^2.10.0"
 requests-ratelimiter = "^0.4.0"
+pandas = "^2.0.2"
 
 
 [tool.poetry.group.dev.dependencies]
diff --git a/sofascore/sofascore.py b/sofascore/sofascore.py
@@ -114,6 +114,28 @@ def decisive_goal_for_win(self) -> bool:
     # TODO: We need to read yellow and red cards from the incident call:
     # https://api.sofascore.com/api/v1/event/11227333/incidents
 
+    @property
+    def features(self) -> dict:
+        return {
+            "assists": self.assists,
+            "expectedAssists": self.expectedAssists,
+            "expectedGoals": self.expectedGoals,
+            "goals": self.goals,
+            "goalsPrevented": self.goalsPrevented,
+            "minutesPlayed": self.minutesPlayed,
+            "onTargetScoringAttempt": self.onTargetScoringAttempt,
+            "savedShotsFromInsideTheBox": self.savedShotsFromInsideTheBox,
+            "saves": self.saves,
+            "team_goals": self.team_goals,
+            "team_goals_conceded": self.team_goals_conceded,
+            "win": int(self.win),
+            "loss": int(self.loss),
+            "draw": int(self.draw),
+            "clean_sheet": int(self.clean_sheet),
+            "decisive_goal_for_draw": int(self.decisive_goal_for_draw),
+            "decisive_goal_for_win": int(self.decisive_goal_for_win),
+        }
+
     def __init__(self, **kwargs):
         for key, value in kwargs.items():
             setattr(self, key, value)
diff --git a/v2.py b/v2.py
@@ -1,16 +1,19 @@
 #!/usr/bin/env python3
-
 import logging
 from dataclasses import dataclass
 from datetime import datetime, timezone
 
+import numpy as np
+import pandas as pd
 from rich import print
 from rich.console import Console
 from rich.logging import RichHandler
 from rich.table import Table
+from sklearn.linear_model import LinearRegression  # type: ignore
+from sklearn.metrics import mean_squared_error  # type: ignore
+from sklearn.model_selection import train_test_split  # type: ignore
 
 from holdet import holdet
-from lp import lp
 from sofascore import sofascore
 
 
@@ -71,9 +74,13 @@ def xGrowth(self) -> float:
 
         return growth
 
+    @property
+    def growth(self) -> int:
+        return self.values.growth
+
     @property
     def diff(self) -> float:
-        return self.xGrowth - self.values.growth
+        return self.xGrowth - self.growth
 
     def __lt__(self, other: "Round") -> bool:
         return self.number < other.number
@@ -375,8 +382,42 @@ def get_holdet(game: holdet.Game) -> list[Holdet]:
             candidates.append(Candidate(h, s))
         status.console.log(f"Found {len(candidates)} players on Sofascore")
 
-    with console.status("Finding optimal team..."):
-        solution = lp.find_optimal_team(candidates, 70 * 1000000)
-        status.console.log(f"Found optimal 11 out of {len(candidates)} players")
-
-    print(Formation(solution))
+    # Extract and flatten the data
+    data = []
+    for candidate in candidates:
+        for round in candidate.rounds:
+            for stat in round.stats:
+                row = {
+                    "id": candidate.id,
+                    "round": round.number,
+                }
+                row.update(stat.features)
+                row.update({"growth": round.growth})
+                data.append(row)
+
+    # Create a pandas DataFrame
+    df = pd.DataFrame(data)
+
+    # Define features (X) and target (y)
+    X = df.iloc[:, :-1]
+    y = df.iloc[:, -1]
+
+    # Split the data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+    # Create and fit the model
+    model = LinearRegression()
+    model.fit(X_train, y_train)
+
+    # Make predictions
+    y_pred = model.predict(X_test)
+
+    # Evaluate the model
+    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
+    print("Root Mean Squared Error:", rmse)
+
+    # with console.status("Finding optimal team..."):
+    #     solution = lp.find_optimal_team(candidates, 70 * 1000000)
+    #     status.console.log(f"Found optimal 11 out of {len(candidates)} players")
+
+    # print(Formation(solution))