From 5eda6951603e3435d3aa870615d6dd40cc5f4e9d Mon Sep 17 00:00:00 2001 From: Matteo Morella <107872970+mat126@users.noreply.github.com> Date: Tue, 22 Oct 2024 17:04:49 +0200 Subject: [PATCH] Add file for github pages --- docs/xG-Model.html | 9703 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 9703 insertions(+) create mode 100644 docs/xG-Model.html diff --git a/docs/xG-Model.html b/docs/xG-Model.html new file mode 100644 index 0000000..29d017f --- /dev/null +++ b/docs/xG-Model.html @@ -0,0 +1,9703 @@ + + + + +
+ + + + + + + + + +library(tools)
+library(stringi)
+library(gtsummary)
+library(pROC)
+## Type 'citation("pROC")' for a citation.
+##
+## Attaching package: 'pROC'
+## The following objects are masked from 'package:stats':
+##
+## cov, smooth, var
+library(caret)
+## Loading required package: ggplot2
+## Loading required package: lattice
+library(predtools)
+library(magrittr)
+library(probably)
+##
+## Attaching package: 'probably'
+## The following objects are masked from 'package:base':
+##
+## as.factor, as.ordered
+library(tidymodels)
+## ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
+## ✔ broom 1.0.6 ✔ rsample 1.2.1
+## ✔ dials 1.3.0 ✔ tibble 3.2.1
+## ✔ dplyr 1.1.4 ✔ tidyr 1.3.1
+## ✔ infer 1.0.7 ✔ tune 1.2.1
+## ✔ modeldata 1.4.0 ✔ workflows 1.1.4
+## ✔ parsnip 1.2.1 ✔ workflowsets 1.1.0
+## ✔ purrr 1.0.2 ✔ yardstick 1.3.1
+## ✔ recipes 1.1.0
+## ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
+## ✖ purrr::discard() masks scales::discard()
+## ✖ tidyr::extract() masks magrittr::extract()
+## ✖ dplyr::filter() masks stats::filter()
+## ✖ dplyr::lag() masks stats::lag()
+## ✖ purrr::lift() masks caret::lift()
+## ✖ yardstick::precision() masks caret::precision()
+## ✖ yardstick::recall() masks caret::recall()
+## ✖ yardstick::sensitivity() masks caret::sensitivity()
+## ✖ purrr::set_names() masks magrittr::set_names()
+## ✖ yardstick::specificity() masks caret::specificity()
+## ✖ recipes::step() masks stats::step()
+## • Dig deeper into tidy modeling with R at https://www.tmwr.org
+library(caTools)
+library(boot)
+##
+## Attaching package: 'boot'
+## The following object is masked from 'package:lattice':
+##
+## melanoma
+library(randomForest)
+## randomForest 4.7-1.2
+## Type rfNews() to see new features/changes/bug fixes.
+##
+## Attaching package: 'randomForest'
+## The following object is masked from 'package:dplyr':
+##
+## combine
+## The following object is masked from 'package:ggplot2':
+##
+## margin
+library(MASS)
+##
+## Attaching package: 'MASS'
+## The following object is masked from 'package:dplyr':
+##
+## select
+## The following object is masked from 'package:gtsummary':
+##
+## select
+library(rpart)
+##
+## Attaching package: 'rpart'
+## The following object is masked from 'package:dials':
+##
+## prune
+library(ranger)
+##
+## Attaching package: 'ranger'
+## The following object is masked from 'package:randomForest':
+##
+## importance
+library(gbm)
+## Loaded gbm 2.2.2
+## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
+library(xgboost)
+##
+## Attaching package: 'xgboost'
+## The following object is masked from 'package:dplyr':
+##
+## slice
+library(e1071)
+##
+## Attaching package: 'e1071'
+## The following object is masked from 'package:tune':
+##
+## tune
+## The following object is masked from 'package:rsample':
+##
+## permutations
+## The following object is masked from 'package:parsnip':
+##
+## tune
+library(nnet)
+library(dplyr)
+library(ggplot2)
+library(plotly)
+##
+## Attaching package: 'plotly'
+## The following object is masked from 'package:xgboost':
+##
+## slice
+## The following object is masked from 'package:MASS':
+##
+## select
+## The following object is masked from 'package:ggplot2':
+##
+## last_plot
+## The following object is masked from 'package:stats':
+##
+## filter
+## The following object is masked from 'package:graphics':
+##
+## layout
+library(ggsoccer)
+library(ggtext)
+library(viridis)
+## Loading required package: viridisLite
+##
+## Attaching package: 'viridis'
+## The following object is masked from 'package:scales':
+##
+## viridis_pal
+library(kableExtra)
+##
+## Attaching package: 'kableExtra'
+## The following object is masked from 'package:dplyr':
+##
+## group_rows
+library(rms)
+## Loading required package: Hmisc
+##
+## Attaching package: 'Hmisc'
+## The following object is masked from 'package:plotly':
+##
+## subplot
+## The following object is masked from 'package:e1071':
+##
+## impute
+## The following object is masked from 'package:parsnip':
+##
+## translate
+## The following objects are masked from 'package:dplyr':
+##
+## src, summarize
+## The following objects are masked from 'package:base':
+##
+## format.pval, units
+shots_dataset <- read.csv("~/Cose Mie/shots_dataset.csv")
+FullData <- read.csv("~/Cose Mie/FullData.csv")
+players <- read.csv("~/Cose Mie/players.csv")
+new.players <- read.csv("~/Cose Mie/new.players.csv")
+
+kable(head(shots_dataset, 3), "html") %>%
+ kable_styling(full_width = T)
++id + | ++minute + | ++result + | ++X + | ++Y + | ++xG + | ++player + | ++h_a + | ++player_id + | ++situation + | ++season + | ++shotType + | ++match_id + | ++h_team + | ++a_team + | ++h_goals + | ++a_goals + | ++date + | ++player_assisted + | ++lastAction + | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+445975 + | ++0 + | ++MissedShots + | ++0.759 + | ++0.644 + | ++0.0186014 + | ++Jorrit Hendrix + | ++h + | ++9392 + | ++OpenPlay + | ++2021 + | ++LeftFoot + | ++16265 + | ++Spartak Moscow + | ++FK Akhmat + | ++2 + | ++1 + | ++2021-12-04 14:00:00 + | ++Quincy Promes + | ++Pass + | +
+445976 + | ++6 + | ++MissedShots + | ++0.738 + | ++0.352 + | ++0.0136572 + | ++Jorrit Hendrix + | ++h + | ++9392 + | ++FromCorner + | ++2021 + | ++LeftFoot + | ++16265 + | ++Spartak Moscow + | ++FK Akhmat + | ++2 + | ++1 + | ++2021-12-04 14:00:00 + | ++ | ++None + | +
+445977 + | ++9 + | ++SavedShot + | ++0.732 + | ++0.250 + | ++0.0310316 + | ++Quincy Promes + | ++h + | ++2815 + | ++DirectFreekick + | ++2021 + | ++RightFoot + | ++16265 + | ++Spartak Moscow + | ++FK Akhmat + | ++2 + | ++1 + | ++2021-12-04 14:00:00 + | ++ | ++Standard + | +
shots_dataset$player <- stri_trans_general(shots_dataset$player, "Latin-ASCII")
+giocatori <- as.data.frame(unique(shots_dataset$player))
+kable(head(giocatori, 3), "html") %>%
+ kable_styling(full_width = T)
++unique(shots_dataset$player) + | +
---|
+Jorrit Hendrix + | +
+Quincy Promes + | +
+Victor Moses + | +
names(giocatori)[names(giocatori) == "unique(shots_dataset$player)"] <- "Name"
+foot_dataset <- FullData[, c("Name", "Preffered_Foot")]
+foot_dataset$Name <- stri_trans_general(foot_dataset$Name, "Latin-ASCII")
+kable(head(foot_dataset, 3), "html") %>%
+ kable_styling(full_width = T)
++Name + | ++Preffered_Foot + | +
---|---|
+Cristiano Ronaldo + | ++Right + | +
+Lionel Messi + | ++Left + | +
+Neymar + | ++Right + | +
giocatori2 <- merge(giocatori, foot_dataset, by = "Name", all.x = TRUE)
+kable(head(giocatori2, 3), "html") %>%
+ kable_styling(full_width = T)
++Name + | ++Preffered_Foot + | +
---|---|
+Aaron Connolly + | ++NA + | +
+Aaron Cresswell + | ++Left + | +
+Aaron Hickey + | ++NA + | +
colSums((is.na(giocatori2)))
+## Name Preffered_Foot
+## 0 2499
+player2 <- players[, c("name", "foot")]
+kable(head(player2, 3), "html") %>%
+ kable_styling(full_width = T)
++name + | ++foot + | +
---|---|
+Timo Hildebrand + | ++ | +
+Martin Petrov + | ++ | +
+Martin Amedick + | ++ | +
player2 <- na.omit(player2)
+player2$name <- stri_trans_general(player2$name, "Latin-ASCII")
+names(player2)[names(player2) == "name"] <- "Name"
+giocatori3 <- merge(giocatori2, player2, by = "Name", all.x = TRUE)
+kable(head(giocatori3, 3), "html") %>%
+ kable_styling(full_width = T)
++Name + | ++Preffered_Foot + | ++foot + | +
---|---|---|
+Aaron Connolly + | ++NA + | ++right + | +
+Aaron Cresswell + | ++Left + | ++left + | +
+Aaron Hickey + | ++NA + | ++both + | +
giocatori3$Preffered_Foot <- ifelse(is.na(giocatori3$Preffered_Foot), giocatori3$foot, giocatori3$Preffered_Foot)
+colSums(is.na(giocatori3))
+## Name Preffered_Foot foot
+## 0 514 651
+new.players <- new.players[, c("name", "foot")]
+##
+nomi_da_cambiare <- c("Andrej Galabinov", "Anssumane Fati", "Arnaud Kalimuendo Muinga", "Daniel Parejo", "Emile Smith-Rowe", "Kephren Thuram", "Kylian Mbappe-Lottin", "Marco Faraoni", "Nemanja Vidci", "Pablo Daniel Osvaldo", "Luca Toni", "Tanguy NDombele Alvaro", "Thiago Alcantara", "Yeremi Pino", "Papis Demba Cisse", "Kevin Kuranyi", "Dimitar Berbatov")
+
+giocatori3 <- giocatori3 %>%
+ mutate(Preffered_Foot = ifelse(Name %in% nomi_da_cambiare, "Right", Preffered_Foot))
+giocatori3 <- subset(giocatori3, select = -c(foot))
+
+nomi_da_cambiare2 <- c("Dimitri Kombarov", "Lee Kang-In", "Mohammed Ali-Cho", "Yaroslav Rakitskiy", "Pape Alassane Gueye")
+
+giocatori3 <- giocatori3 %>%
+ mutate(Preffered_Foot = ifelse(Name %in% nomi_da_cambiare2, "Left", Preffered_Foot))
+
+nomi_da_cambiare3 <- c("Kostas Mitroglu", "Santiago Cazorla", "Son Heung-Min")
+
+giocatori3 <- giocatori3 %>%
+ mutate(Preffered_Foot = ifelse(Name %in% nomi_da_cambiare3, "both", Preffered_Foot))
+
+giocatori3 <- giocatori3 %>%
+ mutate(across(everything(), na_if, ""))
+## Warning: There was 1 warning in `mutate()`.
+## ℹ In argument: `across(everything(), na_if, "")`.
+## Caused by warning:
+## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
+## Supply arguments directly to `.fns` through an anonymous function instead.
+##
+## # Previously
+## across(a:b, mean, na.rm = TRUE)
+##
+## # Now
+## across(a:b, \(x) mean(x, na.rm = TRUE))
+giocatori3$Preffered_Foot <- toTitleCase(giocatori3$Preffered_Foot)
+
+giocatori3$Preffered_Foot <- ifelse(giocatori3$Preffered_Foot == "Right", "RightFoot", ifelse(giocatori3$Preffered_Foot == "Left", "LeftFoot", giocatori3$Preffered_Foot))
+
+names(giocatori3)[names(giocatori3) == "Name"] <- "player"
+
+shots_dataset2 <- merge(shots_dataset, giocatori3, by = "player", all.x = TRUE)
+
+shots_dataset2 <- shots_dataset2 %>%
+ mutate(Preffered_Foot = ifelse(player == "Franck Zambo", "Right", Preffered_Foot))
+
+shots_dataset2 <- shots_dataset2[shots_dataset2$result != "OwnGoal", ]
+
+shots_dataset2$result <- ifelse(shots_dataset2$result == "Goal", 1, 0)
+
+shots_dataset2$result <- as.factor(shots_dataset2$result)
+
+shots_dataset2 <- na.omit(shots_dataset2)
+
+names(shots_dataset2)[names(shots_dataset2) == "xG"] <- "xG Understat"
+
+kable(head(shots_dataset2, 3), "html") %>%
+ kable_styling(full_width = T)
++player + | ++id + | ++minute + | ++result + | ++X + | ++Y + | ++xG Understat + | ++h_a + | ++player_id + | ++situation + | ++season + | ++shotType + | ++match_id + | ++h_team + | ++a_team + | ++h_goals + | ++a_goals + | ++date + | ++player_assisted + | ++lastAction + | ++Preffered_Foot + | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+Aaron Connolly + | ++427269 + | ++51 + | ++0 + | ++0.913 + | ++0.536 + | ++0.5629963 + | ++h + | ++7991 + | ++OpenPlay + | ++2021 + | ++LeftFoot + | ++16391 + | ++Brighton + | ++Watford + | ++2 + | ++0 + | ++2021-08-21 16:30:00 + | ++ | ++None + | ++RightFoot + | +
+Aaron Connolly + | ++329180 + | ++49 + | ++0 + | ++0.944 + | ++0.383 + | ++0.0140796 + | ++h + | ++7991 + | ++FromCorner + | ++2019 + | ++Head + | ++11746 + | ++Brighton + | ++Norwich + | ++2 + | ++0 + | ++2019-11-02 15:00:00 + | ++Pascal Groß + | ++Aerial + | ++RightFoot + | +
+Aaron Connolly + | ++338614 + | ++91 + | ++0 + | ++0.797 + | ++0.377 + | ++0.0308471 + | ++h + | ++7991 + | ++OpenPlay + | ++2019 + | ++RightFoot + | ++11815 + | ++Brighton + | ++Sheffield United + | ++0 + | ++1 + | ++2019-12-21 15:00:00 + | ++ | ++None + | ++RightFoot + | +
premier_league <- c("Watford", "Norwich", "Sheffield United", "Brighton", "Southampton", "Chelsea", "Liverpool", "Manchester United", "Wolverhampton Wanderers", "Tottenham", "Aston Villa", "Burnley", "Swansea", "West Ham", "Leeds", "Crystal Palace", "Manchester City", "Newcastle United", "Fulham", "Hull", "Arsenal", "Bournemouth", "West Bromwich Albion", "Stoke", "Sunderland", "Huddersfield", "Everton", "Leicester", "Cardiff", "Middlesbrough", "Queens Park Rangers", "Brentford")
+seriea <- c("Bologna", "Genoa", "Salernitana", "AC Milan", "Fiorentina", "Cagliari", "Lazio", "Juventus", "Parma Calcio 1913", "Crotone", "SPAL 2013", "Sassuolo", "Lecce", "Udinese", "Roma", "Torino", "Inter", "Brescia", "Verona", "Parma", "Palermo", "Napoli", "Carpi", "Frosinone", "Venezia", "Sampdoria", "Benevento", "Spezia", "Empoli", "Chievo", "Atalanta", "Cesena", "Pescara")
+bundesliga <- c("Hamburger SV", "Wolfsburg", "Eintracht Frankfurt", "RasenBallsport Leipzig", "Freiburg", "Bayern Munich", "Bayer Leverkusen", "Schalke 04", "Hertha Berlin", "Werder Bremen", "VfB Stuttgart", "Darmstadt", "Bochum", "Mainz 05", "Hannover 96", "Arminia Bielefeld", "Fortuna Duesseldorf", "Augsburg", "Hoffenheim", "Borussia M.Gladbach", "Paderborn", "FC Cologne", "Borussia Dortmund", "Greuther Fuerth", "Nuernberg", "Ingolstadt", "Union Berlin")
+ligue1 <- c("Toulouse", "Marseille", "Nice", "Bordeaux", "Saint-Etienne", "Lille", "Strasbourg", "Guingamp", "Metz", "Brest", "Lyon", "Montpellier", "Reims", "Dijon", "Nantes", "Angers", "Rennes", "SC Bastia", "Paris Saint Germain", "Monaco", "Caen", "GFC Ajaccio", "Nimes", "Lorient", "Amiens", "Lens", "Troyes", "Evian Thonon Gaillard", "Nancy", "Clermont Foot")
+liga <- c("Real Betis", "Celta Vigo", "Alaves", "Levante", "Elche", "Granada", "Deportivo La Coruna", "Villarreal", "Cordoba", "Athletic Club", "Rayo Vallecano", "Mallorca", "Real Valladolid", "Real Sociedad", "Sevilla", "Atletico Madrid", "Real Madrid", "Eibar", "Getafe", "Barcelona", "Malaga", "Girona", "Las Palmas", "Valencia", "SD Huesca", "Sporting Gijon", "Osasuna", "Cadiz", "Leganes", "Almeria")
+shots_dataset2$lega <- ifelse(shots_dataset2$h_team %in% premier_league | shots_dataset2$a_team %in% premier_league, "Premier League",
+ ifelse(shots_dataset2$h_team %in% seriea | shots_dataset2$a_team %in% seriea, "Serie A",
+ ifelse(shots_dataset2$h_team %in% bundesliga | shots_dataset2$a_team %in% bundesliga, "Bundesliga",
+ ifelse(shots_dataset2$h_team %in% ligue1 | shots_dataset2$a_team %in% ligue1, "Ligue 1",
+ ifelse(shots_dataset2$h_team %in% liga | shots_dataset2$a_team %in% liga, "Liga", "Russian PL")))))
+subset_penalty <- subset(shots_dataset2, situation == "Penalty")
+
+subset_openplay <- subset(shots_dataset2, situation == "OpenPlay")
+
+subset_setpiece <- subset(shots_dataset2, situation %in% c("SetPiece", "FromCorner", "DirectFreekick"))
+
+kable(head(subset_openplay, 3), "html") %>%
+ kable_styling(full_width = T)
++ | ++player + | ++id + | ++minute + | ++result + | ++X + | ++Y + | ++xG Understat + | ++h_a + | ++player_id + | ++situation + | ++season + | ++shotType + | ++match_id + | ++h_team + | ++a_team + | ++h_goals + | ++a_goals + | ++date + | ++player_assisted + | ++lastAction + | ++Preffered_Foot + | ++lega + | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+1 + | ++Aaron Connolly + | ++427269 + | ++51 + | ++0 + | ++0.913 + | ++0.536 + | ++0.5629963 + | ++h + | ++7991 + | ++OpenPlay + | ++2021 + | ++LeftFoot + | ++16391 + | ++Brighton + | ++Watford + | ++2 + | ++0 + | ++2021-08-21 16:30:00 + | ++ | ++None + | ++RightFoot + | ++Premier League + | +
+3 + | ++Aaron Connolly + | ++338614 + | ++91 + | ++0 + | ++0.797 + | ++0.377 + | ++0.0308471 + | ++h + | ++7991 + | ++OpenPlay + | ++2019 + | ++RightFoot + | ++11815 + | ++Brighton + | ++Sheffield United + | ++0 + | ++1 + | ++2019-12-21 15:00:00 + | ++ | ++None + | ++RightFoot + | ++Premier League + | +
+4 + | ++Aaron Connolly + | ++403235 + | ++49 + | ++0 + | ++0.847 + | ++0.382 + | ++0.1247877 + | ++a + | ++7991 + | ++OpenPlay + | ++2020 + | ++RightFoot + | ++14656 + | ++Burnley + | ++Brighton + | ++1 + | ++1 + | ++2021-02-06 15:00:00 + | ++Yves Bissouma + | ++Tackle + | ++RightFoot + | ++Premier League + | +
openplay_foot <- subset(subset_openplay, shotType %in% c("LeftFoot", "RightFoot"))
+openplay_head <- subset(subset_openplay, shotType == "Head")
+
+setpiece_foot <- subset(subset_setpiece, shotType %in% c("LeftFoot", "RightFoot"))
+setpiece_head <- subset(subset_setpiece, shotType == "Head")
+
+tbl_summary(openplay_foot, include = c(minute, result, X, Y, shotType, lastAction))
+Characteristic | +N = 288,8871 | +
---|---|
minute | +49 (26, 71) |
result | +|
    0 | +260,327 (90%) |
    1 | +28,560 (9.9%) |
X | +0.85 (0.77, 0.89) |
Y | +0.50 (0.39, 0.62) |
shotType | +|
    LeftFoot | +106,100 (37%) |
    RightFoot | +182,787 (63%) |
lastAction | +|
    Aerial | +3,641 (1.3%) |
    BallRecovery | +9,647 (3.3%) |
    BallTouch | +5,416 (1.9%) |
    BlockedPass | +472 (0.2%) |
    Card | +27 (<0.1%) |
    Challenge | +29 (<0.1%) |
    ChanceMissed | +2 (<0.1%) |
    Chipped | +15,477 (5.4%) |
    Clearance | +47 (<0.1%) |
    CornerAwarded | +197 (<0.1%) |
    Cross | +16,612 (5.8%) |
    Dispossessed | +2,383 (0.8%) |
    End | +104 (<0.1%) |
    Error | +3 (<0.1%) |
    FormationChange | +12 (<0.1%) |
    Foul | +272 (<0.1%) |
    Goal | +143 (<0.1%) |
    GoodSkill | +69 (<0.1%) |
    HeadPass | +6,590 (2.3%) |
    Interception | +526 (0.2%) |
    KeeperPickup | +7 (<0.1%) |
    KeeperSweeper | +1 (<0.1%) |
    LayOff | +3,836 (1.3%) |
    None | +32,994 (11%) |
    OffsidePass | +81 (<0.1%) |
    OffsideProvoked | +7 (<0.1%) |
    Pass | +148,079 (51%) |
    Punch | +1 (<0.1%) |
    Rebound | +11,636 (4.0%) |
    Save | +13 (<0.1%) |
    ShieldBallOpp | +2 (<0.1%) |
    Smother | +1 (<0.1%) |
    Start | +10 (<0.1%) |
    SubstitutionOn | +36 (<0.1%) |
    Tackle | +969 (0.3%) |
    TakeOn | +21,584 (7.5%) |
    Throughball | +7,961 (2.8%) |
1 Median (Q1, Q3); n (%) | +
tbl_summary(openplay_head, include = c(minute, result, X, Y, shotType, lastAction))
+Characteristic | +N = 30,8551 | +
---|---|
minute | +49 (26, 73) |
result | +|
    0 | +27,181 (88%) |
    1 | +3,674 (12%) |
X | +0.911 (0.890, 0.929) |
Y | +0.50 (0.46, 0.55) |
shotType | +|
    Head | +30,855 (100%) |
lastAction | +|
    Aerial | +9,053 (29%) |
    BallRecovery | +8 (<0.1%) |
    BallTouch | +65 (0.2%) |
    BlockedPass | +7 (<0.1%) |
    Card | +6 (<0.1%) |
    Challenge | +10 (<0.1%) |
    Chipped | +2,743 (8.9%) |
    Clearance | +2 (<0.1%) |
    CornerAwarded | +59 (0.2%) |
    Cross | +16,320 (53%) |
    Dispossessed | +3 (<0.1%) |
    End | +37 (0.1%) |
    Error | +2 (<0.1%) |
    FormationChange | +1 (<0.1%) |
    Foul | +67 (0.2%) |
    Goal | +26 (<0.1%) |
    HeadPass | +392 (1.3%) |
    Interception | +7 (<0.1%) |
    KeeperPickup | +3 (<0.1%) |
    LayOff | +6 (<0.1%) |
    None | +846 (2.7%) |
    OffsidePass | +7 (<0.1%) |
    Pass | +315 (1.0%) |
    Rebound | +735 (2.4%) |
    Save | +1 (<0.1%) |
    SubstitutionOff | +1 (<0.1%) |
    SubstitutionOn | +11 (<0.1%) |
    Tackle | +12 (<0.1%) |
    TakeOn | +21 (<0.1%) |
    Throughball | +89 (0.3%) |
1 Median (Q1, Q3); n (%) | +
tbl_summary(setpiece_foot, include = c(minute, result, X, Y, shotType, lastAction))
+Characteristic | +N = 69,4751 | +
---|---|
minute | +49 (26, 71) |
result | +|
    0 | +64,065 (92%) |
    1 | +5,410 (7.8%) |
X | +0.80 (0.75, 0.89) |
Y | +0.51 (0.41, 0.61) |
shotType | +|
    LeftFoot | +24,145 (35%) |
    RightFoot | +45,330 (65%) |
lastAction | +|
    Aerial | +3,397 (4.9%) |
    BallRecovery | +65 (<0.1%) |
    BallTouch | +1,316 (1.9%) |
    BlockedPass | +24 (<0.1%) |
    Card | +14 (<0.1%) |
    Challenge | +9 (<0.1%) |
    Chipped | +1,528 (2.2%) |
    Clearance | +5 (<0.1%) |
    CornerAwarded | +154 (0.2%) |
    Cross | +5,988 (8.6%) |
    Dispossessed | +118 (0.2%) |
    End | +103 (0.1%) |
    Error | +2 (<0.1%) |
    FormationChange | +3 (<0.1%) |
    Foul | +139 (0.2%) |
    Goal | +55 (<0.1%) |
    GoodSkill | +3 (<0.1%) |
    HeadPass | +2,736 (3.9%) |
    Interception | +47 (<0.1%) |
    KeeperPickup | +4 (<0.1%) |
    KeeperSweeper | +1 (<0.1%) |
    LayOff | +182 (0.3%) |
    None | +16,313 (23%) |
    OffsidePass | +9 (<0.1%) |
    OffsideProvoked | +1 (<0.1%) |
    Pass | +9,535 (14%) |
    Rebound | +5,585 (8.0%) |
    Save | +5 (<0.1%) |
    Standard | +20,936 (30%) |
    Start | +1 (<0.1%) |
    SubstitutionOn | +7 (<0.1%) |
    Tackle | +93 (0.1%) |
    TakeOn | +987 (1.4%) |
    Throughball | +110 (0.2%) |
1 Median (Q1, Q3); n (%) | +
tbl_summary(setpiece_head, include = c(minute, result, X, Y, shotType, lastAction))
+Characteristic | +N = 45,8331 | +
---|---|
minute | +49 (26, 71) |
result | +|
    0 | +41,601 (91%) |
    1 | +4,232 (9.2%) |
X | +0.917 (0.899, 0.934) |
Y | +0.50 (0.46, 0.55) |
shotType | +|
    Head | +45,833 (100%) |
lastAction | +|
    Aerial | +16,390 (36%) |
    BallRecovery | +13 (<0.1%) |
    BallTouch | +157 (0.3%) |
    BlockedPass | +42 (<0.1%) |
    Card | +23 (<0.1%) |
    Challenge | +16 (<0.1%) |
    Chipped | +2,327 (5.1%) |
    Clearance | +9 (<0.1%) |
    CornerAwarded | +146 (0.3%) |
    Cross | +23,526 (51%) |
    Dispossessed | +16 (<0.1%) |
    End | +98 (0.2%) |
    Error | +1 (<0.1%) |
    FormationChange | +2 (<0.1%) |
    Foul | +160 (0.3%) |
    Goal | +93 (0.2%) |
    HeadPass | +918 (2.0%) |
    Interception | +16 (<0.1%) |
    KeeperPickup | +4 (<0.1%) |
    LayOff | +6 (<0.1%) |
    None | +894 (2.0%) |
    OffsidePass | +35 (<0.1%) |
    OffsideProvoked | +2 (<0.1%) |
    Pass | +162 (0.4%) |
    Rebound | +710 (1.5%) |
    Save | +2 (<0.1%) |
    ShieldBallOpp | +3 (<0.1%) |
    Standard | +2 (<0.1%) |
    Start | +1 (<0.1%) |
    SubstitutionOn | +9 (<0.1%) |
    Tackle | +20 (<0.1%) |
    TakeOn | +25 (<0.1%) |
    Throughball | +5 (<0.1%) |
1 Median (Q1, Q3); n (%) | +
Removing last action <10
+openplay_foot <- subset(openplay_foot, !lastAction %in% c("ChanceMissed", "Error", "KeeperPickup", "KeeperSweeper", "OffsideProvoked", "Punch", "ShieldBallOpp", "Smother"))
+openplay_head <- subset(openplay_head, !lastAction %in% c("BallRecovery", "BlockedPass", "Card", "Clearance", "Dispossessed", "Error", "FormationChange", "Interception", "KeeperPickup", "LayOff", "OffsidePass", "Save", "SubstiotutionOff"))
+setpiece_foot <- subset(setpiece_foot, !lastAction %in% c("Challenge", "Clearance", "Error", "FormationChange", "KeeperPickup", "KeeperSweeper", "OffsidePass", "OffsideProvoked", "Save", "Start", "SubstiotutionOn"))
+setpiece_head <- subset(setpiece_head, !lastAction %in% c("Clearance", "Error", "FormationChange", "KeeperPickup", "LayOff", "OffsideProvoked", "Save", "ShieldBallOpp", "Standard", "Start", "SubstiotutionOn", "Throughball"))
+Introduction of is_weakfoot variable
+openplay_foot$is_weakfoot <- ifelse(openplay_foot$Preffered_Foot == openplay_foot$shotType, "No", ifelse(openplay_foot$Preffered_Foot == "both", "No", "Yes"))
+setpiece_foot$is_weakfoot <- ifelse(setpiece_foot$Preffered_Foot == setpiece_foot$shotType, "No", ifelse(setpiece_foot$Preffered_Foot == "both", "No", "Yes"))
+
+understat_xG_op_foot <- openplay_foot$xG
+understat_xG_op_head <- openplay_head$xG
+understat_xG_sp_foot <- setpiece_foot$xG
+understat_xG_sp_head <- setpiece_head$xG
+
+kable(head(openplay_foot, 3), "html") %>%
+ kable_styling(full_width = T)
++ | ++player + | ++id + | ++minute + | ++result + | ++X + | ++Y + | ++xG Understat + | ++h_a + | ++player_id + | ++situation + | ++season + | ++shotType + | ++match_id + | ++h_team + | ++a_team + | ++h_goals + | ++a_goals + | ++date + | ++player_assisted + | ++lastAction + | ++Preffered_Foot + | ++lega + | ++is_weakfoot + | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
+1 + | ++Aaron Connolly + | ++427269 + | ++51 + | ++0 + | ++0.913 + | ++0.536 + | ++0.5629963 + | ++h + | ++7991 + | ++OpenPlay + | ++2021 + | ++LeftFoot + | ++16391 + | ++Brighton + | ++Watford + | ++2 + | ++0 + | ++2021-08-21 16:30:00 + | ++ | ++None + | ++RightFoot + | ++Premier League + | ++Yes + | +
+3 + | ++Aaron Connolly + | ++338614 + | ++91 + | ++0 + | ++0.797 + | ++0.377 + | ++0.0308471 + | ++h + | ++7991 + | ++OpenPlay + | ++2019 + | ++RightFoot + | ++11815 + | ++Brighton + | ++Sheffield United + | ++0 + | ++1 + | ++2019-12-21 15:00:00 + | ++ | ++None + | ++RightFoot + | ++Premier League + | ++No + | +
+4 + | ++Aaron Connolly + | ++403235 + | ++49 + | ++0 + | ++0.847 + | ++0.382 + | ++0.1247877 + | ++a + | ++7991 + | ++OpenPlay + | ++2020 + | ++RightFoot + | ++14656 + | ++Burnley + | ++Brighton + | ++1 + | ++1 + | ++2021-02-06 15:00:00 + | ++Yves Bissouma + | ++Tackle + | ++RightFoot + | ++Premier League + | ++No + | +
For better interpretation, this is how I refer to the different +models:
+For shot type, the numbering is as follows:
+For modeling, the numbering is as follows: 1. Logit 2. Logit with +interactions 3. Discriminant Analysis 4. Random Forest 5. Bagging 8. +Neural Network
+For example model1.5 is bagging model for Open Play +Foot
+Note: For brevity, the stepwise procedure is not +shown in this report. Where there is no model with interactions, it is +because there is no significant improvement that would justify such a +model, which is very costly in terms of computational time.
+set.seed(123)
+index_opf <- sample.split(Y = openplay_foot$result, SplitRatio = 0.75)
+train_opf <- openplay_foot[index_opf, ]
+
+indice_test_opf <- which(index_opf == FALSE)
+test_opf <- openplay_foot[indice_test_opf, ]
+modello1.1 <- glm(result ~ minute + lastAction + is_weakfoot + X + exp(Y^2) + h_a,
+ data = train_opf, family = binomial)
+summary(modello1.1)
+##
+## Call:
+## glm(formula = result ~ minute + lastAction + is_weakfoot + X +
+## exp(Y^2) + h_a, family = binomial, data = train_opf)
+##
+## Coefficients:
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) -1.327e+01 1.594e-01 -83.287 < 2e-16 ***
+## minute 1.444e-03 2.824e-04 5.113 3.16e-07 ***
+## lastActionBallRecovery 8.943e-01 1.114e-01 8.029 9.85e-16 ***
+## lastActionBallTouch 4.734e-01 1.169e-01 4.049 5.13e-05 ***
+## lastActionBlockedPass 6.401e-01 2.185e-01 2.929 0.003400 **
+## lastActionCard 1.393e-01 7.542e-01 0.185 0.853428
+## lastActionChallenge 4.390e-01 1.049e+00 0.418 0.675667
+## lastActionChipped 3.860e-01 1.055e-01 3.660 0.000253 ***
+## lastActionClearance 1.148e-01 7.561e-01 0.152 0.879348
+## lastActionCornerAwarded -2.692e+00 1.010e+00 -2.666 0.007675 **
+## lastActionCross 4.258e-01 1.040e-01 4.095 4.23e-05 ***
+## lastActionDispossessed 5.274e-01 1.384e-01 3.811 0.000139 ***
+## lastActionEnd -2.009e+00 1.015e+00 -1.978 0.047884 *
+## lastActionFormationChange 8.962e-01 1.093e+00 0.820 0.412368
+## lastActionFoul 3.885e-03 2.842e-01 0.014 0.989093
+## lastActionGoal -1.739e+00 7.240e-01 -2.402 0.016307 *
+## lastActionGoodSkill 1.241e+00 4.443e-01 2.793 0.005219 **
+## lastActionHeadPass 4.361e-01 1.135e-01 3.842 0.000122 ***
+## lastActionInterception 7.496e-01 2.350e-01 3.190 0.001422 **
+## lastActionLayOff 6.903e-01 1.306e-01 5.285 1.26e-07 ***
+## lastActionNone 5.927e-01 1.031e-01 5.747 9.08e-09 ***
+## lastActionOffsidePass -2.190e-02 5.418e-01 -0.040 0.967758
+## lastActionPass 6.951e-01 1.010e-01 6.884 5.83e-12 ***
+## lastActionRebound 1.110e+00 1.042e-01 10.652 < 2e-16 ***
+## lastActionSave -7.818e+00 3.653e+01 -0.214 0.830549
+## lastActionStart 2.035e+00 1.197e+00 1.699 0.089265 .
+## lastActionSubstitutionOn 1.017e+00 5.857e-01 1.736 0.082476 .
+## lastActionTackle 7.795e-01 1.732e-01 4.501 6.76e-06 ***
+## lastActionTakeOn 7.280e-01 1.039e-01 7.009 2.40e-12 ***
+## lastActionThroughball 1.488e+00 1.047e-01 14.216 < 2e-16 ***
+## is_weakfootYes -1.602e-01 1.742e-02 -9.194 < 2e-16 ***
+## X 1.291e+01 1.289e-01 100.099 < 2e-16 ***
+## exp(Y^2) -5.834e-01 4.082e-02 -14.294 < 2e-16 ***
+## h_ah 2.449e-02 1.511e-02 1.621 0.105116
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 139769 on 216646 degrees of freedom
+## Residual deviance: 123770 on 216613 degrees of freedom
+## AIC: 123838
+##
+## Number of Fisher Scoring iterations: 9
+predict1.1 <- predict.glm(modello1.1, newdata = test_opf, type = "response")
+
+roc1.1 <- roc(test_opf$result ~ predict1.1, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.1), 4)), lwd = 2, box.lwd = 0, bg = "white")
+confusione1.1 <- confusionMatrix(as.factor(ifelse(predict1.1>0.3, 1, 0)), as.factor(test_opf$result))
+confusione1.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 63353 6307
+## 1 1723 833
+##
+## Accuracy : 0.8888
+## 95% CI : (0.8865, 0.8911)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.1263
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9735
+## Specificity : 0.1167
+## Pos Pred Value : 0.9095
+## Neg Pred Value : 0.3259
+## Prevalence : 0.9011
+## Detection Rate : 0.8773
+## Detection Prevalence : 0.9646
+## Balanced Accuracy : 0.5451
+##
+## 'Positive' Class : 0
+##
+rms::val.prob(as.numeric(as.character(predict1.1)), as.numeric(as.character(test_opf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.937122e-01 7.468561e-01 1.422587e-01 7.001681e-02 5.057334e+03
+## D:p U U:Chi-sq U:p Q
+## NA 3.125414e-05 4.257049e+00 1.190128e-01 6.998556e-02
+## Brier Intercept Slope Emax E90
+## 8.249545e-02 -5.805545e-02 9.688800e-01 1.046446e-01 5.777724e-03
+## Eavg S:z S:p
+## 2.633787e-03 9.329611e-01 3.508401e-01
+modello1.2 <- glm(formula = result ~ minute + lastAction + is_weakfoot + X +
+ exp(Y^2) + h_a + lastAction:X + is_weakfoot:exp(Y^2) + X:exp(Y^2) +
+ is_weakfoot:X + exp(Y^2):h_a + minute:lastAction + minute:h_a +
+ minute:exp(Y^2) + lastAction:h_a, family = binomial, data = train_opf)
+## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
+summary(modello1.2)
+##
+## Call:
+## glm(formula = result ~ minute + lastAction + is_weakfoot + X +
+## exp(Y^2) + h_a + lastAction:X + is_weakfoot:exp(Y^2) + X:exp(Y^2) +
+## is_weakfoot:X + exp(Y^2):h_a + minute:lastAction + minute:h_a +
+## minute:exp(Y^2) + lastAction:h_a, family = binomial, data = train_opf)
+##
+## Coefficients:
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) -1.493e+01 1.521e+00 -9.817 < 2e-16 ***
+## minute 1.603e-02 4.342e-03 3.692 0.000222 ***
+## lastActionBallRecovery 7.784e-01 1.427e+00 0.545 0.585464
+## lastActionBallTouch -1.297e+00 1.591e+00 -0.815 0.414990
+## lastActionBlockedPass -3.624e+00 3.283e+00 -1.104 0.269698
+## lastActionCard 1.157e+01 1.284e+01 0.901 0.367548
+## lastActionChallenge -5.879e+02 1.246e+04 -0.047 0.962374
+## lastActionChipped 3.317e+00 1.405e+00 2.361 0.018243 *
+## lastActionClearance -5.464e+00 1.576e+03 -0.003 0.997233
+## lastActionCornerAwarded 1.727e+02 2.529e+03 0.068 0.945571
+## lastActionCross -5.733e+00 1.488e+00 -3.854 0.000116 ***
+## lastActionDispossessed -1.104e+00 1.880e+00 -0.587 0.556960
+## lastActionEnd 5.735e+02 3.299e+03 0.174 0.861998
+## lastActionFormationChange 2.008e+02 1.270e+05 0.002 0.998739
+## lastActionFoul 7.324e+00 3.945e+00 1.857 0.063370 .
+## lastActionGoal 1.089e+01 8.492e+02 0.013 0.989772
+## lastActionGoodSkill 1.025e+00 5.205e+00 0.197 0.843953
+## lastActionHeadPass -1.060e+00 1.551e+00 -0.683 0.494455
+## lastActionInterception -4.864e+00 3.025e+00 -1.608 0.107889
+## lastActionLayOff -9.187e-01 1.790e+00 -0.513 0.607762
+## lastActionNone -1.188e+00 1.349e+00 -0.881 0.378568
+## lastActionOffsidePass 4.325e+00 7.721e+00 0.560 0.575313
+## lastActionPass -1.781e+00 1.319e+00 -1.351 0.176837
+## lastActionRebound -5.337e+00 1.421e+00 -3.755 0.000173 ***
+## lastActionSave -5.817e+00 2.059e+04 0.000 0.999775
+## lastActionStart -1.121e+01 6.523e+03 -0.002 0.998629
+## lastActionSubstitutionOn -1.680e+01 1.601e+01 -1.050 0.293930
+## lastActionTackle -8.704e-01 2.259e+00 -0.385 0.700041
+## lastActionTakeOn 4.561e-01 1.361e+00 0.335 0.737519
+## lastActionThroughball 1.381e+01 1.473e+00 9.374 < 2e-16 ***
+## is_weakfootYes 1.098e-01 2.893e-01 0.379 0.704371
+## X 1.398e+01 1.743e+00 8.022 1.04e-15 ***
+## exp(Y^2) 2.016e+00 5.946e-01 3.390 0.000698 ***
+## h_ah -5.780e-01 2.296e-01 -2.517 0.011825 *
+## lastActionBallRecovery:X 4.389e-01 1.648e+00 0.266 0.789939
+## lastActionBallTouch:X 2.787e+00 1.820e+00 1.531 0.125701
+## lastActionBlockedPass:X 5.876e+00 3.709e+00 1.584 0.113156
+## lastActionCard:X -1.265e+01 1.444e+01 -0.876 0.380987
+## lastActionChallenge:X 2.126e+02 6.053e+03 0.035 0.971982
+## lastActionChipped:X -2.796e+00 1.609e+00 -1.738 0.082216 .
+## lastActionClearance:X -1.139e+01 4.333e+00 -2.630 0.008541 **
+## lastActionCornerAwarded:X -2.254e+02 3.388e+03 -0.067 0.946963
+## lastActionCross:X 7.177e+00 1.687e+00 4.255 2.09e-05 ***
+## lastActionDispossessed:X 2.644e+00 2.156e+00 1.226 0.220024
+## lastActionEnd:X -8.521e+02 4.825e+03 -0.177 0.859831
+## lastActionFormationChange:X -3.728e+02 1.731e+05 -0.002 0.998282
+## lastActionFoul:X -8.592e+00 4.424e+00 -1.942 0.052096 .
+## lastActionGoal:X -2.884e+01 1.176e+01 -2.452 0.014193 *
+## lastActionGoodSkill:X -1.715e+00 5.784e+00 -0.297 0.766777
+## lastActionHeadPass:X 2.285e+00 1.773e+00 1.289 0.197357
+## lastActionInterception:X 6.750e+00 3.388e+00 1.992 0.046331 *
+## lastActionLayOff:X 2.191e+00 2.089e+00 1.049 0.294095
+## lastActionNone:X 2.571e+00 1.548e+00 1.661 0.096766 .
+## lastActionOffsidePass:X -3.654e+00 7.972e+00 -0.458 0.646686
+## lastActionPass:X 3.227e+00 1.516e+00 2.129 0.033218 *
+## lastActionRebound:X 7.672e+00 1.619e+00 4.740 2.14e-06 ***
+## lastActionSave:X -1.085e+01 2.480e+04 0.000 0.999651
+## lastActionStart:X -5.294e-01 1.248e+01 -0.042 0.966174
+## lastActionSubstitutionOn:X 1.010e+01 1.490e+01 0.678 0.497963
+## lastActionTackle:X 2.135e+00 2.569e+00 0.831 0.405788
+## lastActionTakeOn:X 8.590e-01 1.562e+00 0.550 0.582282
+## lastActionThroughball:X -1.324e+01 1.684e+00 -7.860 3.83e-15 ***
+## is_weakfootYes:exp(Y^2) -8.210e-01 9.906e-02 -8.288 < 2e-16 ***
+## X:exp(Y^2) -2.709e+00 6.750e-01 -4.013 5.99e-05 ***
+## is_weakfootYes:X 9.331e-01 3.061e-01 3.048 0.002306 **
+## exp(Y^2):h_ah 2.060e-01 8.160e-02 2.525 0.011577 *
+## minute:lastActionBallRecovery -8.250e-03 4.226e-03 -1.952 0.050886 .
+## minute:lastActionBallTouch -1.175e-02 4.456e-03 -2.636 0.008381 **
+## minute:lastActionBlockedPass -1.606e-02 8.260e-03 -1.944 0.051918 .
+## minute:lastActionCard 2.070e-03 3.606e-02 0.057 0.954232
+## minute:lastActionChallenge 5.181e+00 1.032e+02 0.050 0.959962
+## minute:lastActionChipped -8.817e-03 4.026e-03 -2.190 0.028521 *
+## minute:lastActionClearance -1.062e-02 2.520e-02 -0.421 0.673493
+## minute:lastActionCornerAwarded -9.022e-01 1.472e+01 -0.061 0.951122
+## minute:lastActionCross -9.620e-03 3.972e-03 -2.422 0.015441 *
+## minute:lastActionDispossessed -1.557e-02 5.282e-03 -2.947 0.003210 **
+## minute:lastActionEnd 1.572e+00 1.062e+01 0.148 0.882327
+## minute:lastActionFormationChange 1.324e+00 1.670e+02 0.008 0.993674
+## minute:lastActionFoul -1.878e-02 1.103e-02 -1.703 0.088586 .
+## minute:lastActionGoal -3.517e-02 3.015e-02 -1.167 0.243331
+## minute:lastActionGoodSkill 3.062e-02 2.189e-02 1.399 0.161903
+## minute:lastActionHeadPass -1.166e-02 4.309e-03 -2.705 0.006827 **
+## minute:lastActionInterception 2.194e-03 9.279e-03 0.236 0.813085
+## minute:lastActionLayOff -5.453e-03 4.961e-03 -1.099 0.271714
+## minute:lastActionNone -1.066e-02 3.946e-03 -2.702 0.006897 **
+## minute:lastActionOffsidePass -4.063e-02 2.443e-02 -1.664 0.096210 .
+## minute:lastActionPass -9.567e-03 3.871e-03 -2.471 0.013461 *
+## minute:lastActionRebound -1.211e-02 3.993e-03 -3.034 0.002417 **
+## minute:lastActionSave -1.389e-02 7.405e+01 0.000 0.999850
+## minute:lastActionStart -3.936e-01 1.449e+02 -0.003 0.997833
+## minute:lastActionSubstitutionOn 1.278e-01 7.682e-02 1.663 0.096248 .
+## minute:lastActionTackle -7.866e-03 6.632e-03 -1.186 0.235583
+## minute:lastActionTakeOn -1.117e-02 3.976e-03 -2.811 0.004943 **
+## minute:lastActionThroughball -1.113e-02 3.997e-03 -2.786 0.005339 **
+## minute:h_ah -9.621e-04 5.706e-04 -1.686 0.091747 .
+## minute:exp(Y^2) -3.052e-03 1.528e-03 -1.998 0.045769 *
+## lastActionBallRecovery:h_ah 3.278e-01 2.222e-01 1.475 0.140144
+## lastActionBallTouch:h_ah 3.752e-02 2.339e-01 0.160 0.872590
+## lastActionBlockedPass:h_ah -4.142e-02 4.531e-01 -0.091 0.927167
+## lastActionCard:h_ah -1.666e+01 2.035e+03 -0.008 0.993468
+## lastActionChallenge:h_ah 1.521e+01 1.010e+03 0.015 0.987981
+## lastActionChipped:h_ah 2.693e-01 2.102e-01 1.281 0.200194
+## lastActionClearance:h_ah 1.664e+01 1.576e+03 0.011 0.991573
+## lastActionCornerAwarded:h_ah 1.093e+01 6.754e+02 0.016 0.987086
+## lastActionCross:h_ah 4.412e-01 2.075e-01 2.126 0.033506 *
+## lastActionDispossessed:h_ah 3.282e-01 2.773e-01 1.183 0.236643
+## lastActionEnd:h_ah -2.914e+02 1.725e+03 -0.169 0.865815
+## lastActionFormationChange:h_ah 6.069e+01 2.276e+04 0.003 0.997872
+## lastActionFoul:h_ah 2.016e+00 8.014e-01 2.515 0.011904 *
+## lastActionGoal:h_ah 1.591e+01 8.491e+02 0.019 0.985052
+## lastActionGoodSkill:h_ah 3.968e-01 9.520e-01 0.417 0.676807
+## lastActionHeadPass:h_ah 3.384e-01 2.269e-01 1.491 0.135855
+## lastActionInterception:h_ah -4.266e-01 4.976e-01 -0.857 0.391232
+## lastActionLayOff:h_ah 9.304e-02 2.614e-01 0.356 0.721864
+## lastActionNone:h_ah 2.946e-01 2.060e-01 1.430 0.152650
+## lastActionOffsidePass:h_ah 1.294e+00 1.343e+00 0.963 0.335473
+## lastActionPass:h_ah 4.271e-01 2.017e-01 2.118 0.034190 *
+## lastActionRebound:h_ah 3.789e-01 2.085e-01 1.817 0.069197 .
+## lastActionSave:h_ah 4.704e-01 4.439e+03 0.000 0.999915
+## lastActionStart:h_ah 1.464e+01 6.523e+03 0.002 0.998210
+## lastActionSubstitutionOn:h_ah -1.440e+00 1.878e+00 -0.767 0.443286
+## lastActionTackle:h_ah 5.257e-01 3.483e-01 1.509 0.131209
+## lastActionTakeOn:h_ah 3.459e-01 2.073e-01 1.668 0.095298 .
+## lastActionThroughball:h_ah 4.527e-01 2.087e-01 2.169 0.030077 *
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 139769 on 216646 degrees of freedom
+## Residual deviance: 122830 on 216523 degrees of freedom
+## AIC: 123078
+##
+## Number of Fisher Scoring iterations: 17
+predict1.2 <- predict.glm(modello1.2, newdata = test_opf, type = "response")
+
+confusione1.2 <- confusionMatrix(as.factor(ifelse(predict1.2>0.3, 1, 0)), as.factor(test_opf$result))
+confusione1.2
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 63054 6204
+## 1 2022 936
+##
+## Accuracy : 0.8861
+## 95% CI : (0.8838, 0.8884)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.1353
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9689
+## Specificity : 0.1311
+## Pos Pred Value : 0.9104
+## Neg Pred Value : 0.3164
+## Prevalence : 0.9011
+## Detection Rate : 0.8731
+## Detection Prevalence : 0.9590
+## Balanced Accuracy : 0.5500
+##
+## 'Positive' Class : 0
+##
+rms::val.prob(as.numeric(as.character(predict1.2)), as.numeric(as.character(test_opf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict1.2)),
+## as.numeric(as.character(test_opf$result))): 2 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.992168e-01 7.496084e-01 1.392605e-01 6.849007e-02 4.946942e+03
+## D:p U U:Chi-sq U:p Q
+## NA 5.514068e-04 4.181929e+01 8.299584e-10 6.793867e-02
+## Brier Intercept Slope Emax E90
+## 8.220725e-02 -1.447628e-01 9.225944e-01 2.886326e-01 3.951237e-03
+## Eavg S:z S:p
+## 2.745378e-03 1.345419e+00 1.784899e-01
+roc1.2 <- roc(test_opf$result ~ predict1.2, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.2), 4)), lwd = 2, box.lwd = 0, bg = "white")
+set.seed(1234)
+
+new_train_opf <- train_opf
+
+new_train_opf$Y <- exp(new_train_opf$Y^2)
+
+new_test_opf <- test_opf
+
+new_test_opf$Y <- exp(new_test_opf$Y^2)
+
+set.seed(1234)
+
+
+modello1.3 <- lda(result ~ minute + lastAction + is_weakfoot + X + exp(Y^2) + h_a, data = train_opf)
+
+summary(modello1.3)
+## Length Class Mode
+## prior 2 -none- numeric
+## counts 2 -none- numeric
+## means 66 -none- numeric
+## scaling 33 -none- numeric
+## lev 2 -none- character
+## svd 1 -none- numeric
+## N 1 -none- numeric
+## call 3 -none- call
+## terms 3 terms call
+## xlevels 3 -none- list
+predict1.3.1 <- predict(modello1.3, newdata = test_opf)
+
+predict1.3 <- predict1.3.1$posterior[, 2]
+
+
+confusione1.3.1 <- confusionMatrix(as.factor(predict1.3.1$class), as.factor(test_opf$result))
+confusione1.3.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 64747 6980
+## 1 329 160
+##
+## Accuracy : 0.8988
+## 95% CI : (0.8966, 0.901)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 0.9825
+##
+## Kappa : 0.0296
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.99494
+## Specificity : 0.02241
+## Pos Pred Value : 0.90269
+## Neg Pred Value : 0.32720
+## Prevalence : 0.90113
+## Detection Rate : 0.89657
+## Detection Prevalence : 0.99323
+## Balanced Accuracy : 0.50868
+##
+## 'Positive' Class : 0
+##
+confusione1.3 <- confusionMatrix(as.factor(ifelse(predict1.3>0.3, 1, 0)), as.factor(test_opf$result))
+
+
+roc1.3 <- roc(test_opf$result ~ predict1.3, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.3), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict1.3)), as.numeric(as.character(test_opf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.895863e-01 7.447931e-01 1.329346e-01 6.527350e-02 4.714791e+03
+## D:p U U:Chi-sq U:p Q
+## NA 3.116984e-04 2.450961e+01 4.762170e-06 6.496181e-02
+## Brier Intercept Slope Emax E90
+## 8.333969e-02 -1.391553e-01 9.295095e-01 2.800603e-01 2.064602e-02
+## Eavg S:z S:p
+## 1.156119e-02 3.082489e+00 2.052772e-03
+set.seed(1234)
+
+modello1.4 <- ranger(train_opf$result ~ minute + lastAction + is_weakfoot + X + Y + h_a,
+ data = new_train_opf, num.trees = 500, mtry = 3, seed = 42, probability = T)
+
+train_predict_1.4.1 <- predict(modello1.4, data = new_train_opf)
+train_predict1.4 <- train_predict_1.4.1$predictions[, 2]
+
+predict1.4.1 <- predict(modello1.4, data = new_test_opf)
+
+predict1.4 <- predict1.4.1$predictions[, 2]
+
+confusione1.4 <- confusionMatrix(as.factor(ifelse(predict1.4 >0.3, 1, 0)), as.factor(test_opf$result))
+confusione1.4
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 61892 3957
+## 1 3184 3183
+##
+## Accuracy : 0.9011
+## 95% CI : (0.8989, 0.9033)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 0.5081
+##
+## Kappa : 0.417
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9511
+## Specificity : 0.4458
+## Pos Pred Value : 0.9399
+## Neg Pred Value : 0.4999
+## Prevalence : 0.9011
+## Detection Rate : 0.8570
+## Detection Prevalence : 0.9118
+## Balanced Accuracy : 0.6984
+##
+## 'Positive' Class : 0
+##
+roc1.4 <- roc(test_opf$result ~ as.numeric(predict1.4), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.4), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict1.4)), as.numeric(as.character(new_test_opf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict1.4)),
+## as.numeric(as.character(new_test_opf$result))): 929 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.706364e-01 8.353182e-01 3.232237e-01 1.679441e-01 1.197323e+04
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.000000e+00 -Inf
+## Brier Intercept Slope Emax E90
+## 6.651975e-02 -2.583708e-01 8.922615e-01 4.151460e-02 3.601427e-02
+## Eavg S:z S:p
+## 1.228770e-02 -3.416589e+00 6.341090e-04
+set.seed(1234)
+
+modello1.5 <- ranger(train_opf$result ~ minute + lastAction + is_weakfoot + X + Y + h_a,
+ data = new_train_opf, num.trees = 500, seed = 42, probability = T)
+
+predict1.5.1 <- predict(modello1.5, data = new_test_opf)
+
+predict1.5 <- predict1.5.1$predictions[, 2]
+
+confusione1.5 <- confusionMatrix(as.factor(ifelse(predict1.5>0.3, 1, 0)), as.factor(new_test_opf$result))
+confusione1.5
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 62474 4157
+## 1 2602 2983
+##
+## Accuracy : 0.9064
+## 95% CI : (0.9043, 0.9085)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 8.749e-07
+##
+## Kappa : 0.4184
+##
+## Mcnemar's Test P-Value : < 2.2e-16
+##
+## Sensitivity : 0.9600
+## Specificity : 0.4178
+## Pos Pred Value : 0.9376
+## Neg Pred Value : 0.5341
+## Prevalence : 0.9011
+## Detection Rate : 0.8651
+## Detection Prevalence : 0.9227
+## Balanced Accuracy : 0.6889
+##
+## 'Positive' Class : 0
+##
+roc1.5 <- roc(new_test_opf$result ~ as.numeric(predict1.5), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.5), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict1.5)), as.numeric(as.character(new_test_opf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.784610e-01 8.392305e-01 3.261361e-01 1.684718e-01 1.216736e+04
+## D:p U U:Chi-sq U:p Q
+## NA 9.957298e-04 7.390762e+01 1.110223e-16 1.674760e-01
+## Brier Intercept Slope Emax E90
+## 6.710256e-02 1.193215e-01 1.096641e+00 1.542791e-01 1.849012e-02
+## Eavg S:z S:p
+## 1.020388e-02 -8.226908e+00 1.921075e-16
+modello1.8 <- nnet(result ~ minute + lastAction + is_weakfoot + X + Y + h_a, data = new_train_opf, size = 10, maxit = 1000, linout = FALSE)
+## # weights: 351
+## initial value 118545.093917
+## iter 10 value 69981.009756
+## iter 20 value 69192.592115
+## iter 30 value 67888.711525
+## iter 40 value 63628.809594
+## iter 50 value 62525.075031
+## iter 60 value 62057.467967
+## iter 70 value 61604.685356
+## iter 80 value 61532.876008
+## iter 90 value 61487.160400
+## iter 100 value 61233.657946
+## iter 110 value 60338.128851
+## iter 120 value 59616.426996
+## iter 130 value 58733.031793
+## iter 140 value 58349.312834
+## iter 150 value 58148.958166
+## iter 160 value 57935.848822
+## iter 170 value 57756.338123
+## iter 180 value 57639.865936
+## iter 190 value 57573.532413
+## iter 200 value 57512.936093
+## iter 210 value 57426.532572
+## iter 220 value 57327.745162
+## iter 230 value 57217.958189
+## iter 240 value 57172.723167
+## iter 250 value 57148.220125
+## iter 260 value 57133.696190
+## iter 270 value 57116.527271
+## iter 280 value 57102.667221
+## iter 290 value 57084.939379
+## iter 300 value 57056.657224
+## iter 310 value 57048.010413
+## iter 320 value 57038.515424
+## iter 330 value 57021.976784
+## iter 340 value 57009.160524
+## iter 350 value 57001.456556
+## iter 360 value 56993.776476
+## iter 370 value 56988.466171
+## iter 380 value 56985.363909
+## iter 390 value 56984.264328
+## iter 400 value 56983.744595
+## iter 410 value 56983.261082
+## iter 420 value 56982.689945
+## iter 430 value 56981.621792
+## iter 440 value 56979.853764
+## iter 450 value 56977.893636
+## iter 460 value 56975.779385
+## iter 470 value 56973.815073
+## iter 480 value 56972.577633
+## iter 490 value 56971.942203
+## iter 500 value 56971.328528
+## iter 510 value 56969.780842
+## iter 520 value 56969.663506
+## iter 530 value 56969.309645
+## iter 540 value 56968.354652
+## iter 550 value 56967.126957
+## iter 560 value 56964.815551
+## iter 570 value 56961.936920
+## iter 580 value 56957.827947
+## iter 590 value 56955.201372
+## iter 600 value 56954.353889
+## iter 610 value 56953.875346
+## iter 620 value 56953.530963
+## iter 630 value 56953.236351
+## iter 640 value 56953.022966
+## iter 650 value 56952.857124
+## iter 660 value 56952.730886
+## iter 670 value 56952.602141
+## iter 680 value 56952.471966
+## iter 690 value 56952.311041
+## iter 700 value 56952.158581
+## iter 710 value 56951.968581
+## iter 720 value 56951.865165
+## iter 730 value 56951.791552
+## iter 740 value 56951.713886
+## iter 750 value 56951.575373
+## iter 760 value 56951.479369
+## iter 770 value 56951.335809
+## iter 780 value 56951.186863
+## iter 790 value 56951.084841
+## iter 800 value 56950.981560
+## iter 810 value 56950.861977
+## iter 820 value 56950.727533
+## iter 830 value 56950.537039
+## iter 840 value 56950.345343
+## iter 850 value 56950.119654
+## iter 860 value 56949.843306
+## iter 870 value 56949.747925
+## iter 880 value 56949.637210
+## iter 890 value 56949.555767
+## iter 900 value 56949.517758
+## iter 910 value 56949.381139
+## iter 920 value 56949.296510
+## iter 930 value 56949.210179
+## iter 940 value 56949.124307
+## iter 950 value 56948.983340
+## iter 960 value 56948.841103
+## iter 970 value 56948.594921
+## iter 980 value 56948.099597
+## iter 990 value 56947.901399
+## iter1000 value 56947.590591
+## final value 56947.590591
+## stopped after 1000 iterations
+predict1.8 <- predict(modello1.8, newdata = new_test_opf)
+
+confusione1.8 <- confusionMatrix(as.factor(ifelse(predict1.8>0.3, 1, 0)), new_test_opf$result)
+confusione1.8
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 61956 4927
+## 1 3120 2213
+##
+## Accuracy : 0.8886
+## 95% CI : (0.8863, 0.8909)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.2953
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9521
+## Specificity : 0.3099
+## Pos Pred Value : 0.9263
+## Neg Pred Value : 0.4150
+## Prevalence : 0.9011
+## Detection Rate : 0.8579
+## Detection Prevalence : 0.9262
+## Balanced Accuracy : 0.6310
+##
+## 'Positive' Class : 0
+##
+roc1.8 <- roc(new_test_opf$result ~ as.numeric(predict1.8), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.8), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict1.8)), as.numeric(as.character(new_test_opf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict1.8)),
+## as.numeric(as.character(new_test_opf$result))): 315 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.834815e-01 7.917408e-01 2.184253e-01 1.097806e-01 7.894334e+03
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.000000e+00 -Inf
+## Brier Intercept Slope Emax E90
+## 7.652105e-02 -7.947524e-02 9.565840e-01 4.063039e-02 7.167874e-03
+## Eavg S:z S:p
+## 3.731212e-03 1.660904e+00 9.673279e-02
+confusione1.us <- confusionMatrix(as.factor(ifelse(test_opf$`xG Understat`>0.3, 1, 0)), test_opf$result)
+confusione1.us
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 60545 3589
+## 1 4531 3551
+##
+## Accuracy : 0.8876
+## 95% CI : (0.8852, 0.8899)
+## No Information Rate : 0.9011
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.404
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9304
+## Specificity : 0.4973
+## Pos Pred Value : 0.9440
+## Neg Pred Value : 0.4394
+## Prevalence : 0.9011
+## Detection Rate : 0.8384
+## Detection Prevalence : 0.8881
+## Balanced Accuracy : 0.7139
+##
+## 'Positive' Class : 0
+##
+roc1.us <- roc(test_opf$result ~ as.numeric(test_opf$`xG Understat`), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc1.us), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(test_opf$`xG Understat`)), as.numeric(as.character(test_opf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(test_opf$`xG Understat`)), : 6
+## observations deleted from logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.294039e-01 8.147019e-01 2.776084e-01 1.415280e-01 1.022074e+04
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.000000e+00 -Inf
+## Brier Intercept Slope Emax E90
+## 7.128048e-02 -1.240680e-01 9.296240e-01 8.520251e-02 8.860956e-03
+## Eavg S:z S:p
+## 5.630629e-03 3.822074e+00 1.323339e-04
+set.seed(123)
+index_oph <- sample.split(Y = openplay_head$result, SplitRatio = 0.75)
+train_oph <- openplay_head[index_oph, ]
+
+indice_test_oph <- which(index_oph == FALSE)
+test_oph <- openplay_head[indice_test_oph, ]
+modello2.1 <- glm(result ~ minute + lastAction + X + exp(Y^2) + h_a,
+ data = train_oph, family = binomial)
+
+summary2.1 <- summary(modello2.1)
+summary2.1
+##
+## Call:
+## glm(formula = result ~ minute + lastAction + X + exp(Y^2) + h_a,
+## family = binomial, data = train_oph)
+##
+## Coefficients:
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) -2.680e+01 7.831e-01 -34.219 < 2e-16 ***
+## minute -9.752e-04 7.921e-04 -1.231 0.21826
+## lastActionBallTouch -2.660e-01 6.064e-01 -0.439 0.66098
+## lastActionChallenge -1.277e+01 6.023e+02 -0.021 0.98309
+## lastActionChipped 9.195e-01 8.387e-02 10.964 < 2e-16 ***
+## lastActionCornerAwarded -1.305e+01 2.282e+02 -0.057 0.95440
+## lastActionCross 7.696e-01 5.820e-02 13.222 < 2e-16 ***
+## lastActionEnd -1.284e+01 2.877e+02 -0.045 0.96441
+## lastActionFoul -1.302e+01 1.854e+02 -0.070 0.94400
+## lastActionGoal -1.338e+01 3.162e+02 -0.042 0.96626
+## lastActionHeadPass 9.408e-01 1.567e-01 6.002 1.95e-09 ***
+## lastActionNone 5.923e-01 1.292e-01 4.585 4.54e-06 ***
+## lastActionPass 7.710e-01 1.802e-01 4.279 1.88e-05 ***
+## lastActionRebound 1.274e+00 1.125e-01 11.322 < 2e-16 ***
+## lastActionSubstitutionOff 1.729e+01 1.455e+03 0.012 0.99052
+## lastActionSubstitutionOn -4.954e-01 1.063e+00 -0.466 0.64106
+## lastActionTackle -1.282e+01 4.519e+02 -0.028 0.97737
+## lastActionTakeOn 1.697e+00 6.706e-01 2.530 0.01141 *
+## lastActionThroughball 2.603e+00 2.779e-01 9.368 < 2e-16 ***
+## X 2.731e+01 7.882e-01 34.656 < 2e-16 ***
+## exp(Y^2) -7.093e-01 2.478e-01 -2.862 0.00421 **
+## h_ah 5.532e-02 4.326e-02 1.279 0.20097
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 16885 on 23101 degrees of freedom
+## Residual deviance: 15006 on 23080 degrees of freedom
+## AIC: 15050
+##
+## Number of Fisher Scoring iterations: 14
+predict2.1 <- predict.glm(modello2.1, newdata = test_oph, type = "response")
+
+roc2.1 <- roc(test_oph$result ~ predict2.1, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.1), 4)), lwd = 2, box.lwd = 0, bg = "white")
+confusione2.1 <- confusionMatrix(as.factor(ifelse(predict2.1>0.3, 1, 0)), as.factor(test_oph$result))
+confusione2.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6430 743
+## 1 352 175
+##
+## Accuracy : 0.8578
+## 95% CI : (0.8498, 0.8655)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.17
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9481
+## Specificity : 0.1906
+## Pos Pred Value : 0.8964
+## Neg Pred Value : 0.3321
+## Prevalence : 0.8808
+## Detection Rate : 0.8351
+## Detection Prevalence : 0.9316
+## Balanced Accuracy : 0.5694
+##
+## 'Positive' Class : 0
+##
+rms::val.prob(as.numeric(as.character(predict2.1)), as.numeric(as.character(test_oph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.741098e-01 7.370549e-01 1.357465e-01 7.284670e-02 5.619196e+02
+## D:p U U:Chi-sq U:p Q
+## NA 2.890682e-05 2.222582e+00 3.291337e-01 7.281779e-02
+## Brier Intercept Slope Emax E90
+## 9.674780e-02 -1.001238e-01 9.372446e-01 1.817344e-01 1.510059e-02
+## Eavg S:z S:p
+## 6.637031e-03 8.586321e-01 3.905435e-01
+set.seed(1234)
+
+new_train_oph <- train_oph
+
+new_train_oph$Y <- exp(new_train_oph$Y^2)
+
+new_test_oph <- test_oph
+
+new_test_oph$Y <- exp(new_test_oph$Y^2)
+
+set.seed(1234)
+
+
+modello2.3 <- lda(result ~ minute + lastAction + X + exp(Y^2) + h_a, data = train_oph)
+
+summary(modello2.3)
+## Length Class Mode
+## prior 2 -none- numeric
+## counts 2 -none- numeric
+## means 42 -none- numeric
+## scaling 21 -none- numeric
+## lev 2 -none- character
+## svd 1 -none- numeric
+## N 1 -none- numeric
+## call 3 -none- call
+## terms 3 terms call
+## xlevels 2 -none- list
+predict2.3.1 <- predict(modello2.3, newdata = test_oph)
+
+predict2.3 <- predict2.3.1$posterior[, 2]
+
+confusione2.3.1 <- confusionMatrix(as.factor(predict2.3.1$class), as.factor(test_oph$result))
+confusione2.3.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6735 885
+## 1 47 33
+##
+## Accuracy : 0.879
+## 95% CI : (0.8715, 0.8862)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 0.6961
+##
+## Kappa : 0.0479
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.99307
+## Specificity : 0.03595
+## Pos Pred Value : 0.88386
+## Neg Pred Value : 0.41250
+## Prevalence : 0.88078
+## Detection Rate : 0.87468
+## Detection Prevalence : 0.98961
+## Balanced Accuracy : 0.51451
+##
+## 'Positive' Class : 0
+##
+confusione2.3 <- confusionMatrix(as.factor(ifelse(predict2.3>0.3, 1, 0)), as.factor(test_oph$result))
+
+
+roc2.3 <- roc(test_oph$result ~ predict2.3, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.3), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict2.3)), as.numeric(as.character(test_oph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.699917e-01 7.349958e-01 1.318688e-01 7.068646e-02 5.452857e+02
+## D:p U U:Chi-sq U:p Q
+## NA 4.950705e-04 5.812043e+00 5.469290e-02 7.019139e-02
+## Brier Intercept Slope Emax E90
+## 9.774986e-02 -1.619795e-01 9.020178e-01 3.755251e-01 2.307149e-02
+## Eavg S:z S:p
+## 1.214045e-02 1.746477e+00 8.072811e-02
+set.seed(1234)
+
+modello2.4 <- ranger(train_oph$result ~ minute + lastAction + X + Y + h_a,
+ data = new_train_oph, num.trees = 500, mtry = 3, seed = 42, probability = T)
+
+predict2.4.1 <- predict(modello2.4, data = new_test_oph)
+
+predict2.4 <- predict2.4.1$predictions[, 2]
+
+confusione2.4 <- confusionMatrix(as.factor(ifelse(predict2.4 >0.3, 1, 0)), as.factor(test_oph$result))
+confusione2.4
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6272 522
+## 1 510 396
+##
+## Accuracy : 0.866
+## 95% CI : (0.8582, 0.8735)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 1.000
+##
+## Kappa : 0.3582
+##
+## Mcnemar's Test P-Value : 0.732
+##
+## Sensitivity : 0.9248
+## Specificity : 0.4314
+## Pos Pred Value : 0.9232
+## Neg Pred Value : 0.4371
+## Prevalence : 0.8808
+## Detection Rate : 0.8145
+## Detection Prevalence : 0.8823
+## Balanced Accuracy : 0.6781
+##
+## 'Positive' Class : 0
+##
+roc2.4 <- roc(test_oph$result ~ as.numeric(predict2.4), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.4), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict2.4)), as.numeric(as.character(new_test_oph$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict2.4)),
+## as.numeric(as.character(new_test_oph$result))): 245 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.957707e-01 7.978854e-01 2.638290e-01 1.492232e-01 1.113459e+03
+## D:p U U:Chi-sq U:p Q
+## NA 7.485408e-03 5.780371e+01 2.805534e-13 1.417378e-01
+## Brier Intercept Slope Emax E90
+## 8.376312e-02 -4.099566e-01 7.751907e-01 9.897154e-02 5.407294e-02
+## Eavg S:z S:p
+## 2.153259e-02 1.694824e+00 9.010883e-02
+set.seed(1234)
+
+modello2.5 <- ranger(train_oph$result ~ minute + lastAction + X + Y + h_a,
+ data = new_train_oph, num.trees = 500, seed = 42, probability = T)
+
+predict2.5.1 <- predict(modello2.5, data = new_test_oph)
+
+predict2.5 <- predict2.5.1$predictions[, 2]
+
+confusione2.5 <- confusionMatrix(as.factor(ifelse(predict2.5>0.3, 1, 0)), as.factor(new_test_oph$result))
+confusione2.5
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6332 530
+## 1 450 388
+##
+## Accuracy : 0.8727
+## 95% CI : (0.8651, 0.8801)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 0.98542
+##
+## Kappa : 0.3703
+##
+## Mcnemar's Test P-Value : 0.01162
+##
+## Sensitivity : 0.9336
+## Specificity : 0.4227
+## Pos Pred Value : 0.9228
+## Neg Pred Value : 0.4630
+## Prevalence : 0.8808
+## Detection Rate : 0.8223
+## Detection Prevalence : 0.8912
+## Balanced Accuracy : 0.6782
+##
+## 'Positive' Class : 0
+##
+roc2.5 <- roc(new_test_oph$result ~ as.numeric(predict2.5), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.5), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict2.5)), as.numeric(as.character(new_test_oph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.244548e-01 8.122274e-01 2.908950e-01 1.633468e-01 1.258770e+03
+## D:p U U:Chi-sq U:p Q
+## NA 1.468163e-03 1.330485e+01 1.290887e-03 1.618786e-01
+## Brier Intercept Slope Emax E90
+## 8.252587e-02 -2.167724e-01 8.903037e-01 6.141308e-02 3.284286e-02
+## Eavg S:z S:p
+## 1.285068e-02 3.637999e-01 7.160074e-01
+modello2.8 <- nnet(result ~ minute + lastAction + X + Y + h_a, data = new_train_oph, size = 10, maxit = 1000, linout = FALSE)
+## # weights: 231
+## initial value 8722.578293
+## iter 10 value 8435.748320
+## iter 20 value 8279.696679
+## iter 30 value 8089.025131
+## iter 40 value 8065.396259
+## iter 50 value 7820.389397
+## iter 60 value 7620.533019
+## iter 70 value 7527.182160
+## iter 80 value 7347.012489
+## iter 90 value 7306.470349
+## iter 100 value 7270.602773
+## iter 110 value 7209.485135
+## iter 120 value 7149.740344
+## iter 130 value 7108.479248
+## iter 140 value 7089.482769
+## iter 150 value 7081.089074
+## iter 160 value 7075.318128
+## iter 170 value 7072.006341
+## iter 180 value 7065.815611
+## iter 190 value 7061.494404
+## iter 200 value 7059.751906
+## iter 210 value 7057.881921
+## iter 220 value 7056.740067
+## iter 230 value 7055.132476
+## iter 240 value 7052.237977
+## iter 250 value 7051.064152
+## iter 260 value 7050.361597
+## iter 270 value 7049.824061
+## iter 280 value 7049.270668
+## iter 290 value 7048.685893
+## iter 300 value 7047.999021
+## iter 310 value 7047.421214
+## iter 320 value 7047.147571
+## iter 330 value 7046.797723
+## iter 340 value 7046.235374
+## iter 350 value 7045.552039
+## iter 360 value 7043.659123
+## iter 370 value 7043.059688
+## iter 380 value 7042.916778
+## iter 390 value 7042.807888
+## iter 400 value 7042.674153
+## iter 410 value 7042.430997
+## iter 420 value 7042.132006
+## iter 430 value 7041.922490
+## iter 440 value 7041.629317
+## iter 450 value 7041.465949
+## iter 460 value 7041.325280
+## iter 470 value 7041.021732
+## iter 480 value 7040.845830
+## iter 490 value 7040.576073
+## iter 500 value 7040.369471
+## iter 510 value 7040.308663
+## iter 520 value 7040.267512
+## iter 530 value 7040.203691
+## iter 540 value 7040.109465
+## iter 550 value 7040.064034
+## iter 560 value 7039.967961
+## iter 570 value 7039.905303
+## iter 580 value 7039.802194
+## iter 590 value 7039.726709
+## iter 600 value 7039.678707
+## iter 610 value 7039.625371
+## iter 620 value 7039.569185
+## iter 630 value 7039.529814
+## iter 640 value 7039.473146
+## iter 650 value 7039.436103
+## iter 660 value 7039.409985
+## final value 7039.386145
+## converged
+predict2.8 <- predict(modello2.8, newdata = new_test_oph)
+
+confusione2.8 <- confusionMatrix(as.factor(ifelse(predict2.8>0.3, 1, 0)), new_test_oph$result)
+confusione2.8
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6407 655
+## 1 375 263
+##
+## Accuracy : 0.8662
+## 95% CI : (0.8584, 0.8738)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.2663
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9447
+## Specificity : 0.2865
+## Pos Pred Value : 0.9073
+## Neg Pred Value : 0.4122
+## Prevalence : 0.8808
+## Detection Rate : 0.8321
+## Detection Prevalence : 0.9171
+## Balanced Accuracy : 0.6156
+##
+## 'Positive' Class : 0
+##
+roc2.8 <- roc(new_test_oph$result ~ as.numeric(predict2.8), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.8), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict2.8)), as.numeric(as.character(new_test_oph$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict2.8)),
+## as.numeric(as.character(new_test_oph$result))): 64 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 0.536170235 0.768085117 0.187960276 0.102744421 785.556401386
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.000000000 -Inf
+## Brier Intercept Slope Emax E90
+## 0.092247046 -0.148281506 0.904902415 0.153865186 0.008181917
+## Eavg S:z S:p
+## 0.006748645 1.625219257 0.104115849
+confusione2.us <- confusionMatrix(as.factor(ifelse(test_oph$`xG Understat`>0.3, 1, 0)), test_oph$result)
+confusione2.us
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 6021 391
+## 1 761 527
+##
+## Accuracy : 0.8504
+## 95% CI : (0.8422, 0.8583)
+## No Information Rate : 0.8808
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.3933
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.8878
+## Specificity : 0.5741
+## Pos Pred Value : 0.9390
+## Neg Pred Value : 0.4092
+## Prevalence : 0.8808
+## Detection Rate : 0.7819
+## Detection Prevalence : 0.8327
+## Balanced Accuracy : 0.7309
+##
+## 'Positive' Class : 0
+##
+roc2.us <- roc(test_oph$result ~ as.numeric(test_oph$`xG Understat`), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc2.us), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(test_oph$`xG Understat`)), as.numeric(as.character(test_oph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.478921e-01 8.239461e-01 2.930995e-01 1.646936e-01 1.269140e+03
+## D:p U U:Chi-sq U:p Q
+## NA 2.090539e-03 1.809715e+01 1.175584e-04 1.626030e-01
+## Brier Intercept Slope Emax E90
+## 8.339068e-02 -1.780625e-01 9.907059e-01 4.643716e-02 3.833526e-02
+## Eavg S:z S:p
+## 1.470120e-02 -3.084414e+00 2.039538e-03
+set.seed(123)
+index_spf <- sample.split(Y = setpiece_foot$result, SplitRatio = 0.75)
+train_spf <- setpiece_foot[index_spf, ]
+
+indice_test_spf <- which(index_spf == FALSE)
+test_spf <- setpiece_foot[indice_test_spf, ]
+modello3.1 <- glm(result ~ minute + lastAction + is_weakfoot + X + exp(Y^2) + h_a + situation,
+ data = train_spf, family = binomial)
+
+summary(modello3.1)
+##
+## Call:
+## glm(formula = result ~ minute + lastAction + is_weakfoot + X +
+## exp(Y^2) + h_a + situation, family = binomial, data = train_spf)
+##
+## Coefficients: (1 not defined because of singularities)
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) -1.421e+01 2.918e-01 -48.687 < 2e-16 ***
+## minute 3.158e-04 6.464e-04 0.489 0.62515
+## lastActionBallRecovery 2.785e-01 7.601e-01 0.366 0.71411
+## lastActionBallTouch -4.691e-03 1.859e-01 -0.025 0.97986
+## lastActionBlockedPass -1.258e+01 3.202e+02 -0.039 0.96867
+## lastActionCard -1.336e+01 4.690e+02 -0.028 0.97728
+## lastActionChipped 1.790e-01 1.591e-01 1.125 0.26060
+## lastActionCornerAwarded -2.713e-01 3.710e-01 -0.731 0.46466
+## lastActionCross 1.187e-01 1.261e-01 0.941 0.34681
+## lastActionDispossessed -5.646e-01 7.315e-01 -0.772 0.44014
+## lastActionEnd -1.317e+01 1.526e+02 -0.086 0.93122
+## lastActionFoul 2.640e-01 3.382e-01 0.781 0.43504
+## lastActionGoal -1.334e+01 2.110e+02 -0.063 0.94958
+## lastActionGoodSkill -1.355e+01 8.259e+02 -0.016 0.98691
+## lastActionHeadPass 8.723e-01 1.301e-01 6.705 2.01e-11 ***
+## lastActionInterception -4.658e-01 1.049e+00 -0.444 0.65714
+## lastActionLayOff 9.714e-01 3.336e-01 2.912 0.00360 **
+## lastActionNone 5.073e-01 1.203e-01 4.216 2.48e-05 ***
+## lastActionPass 6.736e-01 1.242e-01 5.424 5.83e-08 ***
+## lastActionRebound 1.219e+00 1.216e-01 10.030 < 2e-16 ***
+## lastActionStandard 1.470e+00 1.248e-01 11.781 < 2e-16 ***
+## lastActionSubstitutionOn 2.382e+00 1.061e+00 2.246 0.02473 *
+## lastActionTackle -2.138e-01 7.419e-01 -0.288 0.77326
+## lastActionTakeOn 2.452e-02 2.147e-01 0.114 0.90908
+## lastActionThroughball 1.445e+00 2.679e-01 5.393 6.95e-08 ***
+## is_weakfootYes -1.214e-01 4.351e-02 -2.791 0.00526 **
+## X 1.400e+01 2.924e-01 47.871 < 2e-16 ***
+## exp(Y^2) -6.425e-01 8.422e-02 -7.629 2.36e-14 ***
+## h_ah -8.534e-02 3.469e-02 -2.460 0.01389 *
+## situationFromCorner -9.978e-02 4.192e-02 -2.380 0.01730 *
+## situationSetPiece NA NA NA NA
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 28504 on 52076 degrees of freedom
+## Residual deviance: 24469 on 52047 degrees of freedom
+## AIC: 24529
+##
+## Number of Fisher Scoring iterations: 14
+predict3.1 <- predict.glm(modello3.1, newdata = test_spf, type = "response")
+
+roc3.1 <- roc(test_spf$result ~ predict3.1, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.1), 4)), lwd = 2, box.lwd = 0, bg = "white")
+confusione3.1 <- confusionMatrix(as.factor(ifelse(predict3.1>0.3, 1, 0)), as.factor(test_spf$result))
+confusione3.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15617 1107
+## 1 389 245
+##
+## Accuracy : 0.9138
+## 95% CI : (0.9095, 0.9179)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.2073
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9757
+## Specificity : 0.1812
+## Pos Pred Value : 0.9338
+## Neg Pred Value : 0.3864
+## Prevalence : 0.9221
+## Detection Rate : 0.8997
+## Detection Prevalence : 0.9635
+## Balanced Accuracy : 0.5785
+##
+## 'Positive' Class : 0
+##
+rms::val.prob(as.numeric(as.character(predict3.1)), as.numeric(as.character(test_spf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.080409e-01 7.540205e-01 1.505219e-01 6.547547e-02 1.137523e+03
+## D:p U U:Chi-sq U:p Q
+## NA 3.787373e-04 8.574122e+00 1.374527e-02 6.509673e-02
+## Brier Intercept Slope Emax E90
+## 6.520744e-02 -1.766042e-01 9.150318e-01 1.923828e-02 1.603711e-02
+## Eavg S:z S:p
+## 7.930469e-03 3.248913e-01 7.452633e-01
+set.seed(1234)
+
+new_train_spf <- train_spf
+
+new_train_spf$Y <- exp(new_train_spf$Y^2)
+
+new_test_spf <- test_spf
+
+new_test_spf$Y <- exp(new_test_spf$Y^2)
+
+set.seed(1234)
+
+
+modello3.3 <- lda(result ~ minute + lastAction + is_weakfoot + X + exp(Y^2) + h_a, data = train_spf)
+
+summary(modello3.3)
+## Length Class Mode
+## prior 2 -none- numeric
+## counts 2 -none- numeric
+## means 56 -none- numeric
+## scaling 28 -none- numeric
+## lev 2 -none- character
+## svd 1 -none- numeric
+## N 1 -none- numeric
+## call 3 -none- call
+## terms 3 terms call
+## xlevels 3 -none- list
+predict3.3.1 <- predict(modello3.3, newdata = test_spf)
+
+predict3.3 <- predict3.3.1$posterior[, 2]
+
+confusione3.3.1 <- confusionMatrix(as.factor(predict3.3.1$class), as.factor(test_spf$result))
+confusione3.3.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15775 1193
+## 1 231 159
+##
+## Accuracy : 0.918
+## 95% CI : (0.9138, 0.922)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 0.9794
+##
+## Kappa : 0.153
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9856
+## Specificity : 0.1176
+## Pos Pred Value : 0.9297
+## Neg Pred Value : 0.4077
+## Prevalence : 0.9221
+## Detection Rate : 0.9088
+## Detection Prevalence : 0.9775
+## Balanced Accuracy : 0.5516
+##
+## 'Positive' Class : 0
+##
+confusione3.3 <- confusionMatrix(as.factor(ifelse(predict3.3>0.3, 1, 0)), as.factor(test_spf$result))
+
+roc3.3 <- roc(test_spf$result ~ predict3.3, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.3), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict3.3)), as.numeric(as.character(test_spf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 0.487283753 0.743641877 0.129926651 0.056251210 977.408498151
+## D:p U U:Chi-sq U:p Q
+## NA 0.006455379 114.052465074 0.000000000 0.049795831
+## Brier Intercept Slope Emax E90
+## 0.066883628 -0.556372588 0.751043723 0.343102307 0.019185729
+## Eavg S:z S:p
+## 0.016310743 3.167461664 0.001537760
+set.seed(1234)
+
+modello3.4 <- ranger(train_spf$result ~ minute + lastAction + is_weakfoot + X + Y + h_a,
+ data = new_train_spf, num.trees = 500, mtry = 3, seed = 42, probability = T)
+
+train_predict_3.4.1 <- predict(modello3.4, data = new_train_spf)
+train_predict3.4 <- train_predict_3.4.1$predictions[, 2]
+
+predict3.4.1 <- predict(modello3.4, data = new_test_spf)
+
+predict3.4 <- predict3.4.1$predictions[, 2]
+
+confusione3.4 <- confusionMatrix(as.factor(ifelse(predict3.4 >0.3, 1, 0)), as.factor(test_spf$result))
+confusione3.4
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15519 717
+## 1 487 635
+##
+## Accuracy : 0.9306
+## 95% CI : (0.9268, 0.9344)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 1.086e-05
+##
+## Kappa : 0.4763
+##
+## Mcnemar's Test P-Value : 4.121e-11
+##
+## Sensitivity : 0.9696
+## Specificity : 0.4697
+## Pos Pred Value : 0.9558
+## Neg Pred Value : 0.5660
+## Prevalence : 0.9221
+## Detection Rate : 0.8941
+## Detection Prevalence : 0.9354
+## Balanced Accuracy : 0.7196
+##
+## 'Positive' Class : 0
+##
+roc3.4 <- roc(test_spf$result ~ as.numeric(predict3.4), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.4), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict3.4)), as.numeric(as.character(new_test_spf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict3.4)),
+## as.numeric(as.character(new_test_spf$result))): 383 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.650859e-01 8.325429e-01 3.375476e-01 1.553417e-01 2.637926e+03
+## D:p U U:Chi-sq U:p Q
+## NA 2.683426e-03 4.755116e+01 4.724943e-11 1.526583e-01
+## Brier Intercept Slope Emax E90
+## 5.104138e-02 -3.530922e-01 8.691969e-01 6.181181e-02 4.470280e-02
+## Eavg S:z S:p
+## 1.567332e-02 -2.138949e+00 3.243979e-02
+set.seed(1234)
+
+modello3.5 <- ranger(train_spf$result ~ minute + lastAction + is_weakfoot + X + Y + h_a,
+ data = new_train_spf, num.trees = 500, seed = 42, probability = T)
+
+predict3.5.1 <- predict(modello3.5, data = new_test_spf)
+
+predict3.5 <- predict3.5.1$predictions[, 2]
+
+confusione3.5 <- confusionMatrix(as.factor(ifelse(predict3.5>0.3, 1, 0)), as.factor(new_test_spf$result))
+confusione3.5
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15591 758
+## 1 415 594
+##
+## Accuracy : 0.9324
+## 95% CI : (0.9286, 0.9361)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 1.239e-07
+##
+## Kappa : 0.4677
+##
+## Mcnemar's Test P-Value : < 2.2e-16
+##
+## Sensitivity : 0.9741
+## Specificity : 0.4393
+## Pos Pred Value : 0.9536
+## Neg Pred Value : 0.5887
+## Prevalence : 0.9221
+## Detection Rate : 0.8982
+## Detection Prevalence : 0.9419
+## Balanced Accuracy : 0.7067
+##
+## 'Positive' Class : 0
+##
+roc3.5 <- roc(new_test_spf$result ~ as.numeric(predict3.5), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.5), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict3.5)), as.numeric(as.character(new_test_spf$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.772853e-01 8.386426e-01 3.486116e-01 1.588321e-01 2.758008e+03
+## D:p U U:Chi-sq U:p Q
+## NA 4.519128e-04 9.844303e+00 7.283445e-03 1.583802e-01
+## Brier Intercept Slope Emax E90
+## 5.136852e-02 4.054414e-02 1.056891e+00 9.273364e-02 2.470800e-02
+## Eavg S:z S:p
+## 8.638274e-03 -3.800341e+00 1.444973e-04
+set.seed(1234)
+
+modello3.8 <- nnet(result ~ minute + lastAction + is_weakfoot + X + Y + h_a, data = new_train_spf, size = 10, maxit = 1000, linout = FALSE)
+## # weights: 301
+## initial value 23522.705483
+## iter 10 value 14179.880092
+## iter 20 value 13625.005698
+## iter 30 value 13418.429983
+## iter 40 value 13173.057450
+## iter 50 value 12640.458928
+## iter 60 value 12265.364331
+## iter 70 value 12038.069262
+## iter 80 value 11934.605007
+## iter 90 value 11896.243136
+## iter 100 value 11893.367623
+## iter 110 value 11884.155700
+## iter 120 value 11866.894186
+## iter 130 value 11850.193146
+## iter 140 value 11837.232488
+## iter 150 value 11820.460297
+## iter 160 value 11805.131341
+## iter 170 value 11790.186472
+## iter 180 value 11778.943767
+## iter 190 value 11768.430048
+## iter 200 value 11754.212725
+## iter 210 value 11746.100902
+## iter 220 value 11740.054020
+## iter 230 value 11733.960416
+## iter 240 value 11728.417755
+## iter 250 value 11724.163176
+## iter 260 value 11721.026534
+## iter 270 value 11718.333051
+## iter 280 value 11714.357414
+## iter 290 value 11713.123854
+## iter 300 value 11712.121098
+## iter 310 value 11711.590404
+## iter 320 value 11710.029488
+## iter 330 value 11706.959369
+## iter 340 value 11705.784964
+## iter 350 value 11704.471415
+## iter 360 value 11703.823626
+## iter 370 value 11703.137776
+## iter 380 value 11702.167481
+## iter 390 value 11700.827529
+## iter 400 value 11699.873236
+## iter 410 value 11698.942583
+## final value 11698.629976
+## converged
+predict3.8 <- predict(modello3.8, newdata = new_test_spf)
+
+confusione3.8 <- confusionMatrix(as.factor(ifelse(predict3.8>0.3, 1, 0)), new_test_spf$result)
+confusione3.8
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15425 951
+## 1 581 401
+##
+## Accuracy : 0.9117
+## 95% CI : (0.9074, 0.9159)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 1
+##
+## Kappa : 0.2976
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.9637
+## Specificity : 0.2966
+## Pos Pred Value : 0.9419
+## Neg Pred Value : 0.4084
+## Prevalence : 0.9221
+## Detection Rate : 0.8886
+## Detection Prevalence : 0.9434
+## Balanced Accuracy : 0.6301
+##
+## 'Positive' Class : 0
+##
+roc3.8 <- roc(new_test_spf$result ~ as.numeric(predict3.8), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.8), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict3.8)), as.numeric(as.character(new_test_spf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict3.8)),
+## as.numeric(as.character(new_test_spf$result))): 73 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.362365e-01 7.681183e-01 1.831183e-01 8.020757e-02 1.387388e+03
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.000000e+00 -Inf
+## Brier Intercept Slope Emax E90
+## 6.358302e-02 -2.002981e-01 9.038563e-01 1.008896e-01 8.304276e-03
+## Eavg S:z S:p
+## 5.422550e-03 1.609048e+00 1.076059e-01
+confusione3.us <- confusionMatrix(as.factor(ifelse(test_spf$`xG Understat`>0.3, 1, 0)), test_spf$result)
+confusione3.us
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 15438 770
+## 1 568 582
+##
+## Accuracy : 0.9229
+## 95% CI : (0.9188, 0.9268)
+## No Information Rate : 0.9221
+## P-Value [Acc > NIR] : 0.3524
+##
+## Kappa : 0.424
+##
+## Mcnemar's Test P-Value : 3.907e-08
+##
+## Sensitivity : 0.9645
+## Specificity : 0.4305
+## Pos Pred Value : 0.9525
+## Neg Pred Value : 0.5061
+## Prevalence : 0.9221
+## Detection Rate : 0.8894
+## Detection Prevalence : 0.9337
+## Balanced Accuracy : 0.6975
+##
+## 'Positive' Class : 0
+##
+roc3.us <- roc(test_spf$result ~ as.numeric(test_spf$`xG Understat`), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc3.us), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(test_spf$`xG Understat`)), as.numeric(as.character(test_spf$result)))
+## Warning in rms::val.prob(as.numeric(as.character(test_spf$`xG Understat`)), :
+## 13 observations deleted from logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.895407e-01 7.947703e-01 2.724341e-01 1.219437e-01 2.116114e+03
+## D:p U U:Chi-sq U:p Q
+## NA 5.332985e-04 1.125006e+01 3.606451e-03 1.214104e-01
+## Brier Intercept Slope Emax E90
+## 5.646719e-02 -1.819309e-01 9.393373e-01 4.516177e-02 6.395073e-03
+## Eavg S:z S:p
+## 5.191630e-03 -5.115445e-02 9.592024e-01
+set.seed(123)
+index_sph <- sample.split(Y = setpiece_head$result, SplitRatio = 0.75)
+train_sph <- setpiece_head[index_sph, ]
+
+indice_test_sph <- which(index_sph == FALSE)
+test_sph <- setpiece_head[indice_test_sph, ]
+modello4.1 <- glm(result ~ minute + lastAction + X + exp(Y^2) + h_a,
+ data = train_sph, family = binomial)
+
+summary(modello4.1)
+##
+## Call:
+## glm(formula = result ~ minute + lastAction + X + exp(Y^2) + h_a,
+## family = binomial, data = train_sph)
+##
+## Coefficients:
+## Estimate Std. Error z value Pr(>|z|)
+## (Intercept) -2.420e+01 7.364e-01 -32.868 < 2e-16 ***
+## minute -1.725e-03 7.315e-04 -2.358 0.01836 *
+## lastActionBallRecovery -1.282e+01 4.131e+02 -0.031 0.97524
+## lastActionBallTouch -1.418e-01 3.709e-01 -0.382 0.70231
+## lastActionBlockedPass -1.300e+01 2.409e+02 -0.054 0.95697
+## lastActionCard -1.281e+01 3.355e+02 -0.038 0.96954
+## lastActionChallenge -1.291e+01 4.496e+02 -0.029 0.97708
+## lastActionChipped 6.573e-01 1.014e-01 6.481 9.14e-11 ***
+## lastActionCornerAwarded -1.219e+00 7.181e-01 -1.698 0.08954 .
+## lastActionCross 7.615e-01 4.764e-02 15.986 < 2e-16 ***
+## lastActionDispossessed 7.975e-01 1.060e+00 0.753 0.45168
+## lastActionEnd -4.132e-01 5.973e-01 -0.692 0.48913
+## lastActionFoul -8.870e-01 5.903e-01 -1.502 0.13297
+## lastActionGoal -1.269e+01 1.798e+02 -0.071 0.94375
+## lastActionHeadPass 1.108e+00 1.048e-01 10.566 < 2e-16 ***
+## lastActionInterception -1.269e+01 4.071e+02 -0.031 0.97513
+## lastActionNone 3.315e-01 1.422e-01 2.332 0.01970 *
+## lastActionOffsidePass -1.249e+01 2.712e+02 -0.046 0.96326
+## lastActionPass 1.859e+00 2.146e-01 8.660 < 2e-16 ***
+## lastActionRebound 1.199e+00 1.118e-01 10.724 < 2e-16 ***
+## lastActionSubstitutionOn -1.292e+01 5.695e+02 -0.023 0.98190
+## lastActionTackle -1.305e+01 3.234e+02 -0.040 0.96782
+## lastActionTakeOn -1.262e+01 3.072e+02 -0.041 0.96722
+## X 2.461e+01 7.397e-01 33.267 < 2e-16 ***
+## exp(Y^2) -9.621e-01 2.060e-01 -4.671 2.99e-06 ***
+## h_ah -1.052e-01 3.876e-02 -2.715 0.00663 **
+## ---
+## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+##
+## (Dispersion parameter for binomial family taken to be 1)
+##
+## Null deviance: 21149 on 34346 degrees of freedom
+## Residual deviance: 19194 on 34321 degrees of freedom
+## AIC: 19246
+##
+## Number of Fisher Scoring iterations: 14
+predict4.1 <- predict.glm(modello4.1, newdata = test_sph, type = "response")
+
+confusione4.1 <- confusionMatrix(as.factor(ifelse(predict4.1>0.3, 1, 0)), as.factor(test_sph$result))
+confusione4.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 10215 962
+## 1 177 95
+##
+## Accuracy : 0.9005
+## 95% CI : (0.8949, 0.9059)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 0.9958
+##
+## Kappa : 0.1093
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.98297
+## Specificity : 0.08988
+## Pos Pred Value : 0.91393
+## Neg Pred Value : 0.34926
+## Prevalence : 0.90768
+## Detection Rate : 0.89222
+## Detection Prevalence : 0.97624
+## Balanced Accuracy : 0.53642
+##
+## 'Positive' Class : 0
+##
+roc4.1 <- roc(test_sph$result ~ predict4.1, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.1), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict4.1)), as.numeric(as.character(test_sph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.251367e-01 7.125684e-01 1.037063e-01 4.876773e-02 5.593418e+02
+## D:p U U:Chi-sq U:p Q
+## NA 1.560155e-04 3.786222e+00 1.506026e-01 4.861172e-02
+## Brier Intercept Slope Emax E90
+## 7.906927e-02 -1.739121e-01 9.218147e-01 2.812030e-02 1.657528e-02
+## Eavg S:z S:p
+## 5.413573e-03 -2.727723e-02 9.782386e-01
+set.seed(1234)
+
+new_train_sph <- train_sph
+
+new_train_sph$Y <- exp(new_train_sph$Y^2)
+
+new_test_sph <- test_sph
+
+new_test_sph$Y <- exp(new_test_sph$Y^2)
+
+set.seed(1234)
+
+
+modello4.3 <- lda(result ~ minute + lastAction + X + exp(Y^2) + h_a, data = train_sph)
+
+summary(modello4.3)
+## Length Class Mode
+## prior 2 -none- numeric
+## counts 2 -none- numeric
+## means 50 -none- numeric
+## scaling 25 -none- numeric
+## lev 2 -none- character
+## svd 1 -none- numeric
+## N 1 -none- numeric
+## call 3 -none- call
+## terms 3 terms call
+## xlevels 2 -none- list
+predict4.3.1 <- predict(modello4.3, newdata = test_sph)
+
+predict4.3 <- predict4.3.1$posterior[, 2]
+
+confusione4.3.1 <- confusionMatrix(as.factor(predict4.3.1$class), as.factor(test_sph$result))
+confusione4.3.1
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 10324 1011
+## 1 68 46
+##
+## Accuracy : 0.9058
+## 95% CI : (0.9003, 0.911)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 0.7668
+##
+## Kappa : 0.0617
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.99346
+## Specificity : 0.04352
+## Pos Pred Value : 0.91081
+## Neg Pred Value : 0.40351
+## Prevalence : 0.90768
+## Detection Rate : 0.90174
+## Detection Prevalence : 0.99004
+## Balanced Accuracy : 0.51849
+##
+## 'Positive' Class : 0
+##
+confusione4.3 <- confusionMatrix(as.factor(ifelse(predict4.3>0.3, 1, 0)), as.factor(test_sph$result))
+
+roc4.3 <- roc(test_sph$result ~ predict4.3, plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.3), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict4.3)), as.numeric(as.character(test_sph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 4.176003e-01 7.088001e-01 9.620260e-02 4.515158e-02 5.179404e+02
+## D:p U U:Chi-sq U:p Q
+## NA 1.869583e-03 2.340486e+01 8.273686e-06 4.328199e-02
+## Brier Intercept Slope Emax E90
+## 8.005259e-02 -3.824800e-01 8.242660e-01 3.101720e-01 1.341532e-02
+## Eavg S:z S:p
+## 1.179437e-02 1.354577e+00 1.755522e-01
+set.seed(1234)
+
+modello4.4 <- ranger(train_sph$result ~ minute + lastAction + X + Y + h_a,
+ data = new_train_sph, num.trees = 500, mtry = 3, seed = 42, probability = T)
+
+train_predict_4.4.1 <- predict(modello4.4, data = new_train_sph)
+train_predict4.4 <- train_predict_4.4.1$predictions[, 2]
+
+predict4.4.1 <- predict(modello4.4, data = new_test_sph)
+
+predict4.4 <- predict4.4.1$predictions[, 2]
+
+confusione4.4 <- confusionMatrix(as.factor(ifelse(predict4.4 >0.3, 1, 0)), as.factor(test_sph$result))
+confusione4.4
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 9942 631
+## 1 450 426
+##
+## Accuracy : 0.9056
+## 95% CI : (0.9001, 0.9109)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 0.786
+##
+## Kappa : 0.3897
+##
+## Mcnemar's Test P-Value : 4.383e-08
+##
+## Sensitivity : 0.9567
+## Specificity : 0.4030
+## Pos Pred Value : 0.9403
+## Neg Pred Value : 0.4863
+## Prevalence : 0.9077
+## Detection Rate : 0.8684
+## Detection Prevalence : 0.9235
+## Balanced Accuracy : 0.6799
+##
+## 'Positive' Class : 0
+##
+roc4.4 <- roc(test_sph$result ~ as.numeric(predict4.4), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.4), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict4.4)), as.numeric(as.character(new_test_sph$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict4.4)),
+## as.numeric(as.character(new_test_sph$result))): 869 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 0.60666454 0.80333227 0.28605354 0.14680716 1554.21971840
+## D:p U U:Chi-sq U:p Q
+## NA Inf Inf 0.00000000 -Inf
+## Brier Intercept Slope Emax E90
+## 0.06304306 -0.46515693 0.76715854 0.09687157 0.05778390
+## Eavg S:z S:p
+## 0.02110086 0.94272703 0.34582054
+set.seed(1234)
+
+modello4.5 <- ranger(train_sph$result ~ minute + lastAction + X + Y + h_a,
+ data = new_train_sph, num.trees = 500, seed = 42, probability = T)
+
+predict4.5.1 <- predict(modello4.5, data = new_test_sph)
+
+predict4.5 <- predict4.5.1$predictions[, 2]
+
+confusione4.5 <- confusionMatrix(as.factor(ifelse(predict4.5>0.3, 1, 0)), as.factor(new_test_sph$result))
+confusione4.5
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 10042 658
+## 1 350 399
+##
+## Accuracy : 0.912
+## 95% CI : (0.9066, 0.9171)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 0.05793
+##
+## Kappa : 0.3956
+##
+## Mcnemar's Test P-Value : < 2e-16
+##
+## Sensitivity : 0.9663
+## Specificity : 0.3775
+## Pos Pred Value : 0.9385
+## Neg Pred Value : 0.5327
+## Prevalence : 0.9077
+## Detection Rate : 0.8771
+## Detection Prevalence : 0.9346
+## Balanced Accuracy : 0.6719
+##
+## 'Positive' Class : 0
+##
+roc4.5 <- roc(new_test_sph$result ~ as.numeric(predict4.5), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.5), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict4.5)), as.numeric(as.character(new_test_sph$result)))
+## Warning in rms::val.prob(as.numeric(as.character(predict4.5)),
+## as.numeric(as.character(new_test_sph$result))): 32 observations deleted from
+## logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 6.444300e-01 8.222150e-01 3.168346e-01 1.575874e-01 1.800175e+03
+## D:p U U:Chi-sq U:p Q
+## NA 6.581974e-04 9.514640e+00 8.588598e-03 1.569292e-01
+## Brier Intercept Slope Emax E90
+## 6.248466e-02 -1.808876e-01 9.179875e-01 1.028428e-01 3.948918e-02
+## Eavg S:z S:p
+## 1.451649e-02 -8.034238e-01 4.217298e-01
+modello4.8 <- nnet(result ~ minute + lastAction + X + Y + h_a, data = new_train_sph, size = 10, maxit = 1000, linout = FALSE)
+## # weights: 271
+## initial value 14444.509965
+## iter 10 value 10571.777439
+## iter 20 value 10555.297209
+## iter 30 value 10535.397340
+## iter 40 value 10531.264920
+## iter 50 value 10529.572248
+## iter 60 value 10528.543519
+## final value 10528.519092
+## converged
+predict4.8 <- predict(modello4.8, newdata = new_test_sph)
+
+confusione4.8 <- confusionMatrix(as.factor(ifelse(predict4.8>0.3, 1, 0)), new_test_sph$result)
+confusione4.8
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 10390 1054
+## 1 2 3
+##
+## Accuracy : 0.9078
+## 95% CI : (0.9023, 0.913)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 0.4953
+##
+## Kappa : 0.0048
+##
+## Mcnemar's Test P-Value : <2e-16
+##
+## Sensitivity : 0.999808
+## Specificity : 0.002838
+## Pos Pred Value : 0.907899
+## Neg Pred Value : 0.600000
+## Prevalence : 0.907678
+## Detection Rate : 0.907503
+## Detection Prevalence : 0.999563
+## Balanced Accuracy : 0.501323
+##
+## 'Positive' Class : 0
+##
+roc4.8 <- roc(new_test_sph$result ~ as.numeric(predict4.8), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.8), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(predict4.8)), as.numeric(as.character(new_test_sph$result)))
+## Dxy C (ROC) R2 D D:Chi-sq
+## 1.874104e-02 5.093705e-01 2.306491e-03 9.736699e-04 1.214755e+01
+## D:p U U:Chi-sq U:p Q
+## NA 6.033001e-05 2.690718e+00 2.604461e-01 9.133399e-04
+## Brier Intercept Slope Emax E90
+## 8.366746e-02 -6.346217e-01 7.213535e-01 8.956164e-02 1.159593e-03
+## Eavg S:z S:p
+## 2.291678e-03 9.299804e-02 9.259051e-01
+confusione4.us <- confusionMatrix(as.factor(ifelse(test_sph$`xG Understat`>0.3, 1, 0)), test_sph$result)
+confusione4.us
+## Confusion Matrix and Statistics
+##
+## Reference
+## Prediction 0 1
+## 0 9805 626
+## 1 587 431
+##
+## Accuracy : 0.8941
+## 95% CI : (0.8883, 0.8996)
+## No Information Rate : 0.9077
+## P-Value [Acc > NIR] : 1.0000
+##
+## Kappa : 0.3572
+##
+## Mcnemar's Test P-Value : 0.2752
+##
+## Sensitivity : 0.9435
+## Specificity : 0.4078
+## Pos Pred Value : 0.9400
+## Neg Pred Value : 0.4234
+## Prevalence : 0.9077
+## Detection Rate : 0.8564
+## Detection Prevalence : 0.9111
+## Balanced Accuracy : 0.6756
+##
+## 'Positive' Class : 0
+##
+roc4.us <- roc(test_sph$result ~ as.numeric(test_sph$`xG Understat`), plot = T)
+## Setting levels: control = 0, case = 1
+## Setting direction: controls < cases
+legend("bottomright", legend = paste("AUC =", round(auc(roc4.us), 4)), lwd = 2, box.lwd = 0, bg = "white")
+rms::val.prob(as.numeric(as.character(test_sph$`xG Understat`)), as.numeric(as.character(test_sph$result)))
+## Warning in rms::val.prob(as.numeric(as.character(test_sph$`xG Understat`)), : 2
+## observations deleted from logistic calibration due to probs. of 0 or 1
+## Dxy C (ROC) R2 D D:Chi-sq
+## 5.820363e-01 7.910181e-01 2.272736e-01 1.102882e-01 1.263469e+03
+## D:p U U:Chi-sq U:p Q
+## NA 5.036564e-04 7.765355e+00 2.059561e-02 1.097845e-01
+## Brier Intercept Slope Emax E90
+## 7.092230e-02 -1.467926e-01 9.228842e-01 1.822378e-01 4.303519e-03
+## Eavg S:z S:p
+## 4.759339e-03 1.399728e+00 1.615947e-01
+accuracy_opf <- data.frame(confusione1.us$overall["Accuracy"], confusione1.1$overall["Accuracy"], confusione1.2$overall["Accuracy"], confusione1.3$overall["Accuracy"], confusione1.4$overall["Accuracy"], confusione1.5$overall["Accuracy"], confusione1.8$overall["Accuracy"])
+nomi_acc1 <- c("Understat", "Lineare", "Interazione", "Discriminante", "Random_Forest", "Bagging", "Neural_Net")
+names(accuracy_opf) <- nomi_acc1
+
+auc_opf <- data.frame(roc1.us$auc, roc1.1$auc, roc1.2$auc, roc1.3$auc, roc1.4$auc, roc1.5$auc, roc1.8$auc)
+names(auc_opf) <- nomi_acc1
+
+acc_auc1 <- rbind(accuracy_opf, auc_opf)
+rownames(acc_auc1) <- c("Accuracy", "AUC")
+acc_auc1
+## Understat Lineare Interazione Discriminante Random_Forest Bagging
+## Accuracy 0.8875595 0.8888058 0.8860917 0.8850532 0.9011161 0.9064058
+## AUC 0.8145883 0.7468567 0.7495843 0.7447884 0.8374263 0.8392269
+## Neural_Net
+## Accuracy 0.8885704
+## AUC 0.7913828
+accuracy_oph <- data.frame(confusione2.us$overall["Accuracy"], confusione2.1$overall["Accuracy"], confusione2.3$overall["Accuracy"], confusione2.4$overall["Accuracy"], confusione2.5$overall["Accuracy"], confusione2.8$overall["Accuracy"])
+nomi_acc2 <- c("Understat", "Lineare", "Discriminante", "Random_Forest", "Bagging", "Neural_Net")
+names(accuracy_oph) <- nomi_acc2
+
+auc_oph <- data.frame(roc1.us$auc, roc1.1$auc, roc1.3$auc, roc1.4$auc, roc1.5$auc, roc1.8$auc)
+names(auc_oph) <- nomi_acc2
+
+acc_auc2 <- rbind.data.frame(accuracy_oph, auc_oph)
+rownames(acc_auc2) <- c("Accuracy", "AUC")
+acc_auc2
+## Understat Lineare Discriminante Random_Forest Bagging Neural_Net
+## Accuracy 0.8503896 0.8577922 0.8579221 0.8659740 0.8727273 0.8662338
+## AUC 0.8145883 0.7468567 0.7447884 0.8374263 0.8392269 0.7913828
+accuracy_spf <- data.frame(confusione3.us$overall["Accuracy"], confusione3.1$overall["Accuracy"], confusione3.3$overall["Accuracy"], confusione3.4$overall["Accuracy"], confusione3.5$overall["Accuracy"], confusione3.8$overall["Accuracy"])
+nomi_acc3 <- c("Understat", "Lineare", "Discriminante", "Random_Forest", "Bagging", "Neural_Net")
+names(accuracy_spf) <- nomi_acc3
+
+auc_spf <- data.frame(roc3.us$auc, roc3.1$auc, roc3.3$auc, roc3.4$auc, roc3.5$auc, roc3.8$auc)
+names(auc_spf) <- nomi_acc3
+
+
+acc_auc3 <- rbind.data.frame(accuracy_spf, auc_spf)
+rownames(acc_auc3) <- c("Accuracy", "AUC")
+acc_auc3
+## Understat Lineare Discriminante Random_Forest Bagging Neural_Net
+## Accuracy 0.9229174 0.9138150 0.9033875 0.9306372 0.9324231 0.9117410
+## AUC 0.7949585 0.7540138 0.7436472 0.8365386 0.8386365 0.7650791
+accuracy_sph <- data.frame(confusione4.us$overall["Accuracy"], confusione4.1$overall["Accuracy"], confusione4.3$overall["Accuracy"], confusione4.4$overall["Accuracy"], confusione4.5$overall["Accuracy"], confusione4.8$overall["Accuracy"])
+nomi_acc2 <- c("Understat", "Lineare", "Discriminante", "Random_Forest", "Bagging", "Neural_Net")
+names(accuracy_sph) <- nomi_acc2
+
+auc_sph <- data.frame(roc4.us$auc, roc4.1$auc, roc4.3$auc, roc4.4$auc, roc4.5$auc, roc4.8$auc)
+names(auc_sph) <- nomi_acc2
+
+acc_auc4 <- rbind.data.frame(accuracy_sph, auc_sph)
+rownames(acc_auc4) <- c("Accuracy", "AUC")
+acc_auc4
+## Understat Lineare Discriminante Random_Forest Bagging Neural_Net
+## Accuracy 0.8940519 0.9005153 0.8967595 0.9055813 0.9119574 0.9077649
+## AUC 0.7910585 0.7125744 0.7087912 0.8190310 0.8227611 0.5094168
+test_opf_rf <- cbind(test_opf, predict1.4)
+names(test_opf_rf)[names(test_opf_rf) == "predict1.4"] <- "xG_Random_Forest"
+train_opf_rf <- cbind(train_opf, train_predict1.4)
+names(train_opf_rf)[names(train_opf_rf) == "train_predict1.4"] <- "xG_Random_Forest"
+
+messi_shots_opf_test <- test_opf_rf %>% filter(player == "Lionel Messi")
+messi_shots_opf_train <- train_opf_rf %>% filter(player == "Lionel Messi")
+messi_shots_opf <- bind_rows(messi_shots_opf_train, messi_shots_opf_test)
+
+messi_shots_opf$shotType <- ifelse(messi_shots_opf$shotType == "LeftFoot", "Left", "Right")
+messi_shots_opf <- messi_shots_opf %>%
+ mutate(X = X * 100, Y = Y * 100)
+
+messi_shots_opf <- messi_shots_opf %>%
+ mutate(result = factor(result, levels = c(0, 1), labels = c("No Goal", "Goal")),
+ match_info = paste(h_team, "vs", a_team, "<br>", h_goals, "-", a_goals),
+ date_formatted = format(as.Date(date), "%d-%m-%Y"))
+
+generate_center_circle_arc <- function(center_x = 50, center_y = 50, r = 9.15, n_points = 100) {
+ theta <- seq(-pi / 2, pi / 2, length.out = n_points) # Modificato per metà arco, da -pi/2 a pi/2
+ data.frame(
+ x = center_x + r * cos(theta), # Mantiene il centro sul bordo visibile del campo
+ y = center_y + r * sin(theta)
+ )
+}
+
+# Genera i punti della lunetta del centrocampo
+center_circle_arc <- generate_center_circle_arc(center_x = 50, center_y = 50)
+
+# Esempio di dataset con coordinate normalizzate (da 0 a 1)
+p <- ggplot() +
+ annotate_pitch(colour = "black", fill = "white") + # disegna il campo
+ theme_pitch() + # tema predefinito per il campo
+ theme(panel.background = element_rect(fill = "green4")) + # colore sfondo campo
+ # Aggiungi la lunetta del centrocampo
+ geom_path(data = center_circle_arc, aes(x = x, y = y), color = "black", size = 0.8) +
+ geom_point(data = messi_shots_opf,
+ aes(x = X, y = Y,
+ color = factor(result), # colori differenti per goal e no goal
+ shape = factor(result), # forme differenti per goal e no goal
+ text = paste("Random Forest xG:", round(xG_Random_Forest, 4),
+ "<br>X:", round(X, 2),
+ "<br>Y:", round(Y, 2),
+ "<br>Foot:", shotType,
+ "<br>", match_info,
+ "<br>", date_formatted,
+ "<br>Minute:", minute)),
+ size = 2, stroke = 0.3) +
+ scale_color_manual(values = c("No Goal" = "black", "Goal" = "red"),
+ name = "Esito Tiro") +
+ scale_shape_manual(values = c("No Goal" = 1, "Goal" = 16),
+ name = "Esito Tiro") +
+ labs(title = "<span style='font-size:16pt;'>Tiri effettuati da Lionel Messi in campionato</span><br><span style='font-size:12pt;'>di piede e in situazione di Open Play tra il 2014 e il 2021", x = " ", y = " ") +
+ theme_minimal() +
+ coord_cartesian(xlim = c(50, 100))
+## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
+## ℹ Please use `linewidth` instead.
+## This warning is displayed once every 8 hours.
+## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
+## generated.
+## Warning in geom_point(data = messi_shots_opf, aes(x = X, y = Y, color =
+## factor(result), : Ignoring unknown aesthetics: text
+p_interactive <- ggplotly(p, tooltip = "text")
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+p_interactive
+
+
+test_spf_rf <- cbind(test_spf, predict3.4)
+names(test_spf_rf)[names(test_spf_rf) == "predict3.4"] <- "xG_Random_Forest"
+test_sph_rf <- cbind(test_sph, predict4.4)
+names(test_sph_rf)[names(test_sph_rf) == "predict4.4"] <- "xG_Random_Forest"
+train_spf_rf <- cbind(new_train_spf, train_predict3.4)
+names(test_spf_rf)[names(test_spf_rf) == "train_predict3.4"] <- "xG_Random_Forest"
+train_sph_rf <- cbind(new_train_sph, train_predict4.4)
+names(test_sph_rf)[names(test_sph_rf) == "train_predict4.4"] <- "xG_Random_Forest"
+
+seriea_corner_spf_test <- test_spf_rf %>%
+ filter(situation == "FromCorner", lega == "Serie A")
+
+seriea_corner_sph_test <- test_sph_rf %>%
+ filter(situation == "FromCorner", lega == "Serie A")
+
+seriea_corner_spf_train <- train_spf_rf %>%
+ filter(situation == "FromCorner", lega == "Serie A")
+
+seriea_corner_sph_train <- train_sph_rf %>%
+ filter(situation == "FromCorner", lega == "Serie A")
+
+
+seriea_corner <- bind_rows(seriea_corner_spf_test, seriea_corner_sph_test, seriea_corner_spf_train, seriea_corner_sph_train)
+seriea_corner$foot <- ifelse(seriea_corner$shotType %in% c("RightFoot", "LeftFoot"),
+ "foot",
+ "head")
+
+seriea_corner <- seriea_corner %>%
+ mutate(team_name = ifelse(h_a == "h", h_team, a_team),
+ match_info = paste(h_team, "vs", a_team, "<br>", h_goals, "-", a_goals),
+ date_formatted = format(as.Date(date), "%d-%m-%Y"),
+ result = factor(result, levels = c(0, 1), labels = c("No Goal", "Goal")))
+
+seriea_corner <- seriea_corner %>%
+ mutate(X = X * 100, Y = Y * 100)
+
+seriea_corner_20_21 <- seriea_corner %>%
+ filter(season == "2020")
+seriea_corner_19_21 <- seriea_corner %>%
+ filter(season %in% c("2019", "2020"))
+
+generate_center_circle_arc <- function(center_x = 50, center_y = 50, r = 9.15, n_points = 100) {
+ theta <- seq(-pi / 2, pi / 2, length.out = n_points) # Modificato per metà arco, da -pi/2 a pi/2
+ data.frame(
+ x = center_x + r * cos(theta), # Mantiene il centro sul bordo visibile del campo
+ y = center_y + r * sin(theta)
+ )
+}
+
+# Genera i punti della lunetta del centrocampo
+center_circle_arc <- generate_center_circle_arc(center_x = 50, center_y = 50)
+
+# Esempio di dataset con coordinate normalizzate (da 0 a 1)
+p_seriea <- ggplot() +
+ annotate_pitch(colour = "black", fill = "white") + # disegna il campo
+ theme_pitch() + # tema predefinito per il campo
+ theme(panel.background = element_rect(fill = "green4")) + # colore sfondo campo
+ # Aggiungi la lunetta del centrocampo
+ geom_path(data = center_circle_arc, aes(x = x, y = y), color = "black", size = 0.8) +
+ geom_point(data = seriea_corner_19_21,
+ aes(x = X, y = Y,
+ color = factor(result), # colori differenti per goal e no goal
+ shape = factor(result), # forme differenti per goal e no goal
+ text = paste("Random Forest xG:", round(xG_Random_Forest, 4),
+ "<br>X:", round(X, 2),
+ "<br>Y:", round(Y, 2),
+ "<br>Player:", player,
+ "<br>Type:", shotType,
+ "<br>Assist:", player_assisted,
+ "<br>Team:", team_name,
+ "<br>", match_info,
+ "<br>", date_formatted,
+ "<br>Minute:", minute)),
+ size = 2, stroke = 0.3) +
+ scale_color_manual(values = c("No Goal" = "black", "Goal" = "red"),
+ name = "Esito Tiro") +
+ scale_shape_manual(values = c("No Goal" = 1, "Goal" = 16),
+ name = "Esito Tiro") +
+ labs(title = "<span style='font-size:16pt;'>Tiri effettuati in Serie A da calcio d'angolo</span><br><span style='font-size:12pt;'>nelle stagioni 2019/20 e 2020/21</span>", x = " ", y = " ") +
+ theme_minimal() +
+ coord_cartesian(xlim = c(50, 100))
+## Warning in geom_point(data = seriea_corner_19_21, aes(x = X, y = Y, color =
+## factor(result), : Ignoring unknown aesthetics: text
+p_interactive_seriea <- ggplotly(p_seriea, tooltip = "text")
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomCurve() has yet to be implemented in plotly.
+## If you'd like to see this geom implemented,
+## Please open an issue with your example code at
+## https://github.com/ropensci/plotly/issues
+p_interactive_seriea
+
+
+tot_opf_rf <- bind_rows(train_opf_rf, test_opf_rf)
+tot_opf_rf <- tot_opf_rf %>%
+ mutate(X = X * 100, Y = Y * 100)
+
+p_heatmap_custom <- ggplot(tot_opf_rf, aes(x = X, y = Y)) +
+ annotate_pitch(colour = "black", fill = "white") +
+ theme_pitch() +
+ theme(panel.background = element_rect(fill = "lightblue")) +
+
+ # Usa stat_summary_2d invece di geom_tile, per calcolare densità in ogni area
+ stat_summary_2d(aes(z = xG_Random_Forest), fun = mean, bins = 65, alpha = 0.8) +
+ scale_fill_viridis(option = "plasma", name = "xG Medio", direction = -1,
+ breaks = pretty_breaks(n = 5),
+ limits = c(0, max(tot_opf_rf$xG_Random_Forest, na.rm = TRUE))) +
+
+ # Migliora i dettagli del grafico
+ labs(
+ title = "Heat Map degli Expected Goals (xG)\nTiri Effettuati in Situazione di Open Play",
+ x = NULL, y = NULL
+ ) +
+
+ # Coordinata per mostrare solo la metà campo
+ coord_cartesian(xlim = c(50, 100), ylim = c(0, 100)) +
+ theme_minimal() +
+ theme(
+ axis.text = element_blank(), # Nascondi i valori degli assi
+ axis.ticks = element_blank(), # Nascondi i ticks degli assi
+ panel.grid = element_blank(), # Rimuovi le griglie
+ legend.position = "bottom", # Posiziona la legenda in basso
+ plot.title = element_text(size = 14, face = "bold", hjust = 0.5) # Centra il titolo e lo rende più visibile
+ )
+
+# Visualizza il grafico
+p_heatmap_custom
+