-
Notifications
You must be signed in to change notification settings - Fork 0
/
2. Replication results.Rmd
359 lines (310 loc) · 14.2 KB
/
2. Replication results.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
---
title: "Replicaton study"
author: "Christoph Völtzke"
date: "2022-12-02"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Packages
```{r, include=FALSE}
# needed packages
library(readr)
library(lme4)
library(lmerTest)
library(tidyverse)
library(texreg)
library(stargazer)
library(ggplot2)
library(cowplot)
library(gmodels)
```
## Replication data set
```{r, include=FALSE}
# The following code loads the data used in the study by Wunderlich et al. (2020). This is the data set needed to reproduce the results of the initial analysis. See report for description of variables needed.
dat <- read_csv("Data/Full_sets/rep.csv")
```
```{r, include=FALSE}
dat <- dat %>%
mutate(Season_c = Season -10) # centering the first covid season as 0
dat_0 <- filter(dat, Season_c == 0) # filtering to have only the first covid season in the data set
#write.csv(dat_0,"Data/Full_sets/rep_one_season.csv")
```
## Model used for all 10 seasons
```{r}
# set a seed for reproducible results
set.seed(123)
```
```{r}
# original 10 seasons
# Spectators (Yes/No), Season (-9 to 0) as fixed effects and Division as Random intercept, Restricted MLE as this is the preferred option for ML analysis
options(scipen=999)
model1 <- lmer(goalsDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model1)
model2 <- lmer(pointsDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model2)
model3 <- lmer(expPointsDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model3)
model4 <- lmer(shotsDiff~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model4)
model5 <- lmer(shotsTargetDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model5)
model6 <- lmer(foulsDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model6)
model7 <- lmer(yellowDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat)
#summary(model7)
model8 <- lmer(redDiff ~ Spectators + Season_c + (1|Division), REML = FALSE, data = dat) # the model with the red cards procudes a warning. It might be useful to exclude this dependent variable
#summary(model8)
```
## Summary of the results for 10 seasons
```{r}
table1 <-screenreg(list(model5, model6, model7, model8),
custom.model.names=c("ShotsTarget", "Fouls", "Yellow", "Red"))
table1
```
```{r}
table1_0 <-screenreg(list(model1, model2, model3, model4),
custom.model.names=c("Goals","Points", "Exp.Points","Shots"))
table1_0
```
### Latex code for plots with Stargazer
```{r}
# needed to use the stargazer package
class(model1) <- "lmerMod"
class(model2) <- "lmerMod"
class(model3) <- "lmerMod"
class(model4) <- "lmerMod"
class(model5) <- "lmerMod"
class(model6) <- "lmerMod"
class(model7) <- "lmerMod"
class(model8) <- "lmerMod"
# if you want to produce the latex code just remove the hashtag
#stargazer(model1, model2, model3, model4, title="Replication results over ten seasons first four DVs", align=TRUE, star.cutoffs = c(0.05, 0.01, 0.001))
#stargazer(model5, model6, model7, model8, title="Replication results over ten seasons last four DVs", align=TRUE,star.cutoffs = c(0.05, 0.01, 0.001))
```
## Model used for only the last season
```{r, include=FALSE}
# original only one season
options(scipen=999)
model1 <- lmer(goalsDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model1)
model2 <- lmer(pointsDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model2)
model3 <- lmer(expPointsDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model3)
model4 <- lmer(shotsDiff~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model4)
model5 <- lmer(shotsTargetDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model5)
model6 <- lmer(foulsDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model6)
model7 <- lmer(yellowDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model7)
model8 <- lmer(redDiff ~ Spectators + (1|Division), REML = FALSE, data = dat_0)
#summary(model8)
```
## Summary of the results for one season
```{r}
table1 <-screenreg(list(model5, model6, model7, model8),
custom.model.names=c("ShotsTarget", "Fouls", "Yellow", "Red"))
table1
```
```{r}
table1_0 <-screenreg(list(model1, model2, model3, model4),
custom.model.names=c("Goals","Points", "Exp.Points","Shots"))
table1_0
```
### Latex code for plots with Stargazer
```{r}
# needed to use the stargazer package
class(model1) <- "lmerMod"
class(model2) <- "lmerMod"
class(model3) <- "lmerMod"
class(model4) <- "lmerMod"
class(model5) <- "lmerMod"
class(model6) <- "lmerMod"
class(model7) <- "lmerMod"
class(model8) <- "lmerMod"
# if you want to produce the latex code just remove the hashtag
#stargazer(model1, model2, model3, model4, title="Replication results last season first four DVs", align=TRUE,star.cutoffs = c(0.05, 0.01, 0.001))
#stargazer(model5, model6, model7, model8, title="Replication results last season last four DVs", align=TRUE,star.cutoffs = c(0.05, 0.01, 0.001))
```
## Plots over the seasons initial data set
```{r, warning=FALSE}
plot_dat_season_1 <- dat %>%
group_by(Season) %>%
summarise(Goals =ci(goalsDiff, na.rm=T)[1],
GoalslowCI = ci(goalsDiff, na.rm=T)[2],
GoalshiCI = ci(goalsDiff, na.rm=T)[3],
Points =ci(pointsDiff, na.rm=T)[1],
PointslowCI = ci(pointsDiff, na.rm=T)[2],
PointshiCI = ci(pointsDiff, na.rm=T)[3],
ExpPoints =ci(expPointsDiff, na.rm=T)[1],
ExpPointslowCI = ci(expPointsDiff, na.rm=T)[2],
ExpPointshiCI = ci(expPointsDiff, na.rm=T)[3],
Shots =ci(shotsDiff, na.rm=T)[1],
ShotslowCI = ci(shotsDiff, na.rm=T)[2],
ShotshiCI = ci(shotsDiff, na.rm=T)[3],
ShotsTarget =ci(shotsTargetDiff, na.rm=T)[1],
ShotsTargetlowCI = ci(shotsTargetDiff, na.rm=T)[2],
ShotsTargethiCI = ci(shotsTargetDiff, na.rm=T)[3],
Fouls =ci(foulsDiff, na.rm=T)[1],
FoulslowCI = ci(foulsDiff, na.rm=T)[2],
FoulshiCI = ci(foulsDiff, na.rm=T)[3],
Yellow =ci(yellowDiff, na.rm=T)[1],
YellowlowCI = ci(yellowDiff, na.rm=T)[2],
YellowhiCI = ci(yellowDiff, na.rm=T)[3],
Red =ci(redDiff, na.rm=T)[1],
RedlowCI = ci(redDiff, na.rm=T)[2],
RedhiCI = ci(redDiff, na.rm=T)[3])
# this step is needed to have a mean and CIs for the seasons with and without spectators. This is especially of interest when considering the Season when Covid-19 interrupted the season half way (Season 10).
plot_dat_Spect_1 <- dat %>%
group_by(Spectators,Season) %>%
summarise(Goals =ci(goalsDiff, na.rm=T)[1],
GoalslowCI = ci(goalsDiff, na.rm=T)[2],
GoalshiCI = ci(goalsDiff, na.rm=T)[3],
Points =ci(pointsDiff, na.rm=T)[1],
PointslowCI = ci(pointsDiff, na.rm=T)[2],
PointshiCI = ci(pointsDiff, na.rm=T)[3],
ExpPoints =ci(expPointsDiff, na.rm=T)[1],
ExpPointslowCI = ci(expPointsDiff, na.rm=T)[2],
ExpPointshiCI = ci(expPointsDiff, na.rm=T)[3],
Shots =ci(shotsDiff, na.rm=T)[1],
ShotslowCI = ci(shotsDiff, na.rm=T)[2],
ShotshiCI = ci(shotsDiff, na.rm=T)[3],
ShotsTarget =ci(shotsTargetDiff, na.rm=T)[1],
ShotsTargetlowCI = ci(shotsTargetDiff, na.rm=T)[2],
ShotsTargethiCI = ci(shotsTargetDiff, na.rm=T)[3],
Fouls =ci(foulsDiff, na.rm=T)[1],
FoulslowCI = ci(foulsDiff, na.rm=T)[2],
FoulshiCI = ci(foulsDiff, na.rm=T)[3],
Yellow =ci(yellowDiff, na.rm=T)[1],
YellowlowCI = ci(yellowDiff, na.rm=T)[2],
YellowhiCI = ci(yellowDiff, na.rm=T)[3],
Red =ci(redDiff, na.rm=T)[1],
RedlowCI = ci(redDiff, na.rm=T)[2],
RedhiCI = ci(redDiff, na.rm=T)[3])
plot_data <- dplyr::bind_rows(plot_dat_season_1,plot_dat_Spect_1)
plot_data$Name <- paste(plot_data$Season,plot_data$Spectators)
plot_data <- plot_data %>%
dplyr::select(Name,Goals:RedhiCI)
```
```{r, include=FALSE}
# Rename the seasons to have a numeric variable, which can be used in an interactive way (Shiny app, attached to the repo)
plot_data$Name[plot_data$Name == "1 NA"] <- 1
plot_data$Name[plot_data$Name == "2 NA"] <- 2
plot_data$Name[plot_data$Name == "3 NA"] <- 3
plot_data$Name[plot_data$Name == "4 NA"] <- 4
plot_data$Name[plot_data$Name == "5 NA"] <- 5
plot_data$Name[plot_data$Name == "6 NA"] <- 6
plot_data$Name[plot_data$Name == "7 NA"] <- 7
plot_data$Name[plot_data$Name == "8 NA"] <- 8
plot_data$Name[plot_data$Name == "9 NA"] <- 9
plot_data$Name[plot_data$Name == "11 NA"] <- 11
plot_data$Name[plot_data$Name == "12 NA"] <- 12
plot_data$Name[plot_data$Name == "10 Yes"] <- 10
plot_data$Name[plot_data$Name == "10 No"] <- 10.5
plot_data <- subset(plot_data, Name == "1" | Name == "2" | Name == "3" | Name == "4" | Name == "5" | Name == "6" | Name == "7" | Name == "8" | Name == "9" | Name == "10" | Name == "10.5" |Name == "11" | Name == "12")
plot_data <- plot_data %>%
na.omit()
plot_data$Name <- as.numeric(plot_data$Name)
```
```{r}
# To be able to have a factor which can be used to differentiate the DVs
Goals <- as.tibble(plot_data$Goals)
Goals$dv <- "Goals"
Goals$season <- plot_data$Name
GoalslowCI <- as.tibble(plot_data$GoalslowCI)
GoalshiCI <- as.tibble(plot_data$GoalshiCI)
Points <- as.tibble(plot_data$Points)
Points$dv <- "Points"
Points$season <- plot_data$Name
PointslowCI <- as.tibble(plot_data$PointslowCI)
PointshiCI <- as.tibble(plot_data$PointshiCI)
ExpPoints <- as.tibble(plot_data$ExpPoints)
ExpPoints$dv <- "ExpPoints"
ExpPoints$season <- plot_data$Name
ExpPointslowCI <- as.tibble(plot_data$ExpPointslowCI)
ExpPointshiCI <- as.tibble(plot_data$ExpPointshiCI)
Shots <- as.tibble(plot_data$Shots)
Shots$dv <- "Shots"
Shots$season <- plot_data$Name
ShotslowCI <- as.tibble(plot_data$ShotslowCI)
ShotshiCI <- as.tibble(plot_data$ShotshiCI)
ShotsTarget <- as.tibble(plot_data$ShotsTarget)
ShotsTarget$dv <- "ShotsTarget"
ShotsTarget$season <- plot_data$Name
ShotsTargetlowCI <- as.tibble(plot_data$ShotsTargetlowCI)
ShotsTargethiCI <- as.tibble(plot_data$ShotsTargethiCI)
Fouls <- as.tibble(plot_data$Fouls)
Fouls$dv <- "Fouls"
Fouls$season <- plot_data$Name
FoulslowCI <- as.tibble(plot_data$FoulslowCI)
FoulshiCI <- as.tibble(plot_data$FoulshiCI)
Yellow <- as.tibble(plot_data$Yellow)
Yellow$dv <- "Yellow"
Yellow$season <- plot_data$Name
YellowlowCI <- as.tibble(plot_data$YellowlowCI)
YellowhiCI <- as.tibble(plot_data$YellowhiCI)
Red <- as.tibble(plot_data$Red)
Red$dv <- "Red"
Red$season <- plot_data$Name
RedlowCI <- as.tibble(plot_data$RedlowCI)
RedhiCI <- as.tibble(plot_data$RedhiCI)
```
```{r}
plot_data_mean <- rbind(Red,Yellow,Fouls,ShotsTarget,Shots,ExpPoints,Points,Goals)
plot_data_hi <- rbind(RedhiCI,YellowhiCI,FoulshiCI,ShotsTargethiCI,ShotshiCI,ExpPointshiCI,PointshiCI,GoalshiCI)
plot_data_lo <- rbind(RedlowCI,YellowlowCI,FoulslowCI,ShotsTargetlowCI,ShotslowCI,ExpPointslowCI,PointslowCI,GoalslowCI)
plot_data_lo <- plot_data_lo %>%
rename(CI_low = value)
plot_data_hi <- plot_data_hi %>%
rename(CI_hi = value)
plot_data_mean <- plot_data_mean %>%
rename(mean = value)
plot_data <- cbind(plot_data_mean,plot_data_lo,plot_data_hi)
plot_data <- plot_data %>%
select(season,dv,mean,CI_low,CI_hi)
plot_data <- plot_data %>%
mutate(dv = recode(dv, "ExpPoints" = "Expected Points", "ShotsTarget" = "Shots on Target"))
```
```{r}
# Data used for the plots.
saveRDS(plot_data,"Data/Plots/plot_data_rep.rds")
```
## Plotting
```{r}
# helper function to plot
source("Functions/helper_plot_manus.R")
source("Functions/helper_filter_dvs.R")
```
```{r}
filtered_dat <- filtered_df(plot_data,c("Points"))
p1 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Points", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Goals"))
p2 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Goals", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Expected Points"))
p3 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Expected Points", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Shots"))
p4 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Shots", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Shots on Target"))
p5 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Shots on Target", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Fouls"))
p6 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Fouls", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Yellow"))
p7 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Yellow", type="rep",legend = "Exclude")
filtered_dat <- filtered_df(plot_data,c("Red"))
p8 <- plot_function_manus(filtered_dat,conf = "Include", slope="No",covid="No",season_start=1,season_end=10, title="Red", type="rep",legend = "Exclude")
```
```{r}
Replicated_plot <- plot_grid(p1,p2,p3,p4,p5,p6,p7,p8, ncol=2)
#this visualization gives nice insights about the overall decreasing trend of a home advantage plus the more drastically decrease for games without spectators. However, for points and goals it is not clear if this is just the regular trend or due to the spectator absence. (Also not significant regression)
# moreover, the Expected points suggest a heavy mispricing of home teams during this first time with spectator absence! This is a very interesting finding as it means the betting companies have had flaws in their algorithms during this time
Replicated_plot
ggsave("Manuscript/Figures/Replicated_plot.png", width = 8.5, height = 6,bg="white")
```
```{r}
# as all of the other rmd files use the same packages I can do this step only for one of the files
writeLines(capture.output(sessionInfo()), "Requirements.txt")
```