-
Notifications
You must be signed in to change notification settings - Fork 1
/
team_bias.R
104 lines (98 loc) · 3.93 KB
/
team_bias.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
library(vroom)
library(xtable)
## simulation function
empirical_netsim_team <- function(name, df, league_recall_call, n = 100){
LEAGUE_AVG_recall = 0.22 ## from data
LEAGUE_AVG_precision = 0.31
df_tmp = df[which(df$committing_team==name),]
df_tmp2 = df[which(df$disadvantaged_team==name),]
sims <- rep(0,n)
for (i in 1:n){
s = 0
s2 = 0
for (v in 1:dim(df_tmp)[1]){
if (length(which(league_recall_call$violation==df_tmp[v,]$call_type))>0){
tmp_r = runif(1)
if (tmp_r < league_recall_call[which(league_recall_call$violation==df_tmp[v,]$call_type),]$p1){
s = s+1
}else{
if (tmp_r < league_recall_call[which(league_recall_call$violation==df_tmp[v,]$call_type),]$p2){
s2 = s2+1
}
}
}else{
tmp_r = runif(1)
if (tmp_r < LEAGUE_AVG_recall){
s = s + 1
}else{
if (tmp_r< LEAGUE_AVG_precision){
s2 = s2+1
}
}
}
}
for (v in 1:dim(df_tmp2)[1]){
if (length(which(league_recall_call$violation==df_tmp2[v,]$call_type))>0){
tmp_r = runif(1)
if (tmp_r < league_recall_call[which(league_recall_call$violation==df_tmp2[v,]$call_type),]$p1){
s2 = s2+1
}else{
if (tmp_r < league_recall_call[which(league_recall_call$violation==df_tmp2[v,]$call_type),]$p2){
s = s+1
}
}
}else{
tmp_r = runif(1)
if (tmp_r < LEAGUE_AVG_recall){
s2 = s2 + 1
}else{
if (tmp_r < LEAGUE_AVG_precision){
s = s+1
}
}
}
}
sims[i] = s-s2
}
return(sims)
}
df_3y <- vroom("https://raw.githubusercontent.com/atlhawksfanatic/L2M/master/1-tidy/L2M/L2M.csv")
df_3y = df_3y[which(!is.na(df_3y$decision)),]
df_3y = df_3y[as.character(df_3y$decision)!="CNC",]
teams = unique(c(df_3y$disadvantaged_team,df_3y$committing_team))
results.df <- data.frame(team=c(),pval = c(), effect = c(), size = c())
for (p in 1:length(teams)){
# this condition obviously is always true
if (dim(df_3y[which(df_3y$committing_team == teams[p]),])[1]+dim(df_3y[which(df_3y$disadvantaged_team == teams[p]),])[1]>100){
league_recall_call = data.frame(violation=c(),p1=c(),p2=c())
for (v in 1:length(unique(df_3y$call_type))){
tmp = df_3y[which(df_3y$call_type==unique(df_3y$call_type)[v] & (df_3y$committing_team!=teams[p] & df_3y$disadvantaged_team!=teams[p])),]
if (dim(tmp)[1]>0){
tmptable = as.data.frame(table(tmp$decision))
cc = tmptable[which(tmptable$Var1=="CC"),]$Freq
if (length(cc) == 0){cc = 0}
inc = tmptable[which(tmptable$Var1=="INC"),]$Freq
if (length(inc)==0){inc = 0}
ic = tmptable[which(tmptable$Var1=="IC"),]$Freq
if (length(ic)==0){ic = 0}
league_recall_call <- rbind(league_recall_call,data.frame(violation=unique(df_3y$call_type)[v],p1=inc/(inc+ic+cc), p2 = (inc+ic)/(inc+cc+ic)))
}
}
t_real_tmp1 = as.data.frame(table(df_3y[which(df_3y$committing_team == teams[p]),]$decision))
inc1 = t_real_tmp1[which(t_real_tmp1$Var1=="INC"),]$Freq
if (length(inc1)==0){inc1 = 0}
t_real_tmp2 = as.data.frame(table(df_3y[which(df_3y$disadvantaged_team == teams[p]),]$decision))
inc2 = t_real_tmp2[which(t_real_tmp2$Var1=="INC"),]$Freq
if (length(inc2)==0){inc2 = 0}
ic1 = t_real_tmp2[which(t_real_tmp2$Var1=="IC"),]$Freq
if (length(ic1) ==0){ic1 =0}
ic2 = t_real_tmp1[which(t_real_tmp1$Var1=="IC"),]$Freq
if (length(ic2) ==0){ic2 =0}
t_real = (inc1+ic1) - (inc2+ic2)
t_real = inc1 - inc2
t_sim <- empirical_netsim_team(teams[p],df=df_3y,league_recall_call,n=100)
emp_pval = length(which(t_sim>=t_real))/100
results.df <- rbind(results.df,data.frame(team=teams[p],pval=emp_pval,effect=t_real-mean(t_sim), size=dim(df_3y[which(df_3y$committing_team == teams[p]),])[1]+dim(df_3y[which(df_3y$disadvantaged_team == teams[p]),])[1]))
}
}
xtable(results.df)