-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_alarm_results_supplementary.R
253 lines (211 loc) · 14 KB
/
plot_alarm_results_supplementary.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
library(ggplot2)
library(plyr)
library(RColorBrewer)
library(reshape2)
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_lgbm_ratios_nottrunc/")
files <- list.files()
data <- data.frame()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "lgbm"
data <- rbind(data, tmp)
}
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_rf_ratios_nottrunc/")
files <- list.files()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "rf"
data <- rbind(data, tmp)
}
data$metric <- gsub("_mean", "", data$metric)
#data$cost_weights_str <- sprintf("%s\n%s\n%s", data$c_miss, data$c_action, data$c_postpone)
data$ratio <- paste(data$c_miss, data$c_action, sep=":")
base_size <- 36
line_size <- 1
point_size <- 4
width <- 1300
height <- 900
data <- subset(data, dataset != "bpic2018")
data <- subset(data, dataset != "uwv")
data$method <- as.character(data$method)
data$dataset <- as.character(data$dataset)
data$method[data$method=="fixed0"] <- "always alarm"
data$method[data$method=="fixed110"] <- "never alarm"
data$method[data$method=="fixed50"] <- "tau=0.5"
data$method[data$method=="opt_threshold"] <- "optimized"
data$dataset[data$dataset=="uwv_all"] <- "uwv"
data$dataset[data$dataset=="traffic_fines_1"] <- "traffic_fines"
head(data)
color_palette <- c("#0072B2", "#000000", "#E69F00", "#009E73", "#56B4E9","#D55E00", "#999999", "#F0E442", "#CC79A7")
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_ratios_lgbm.png", width=width, height=height)
ggplot(subset(data, cls=="lgbm" & c_postpone==0 & metric=="cost_avg" & !grepl("fixed", method)), aes(x=c_miss, y=value, color=method, shape=method)) +
geom_point(size=point_size) + geom_line(size=line_size) + scale_x_continuous(breaks=c(1,5,10,20),
labels=c("1:1", "5:1", "10:1", "20:1"))+
theme_bw(base_size=base_size) + ylab("Average cost per case") + xlab("c_out : c_in") + facet_wrap( ~ dataset, ncol=2) +
scale_color_manual(values=color_palette) + theme(legend.position="top")
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_ratios_rf.png", width=width, height=height)
ggplot(subset(data, cls=="rf" & c_postpone==0 & metric=="cost_avg" & !grepl("fixed", method)), aes(x=c_miss, y=value, color=method, shape=method)) +
geom_point(size=point_size) + geom_line(size=line_size) + scale_x_continuous(breaks=c(1,5,10,20),
labels=c("1:1", "5:1", "10:1", "20:1"))+
theme_bw(base_size=base_size) + ylab("Average cost per case") + xlab("c_out : c_in") + facet_wrap( ~ dataset, ncol=2) +
scale_color_manual(values=color_palette) + theme(legend.position="top")
dev.off()
color_palette <- c("#0072B2", "#000000", "#E69F00", "#009E73", "#56B4E9","#D55E00", "#999999", "#F0E442", "#CC79A7")
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_thresholds_lgbm.png", width=width, height=height)
ggplot(subset(data, cls=="lgbm" & c_postpone==0 & metric=="cost_avg"), aes(x=threshold, y=value, color=factor(ratio))) +
geom_point(size=point_size) + geom_line(size=line_size) + geom_point(data=subset(data, cls=="lgbm" & c_postpone==0 & metric=="cost_avg" & method=="optimized"),
size=4, color="red", stroke=2, shape=4, aes(x=threshold, y=value, color=factor(c_miss))) +
theme_bw(base_size=base_size) + ylab("Average cost per case") + xlab("Threshold (tau)") +
facet_wrap( ~ dataset, scales="free", ncol=2) + scale_color_manual(values=color_palette, name="c_out:c_in")
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_thresholds_rf.png", width=width, height=height)
ggplot(subset(data, cls=="rf" & c_postpone==0 & metric=="cost_avg"), aes(x=threshold, y=value, color=factor(ratio))) +
geom_point(size=point_size) + geom_line(size=line_size) + geom_point(data=subset(data, cls=="rf" & c_postpone==0 & metric=="cost_avg" & method=="optimized"),
size=4, color="red", stroke=2, shape=4, aes(x=threshold, y=value, color=factor(c_miss))) +
theme_bw(base_size=base_size) + ylab("Average cost per case") + xlab("Threshold (tau)") +
facet_wrap( ~ dataset, scales="free", ncol=2) + scale_color_manual(values=color_palette, name="c_out:c_in")
dev.off()
dt_melt <- subset(data, method=="optimized" & metric %in% c("earliness", "fscore"))
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_earliness_fscore_rf.png", width=width, height=height)
ggplot(subset(dt_melt, cls=="rf"), aes(x=c_miss, y=value, color=metric)) +
geom_point(size=point_size) + geom_line(size=line_size) +
theme_bw(base_size=base_size) + ylab("value") + xlab("Ratio c_out:c_in") + facet_wrap( ~ dataset, scales="free", ncol=2)
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_earliness_fscore_lgbm.png", width=width, height=height)
ggplot(subset(dt_melt, cls=="lgbm"), aes(x=c_miss, y=value, color=metric)) +
geom_point(size=point_size) + geom_line(size=line_size) +
theme_bw(base_size=base_size) + ylab("value") + xlab("Ratio c_out:c_in") + facet_wrap( ~ dataset, scales="free", ncol=2)
dev.off()
### heatmaps effectiveness
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_lgbm_ratios_nottrunc_effectiveness/")
files <- list.files()
data <- data.frame()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "lgbm"
data <- rbind(data, tmp)
}
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_rf_ratios_nottrunc_effectiveness/")
files <- list.files()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "rf"
data <- rbind(data, tmp)
}
data$metric <- gsub("_mean", "", data$metric)
data$ratio <- paste(data$c_miss, data$c_action, sep=":")
data$dataset <- as.character(data$dataset)
data$dataset[data$dataset=="uwv_all"] <- "uwv"
data$dataset[data$dataset=="traffic_fines_1"] <- "traffic_fines"
data$early_type <- as.character(data$early_type)
#data$early_type[data$early_type=="linear"] <- "linear eff. decay"
#data$early_type[data$early_type=="const"] <- "constant eff."
head(data)
dt_as_is <- subset(data, metric=="cost_avg_baseline")
dt_to_be <- subset(data, metric=="cost_avg")
dt_merged <- merge(dt_as_is[,-10], dt_to_be, by=c("dataset", "method", "c_miss", "c_action", "c_postpone", "eff", "early_type", "cls", "ratio"), suffixes=c("_as_is", "_to_be"))
dt_merged$benefit <- dt_merged$value_as_is - dt_merged$value_to_be
dt_merged$ratio <- as.factor(dt_merged$ratio)
dt_merged$ratio <- factor(dt_merged$ratio, levels(dt_merged$ratio)[c(2,4,5,7,1,3,6)])
base_size <- 30
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_effectiveness_const_lgbm.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="lgbm" & c_miss %in% c(1,2,3,5,10,20) & grepl("const", early_type)), aes(eff, factor(ratio))) +
geom_tile(aes(fill = benefit), colour = "white") + scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("mitigation effectiveness (eff)") + ylab("c_out : c_in")
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_effectiveness_const_rf.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="rf" & c_miss %in% c(1,2,3,5,10,20) & grepl("const", early_type)), aes(eff, factor(ratio))) +
geom_tile(aes(fill = benefit), colour = "white") + scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("mitigation effectiveness (eff)") + ylab("c_out : c_in")
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_effectiveness_linear_lgbm.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="lgbm" & c_miss %in% c(1,2,3,5,10,20) & !grepl("const", early_type)), aes(eff, factor(ratio))) +
geom_tile(aes(fill = benefit), colour = "white") + scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("mitigation effectiveness (eff)") + ylab("c_out : c_in")
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_effectiveness_linear_rf.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="rf" & c_miss %in% c(1,2,3,5,10,20) & !grepl("const", early_type)), aes(eff, factor(ratio))) +
geom_tile(aes(fill = benefit), colour = "white") + scale_x_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1))+
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("mitigation effectiveness (eff)") + ylab("c_out : c_in")
dev.off()
### heatmaps cost of compensation
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_lgbm_ratios_nottrunc_compensation/")
files <- list.files()
data <- data.frame()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "lgbm"
data <- rbind(data, tmp)
}
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/results_rf_ratios_nottrunc_compensation/")
files <- list.files()
for (file in files) {
tmp <- read.table(file, sep=";", header=T)
tmp$cls <- "rf"
data <- rbind(data, tmp)
}
data$metric <- gsub("_mean", "", data$metric)
data$ratio <- paste(data$c_miss, data$c_action, sep=":")
data$ratio_com <- ifelse(data$c_com==0, "1:0", ifelse(data$c_com > 1, sprintf("1:%s", data$c_com), sprintf("%s:1", 1/data$c_com)))
data$dataset <- as.character(data$dataset)
data$dataset[data$dataset=="uwv_all"] <- "uwv"
data$dataset[data$dataset=="traffic_fines_1"] <- "traffic_fines"
data$early_type <- as.character(data$early_type)
data$early_type[data$early_type=="linear"] <- "linearly increasing c_in"
data$early_type[data$early_type=="const"] <- "constant c_in"
head(data)
dt_as_is <- subset(data, metric=="cost_avg_baseline")
dt_to_be <- subset(data, metric=="cost_avg")
dt_merged <- merge(dt_as_is[,-10], dt_to_be, by=c("dataset", "method", "c_miss", "c_action", "c_postpone", "c_com", "early_type", "cls", "ratio", "ratio_com"), suffixes=c("_as_is", "_to_be"))
dt_merged$benefit <- dt_merged$value_as_is - dt_merged$value_to_be
dt_merged$ratio <- as.factor(dt_merged$ratio)
dt_merged$ratio <- factor(dt_merged$ratio, levels(dt_merged$ratio)[c(2,4,5,7,1,3,6)])
dt_merged$ratio_com <- as.factor(dt_merged$ratio_com)
dt_merged$ratio_com <- factor(dt_merged$ratio_com, levels(dt_merged$ratio_com)[c(1,13,10,2,14,12,11,3,5,7,9,4,6,8)])
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_compensation_const_lgbm.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="lgbm" & c_miss %in% c(1,2,3,5,10,20) & grepl("const", early_type) & ratio!="3:1" &
!(ratio_com%in%c("3:1", "1:3", "1:40", "40:1", NA, "20:1", "1:20"))), aes(ratio_com, factor(ratio))) + geom_tile(aes(fill = benefit), colour = "white") +
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("c_in : c_com") + ylab("c_out : c_in") + theme(axis.text.x = element_text(size=20))
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_compensation_const_rf.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="rf" & c_miss %in% c(1,2,3,5,10,20) & grepl("const", early_type) & ratio!="3:1" &
!(ratio_com%in%c("3:1", "1:3", "1:40", "40:1", NA, "20:1", "1:20"))), aes(ratio_com, factor(ratio))) + geom_tile(aes(fill = benefit), colour = "white") +
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("c_in : c_com") + ylab("c_out : c_in") + theme(axis.text.x = element_text(size=20))
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_compensation_linear_lgbm.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="lgbm" & c_miss %in% c(1,2,3,5,10,20) & !grepl("const", early_type) & ratio!="3:1" &
!(ratio_com%in%c("3:1", "1:3", "1:40", "40:1", NA, "20:1", "1:20"))), aes(ratio_com, factor(ratio))) + geom_tile(aes(fill = benefit), colour = "white") +
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("c_in : c_com") + ylab("c_out : c_in") + theme(axis.text.x = element_text(size=20))
dev.off()
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/results_compensation_linear_rf.png", width=width, height=height)
ggplot(subset(dt_merged, cls=="lgbm" & c_miss %in% c(1,2,3,5,10,20) & !grepl("const", early_type) & ratio!="3:1" &
!(ratio_com%in%c("3:1", "1:3", "1:40", "40:1", NA, "20:1", "1:20"))), aes(ratio_com, factor(ratio))) + geom_tile(aes(fill = benefit), colour = "white") +
theme_bw(base_size=base_size) + scale_fill_gradient(low = "white", high = "black") + facet_wrap( ~ dataset, ncol=2) +
xlab("c_in : c_com") + ylab("c_out : c_in") + theme(axis.text.x = element_text(size=20))
dev.off()
# Reliability plot
setwd("/home/irene/Repos/AlarmBasedProcessPrediction/")
data <- read.table("predictions_calibrated/preds_bpic2017_refused.csv", sep=";", header=T)
data$params <- "calibrated"
tmp <- read.table("predictions/preds_bpic2017_refused.csv", sep=";", header=T)
tmp$params <- "original"
names(tmp)[names(tmp)=="preds"] <- "predicted_proba"
data <- rbind(data, tmp)
head(data)
head(tmp)
data$bin <- cut(data$predicted_proba, seq(0, 1, 0.05), seq(0, 0.95, 0.05))
# reliability plots
dt_reliability <- ddply(data, .(params, bin, prefix_nr), summarise, mean_predicted=mean(predicted_proba), ratio_pos=sum(actual)/length(actual))
head(dt_reliability)
png("/home/irene/Repos/AlarmBasedProcessPrediction/images/reliability_bpic2017_refused.png", width=width, height=height)
ggplot(dt_reliability, aes(x=mean_predicted, y=ratio_pos, color=params)) + geom_point() + geom_line() +
theme_bw(base_size=22) + facet_wrap(~prefix_nr) + geom_abline(slope=1, intercept=0) + scale_x_continuous(breaks=c(0,0.3,0.6,0.9))
dev.off()