-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathsupervised_randomization.R
91 lines (75 loc) · 3.31 KB
/
supervised_randomization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# #### Static Quantile-based Mapping ####
# map_propensity <- function(model_score, target_ratio, groups=9){
# # Platt scaling
# # platt_scaler <- glm(y~prob, family=binomial(link='logit'),
# # weights = ifelse(exp$none$y==1, 1/(mean(exp$none$y==1)), 1),
# # data = data.frame(cbind('y'=exp$none$y,'prob'=churn_pred))
# # )
# # treat_prob <- predict(platt_scaler, newdata=data.frame(prob=churn_pred), type='response')
# # Cut into groups based on the score quantiles
# score_quantiles <- quantile(model_score,seq(0,1,1/groups))
#
# if(length(score_quantiles) != length(unique(score_quantiles))){
# score_quantiles <- unique(score_quantiles)
# warning(paste("Reduced number of groups to", length(score_quantiles)-1, "because scores between groups were constant"))
# }
#
# model_score <- cut(model_score,breaks=score_quantiles,labels=FALSE, include.lowest = TRUE)-1
#
# # Adjust to expected target ratio by shifting min or max
# if(target_ratio<=0.5){
# new_max <- 2*target_ratio - 0.05
# model_score <- 0.05 + model_score* (new_max-0.05)/(groups-1)
# }
# if(target_ratio>0.5){
# new_min <- 2*target_ratio - 0.95
# model_score <- new_min + model_score* (0.95-new_min)/(groups-1)
# }
#
# return(model_score)
# }
#### Static Quantile-based Mapping ####
get_score_quantiles_ <- function(model_score, groups=9){
# Cut into groups based on the score quantiles
score_quantiles <- quantile(model_score,seq(0,1,1/groups))
if(length(score_quantiles) != length(unique(score_quantiles))){
score_quantiles <- unique(score_quantiles)
warning(paste("Reduced number of groups to", length(score_quantiles)-1, "because scores between groups were constant"))
}
return(score_quantiles)
}
map_propensity_quantiles <- function(model_score, target_ratio, score_breaks){
# Platt scaling
# platt_scaler <- glm(y~prob, family=binomial(link='logit'),
# weights = ifelse(exp$none$y==1, 1/(mean(exp$none$y==1)), 1),
# data = data.frame(cbind('y'=exp$none$y,'prob'=churn_pred))
# )
# treat_prob <- predict(platt_scaler, newdata=data.frame(prob=churn_pred), type='response')
model_score <- cut(model_score, breaks=score_breaks,labels=FALSE, include.lowest = TRUE)-1
n_groups <- length(unique(model_score))
# Adjust to expected target ratio by shifting min or max
if(target_ratio<=0.5){
new_max <- 2*target_ratio - 0.05
model_score <- 0.05 + model_score* (new_max-0.05)/(n_groups-1)
}
if(target_ratio>0.5){
new_min <- 2*target_ratio - 0.95
model_score <- new_min + model_score* (0.95-new_min)/(n_groups-1)
}
return(model_score)
}
#### Generalized Logistic Mapping ####
generalized_logistic_ <- function(x, A=0, K=1, B=1, nu=1, Q=1, C=1){
# B: growth rate
# nu: skew
return(A + (K-A)/((C+Q*exp(-B*x))^(1/nu)))
}
map_propensity_logistic <- function(model_score, min_score, max_score, target_ratio=NULL){
if(!is.null(target_ratio)){
stop("target_ratio not implemented")
}
# Min-Max scaling based on e.g. training data model scores
model_score_normalized <- -1 + (model_score - min_score)*(2)/(max_score-min_score)
prop_score <- generalized_logistic_(model_score_normalized, A=0.05, K=0.95, B=5)
return(prop_score)
}