-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluate_estimator.R
91 lines (78 loc) · 3.05 KB
/
evaluate_estimator.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# this topic is concerned with taking a random simulation
# and demonstrating bias and efficiency in an estimator
# we generate a random sample of some normals
# we create the following types of estimators
rm(list = ls());
library(ggplot2);
library(dplyr);
library(gridExtra)
estimator_mean <- function(x){
return(mean(x));
}
estimator_constant <- function(x){
return(x[1]);
}
# so let's get a bunch of random normals
mu <- 10;
sd <- 1;
N <- 1e4;
x <- rnorm(n = N, mean = mu, sd = sd);
df <- data.frame(x = x);
df %>%
ggplot(aes(x = x)) +
geom_histogram(aes(alpha = .1)) +
geom_vline(aes(xintercept = mu), colour = "red") +
labs(title = "Evaluating Estimators",
subtitle = sprintf("%s random normals with mean %s and sd %s", N, mu, sd),
caption = "Christian Bitter") +
theme_light()
# we know the true mean for comparison,
# but we also know that the emperical mean is the best estimator of the true mean mu
# so let's create M samples, estimate and see how our estimator stacks against the
# true mean
M <- 1e4;
L <- N;
estimates <- replicate(n = M,
expr = (function() {
s <- sample(x = x,
size = L,
replace = T);
mu_hat <- estimator_mean(s);
e_hat <- estimator_constant(s);
return(c(mu_hat, e_hat));
})(),
simplify = T);
m <- matrix(estimates, byrow = T, ncol = 2);
df_e <- data.frame(x = m[, 1], type = rep("Mean Estimator", M));
df_e <- rbind(df_e,
data.frame(x = m[, 2], type = rep("Fixed Estimator", M)));
df_e$type <- as.factor(df_e$type);
df_e_mu <- apply(m, mean, MARGIN = 2);
# instead of faceting, we plot the two estimators separately, so that
# the shape of their distribution becomes apparent.
p1 <-
df_e[df_e$type == "Mean Estimator", ] %>%
ggplot(aes(x = x)) +
geom_histogram(aes(x = x), alpha = 0.3) +
geom_vline(aes(xintercept = mu, colour = "true mean")) +
geom_vline(aes(xintercept = df_e_mu[1], colour = "empirical mean estimate")) +
geom_vline(aes(xintercept = df_e_mu[2], colour = "fixed estimate")) +
labs(title = "Evaluating Estimators",
subtitle = sprintf("Distribution of empirical mean"),
caption = "Christian Bitter") +
theme_light();
# notice how concentrated the mean estimator is (see the range of the histogram)
# whereas the fixed estimators has huge spread
p2 <-
df_e[df_e$type == "Fixed Estimator", ] %>%
ggplot(aes(x = x)) +
geom_histogram(aes(x = x), alpha = 0.3) +
geom_vline(aes(xintercept = mu, colour = "true mean")) +
geom_vline(aes(xintercept = df_e_mu[1], colour = "empirical mean estimate")) +
geom_vline(aes(xintercept = df_e_mu[2], colour = "fixed estimate")) +
xlim(7, 13) +
labs(title = "Evaluating Estimators",
subtitle = sprintf("Distribution of fixed estimate"),
caption = "Christian Bitter") +
theme_light();
plot(gridExtra::arrangeGrob(p1, p2));