-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmeldoistream.Rmd
257 lines (208 loc) · 6.74 KB
/
meldoistream.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
---
title: "Leveraging Bayesian Analysis for Data-Driven Product Decisions: A Case Study with MelodiStream"
output: html_notebook
---
```{r}
# Install and load required packages
# if (!requireNamespace("rstan", quietly = TRUE)) install.packages("rstan")
# if (!requireNamespace("brms", quietly = TRUE)) install.packages("brms")
# if (!requireNamespace("bayesplot", quietly = TRUE)) install.packages("bayesplot")
# if (!requireNamespace("tidyverse", quietly = TRUE)) install.packages("tidyverse")
library(rstan)
library(brms)
library(bayesplot)
library(tidyverse)
```
```{r}
# Set seed for reproducibility
set.seed(123)
simulate_experiment <- function(n_per_group, true_effect) {
control <- rnorm(n_per_group, 45, 15)
treatment <- rnorm(n_per_group, 45 + true_effect, 15)
data <- list(y = c(control, treatment),
group = c(rep(0, n_per_group), rep(1, n_per_group)),
N = 2 * n_per_group)
model <- "
data {
int<lower=0> N;
vector[N] y;
array[N] int<lower=0, upper=1> group;
}
parameters {
real mu;
real effect;
real<lower=0> sigma;
}
transformed parameters {
vector[N] mu_effect;
for (i in 1:N) {
mu_effect[i] = mu + effect * group[i];
}
}
model {
mu ~ normal(45, 10);
effect ~ normal(0, 5);
sigma ~ cauchy(0, 5);
y ~ normal(mu_effect, sigma);
}
"
fit <- stan(model_code = model, data = data, chains = 2, iter = 2000, warmup = 1000, thin = 2, refresh = 0)
samples <- as.data.frame(fit) # Extract the samples as a data frame
prob_positive = mean(samples$effect > 0)
return(prob_positive > 0.95)
}
```
```{r}
sample_sizes <- seq(100, 300, by = 50)
results <- sapply(sample_sizes, function(n) {
mean(replicate(10, simulate_experiment(n, true_effect = 5)))
})
min_sample_size <- sample_sizes[which(results > 0.8)[1]]
print(paste("Recommended sample size per group:", min_sample_size))
```
```{r}
# 2. Data Generation
n <- min_sample_size * 2 # Total sample size
group_assignment <- sample(c("control", "treatment"), size = n, replace = TRUE)
# Simulate data collection (in practice, this would be real user data)
current_algo <- rnorm(sum(group_assignment == "control"), mean = 45, sd = 15)
new_algo <- rnorm(sum(group_assignment == "treatment"), mean = 50, sd = 15)
data <- data.frame(
listening_time = c(current_algo, new_algo),
group = group_assignment)
```
```{r}
library(ggplot2)
# Create the boxplot
ggplot(data, aes(x = group, y = listening_time, fill = group)) +
geom_boxplot() +
labs(title = "Listening Time Comparison Between Algorithms",
x = "Algorithm",
y = "Listening Time") +
theme_minimal()
```
```{r}
library(ggplot2)
ggplot(data, aes(x = listening_time, fill = group)) +
geom_density(alpha = 0.7) +
labs(title = "Density Plot of Listening Time by Algorithm",
x = "Listening Time",
y = "Density") +
theme_minimal()
```
```{r}
# 3. Bayesian Model
model <- brm(
formula = listening_time ~ group,
data = data,
family = gaussian(),
prior = c(
prior(normal(45, 10), class = "Intercept"),
prior(normal(0, 5), class = "b"),
prior(cauchy(0, 5), class = "sigma")
),
chains = 4,
iter = 4000,
warmup = 2000,
refresh = 0
)
```
```{r}
# 4. Results Interpretation
summary(model)
hypothesis(model, "grouptreatment > 0")
posterior_samples <- posterior_samples(model)
mean_diff <- mean(posterior_samples$b_grouptreatment)
ci <- quantile(posterior_samples$b_grouptreatment, c(0.025, 0.975))
print(paste("Expected difference:", round(mean_diff, 2), "minutes"))
print(paste("95% Credible Interval:", round(ci[1], 2), "to", round(ci[2], 2), "minutes"))
```
```{r}
# 5. Visualization
plot <- mcmc_areas(posterior_samples, pars = "b_grouptreatment", prob = 0.95) +
ggtitle("Posterior Distribution of the Effect")
print(plot)
```
```{r}
# Trace plot for the parameters of the model
stanplot(model, type = "trace")
```
```{r}
# Posterior predictive check
pp_check(model)
```
```{r}
# Extract posterior samples for multiple parameters
posterior_samples <- posterior_samples(model)
# Posterior density plot for more parameters
mcmc_areas(posterior_samples, pars = c("b_grouptreatment", "Intercept", "sigma"), prob = 0.95) +
ggtitle("Posterior Distribution of Parameters")
```
```{r}
# Pairwise relationships between parameters
mcmc_pairs(posterior_samples, pars = c("b_grouptreatment", "Intercept", "sigma"))
```
```{r}
mcmc_intervals(posterior_samples, pars = c("b_grouptreatment", "Intercept")) +
ggtitle("Posterior Intervals for Treatment Effect and Intercept")
```
# Rather than running these plots individually we can put them on dashboard by creating a shiny application
```{r}
library(shiny)
# Define UI for the dashboard
ui <- fluidPage(
titlePanel("Bayesian Analysis Dashboard"),
# Sidebar layout to choose different plots
sidebarLayout(
sidebarPanel(
selectInput(
"plot_type",
"Choose a plot:",
choices = c(
"Posterior Distribution of Effect" = "posterior_effect",
"Trace Plot" = "trace",
"Posterior Predictive Check" = "pp_check",
"Posterior Distributions of All Parameters" = "posterior_all",
"Pairwise Relationships" = "pairs",
"Posterior Intervals" = "intervals"
)
)
),
# Main panel to display plots
mainPanel(
plotOutput("bayesianPlot")
)
)
)
# Define server logic for the plots
server <- function(input, output) {
output$bayesianPlot <- renderPlot({
# Depending on the user choice, render the respective plot
if (input$plot_type == "posterior_effect") {
# Posterior Distribution of Treatment Effect
plot <- mcmc_areas(posterior_samples, pars = "b_grouptreatment", prob = 0.95) +
ggtitle("Posterior Distribution of the Effect")
print(plot)
} else if (input$plot_type == "trace") {
# Trace plot for the model parameters
stanplot(model, type = "trace")
} else if (input$plot_type == "pp_check") {
# Posterior Predictive Check
pp_check(model)
} else if (input$plot_type == "posterior_all") {
# Posterior density plot for multiple parameters
mcmc_areas(posterior_samples, pars = c("b_grouptreatment", "Intercept", "sigma"), prob = 0.95) +
ggtitle("Posterior Distribution of Parameters")
} else if (input$plot_type == "pairs") {
# Pairwise relationships between parameters
mcmc_pairs(posterior_samples, pars = c("b_grouptreatment", "Intercept", "sigma"))
} else if (input$plot_type == "intervals") {
# Posterior Intervals for Treatment Effect and Intercept
mcmc_intervals(posterior_samples, pars = c("b_grouptreatment", "Intercept")) +
ggtitle("Posterior Intervals for Treatment Effect and Intercept")
}
})
}
# Run the application
shinyApp(ui = ui, server = server)
```