Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Getting bayesian anova modules up to speed with changes in master #31

Merged
merged 5 commits into from
Sep 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 0 additions & 110 deletions R/ssp_eq_anova.R

This file was deleted.

99 changes: 0 additions & 99 deletions R/ssp_rope_anova.R

This file was deleted.

6 changes: 3 additions & 3 deletions data-raw/bf_anova_precalculations.r
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ future::plan(multisession, workers = n_cores)
# Calculation configurations ---------------------------------------------------

# Dataframe is created with "./data-raw/options/anova_options_bayesian.R
bayes_anova_options <- readr::read_csv(here("data/options/ssp_anova_options_bayesian.csv")) |>
mutate(iter = row_number())
bayes_anova_options <- readr::read_csv(here("data/options/ssp_anova_options_bayesian.csv")) %>%
mutate(row_id = row_number())

# Set file directory -----------------------------------------------------------

Expand All @@ -43,7 +43,7 @@ n_batches <- 75

# First, we split all the iteration as list
bayes_anova_options_split <-
split(bayes_anova_options, bayes_anova_options$iter)
split(bayes_anova_options, bayes_anova_options$row_id)

# Since each iteration has 75 possible configuration, then we need:
n_saves <- ceiling(length(bayes_anova_options_split) / n_batches)
Expand Down
124 changes: 35 additions & 89 deletions data-raw/bf_anova_precalculations_results.R
Original file line number Diff line number Diff line change
@@ -1,100 +1,46 @@
# Load necessary packages
library(tidyverse)

# Input params are not saved with result outputs thus we will join them here
bayes_anova_options <-
tidyr::expand_grid(
m11 = 0,
m12 = seq(0, 1, by = 0.25),
m21 = seq(0, 1, by = 0.25),
m22 = seq(0, 1, by = 0.25),
tpr = seq(0.5, 0.95, by = 0.05),
effect = c("Main Effect 1", "Main Effect 2", "Interaction Effect"),
thresh = c(3, 6, 10),
prior_scale = c(1 / sqrt(2), 1, sqrt(2))
) %>%
dplyr::filter(m12 <= m21) %>%
dplyr::slice(-1) %>%
dplyr::rowwise() %>%
dplyr::mutate(mu = mapply(c, m11, m12, m21, m22, SIMPLIFY = FALSE)) %>%
dplyr::ungroup() %>%
dplyr::mutate(iter = row_number()) |>
dplyr::mutate(batch_id = str_pad(ceiling(row_number() / 75), width = 3, pad = "0"),
intra_batch_id = str_pad(((iter - 1) %% 75) + 1, width = 3, pad = "0"))

# Read the outputs of all iterations
bayes_anova_output <- tibble(filename = list.files(path = "./data/bayes-anova-res/", pattern = "\\.rds$", full.names = TRUE)) %>%
bayes_anova_data <-
tibble(filename = list.files(
path = "./data/bayes-anova-res/",
pattern = "\\.rds$",
full.names = TRUE
)) %>%
mutate(batch_id = str_extract(filename, "(?<=set-)[\\d]+") %>% str_pad(width = 3, pad = "0")) %>%
mutate(data = map(filename, readRDS)) %>%
unnest(data) %>%
group_by(batch_id) %>%
mutate(intra_batch_id = str_pad(row_number(), width = 3, pad = "0")) %>%
mutate(calculation_id = str_pad(row_number(), width = 3, pad = "0")) %>%
ungroup() %>%
mutate(result = map(data, "result"),
error = map(data, "error"),
error_message = map_chr(error, ~ pluck(.x, "message", .default = NA_character_)),
result_not_null = if_else(map_lgl(result, ~ !is.null(.x)),
1L,
0L),
n1 = map_dbl(result, ~ pluck(.x, "n1", .default = NA_real_)),
tpr_out = map_dbl(result, ~ pluck(.x, "tpr_out", .default = NA_real_)),
effect_out = map_chr(result, ~ pluck(.x, "effect", .default = NA_character_)),
# Because the iteration ids in the filenames do not have trailing zeros to keep the order to join the results with the input params we need to add them here
batch_id = str_extract(filename, "\\d+"),
batch_id = str_pad(batch_id, width = 3, pad = "0")
) |>
select(-data)

bayes_anova_data <- left_join( bayes_anova_output, bayes_anova_options, by = c("batch_id", "intra_batch_id")) |>
arrange(batch_id, intra_batch_id)

# Randomly checking if some results match by recalculating them here
source(here::here("R/ssp_bayesian_anova.R"))
source(here::here("R/tpr_optim.R"))

# Create a function to safely run Bayesian Anova
safe_ssp_anova_bf <- purrr::safely(ssp_anova_bf)

# For batch_id 001 and intra_batch_id 018
safe_ssp_anova_bf(
tpr = 0.50,
effect = "Interaction Effect",
thresh = 3,
prior_scale = 0.7071068,
iter = 1000,
max_n = 500,
mu = c(0, 0, 0, 0),
sigma = 1
)

# For batch_id 010 and intra_batch_id 071
safe_ssp_anova_bf(
tpr = 0.85,
effect = "Main Effect 2",
thresh = 10,
prior_scale = 1.4142136,
iter = 1000,
max_n = 500,
mu = c(0, 0, 0, 0.5),
sigma = 1
)

# For batch_id 015 and intra_batch_id 029
safe_ssp_anova_bf(
tpr = 0.95,
effect = "Interaction Effect",
thresh = 10,
prior_scale = 1.4142136,
iter = 1000,
max_n = 500,
mu = c(0, 0, 0, 0.75),
sigma = 1
)

# Prepare data for saving
# bayes_anova_precalculation_results <-
# bayes_anova_data %>%
# select(iterate, tpr_out, delta, thresh, prior_scale, n1, h0, ha, error_message) %>%
# arrange(iterate)
mutate(parameters = map(data, "parameters"),
output = map(data, "output")) |>
mutate(
tpr_in = map_dbl(parameters, "tpr"),
effect = map_chr(parameters, "effect"),
thresh = map_int(parameters, "thresh"),
prior_scale = map_dbl(parameters, "prior_scale"),
mu = map(parameters, "mu"),
sigma = map_dbl(parameters, "sigma"),
result = map(output, "result"),
error = map(output, "error"),
error_message = map_chr(error, ~ pluck(.x, "message", .default = NA_character_)),
result_not_null = if_else(map_lgl(result, ~ !is.null(.x)),
1L,
0L),
n1 = map_dbl(result, ~ pluck(.x, "n1", .default = NA_real_)),
tpr_out = map_dbl(result, ~ pluck(.x, "tpr_out", .default = NA_real_)),
effect_out = map_chr(result, ~ pluck(.x, "effect", .default = NA_character_)),
# Because the iteration ids in the filenames do not have trailing zeros to keep the order to join the results with the input params we need to add them here
batch_id = str_extract(filename, "\\d+"),
batch_id = str_pad(batch_id, width = 3, pad = "0")
) |>
select(-data, -parameters, -output, -result, -error) |>
arrange(batch_id, calculation_id)

# Calculating the number of calculated values
bayes_anova_data |>
count(result_not_null)

usethis::use_data(bayes_anova_data, overwrite = TRUE)
Loading
Loading