diff --git a/.Rhistory b/.Rhistory index 9fc633c..de293a8 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -to = nrow(raw_120_365))] -R_hat_sparcc = matrix(NA, -nrow = nrow(raw_30) + nrow(raw_120) + nrow(raw_365), -ncol = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)) -R_hat_sparcc[seq_len(nrow(raw_30)), seq_len(nrow(raw_30))] = R_hat_sparcc11 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc12 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc13 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc22 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc23 -R_hat_sparcc[seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc33 -View(R_hat_sparcc) -R_hat_sparcc[lower.tri(R_hat_sparcc)] -R_hat_sparcc[lower.tri(R_hat_sparcc)] = R_hat_sparcc[upper.tri(R_hat_sparcc)] -R_hat_sparcc = hard_thresh(R_hat_sparcc, 0.3) -col_ind = match(family_level, colnames(R_hat_sparcc)) -R_hat_sparcc = R_hat_sparcc[col_ind[!is.na(col_ind)], col_ind[!is.na(col_ind)]] -df_sparcc = data.frame(get_upper_tri(R_hat_sparcc)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -mutate(var2 = gsub("\\...", " - ", var2)) %>% -filter(!is.na(value)) %>% -mutate(value = round(value, 2)) -family_level -R_hat_sparcc = matrix(NA, -nrow = nrow(raw_30) + nrow(raw_120) + nrow(raw_365), -ncol = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)) -R_hat_sparcc[seq_len(nrow(raw_30)), seq_len(nrow(raw_30))] = R_hat_sparcc11 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc12 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc13 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc22 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc23 -R_hat_sparcc[seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc33 -R_hat_sparcc[lower.tri(R_hat_sparcc)] = R_hat_sparcc[upper.tri(R_hat_sparcc)] -dimnames(R_hat_sparcc) = list(c(rownames(raw_30), rownames(raw_120), rownames(raw_365)), -c(rownames(raw_30), rownames(raw_120), rownames(raw_365))) -View(R_hat_sparcc) -R_hat_sparcc = hard_thresh(R_hat_sparcc, 0.3) -col_ind = match(family_level, colnames(R_hat_sparcc)) -R_hat_sparcc = R_hat_sparcc[col_ind[!is.na(col_ind)], col_ind[!is.na(col_ind)]] -df_sparcc = data.frame(get_upper_tri(R_hat_sparcc)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -mutate(var2 = gsub("\\...", " - ", var2)) %>% -filter(!is.na(value)) %>% -mutate(value = round(value, 2)) -family_level = sort(union(df_sparcc$var1, df_sparcc$var2)) -family_label = sapply(family_level, function(x) strsplit(x, " - ")[[1]][2]) -txt_color = case_when(grepl("data1", family_level) ~ "#1B9E77", -grepl("data2", family_level) ~ "#D95F02", -TRUE ~ "#7570B3") -df_sparcc$var1 = factor(df_sparcc$var1, levels = family_level) -df_sparcc$var2 = factor(df_sparcc$var2, levels = family_level) -p_sparcc = df_sparcc %>% -ggplot(aes(var2, var1, fill = value)) + -geom_tile(color = "black") + -scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey", -midpoint = 0, limit = c(-1,1), space = "Lab", -name = NULL) + -scale_x_discrete(drop = FALSE, labels = family_label) + -scale_y_discrete(drop = FALSE, labels = family_label) + -geom_text(aes(var2, var1, label = value), color = "black", size = 4) + -labs(x = NULL, y = NULL, title = NULL) + -guides(fill = guide_colorbar(barwidth = 1, barheight = 7, -title.position = "right")) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + theme_bw() + -geom_vline(xintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -geom_hline(yintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, -face = "italic", color = txt_color), -axis.text.y = element_text(size = 12, face = "italic", color = txt_color), -strip.text.x = element_text(size = 14), -strip.text.y = element_text(size = 14), -legend.text = element_text(size = 12), -plot.title = element_text(hjust = 0.5, size = 15), -panel.grid.major = element_blank(), -axis.ticks = element_blank()) + -coord_fixed() -p_sparcc -p_sparcc -res_linear = read_rds("../data/nomic/phylum/res_linear.rds") -res_dist = read_rds("../data/nomic/phylum/res_dist.rds") -p_filter = function(mat, mat_p, max_p){ -ind_p = mat_p -ind_p[mat_p > max_p] = 0 -ind_p[mat_p <= max_p] = 1 -mat_filter = mat * ind_p -return(mat_filter) -} -res_linear$corr_fl = p_filter(res_linear$corr, res_linear$corr_p, 0.1) -res_dist$dcorr_fl = p_filter(res_linear$dcorr, res_linear$dcorr_p, 0.1) -cooccur_linear = res_linear$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_linear[cooccur_linear < overlap] = 0 -df_linear = data.frame(get_upper_tri(corr_linear)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Pearson") -# Distance relationships -corr_dist = res_dist$dcorr_fl -cooccur_dist = res_dist$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_dist[cooccur_dist < overlap] = 0 -df_dist = data.frame(get_upper_tri(corr_dist)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Distance") -View(res_linear$corr_fl) -pseqs = readRDS("../data/nomic/physeq_obj.rds") -pseqs = readRDS("data/nomic/physeq_obj.rds") -# OTU object -otu_table = abundances(pseqs$day30) -tax = tax_table(pseqs$day30)@.Data -meta_data = meta(pseqs$day30) -col_30 = colnames(otu_table) -new_30 = meta_data$kortnr[which(col_30 %in% meta_data$X.SampleID)] -colnames(otu_table) = new_30 -col_30 -new_30 -OTU = otu_table(otu_table, taxa_are_rows = TRUE) -META = sample_data(meta_data) -sample_names(META) = meta_data$kortnr -TAX = tax_table(tax) -otu_data30 = phyloseq(OTU, TAX, META) -# Aggregate to phylum level -phylum_data30 = aggregate_taxa(otu_data30, "phylum") -phylum_data30 = subset_taxa(phylum_data30, phylum != "Unknown") -# Aggregate to family level -family_data30 = aggregate_taxa(otu_data30, "family") -family_data30 = subset_taxa(family_data30, family != "Unknown") -top_phylum = c("Acidobacteria", "Actinobacteria", "Bacteroidetes", "Cyanobacteria", "Firmicutes", -"Fusobacteria", "Proteobacteria", "Tenericutes", "TM7", "Verrucomicrobia") -top_family = c("Bacteroidaceae", "Bifidobacteriaceae", "Clostridiaceae", "Enterobacteriaceae", "Fusobacteriaceae", -"Lachnospiraceae", "Moraxellaceae", "Pasteurellaceae", "Ruminococcaceae", "Staphylococcaceae") -otu_table = abundances(pseqs$day30) -meta_data = meta(pseqs$day30) -tax = tax_table(pseqs$day30)@.Data -col_30 = colnames(otu_table) -new_30 = meta_data$kortnr[which(col_30 %in% meta_data$X.SampleID)] -colnames(otu_table) = new_30 -OTU = otu_table(otu_table, taxa_are_rows = TRUE) -META = sample_data(meta_data) -sample_names(META) = meta_data$kortnr -TAX = tax_table(tax) -otu_data30 = phyloseq(OTU, TAX, META) -# Aggregate to phylum level -phylum_data30 = aggregate_taxa(otu_data30, "phylum") -phylum_data30 = subset_taxa(phylum_data30, phylum != "Unknown") -core_phylum30 = subset_taxa(phylum_data30, phylum %in% top_phylum) -# Aggregate to family level -family_data30 = aggregate_taxa(otu_data30, "family") -family_data30 = subset_taxa(family_data30, family != "Unknown") -core_family30 = subset_taxa(family_data30, family %in% top_family) -otu_table = abundances(pseqs$day365) -meta_data = meta(pseqs$day365) -tax = tax_table(pseqs$day365)@.Data -col_365 = colnames(otu_table) -new_365 = meta_data$kortnr[which(col_365 %in% meta_data$X.SampleID)] -colnames(otu_table) = new_365 -OTU = otu_table(otu_table, taxa_are_rows = TRUE) -META = sample_data(meta_data) -sample_names(META) = meta_data$kortnr -TAX = tax_table(tax) -otu_data365 = phyloseq(OTU, TAX, META) -# Aggregate to phylum level -phylum_data365 = aggregate_taxa(otu_data365, "phylum") -phylum_data365 = subset_taxa(phylum_data365, phylum != "Unknown") -core_phylum365 = subset_taxa(phylum_data365, phylum %in% top_phylum) -# Aggregate to family level -family_data365 = aggregate_taxa(otu_data365, "family") -family_data365 = subset_taxa(family_data365, family != "Unknown") -core_family365 = subset_taxa(family_data365, family %in% top_family) -otu_table = abundances(pseqs$day120) -meta_data = meta(pseqs$day120) -tax = tax_table(pseqs$day120)@.Data -col_120 = colnames(otu_table) -new_120 = meta_data$kortnr[which(col_120 %in% meta_data$X.SampleID)] -colnames(otu_table) = new_120 -OTU = otu_table(otu_table, taxa_are_rows = TRUE) -META = sample_data(meta_data) -sample_names(META) = meta_data$kortnr -TAX = tax_table(tax) -otu_data120 = phyloseq(OTU, TAX, META) -# Aggregate to phylum level -phylum_data120 = aggregate_taxa(otu_data120, "phylum") -phylum_data120 = subset_taxa(phylum_data120, phylum != "Unknown") -core_phylum120 = subset_taxa(phylum_data120, phylum %in% top_phylum) -# Aggregate to family level -family_data120 = aggregate_taxa(otu_data120, "family") -family_data120 = subset_taxa(family_data120, family != "Unknown") -core_family120 = subset_taxa(family_data120, family %in% top_family) -pseqs = list(data1 = c(otu_data30, core_phylum30), -data2 = c(otu_data120, core_phylum120), -data3 = c(otu_data365, core_phylum365)) -pseudo = 0; prv_cut = 0.1; lib_cut = 0; corr_cut = 0.5 -wins_quant = c(0.05, 0.95); method = "pearson"; soft = FALSE; thresh_len = 20 -n_cv = 10; thresh_hard = 0; max_p_linear = 0.1; n_cl = 2 -set.seed(123) -res_linear = secom_linear(pseqs, pseudo, prv_cut, lib_cut, corr_cut, -wins_quant, method, soft, thresh_len, n_cv, -thresh_hard, max_p_linear, n_cl) -R = 1000; max_p_dist = 0.1 -set.seed(123) -res_dist = secom_dist(pseqs, pseudo, prv_cut, corr_cut, lib_cut, -wins_quant, R, thresh_hard, max_p_dist, n_cl) -# Data organization -# Linear relationships -corr_linear = res_linear$corr_fl -cooccur_linear = res_linear$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_linear[cooccur_linear < overlap] = 0 -df_linear = data.frame(get_upper_tri(corr_linear)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Pearson") -# Distance relationships -corr_dist = res_dist$dcorr_fl -cooccur_dist = res_dist$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_dist[cooccur_dist < overlap] = 0 -df_dist = data.frame(get_upper_tri(corr_dist)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Distance") -# Merge datasets -df_corr = df_linear %>% -bind_rows( -df_dist +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_x_discrete(breaks = seq(0, 1, 0.2), limits = c(0, 1)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +?scale_x_discrete +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_x_discrete(limits = c(0, 1)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_y_discrete(limits = c(0, 1)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_y_discrete(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +seq(0, 1, 0.2) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +# scale_y_discrete(breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +res2 = read_csv("../outputs/others/cca_vs_impute_corr.csv") +res2$S = factor(res2$S, levels = c("0.1", "0.05", "0.02", "0.01", "0.005")) +df_fig2 = res2 %>% +pivot_longer(cols = r_cca:r_gbm, names_to = "measure", values_to = "value") %>% +mutate( +type = case_when( +measure == "r_cca" ~ "CCA", +measure == "r_mice" ~ "MICE", +TRUE ~ "GBM" ) -phylum_level = sort(union(df_corr$var1, df_corr$var2)) -phylum_label = sapply(phylum_level, function(x) strsplit(x, " - ")[[1]][2]) -txt_color = case_when(grepl("data1", phylum_level) ~ "#1B9E77", -grepl("data2", phylum_level) ~ "#D95F02", -TRUE ~ "#7570B3") -df_corr$var1 = factor(df_corr$var1, levels = phylum_level) -df_corr$var2 = factor(df_corr$var2, levels = phylum_level) -df_corr$metric = factor(df_corr$metric, levels = c("Pearson", "Distance")) -p_corr = df_corr %>% -ggplot(aes(var2, var1, fill = value)) + -geom_tile(color = "black") + -scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey", -midpoint = 0, limit = c(-1,1), space = "Lab", -name = NULL) + -scale_x_discrete(drop = FALSE, labels = phylum_label) + -scale_y_discrete(drop = FALSE, labels = phylum_label) + -facet_grid(rows = vars(metric)) + -geom_text(aes(var2, var1, label = value), color = "black", size = 4) + -labs(x = NULL, y = NULL, title = NULL) + -guides(fill = guide_colorbar(barwidth = 1, barheight = 7, -title.position = "right")) + +) +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +# scale_y_discrete(breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + theme_bw() + -geom_vline(xintercept = c(4.5, 9.5), color = "blue", linetype = "dashed") + -geom_hline(yintercept = c(4.5, 9.5), color = "blue", linetype = "dashed") + -theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, -face = "italic", color = txt_color), -axis.text.y = element_text(size = 12, face = "italic", color = txt_color), -strip.text.x = element_text(size = 14), -strip.text.y = element_text(size = 14), -legend.text = element_text(size = 12), -plot.title = element_text(hjust = 0.5, size = 15), -panel.grid.major = element_blank(), -axis.ticks = element_blank()) + -coord_fixed() -p_corr -p_corr -saveRDS(res_linear, file = "../data/nomic/phylum/res_linear.rds") -saveRDS(res_dist, file = "../data/nomic/phylum/res_dist.rds") -pseqs = list(data1 = c(otu_data30, core_family30), -data2 = c(otu_data120, core_family120), -data3 = c(otu_data365, core_family365)) -pseudo = 0; prv_cut = 0.1; lib_cut = 0; corr_cut = 0.5 -wins_quant = c(0.05, 0.95); method = "pearson"; soft = FALSE; thresh_len = 20 -n_cv = 10; thresh_hard = 0; max_p_linear = 0.1; n_cl = 2 -set.seed(123) -res_linear = secom_linear(pseqs, pseudo, prv_cut, lib_cut, corr_cut, -wins_quant, method, soft, thresh_len, n_cv, -thresh_hard, max_p_linear, n_cl) -R = 1000; max_p_dist = 0.1 -set.seed(123) -res_dist = secom_dist(pseqs, pseudo, prv_cut, corr_cut, lib_cut, -wins_quant, R, thresh_hard, max_p_dist, n_cl) -# Data organization -# Linear relationships -corr_linear = res_linear$corr_fl -cooccur_linear = res_linear$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_linear[cooccur_linear < overlap] = 0 -df_linear = data.frame(get_upper_tri(corr_linear)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Pearson") -# Distance relationships -corr_dist = res_dist$dcorr_fl -cooccur_dist = res_dist$mat_cooccur -# Filter by co-occurrence -overlap = 10 -corr_dist[cooccur_dist < overlap] = 0 -df_dist = data.frame(get_upper_tri(corr_dist)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -filter(!is.na(value)) %>% -mutate(var2 = gsub("\\...", " - ", var2), -value = round(value, 2), -metric = "Distance") -# Merge datasets -df_corr = df_linear %>% -bind_rows( -df_dist +theme(plot.title = element_text(hjust = 0.5)) +res2 = read_csv("../outputs/others/cca_vs_impute_corr.csv") +res2$S = factor(res2$S, levels = c("0.1", "0.05", "0.02", "0.01", "0.005")) +df_fig2 = res2 %>% +pivot_longer(cols = r_cca:r_gbm, names_to = "measure", values_to = "value") %>% +mutate( +type = case_when( +measure == "r_cca" ~ "CCA", +measure == "r_mice" ~ "MICE", +TRUE ~ "GBM" +) ) -family_level = sort(union(df_corr$var1, df_corr$var2)) -family_label = sapply(family_level, function(x) strsplit(x, " - ")[[1]][2]) -txt_color = case_when(grepl("data1", family_level) ~ "#1B9E77", -grepl("data2", family_level) ~ "#D95F02", -TRUE ~ "#7570B3") -df_corr$var1 = factor(df_corr$var1, levels = family_level) -df_corr$var2 = factor(df_corr$var2, levels = family_level) -df_corr$metric = factor(df_corr$metric, levels = c("Pearson", "Distance")) -p_corr = df_corr %>% -ggplot(aes(var2, var1, fill = value)) + -geom_tile(color = "black") + -scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey", -midpoint = 0, limit = c(-1,1), space = "Lab", -name = NULL) + -scale_x_discrete(drop = FALSE, labels = family_label) + -scale_y_discrete(drop = FALSE, labels = family_label) + -facet_grid(rows = vars(metric)) + -geom_text(aes(var2, var1, label = value), color = "black", size = 4) + -labs(x = NULL, y = NULL, title = NULL) + -guides(fill = guide_colorbar(barwidth = 1, barheight = 7, -title.position = "right")) + +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +# scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +rlang::last_error() +res2 = read_csv("../outputs/others/cca_vs_impute_corr.csv") +res2$S = factor(res2$S, levels = c("0.1", "0.05", "0.02", "0.01", "0.005")) +df_fig2 = res2 %>% +pivot_longer(cols = r_cca:r_gbm, names_to = "measure", values_to = "value") %>% +mutate( +type = case_when( +measure == "r_cca" ~ "CCA", +measure == "r_mice" ~ "MICE", +TRUE ~ "GBM" +) +) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +#>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = setting), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_nejm(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +# guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_discrete(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + +theme_bw() + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + theme_bw() + -geom_vline(xintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -geom_hline(yintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, -face = "italic", color = txt_color), -axis.text.y = element_text(size = 12, face = "italic", color = txt_color), -strip.text.x = element_text(size = 14), -strip.text.y = element_text(size = 14), -legend.text = element_text(size = 12), -plot.title = element_text(hjust = 0.5, size = 15), -panel.grid.major = element_blank(), -axis.ticks = element_blank()) + -coord_fixed() -p_corr -p_corr -saveRDS(res_linear, file = "../data/nomic/family/res_linear.rds") -saveRDS(res_dist, file = "../data/nomic/family/res_dist.rds") -raw_30 = abundances(family_data30) -meta_30 = meta(family_data30) -col_30 = colnames(raw_30) -new_30 = meta_30$kortnr[which(col_30 %in% meta_30$X.SampleID)] -colnames(raw_30) = new_30 -rownames(raw_30) = paste0("data1 - ", rownames(raw_30)) -raw_120 = abundances(family_data120) -meta_120 = meta(family_data120) -col_120 = colnames(raw_120) -new_120 = meta_120$kortnr[which(col_120 %in% meta_120$X.SampleID)] -colnames(raw_120) = new_120 -rownames(raw_120) = paste0("data2 - ", rownames(raw_120)) -raw_365 = abundances(family_data365) -meta_365 = meta(family_data365) -col_365 = colnames(raw_365) -new_365 = meta_365$kortnr[which(col_365 %in% meta_365$X.SampleID)] -colnames(raw_365) = new_365 -rownames(raw_365) = paste0("data3 - ", rownames(raw_365)) -raw_30_120 = data.frame(raw_30, check.names = FALSE) %>% -bind_rows(data.frame(raw_120, check.names = FALSE)) %>% -select_if(~ !any(is.na(.))) -raw_30_365 = data.frame(raw_30, check.names = FALSE) %>% -bind_rows(data.frame(raw_365, check.names = FALSE)) %>% -select_if(~ !any(is.na(.))) -raw_120_365 = data.frame(raw_120, check.names = FALSE) %>% -bind_rows(data.frame(raw_365, check.names = FALSE)) %>% -select_if(~ !any(is.na(.))) -# SparCC cannot handle missing values, so we have to calculate the correlation -# matrix separately -set.seed(123) -R_hat_sparcc11 = sparcc(t(raw_30))$Cor -R_hat_sparcc22 = sparcc(t(raw_120))$Cor -R_hat_sparcc33 = sparcc(t(raw_365))$Cor -R_hat_sparcc12 = sparcc(t(raw_30_120))$Cor -R_hat_sparcc12 = R_hat_sparcc12[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30_120))] -R_hat_sparcc13 = sparcc(t(raw_30_365))$Cor -R_hat_sparcc13 = R_hat_sparcc13[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30_365))] -R_hat_sparcc23 = sparcc(t(raw_120_365))$Cor -R_hat_sparcc23 = R_hat_sparcc23[seq_len(nrow(raw_120)), -seq(from = nrow(raw_120) + 1, -to = nrow(raw_120_365))] -R_hat_sparcc = matrix(NA, -nrow = nrow(raw_30) + nrow(raw_120) + nrow(raw_365), -ncol = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)) -R_hat_sparcc[seq_len(nrow(raw_30)), seq_len(nrow(raw_30))] = R_hat_sparcc11 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc12 -R_hat_sparcc[seq_len(nrow(raw_30)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc13 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120))] = R_hat_sparcc22 -R_hat_sparcc[seq(from = nrow(raw_30) + 1, -to = nrow(raw_30) + nrow(raw_120)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc23 -R_hat_sparcc[seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365)), -seq(from = nrow(raw_30) + nrow(raw_120) + 1, -to = nrow(raw_30) + nrow(raw_120) + nrow(raw_365))] = R_hat_sparcc33 -R_hat_sparcc[lower.tri(R_hat_sparcc)] = R_hat_sparcc[upper.tri(R_hat_sparcc)] -dimnames(R_hat_sparcc) = list(c(rownames(raw_30), rownames(raw_120), rownames(raw_365)), -c(rownames(raw_30), rownames(raw_120), rownames(raw_365))) -saveRDS(R_hat_sparcc, file = "../data/nomic/sparcc/R_hat_sparcc.rds") -R_hat_sparcc = read_rds("../data/nomic/sparcc/R_hat_sparcc.rds") -R_hat_sparcc = hard_thresh(R_hat_sparcc, 0.3) -col_ind = match(family_level, colnames(R_hat_sparcc)) -R_hat_sparcc = R_hat_sparcc[col_ind[!is.na(col_ind)], col_ind[!is.na(col_ind)]] -df_sparcc = data.frame(get_upper_tri(R_hat_sparcc)) %>% -rownames_to_column("var1") %>% -pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>% -mutate(var2 = gsub("\\...", " - ", var2)) %>% -filter(!is.na(value)) %>% -mutate(value = round(value, 2)) -family_level = sort(union(df_sparcc$var1, df_sparcc$var2)) -family_label = sapply(family_level, function(x) strsplit(x, " - ")[[1]][2]) -txt_color = case_when(grepl("data1", family_level) ~ "#1B9E77", -grepl("data2", family_level) ~ "#D95F02", -TRUE ~ "#7570B3") -df_sparcc$var1 = factor(df_sparcc$var1, levels = family_level) -df_sparcc$var2 = factor(df_sparcc$var2, levels = family_level) -p_sparcc = df_sparcc %>% -ggplot(aes(var2, var1, fill = value)) + -geom_tile(color = "black") + -scale_fill_gradient2(low = "blue", high = "red", mid = "white", na.value = "grey", -midpoint = 0, limit = c(-1,1), space = "Lab", -name = NULL) + -scale_x_discrete(drop = FALSE, labels = family_label) + -scale_y_discrete(drop = FALSE, labels = family_label) + -geom_text(aes(var2, var1, label = value), color = "black", size = 4) + -labs(x = NULL, y = NULL, title = NULL) + -guides(fill = guide_colorbar(barwidth = 1, barheight = 7, -title.position = "right")) + +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 +p_impute2 = df_fig2 %>% +ggplot(aes(x = S, y = value, fill = type)) + +stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + +stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", +geom = "errorbar", width = .3, position = position_dodge(.9)) + +geom_point(aes(x = S), size = .1, alpha = .3, +position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + +scale_fill_npg(name = NULL) + +guides(fill = guide_legend(override.aes = list(shape = NA))) + +scale_y_continuous(limits = c(0, 1), breaks = seq(0, 0.9, 0.2)) + +geom_hline(yintercept = 0.8, linetype = "dashed") + +labs(x = "Sampling fraction", y = "Correlation estimate", +title = "Bias of Estimating Zero Correlation Coefficients") + theme_bw() + -geom_vline(xintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -geom_hline(yintercept = c(8.5, 15.5), color = "blue", linetype = "dashed") + -theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 12, hjust = 1, -face = "italic", color = txt_color), -axis.text.y = element_text(size = 12, face = "italic", color = txt_color), -strip.text.x = element_text(size = 14), -strip.text.y = element_text(size = 14), -legend.text = element_text(size = 12), -plot.title = element_text(hjust = 0.5, size = 15), -panel.grid.major = element_blank(), -axis.ticks = element_blank()) + -coord_fixed() -p_sparcc -p_sparcc -library(ANCOMBC) -detach("package:ANCOMBC", unload = TRUE) -BiocManager::install("ANCOMBC") -library(ANCOMBC) -?secom_dist +theme(plot.title = element_text(hjust = 0.5)) +p_impute2 diff --git a/.Rproj.user/shared/notebooks/paths b/.Rproj.user/shared/notebooks/paths index 054eb60..91a0a45 100644 --- a/.Rproj.user/shared/notebooks/paths +++ b/.Rproj.user/shared/notebooks/paths @@ -7,6 +7,7 @@ /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/complex/secom.R="4D6DAE01" /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/concordance/linear.R="CA33CEF3" /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/concordance/nonlinear.R="2D4D8054" +/Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/linear/prop.R="1BBDBD29" /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/linear/sample.R="B76BCE44" /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/linear/secom.R="E8979B6F" /Users/huanglin/Google Drive/Methodological Research/03_secom/SECOM-Code-Archive/simulations/nonlinear/sample.R="C099902C" diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..a48b64a --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,15 @@ +# This CITATION.cff file was generated with cffinit. +# Visit https://bit.ly/cffinit to generate yours today! + +cff-version: 1.2.0 +title: SECOM-Code-Archive +message: 'If you use this software, please cite it as below.' +type: software +authors: + - given-names: Huang + family-names: Lin + email: huang.lin@nih.gov + affiliation: >- + Eunice Kennedy Shriver National Institute of + Child Health and Human Development + orcid: 'https://orcid.org/0000-0002-4892-7871' diff --git a/images/main/sim_complex.jpeg b/images/main/sim_complex.jpeg index 005b459..3965171 100644 Binary files a/images/main/sim_complex.jpeg and b/images/main/sim_complex.jpeg differ diff --git a/images/main/sim_complex.pdf b/images/main/sim_complex.pdf index f05d649..8cee238 100644 Binary files a/images/main/sim_complex.pdf and b/images/main/sim_complex.pdf differ diff --git a/images/main/sim_linear.jpeg b/images/main/sim_linear.jpeg index 1131727..57ebece 100644 Binary files a/images/main/sim_linear.jpeg and b/images/main/sim_linear.jpeg differ diff --git a/images/main/sim_linear.pdf b/images/main/sim_linear.pdf index 92b309d..c946284 100644 Binary files a/images/main/sim_linear.pdf and b/images/main/sim_linear.pdf differ diff --git a/images/main/sim_nonlinear.jpeg b/images/main/sim_nonlinear.jpeg index 58ad1d9..a8330ba 100644 Binary files a/images/main/sim_nonlinear.jpeg and b/images/main/sim_nonlinear.jpeg differ diff --git a/images/main/sim_nonlinear.pdf b/images/main/sim_nonlinear.pdf index ca73d38..7c7a2a5 100644 Binary files a/images/main/sim_nonlinear.pdf and b/images/main/sim_nonlinear.pdf differ diff --git a/images/supp/supp_diff_corr.pdf b/images/supp/supp_diff_corr.pdf index 85b149a..092c21a 100644 Binary files a/images/supp/supp_diff_corr.pdf and b/images/supp/supp_diff_corr.pdf differ diff --git a/images/supp/supp_impute.jpeg b/images/supp/supp_impute.jpeg index 238f31a..8519f57 100644 Binary files a/images/supp/supp_impute.jpeg and b/images/supp/supp_impute.jpeg differ diff --git a/images/supp/supp_impute.pdf b/images/supp/supp_impute.pdf index 3f8178e..bcb7778 100644 Binary files a/images/supp/supp_impute.pdf and b/images/supp/supp_impute.pdf differ diff --git a/images/supp/supp_org_rank.pdf b/images/supp/supp_org_rank.pdf index b6e16e0..d056c37 100644 Binary files a/images/supp/supp_org_rank.pdf and b/images/supp/supp_org_rank.pdf differ diff --git a/images/supp/supp_pseudo.pdf b/images/supp/supp_pseudo.pdf index 13c4b02..495b3ef 100644 Binary files a/images/supp/supp_pseudo.pdf and b/images/supp/supp_pseudo.pdf differ diff --git a/images/supp/supp_sim_complex.jpeg b/images/supp/supp_sim_complex.jpeg index 7282d08..d69e7ed 100644 Binary files a/images/supp/supp_sim_complex.jpeg and b/images/supp/supp_sim_complex.jpeg differ diff --git a/images/supp/supp_sim_complex.pdf b/images/supp/supp_sim_complex.pdf index 7d8f40a..746c38d 100644 Binary files a/images/supp/supp_sim_complex.pdf and b/images/supp/supp_sim_complex.pdf differ diff --git a/images/supp/supp_sim_linear.jpeg b/images/supp/supp_sim_linear.jpeg index bcdfbff..4643579 100644 Binary files a/images/supp/supp_sim_linear.jpeg and b/images/supp/supp_sim_linear.jpeg differ diff --git a/images/supp/supp_sim_linear.pdf b/images/supp/supp_sim_linear.pdf index eeb71b4..9290576 100644 Binary files a/images/supp/supp_sim_linear.pdf and b/images/supp/supp_sim_linear.pdf differ diff --git a/images/supp/supp_sim_nonlinear.jpeg b/images/supp/supp_sim_nonlinear.jpeg index 001bf6d..cbd0b16 100644 Binary files a/images/supp/supp_sim_nonlinear.jpeg and b/images/supp/supp_sim_nonlinear.jpeg differ diff --git a/images/supp/supp_sim_nonlinear.pdf b/images/supp/supp_sim_nonlinear.pdf index cbe8a3b..37e806a 100644 Binary files a/images/supp/supp_sim_nonlinear.pdf and b/images/supp/supp_sim_nonlinear.pdf differ diff --git a/programs/02_simulation.Rmd b/programs/02_simulation.Rmd index c0a5589..425a5e2 100644 --- a/programs/02_simulation.Rmd +++ b/programs/02_simulation.Rmd @@ -530,10 +530,8 @@ df_linear = rbind(linear_secom1, linear_secom2, linear_secom3, linear_prop, # Relative norm loss df_linear_fig1 = df_linear %>% dplyr::select(rel_F, rel_S, method, setting) %>% - pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") + df_linear_fig1$method = factor(df_linear_fig1$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -543,11 +541,14 @@ df_linear_fig1$setting = factor(df_linear_fig1$setting, "100, 200, 0.5", "100, 200, 2")) p_linear1 = df_linear_fig1 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -562,10 +563,8 @@ p_linear1 = df_linear_fig1 %>% # TPR/FPR df_linear_fig2 = df_linear %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_linear_fig2$method = factor(df_linear_fig2$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -575,11 +574,14 @@ df_linear_fig2$setting = factor(df_linear_fig2$setting, "100, 200, 0.5", "100, 200, 2")) p_linear2 = df_linear_fig2 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -588,10 +590,9 @@ p_linear2 = df_linear_fig2 %>% labeller = labeller(measure = c(tpr = "TPR", fpr = "FPR"))) + labs(title = "FPR/TPR", x = NULL, y = NULL) + theme_bw() + - theme(plot.title = element_text(hjust = 0.5), + theme(plot.title = element_text(hjust = .5), strip.background = element_rect(fill = "white")) - p_linear = ggarrange(p_linear1, p_linear2, ncol = 2, nrow = 1, labels = c("a", "b"), common.legend = TRUE, legend = "bottom") print(p_linear) @@ -608,10 +609,8 @@ df_linear = rbind(linear_secom1, linear_secom2, linear_secom3, linear_prop, # Relative norm loss df_linear_fig1 = df_linear %>% dplyr::select(rel_F, rel_S, method, setting) %>% - pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") + df_linear_fig1$method = factor(df_linear_fig1$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -621,11 +620,14 @@ df_linear_fig1$setting = factor(df_linear_fig1$setting, "100, 200, 0.5", "100, 200, 2")) p_linear1 = df_linear_fig1 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -640,10 +642,8 @@ p_linear1 = df_linear_fig1 %>% # TPR/FPR df_linear_fig2 = df_linear %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_linear_fig2$method = factor(df_linear_fig2$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -653,11 +653,14 @@ df_linear_fig2$setting = factor(df_linear_fig2$setting, "100, 200, 0.5", "100, 200, 2")) p_linear2 = df_linear_fig2 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -1089,10 +1092,8 @@ df_nonlinear = rbind(nonlinear_secom1, nonlinear_secom2, nonlinear_secom3, # TPR/FPR df_nonlinear_fig = df_nonlinear %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_nonlinear_fig$method = factor(df_nonlinear_fig$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -1102,12 +1103,14 @@ df_nonlinear_fig$setting = factor(df_nonlinear_fig$setting, "100, 200, 0.5", "100, 200, 2")) p_nonlinear = df_nonlinear_fig %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - # brewer.pal(n = 7, name = "Dark2") + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -1137,10 +1140,8 @@ df_nonlinear = rbind(nonlinear_secom1, nonlinear_secom2, nonlinear_secom3, # TPR/FPR df_nonlinear_fig = df_nonlinear %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_nonlinear_fig$method = factor(df_nonlinear_fig$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -1150,12 +1151,14 @@ df_nonlinear_fig$setting = factor(df_nonlinear_fig$setting, "100, 200, 0.5", "100, 200, 2")) p_nonlinear = df_nonlinear_fig %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - # brewer.pal(n = 7, name = "Dark2") + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -1994,10 +1997,8 @@ df_complex = rbind(complex_secom1, complex_secom2, complex_secom3, complex_prop, # Relative norm loss df_complex_fig1 = df_complex %>% dplyr::select(rel_F, rel_S, method, setting) %>% - pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") + df_complex_fig1$method = factor(df_complex_fig1$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -2007,11 +2008,14 @@ df_complex_fig1$setting = factor(df_complex_fig1$setting, "100, 200, 0.5", "100, 200, 2")) p_complex1 = df_complex_fig1 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - scale_fill_nejm(name = NULL) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -2026,10 +2030,8 @@ p_complex1 = df_complex_fig1 %>% # TPR/FPR df_complex_fig2 = df_complex %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_complex_fig2$method = factor(df_complex_fig2$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -2039,11 +2041,14 @@ df_complex_fig2$setting = factor(df_complex_fig2$setting, "100, 200, 0.5", "100, 200, 2")) p_complex2 = df_complex_fig2 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - scale_fill_nejm(name = NULL) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -2071,10 +2076,8 @@ df_complex = rbind(complex_secom1, complex_secom2, complex_secom3, complex_prop, # Relative norm loss df_complex_fig1 = df_complex %>% dplyr::select(rel_F, rel_S, method, setting) %>% - pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = rel_F:rel_S, names_to = "measure", values_to = "value") + df_complex_fig1$method = factor(df_complex_fig1$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -2084,11 +2087,14 @@ df_complex_fig1$setting = factor(df_complex_fig1$setting, "100, 200, 0.5", "100, 200, 2")) p_complex1 = df_complex_fig1 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - scale_fill_nejm(name = NULL) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", @@ -2103,10 +2109,8 @@ p_complex1 = df_complex_fig1 %>% # TPR/FPR df_complex_fig2 = df_complex %>% dplyr::select(tpr, fpr, method, setting) %>% - pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>% - group_by(method, setting, measure) %>% - summarise(measure_mean = mean(value), - measure_sd = sd(value)) + pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") + df_complex_fig2$method = factor(df_complex_fig2$method, levels = c("SECOM (Pearson1)", "SECOM (Pearson2)", "SECOM (Distance)", "Proportionality", "SparCC", @@ -2116,11 +2120,14 @@ df_complex_fig2$setting = factor(df_complex_fig2$setting, "100, 200, 0.5", "100, 200, 2")) p_complex2 = df_complex_fig2 %>% - ggplot(aes(x = setting, y = measure_mean, fill = method)) + - geom_bar(position = "dodge", stat = "identity", color = "black") + - geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd), - width = .2, position = position_dodge(.9)) + - scale_fill_nejm(name = NULL) + + ggplot(aes(x = setting, y = value, fill = method)) + + stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") + + stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black", + geom = "errorbar", width = .3, position = position_dodge(.9)) + + geom_point(aes(x = setting), size = .1, alpha = .3, + position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) + + scale_fill_nejm(name = NULL) + + guides(fill = guide_legend(override.aes = list(shape = NA))) + scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)", "50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)", "100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)", diff --git a/programs/02_simulation.html b/programs/02_simulation.html index 12e1a56..7b85c07 100644 --- a/programs/02_simulation.html +++ b/programs/02_simulation.html @@ -12,7 +12,7 @@ - +
ggsave(plot = p_linear, "../images/main/sim_linear.pdf", height = 8, width = 15)
ggsave(plot = p_linear, "../images/main/sim_linear.jpeg", height = 8, width = 15, dpi = 300)
@@ -3634,10 +3635,8 @@ ggsave(plot = p_linear, "../images/supp/supp_sim_linear.pdf", height = 8, width = 15)
ggsave(plot = p_linear, "../images/supp/supp_sim_linear.jpeg", height = 8, width = 15, dpi = 300)
@@ -4103,10 +4106,8 @@ ggsave(filename = "../images/main/sim_nonlinear.jpeg",
plot = p_nonlinear, width = 8, height = 6, units = "in", dpi = 300)
ggsave(filename = "../images/main/sim_nonlinear.pdf",
@@ -4149,10 +4152,8 @@ 2.8 Visualization 2: all groups
# TPR/FPR
df_nonlinear_fig = df_nonlinear %>%
dplyr::select(tpr, fpr, method, setting) %>%
- pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value") %>%
- group_by(method, setting, measure) %>%
- summarise(measure_mean = mean(value),
- measure_sd = sd(value))
+ pivot_longer(cols = tpr:fpr, names_to = "measure", values_to = "value")
+
df_nonlinear_fig$method = factor(df_nonlinear_fig$method,
levels = c("SECOM (Pearson1)", "SECOM (Pearson2)",
"SECOM (Distance)", "Proportionality", "SparCC",
@@ -4162,12 +4163,14 @@ 2.8 Visualization 2: all groups
"100, 200, 0.5", "100, 200, 2"))
p_nonlinear = df_nonlinear_fig %>%
- ggplot(aes(x = setting, y = measure_mean, fill = method)) +
- geom_bar(position = "dodge", stat = "identity", color = "black") +
- geom_errorbar(aes(ymin = measure_mean - measure_sd, ymax = measure_mean + measure_sd),
- width = .2, position = position_dodge(.9)) +
- # brewer.pal(n = 7, name = "Dark2")
+ ggplot(aes(x = setting, y = value, fill = method)) +
+ stat_summary(fun = mean, geom = "bar", position = "dodge", color = "black") +
+ stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), color = "black",
+ geom = "errorbar", width = .3, position = position_dodge(.9)) +
+ geom_point(aes(x = setting), size = .1, alpha = .3,
+ position = position_jitterdodge(jitter.width = .2, dodge.width = .9)) +
scale_fill_nejm(name = NULL) +
+ guides(fill = guide_legend(override.aes = list(shape = NA))) +
scale_x_discrete(labels = c("50, 100, 0.5" = "(n = 50, d = 100, \u03B1 = 0.5)",
"50, 100, 2" = "(n = 50, d = 100, \u03B1 = 2)",
"100, 200, 0.5" = "(n = 100, d = 200, \u03B1 = 0.5)",
@@ -4180,7 +4183,7 @@ 2.8 Visualization 2: all groups
strip.background = element_rect(fill = "white"),
legend.position = "bottom")
print(p_nonlinear)
-
+
ggsave(filename = "../images/supp/supp_sim_nonlinear.jpeg",
plot = p_nonlinear, width = 8, height = 6, units = "in", dpi = 300)
ggsave(filename = "../images/supp/supp_sim_nonlinear.pdf",
@@ -4308,8 +4311,8 @@ 3.1 Linear correlations
", FN = ", compare_linear[12], ")"))
colnames(compare_linear) = c("(1, 1)", "(1, 0)", "(0, 1)", "(0, 0)")
datatable(compare_linear)
-
-
+
+
ggsave(plot = p_complex, "../images/main/sim_complex.pdf", height = 8, width = 15)
ggsave(plot = p_complex, "../images/main/sim_complex.jpeg", height = 8, width = 15, dpi = 300)
ggsave(plot = p_complex, "../images/supp/supp_sim_complex.pdf", height = 8, width = 15)
ggsave(plot = p_complex, "../images/supp/supp_sim_complex.jpeg", height = 8, width = 15, dpi = 300)
@@ -5145,7 +5152,7 @@ df_t = read_csv("../outputs/others/cpu_time.csv")
datatable(df_t)
-
-
+
+
p_impute = ggarrange(p_impute1, p_impute2, nrow = 2, labels = c("a", "b"),
common.legend = TRUE, legend = "right")
p_impute
-
-ggsave(plot = p_impute, "../images/supp/supp_impute.pdf", height = 10, width = 12)
-ggsave(plot = p_impute, "../images/supp/supp_impute.jpeg", height = 10, width = 12, dpi = 300)
+
+ggsave(plot = p_impute, "../images/supp/supp_impute.pdf", height = 8, width = 10)
+ggsave(plot = p_impute, "../images/supp/supp_impute.jpeg", height = 8, width = 10, dpi = 300)