fix examples

neurogenomics · Nov 8, 2023 · d20c960 · d20c960
1 parent a4f800b
commit d20c960
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 20 deletions.
diff --git a/R/get_unfinished_list_names.R b/R/get_unfinished_list_names.R
@@ -20,7 +20,7 @@
 #'                        list_names = list_names,
 #'                        reps = 10,
 #'                        save_dir_tmp = save_dir_tmp)
-#' unfinished <- get_unfinished_list_names(list_names = gene_data$hpo_name,
+#' unfinished <- get_unfinished_list_names(list_names = gene_data$hpo_id,
 #'                                         save_dir_tmp = save_dir_tmp)
 get_unfinished_list_names <- function (list_names,
                                        save_dir_tmp) {

diff --git a/R/merge_results.R b/R/merge_results.R
@@ -13,17 +13,19 @@
 #' @export
 #' @importFrom data.table rbindlist
 #' @examples
+#' set.seed(2023)
 #' gene_data <- HPOExplorer::load_phenotype_to_genes()
-#' ctd <- MultiEWCE::load_example_ctd()
-#' list_names <- unique(gene_data$hpo_name)[seq(3)]
+#' ctd <- load_example_ctd()
+#' list_names <- unique(gene_data$hpo_id)[seq(3)]
 #' res_files <- ewce_para(ctd = ctd,
 #'                        gene_data = gene_data,
 #'                        list_names = list_names,
+#'                        force_new = TRUE,
 #'                        reps = 10)
 #' all_results <- merge_results(res_files=res_files)
 merge_results <- function(save_dir=NULL,
                           res_files=NULL,
-                          list_name_column = "hpo_name") {
+                          list_name_column = "hpo_id") {
 
   if(is.null(res_files)){
     if(is.null(save_dir)) stop("Must provided save_dir when res_files=NULL.")

diff --git a/man/get_unfinished_list_names.Rd b/man/get_unfinished_list_names.Rd
diff --git a/man/merge_results.Rd b/man/merge_results.Rd
diff --git a/tests/testthat/test-ewce_para.R b/tests/testthat/test-ewce_para.R
@@ -33,7 +33,7 @@ test_that("ewce_para works", {
   }
 
   #### Tests get_unfinished_list_names ####
-  all_phenotypes <- unique(gene_data$hpo_name)
+  all_phenotypes <- unique(gene_data$hpo_id)
   unfinished <- get_unfinished_list_names(list_names = all_phenotypes,
                                           save_dir_tmp = save_dir_tmp)
   testthat::expect_lte(length(unfinished),
@@ -44,17 +44,17 @@ test_that("ewce_para works", {
   all_results1 <- merge_results(res_files=res_files)
   all_results2 <- merge_results(res_files=res_files2)
   ## Confirm both methods have the correct phenotyoes
-  testthat::expect_gte(sum(list_names %in% unique(all_results1$hpo_name)),
+  testthat::expect_gte(sum(list_names %in% unique(all_results1$hpo_id)),
                        length(list_names)-3)
-  testthat::expect_gte(sum(list_names %in% unique(all_results2$hpo_name)),
+  testthat::expect_gte(sum(list_names %in% unique(all_results2$hpo_id)),
                        length(list_names)-3)
   ## Confirm both methods are identical
   data.table::setkey(all_results1,"CellType")
   data.table::setkey(all_results2,"CellType")
   testthat::expect_equal(nrow(all_results1),
                          nrow(all_results2))
-  testthat::expect_equal(sort(unique(all_results1$hpo_name)),
-                         sort(unique(all_results2$hpo_name)))
+  testthat::expect_equal(sort(unique(all_results1$hpo_id)),
+                         sort(unique(all_results2$hpo_id)))
   testthat::expect_equal(sort(unique(all_results1$CellType)),
                          sort(unique(all_results2$CellType)))
   # testthat::expect_equal(all_results1, all_results2)

diff --git a/vignettes/MultiEWCE.Rmd b/vignettes/MultiEWCE.Rmd
@@ -25,7 +25,7 @@ This package is useful in cases where you have a large number of related, but se
 # Loading Phenotype Associated Gene Lists from the HPO
 The MultiEWCE package requires the gene data to be in a particular format. It must be a data.frame that includes one column of gene list names, and another column of genes. For example: 
 
-| Phenotype        | Gene   |
+| hpo_name         | Gene   |
 | ---------------- | ------ |
 | "Abnormal heart" | gene X |
 | "Abnormal heart" | gene Y |
@@ -49,7 +49,7 @@ In this example our **gene list names** column is called `Phenotype` and our col
 ```{r setup_args}
 # Loading CTD file 
 ctd <- load_example_ctd()
-list_names <- unique(gene_data$hpo_name)[seq(3)] 
+list_names <- unique(gene_data$hpo_id)[seq(10)] 
 reps <- 10 # in practice would use more reps
 cores <- 1 # in practice would use more cores
 save_dir <- tempdir()
@@ -96,10 +96,11 @@ Now we have set up all our desired inputs, we can run the analysis.
 all_results <- MultiEWCE::gen_results(ctd = ctd,
                                       gene_data = gene_data,
                                       list_names = list_names, 
-                                      list_name_column = "hpo_name",
+                                      list_name_column = "hpo_id",
                                       reps = reps,
                                       cores = cores,
                                       save_dir = save_dir, 
+                                      force_new = TRUE,
                                       save_dir_tmp = save_dir_tmp) 
 ```
 
@@ -109,11 +110,14 @@ Just as an example, we will create a plot showing the number of significant enri
 library(ggplot2)
 library(data.table)
 #### Aggregate results ####
-n_signif <- all_results[q<=0.05 & !is.na(q), .(sig_enrichments = .N), by="hpo_name"]
+n_signif <- all_results[q<=0.05 & !is.na(q),
+                        list(sig_enrichments = .N,
+                             mean_fold_change=mean(fold_change)), 
+                        by="hpo_id"]
 #### Plot #### 
-plot1 <-  ggplot(n_signif, aes(x = stringr::str_wrap(hpo_name,width = 10),
+plot1 <-  ggplot(n_signif, aes(x = stringr::str_wrap(hpo_id,width = 10),
                                y = sig_enrichments,
-                               fill = sig_enrichments)) +
+                               fill = mean_fold_change)) +
     geom_col() +
     labs(x="Phenotype",y="Enrichments (n)") + 
     theme_bw() 
@@ -149,7 +153,7 @@ cat(paste(length(unique(gene_set$gene_symbol)),
 This function is used to find which gene lists you have not yet analysed 
 
 ```{r get_unfinished} 
-all_phenotypes <- unique(gene_data$hpo_name)
+all_phenotypes <- unique(gene_data$hpo_id)
 unfinished <- MultiEWCE::get_unfinished_list_names(list_names = all_phenotypes,
                                                    save_dir_tmp = save_dir_tmp)
 
@@ -176,6 +180,7 @@ all_results <- MultiEWCE::gen_results(ctd = ctd,
                                       list_name_column = "disease_id",
                                       list_names = list_names,
                                       annotLevel = 1,
+                                      force_new = TRUE,
                                       reps = 10)
 ```
 
@@ -193,6 +198,7 @@ ctd <- load_example_ctd("ctd_DescartesHuman.rds")
 all_results <- MultiEWCE::gen_results(ctd = ctd,
                                       list_name_column = "hpo_id", 
                                       gene_data = gene_data,  
+                                      annotLevel = 2,
                                       reps = 100000,
                                       cores = 10)
 ```