From 17b1eed639d59218e271f5b0d46f40b852e3f13b Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 7 Aug 2019 17:37:31 +0200 Subject: [PATCH 01/15] update for news md support in R 3.6 --- inst/NEWS.md => NEWS.md | 0 README.Rmd | 4 ++-- README.md | 2 +- inst/NEWS | 31 ------------------------------- 4 files changed, 3 insertions(+), 34 deletions(-) rename inst/NEWS.md => NEWS.md (100%) delete mode 100644 inst/NEWS diff --git a/inst/NEWS.md b/NEWS.md similarity index 100% rename from inst/NEWS.md rename to NEWS.md diff --git a/README.Rmd b/README.Rmd index d5d9f5b..822e696 100755 --- a/README.Rmd +++ b/README.Rmd @@ -23,9 +23,9 @@ The plotting of the top features is nicely intergrated into [dynplot](https://gi ## Latest changes -Check out `news(package = "dynwrap")` or [NEWS.md](inst/NEWS.md) for a full list of changes. +Check out `news(package = "dynwrap")` or [NEWS.md](NEWS.md) for a full list of changes. - + ```{r news, echo=FALSE, results="asis"} dynutils::update_news() diff --git a/README.md b/README.md index a6b2a52..f21ae7e 100755 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ heatmap](https://raw.githubusercontent.com/dynverse/dynplot/devel/.readme_files/ ## Latest changes -Check out `news(package = "dynwrap")` or [NEWS.md](inst/NEWS.md) for a +Check out `news(package = "dynwrap")` or [NEWS.md](NEWS.md) for a full list of changes. diff --git a/inst/NEWS b/inst/NEWS deleted file mode 100644 index 5cd73e6..0000000 --- a/inst/NEWS +++ /dev/null @@ -1,31 +0,0 @@ -dynfeature 1.0.0 (28-03-2019) - -* MINOR CHANGE: Use only one core by default. - -* MINOR CHANGE: Support sparse matrices - -dynfeature 0.2.0 (25-10-2018) - -* SPEED UP: Added `fi_ranger_rf_lite()`, which scales much better w.r.t. the number of samples and features, - at the cost of increasing loss of accuracy at higher dimension sizes. - -* MAJOR CHANGES: Large cleanup of the code. Most notably, - - The format of feature importance method specification and its parameters, - with format `fi_method = fi_example_method(param1 = 10, param2 = 4)`. - Before, it had to be specified as `method = "example_method", method_params = list(param1 = 10, param2 = 4)`. - -* MINOR CHANGE: Whenever possible, output columns are now factors instead of characters. - -* MINOR CHANGE: Add NEWS, and add news section to README. - -* DOCUMENTATION: Turned on markdown for Roxygen. - -* DOCUMENTATION: Improved documentation on expression_source. - -* TESTING: Improved testing with a larger dataset, and will check whether the overall feature importance produces decent results. - -* MINOR CHANGE: Feature importance functions will always return factors instead of characters. - -dynfeature 0.1.0 (26-04-2018) - -* INITIAL RELEASE: dynfeature, calculating feature importance scores from trajectories. From 93d8964ac235c1d00ce2e83d809b8181cb2a64a7 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 7 Aug 2019 17:38:23 +0200 Subject: [PATCH 02/15] forgot to remove one line of code --- README.Rmd | 1 - 1 file changed, 1 deletion(-) diff --git a/README.Rmd b/README.Rmd index 822e696..c302767 100755 --- a/README.Rmd +++ b/README.Rmd @@ -28,7 +28,6 @@ Check out `news(package = "dynwrap")` or [NEWS.md](NEWS.md) for a full list of c ```{r news, echo=FALSE, results="asis"} -dynutils::update_news() cat(dynutils::recent_news()) ``` From 563aef82d9d78544bec82146299046e0bfbf8f4d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 12 Feb 2020 16:01:15 +0100 Subject: [PATCH 03/15] update roxygen --- DESCRIPTION | 2 +- NAMESPACE | 1 + man/calculate_feature_importances.Rd | 8 ++- man/calculate_overall_feature_importance.Rd | 54 ++++++++++++++------- man/dynfeature.Rd | 1 - man/fi_methods.Rd | 18 ++++++- 6 files changed, 61 insertions(+), 23 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d89ff55..578b94d 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,7 @@ Imports: tibble Suggests: caret -RoxygenNote: 6.1.1 +RoxygenNote: 7.0.2 Roxygen: list(markdown = TRUE) Remotes: dynverse/dynutils@devel, diff --git a/NAMESPACE b/NAMESPACE index dbba772..1ffd00f 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(calculate_waypoint_feature_importance) export(fi_caret) export(fi_ranger_rf) export(fi_ranger_rf_lite) +export(fi_ranger_rf_tiny) import(dplyr) import(dynutils) import(dynwrap) diff --git a/man/calculate_feature_importances.Rd b/man/calculate_feature_importances.Rd index a6f984d..e84f04e 100644 --- a/man/calculate_feature_importances.Rd +++ b/man/calculate_feature_importances.Rd @@ -4,8 +4,12 @@ \alias{calculate_feature_importances} \title{Calculate feature importance scores} \usage{ -calculate_feature_importances(X, Y, fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_feature_importances( + X, + Y, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) } \arguments{ \item{X}{A data frame containing the features as columns.} diff --git a/man/calculate_overall_feature_importance.Rd b/man/calculate_overall_feature_importance.Rd index 42fc9d6..5ac3b52 100755 --- a/man/calculate_overall_feature_importance.Rd +++ b/man/calculate_overall_feature_importance.Rd @@ -14,30 +14,50 @@ \alias{calculate_waypoint_feature_importance} \title{Calculating feature importances across trajectories} \usage{ -calculate_branch_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_branch_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_branching_point_feature_importance(trajectory, +calculate_branching_point_feature_importance( + trajectory, expression_source = "expression", milestones_oi = trajectory$milestone_ids, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_cell_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_cell_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_milestone_feature_importance(trajectory, - expression_source = "expression", milestones_oi = NULL, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) +calculate_milestone_feature_importance( + trajectory, + expression_source = "expression", + milestones_oi = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_overall_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_overall_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_waypoint_feature_importance(trajectory, - expression_source = "expression", waypoints = NULL, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) +calculate_waypoint_feature_importance( + trajectory, + expression_source = "expression", + waypoints = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) } \arguments{ \item{trajectory}{A trajectory object containing expression values and a trajectory.} diff --git a/man/dynfeature.Rd b/man/dynfeature.Rd index 6c23920..f3ebf48 100755 --- a/man/dynfeature.Rd +++ b/man/dynfeature.Rd @@ -3,7 +3,6 @@ \docType{package} \name{dynfeature} \alias{dynfeature} -\alias{dynfeature-package} \title{Dynfeature feature importance} \description{ Dynfeature feature importance diff --git a/man/fi_methods.Rd b/man/fi_methods.Rd index aafac26..5ba63f6 100644 --- a/man/fi_methods.Rd +++ b/man/fi_methods.Rd @@ -4,14 +4,28 @@ \alias{fi_ranger_rf_lite} \alias{fi_ranger_rf} \alias{fi_caret} +\alias{fi_ranger_rf_tiny} \title{Feature Importance methods} \usage{ -fi_ranger_rf_lite(num_trees = 2000, num_variables_per_split = 50, - num_samples_per_tree = 250, min_node_size = 20, ...) +fi_ranger_rf_lite( + num_trees = 2000, + num_variables_per_split = 50, + num_samples_per_tree = 250, + min_node_size = 20, + ... +) fi_ranger_rf(...) fi_caret(caret_method, ...) + +fi_ranger_rf_tiny( + num_trees = 100, + num_variables_per_split = 50, + num_samples_per_tree = 250, + min_node_size = 20, + ... +) } \arguments{ \item{num_trees}{(fi_ranger_rf_lite) The number of trees to use} From 60c053e0720bce10c7073c95b480de422e29d632 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 3 Sep 2020 11:10:47 +0200 Subject: [PATCH 04/15] avoid deprecated functions --- R/calculate_feature_importances.R | 4 ++-- R/package.R | 6 ++++-- tests/testthat/test-calculate_feature_importance.R | 6 +++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/R/calculate_feature_importances.R b/R/calculate_feature_importances.R index 9836c56..63033e9 100644 --- a/R/calculate_feature_importances.R +++ b/R/calculate_feature_importances.R @@ -11,7 +11,7 @@ calculate_feature_importances <- function(X, Y, fi_method = fi_ranger_rf_lite(), if (dynutils::is_sparse(Y)) { Y <- as.matrix(Y) } - Y <- as_data_frame(Y) + Y <- as_tibble(Y, .name_repair = "minimal") } # convert expression to regular matrix if sparse @@ -36,7 +36,7 @@ calculate_feature_importances <- function(X, Y, fi_method = fi_ranger_rf_lite(), importance <- fi_method$fun(X, y, verbose = verbose) } - data_frame( + tibble( predictor_id = factor(colnames(Y)[[i]], levels = colnames(Y)), feature_id = factor(names(importance), levels = names(importance)), importance diff --git a/R/package.R b/R/package.R index be0c517..da9b302 100755 --- a/R/package.R +++ b/R/package.R @@ -1,9 +1,11 @@ -#' Dynfeature feature importance +#' Dynamic feature importance +#' +#' Calculating feature importance scores from trajectories using the random forests algorithm. #' #' @import dplyr #' @import tidyr #' @import methods -#' @import tibble +#' @importFrom tibble tibble tribble as_tibble #' @import dynutils #' @import dynwrap #' @importFrom purrr %>% map map_df map_chr map_lgl map_int map_dbl keep set_names list_modify invoke diff --git a/tests/testthat/test-calculate_feature_importance.R b/tests/testthat/test-calculate_feature_importance.R index 4b7c898..0a04241 100755 --- a/tests/testthat/test-calculate_feature_importance.R +++ b/tests/testthat/test-calculate_feature_importance.R @@ -73,7 +73,7 @@ suppressWarnings({ module_expression <- distance_from_module %>% reshape2::melt(varnames = c("module_id", "cell_id"), value.name = "distance") %>% - as_data_frame() %>% + as_tibble(.name_repair = "minimal") %>% left_join(module_progressions %>% select(-from, -to, -percentage), by = "module_id") %>% mutate( expr = dnorm(distance, mean = 0, sd = sd) * mult + basal @@ -83,7 +83,7 @@ suppressWarnings({ module_expression <- module_expression[module_ids, cell_ids] # generate gene expression -feature_info <- data_frame( +feature_info <- tibble( feature_id = feature_ids, module = sample.int(num_modules, num_features, replace = TRUE), mean = rnorm(num_features, 10, 3) %>% pmax(0), @@ -95,7 +95,7 @@ expression <- feature_info %>% group_by(feature_id) %>% purrr::pmap_df(function(feature_id, module, mean, sd, dropout) { - data_frame( + tibble( cell_id = colnames(module_expression), feature_id, expression = ifelse(runif(num_cells) < dropout, 0, module_expression[module, , drop = FALSE] + rnorm(num_cells, mean, sd)) From b5e1f2092447f90f789d4291adb1d43ee9c4ed57 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 3 Sep 2020 11:10:58 +0200 Subject: [PATCH 05/15] update docs --- DESCRIPTION | 7 +-- NAMESPACE | 5 +- R/calculate_waypoint_feature_importance.R | 2 +- man/calculate_feature_importances.Rd | 8 ++- man/calculate_overall_feature_importance.Rd | 54 ++++++++++++++------- man/dynfeature.Rd | 5 +- man/fi_methods.Rd | 18 ++++++- 7 files changed, 68 insertions(+), 31 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d89ff55..ebb808a 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,7 +5,7 @@ Version: 1.0.0.9000 Authors@R: c( person("Robrecht", "Cannoodt", email = "rcannood@gmail.com", role = c("aut")), person("Wouter", "Saelens", email = "wouter.saelens@ugent.be", role = c("aut", "cre"))) -Description: Calculating feature importance scores from trajectories +Description: Calculating feature importance scores from trajectories using the random forests algorithm. License: GPL-3 Encoding: UTF-8 LazyData: true @@ -23,11 +23,8 @@ Imports: tibble Suggests: caret -RoxygenNote: 6.1.1 +RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) -Remotes: - dynverse/dynutils@devel, - dynverse/dynwrap@devel Collate: 'calculate_feature_importances.R' 'calculate_branch_feature_importance.R' diff --git a/NAMESPACE b/NAMESPACE index dbba772..8d78b31 100755 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,11 +9,11 @@ export(calculate_waypoint_feature_importance) export(fi_caret) export(fi_ranger_rf) export(fi_ranger_rf_lite) +export(fi_ranger_rf_tiny) import(dplyr) import(dynutils) import(dynwrap) import(methods) -import(tibble) import(tidyr) importFrom(purrr,"%>%") importFrom(purrr,invoke) @@ -28,3 +28,6 @@ importFrom(purrr,map_lgl) importFrom(purrr,set_names) importFrom(ranger,ranger) importFrom(reshape2,acast) +importFrom(tibble,as_tibble) +importFrom(tibble,tibble) +importFrom(tibble,tribble) diff --git a/R/calculate_waypoint_feature_importance.R b/R/calculate_waypoint_feature_importance.R index 02d321f..f0b3dd2 100644 --- a/R/calculate_waypoint_feature_importance.R +++ b/R/calculate_waypoint_feature_importance.R @@ -25,7 +25,7 @@ calculate_waypoint_feature_importance <- inherit_default_params( calculate_feature_importances( X = expression, - Y = t(waypoints$geodesic_distances)[rownames(expression),], + Y = t(waypoints$geodesic_distances)[rownames(expression),unique(rownames(waypoints$geodesic_distances))], fi_method = fi_method, verbose = verbose ) %>% diff --git a/man/calculate_feature_importances.Rd b/man/calculate_feature_importances.Rd index a6f984d..e84f04e 100644 --- a/man/calculate_feature_importances.Rd +++ b/man/calculate_feature_importances.Rd @@ -4,8 +4,12 @@ \alias{calculate_feature_importances} \title{Calculate feature importance scores} \usage{ -calculate_feature_importances(X, Y, fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_feature_importances( + X, + Y, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) } \arguments{ \item{X}{A data frame containing the features as columns.} diff --git a/man/calculate_overall_feature_importance.Rd b/man/calculate_overall_feature_importance.Rd index 42fc9d6..5ac3b52 100755 --- a/man/calculate_overall_feature_importance.Rd +++ b/man/calculate_overall_feature_importance.Rd @@ -14,30 +14,50 @@ \alias{calculate_waypoint_feature_importance} \title{Calculating feature importances across trajectories} \usage{ -calculate_branch_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_branch_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_branching_point_feature_importance(trajectory, +calculate_branching_point_feature_importance( + trajectory, expression_source = "expression", milestones_oi = trajectory$milestone_ids, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_cell_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_cell_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_milestone_feature_importance(trajectory, - expression_source = "expression", milestones_oi = NULL, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) +calculate_milestone_feature_importance( + trajectory, + expression_source = "expression", + milestones_oi = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_overall_feature_importance(trajectory, - expression_source = "expression", fi_method = fi_ranger_rf_lite(), - verbose = FALSE) +calculate_overall_feature_importance( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) -calculate_waypoint_feature_importance(trajectory, - expression_source = "expression", waypoints = NULL, - fi_method = fi_ranger_rf_lite(), verbose = FALSE) +calculate_waypoint_feature_importance( + trajectory, + expression_source = "expression", + waypoints = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) } \arguments{ \item{trajectory}{A trajectory object containing expression values and a trajectory.} diff --git a/man/dynfeature.Rd b/man/dynfeature.Rd index 6c23920..2fd5af8 100755 --- a/man/dynfeature.Rd +++ b/man/dynfeature.Rd @@ -3,8 +3,7 @@ \docType{package} \name{dynfeature} \alias{dynfeature} -\alias{dynfeature-package} -\title{Dynfeature feature importance} +\title{Dynamic feature importance} \description{ -Dynfeature feature importance +Calculating feature importance scores from trajectories using the random forests algorithm. } diff --git a/man/fi_methods.Rd b/man/fi_methods.Rd index aafac26..5ba63f6 100644 --- a/man/fi_methods.Rd +++ b/man/fi_methods.Rd @@ -4,14 +4,28 @@ \alias{fi_ranger_rf_lite} \alias{fi_ranger_rf} \alias{fi_caret} +\alias{fi_ranger_rf_tiny} \title{Feature Importance methods} \usage{ -fi_ranger_rf_lite(num_trees = 2000, num_variables_per_split = 50, - num_samples_per_tree = 250, min_node_size = 20, ...) +fi_ranger_rf_lite( + num_trees = 2000, + num_variables_per_split = 50, + num_samples_per_tree = 250, + min_node_size = 20, + ... +) fi_ranger_rf(...) fi_caret(caret_method, ...) + +fi_ranger_rf_tiny( + num_trees = 100, + num_variables_per_split = 50, + num_samples_per_tree = 250, + min_node_size = 20, + ... +) } \arguments{ \item{num_trees}{(fi_ranger_rf_lite) The number of trees to use} From 132f1630c6ce65aa192e09f2c1f0512094c7907b Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 3 Sep 2020 11:11:05 +0200 Subject: [PATCH 06/15] use github actions --- .Rbuildignore | 3 +- .github/.gitignore | 1 + .github/workflows/R-CMD-check.yml | 83 +++++++++++++++++++++++++++++ .github/workflows/test_coverage.yml | 54 +++++++++++++++++++ 4 files changed, 140 insertions(+), 1 deletion(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yml create mode 100644 .github/workflows/test_coverage.yml diff --git a/.Rbuildignore b/.Rbuildignore index 7791a8d..42b7713 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,4 +2,5 @@ ^\.Rproj\.user$ ^\.travis ^README\.Rmd$ -^\.git$ \ No newline at end of file +^\.git$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 0000000..2d19fc7 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml new file mode 100644 index 0000000..27ba7d1 --- /dev/null +++ b/.github/workflows/R-CMD-check.yml @@ -0,0 +1,83 @@ +on: + push: + branches: + - master + - devel + pull_request: + branches: + - master + schedule: + - cron: "0 0 * * 1" + +name: R-CMD-check + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + - {os: macos-latest, r: 'release'} + - {os: ubuntu-16.04, r: '3.6', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} + - {os: ubuntu-16.04, r: 'release', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@master + with: + r-version: ${{ matrix.config.r }} + + - uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + if: runner.os != 'Windows' + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}- + + - name: Install system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('r-hub/sysreqs')" + sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") + sudo -s eval "$sysreqs" + + - name: Install dependencies + run: | + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("rcmdcheck") + shell: Rscript {0} + + - name: Check + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") + shell: Rscript {0} + + - name: Upload check results + if: failure() + uses: actions/upload-artifact@master + with: + name: ${{ runner.os }}-r${{ matrix.config.r }}-results + path: check + diff --git a/.github/workflows/test_coverage.yml b/.github/workflows/test_coverage.yml new file mode 100644 index 0000000..b74fee0 --- /dev/null +++ b/.github/workflows/test_coverage.yml @@ -0,0 +1,54 @@ +on: + push: + branches: + - master + - devel + pull_request: + branches: + - master + +name: test-coverage + +jobs: + test-coverage: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@master + + - uses: r-lib/actions/setup-pandoc@master + + - name: Query dependencies + run: | + install.packages('remotes') + saveRDS(remotes::dev_package_deps(dependencies = TRUE), "depends.Rds", version = 2) + shell: Rscript {0} + + - name: Cache R packages + uses: actions/cache@v1 + with: + path: ${{ env.R_LIBS_USER }} + key: ubuntu-r-release-${{ hashFiles('depends.Rds') }} + restore-keys: ubuntu-r-release- + + - name: Install system dependencies + if: runner.os == 'Linux' + env: + RHUB_PLATFORM: linux-x86_64-ubuntu-gcc + run: | + Rscript -e "remotes::install_github('rcannood/sysreqs')" + sysreqs=$(Rscript -e "pkgs <- unique(c(sysreqs:::get_cran_sysreqs(sysreqs:::get_cran_deps('covr')), sysreqs::sysreqs('DESCRIPTION'))); cat(sysreqs:::sysreq_commands_pkgs(pkgs))") + sudo -s eval "$sysreqs" + + - name: Install dependencies + run: | + install.packages(c("remotes")) + remotes::install_deps(dependencies = TRUE) + remotes::install_cran("covr") + shell: Rscript {0} + + - name: Test coverage + run: covr::codecov() + shell: Rscript {0} + From 7219028877e753ea37a3f85d1a5df6bcf8d03db9 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 3 Sep 2020 12:24:49 +0200 Subject: [PATCH 07/15] update desc --- DESCRIPTION | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ebb808a..b78c102 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,10 +1,18 @@ Package: dynfeature Type: Package -Title: Dynamic feature importance +Title: Dynamic Feature Importance Version: 1.0.0.9000 -Authors@R: c( - person("Robrecht", "Cannoodt", email = "rcannood@gmail.com", role = c("aut")), - person("Wouter", "Saelens", email = "wouter.saelens@ugent.be", role = c("aut", "cre"))) +Authors@R: + c(person(given = "Robrecht", + family = "Cannoodt", + role = c("aut", "cre"), + email = "rcannood@gmail.com", + comment = c(ORCID = "0000-0003-3641-729X", github = "rcannood")), + person(given = "Wouter", + family = "Saelens", + role = "aut", + email = "wouter.saelens@gmail.com", + comment = c(ORCID = "0000-0002-7114-6248", github = "zouter"))) Description: Calculating feature importance scores from trajectories using the random forests algorithm. License: GPL-3 Encoding: UTF-8 From 85f738d428e3db8fb87d09add2463f2d6e415116 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 12:50:27 +0200 Subject: [PATCH 08/15] update ci setup --- .github/workflows/R-CMD-check.yml | 28 ++++++++++++++-------------- .github/workflows/test_coverage.yml | 10 +++++----- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 27ba7d1..ce702bd 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -14,17 +14,17 @@ name: R-CMD-check jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} - + if: "!contains(github.event.head_commit.message, 'ci skip')" name: ${{ matrix.config.os }} (${{ matrix.config.r }}) strategy: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macos-latest, r: 'release'} - - {os: ubuntu-16.04, r: '3.6', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} - - {os: ubuntu-16.04, r: 'release', rspm: "https://demo.rstudiopm.com/all/__linux__/xenial/latest"} + #- {os: windows-latest, r: 'release'} + #- {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true @@ -43,24 +43,24 @@ jobs: run: | install.packages('remotes') saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) + writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") shell: Rscript {0} - - name: Cache R packages + - name: Restore R package cache if: runner.os != 'Windows' - uses: actions/cache@v1 + uses: actions/cache@v2 with: path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-r-${{ matrix.config.r }}- + key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} + restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - name: Install system dependencies if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - name: Install dependencies run: | diff --git a/.github/workflows/test_coverage.yml b/.github/workflows/test_coverage.yml index b74fee0..6ee47ce 100644 --- a/.github/workflows/test_coverage.yml +++ b/.github/workflows/test_coverage.yml @@ -12,6 +12,7 @@ name: test-coverage jobs: test-coverage: runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, 'ci skip')" steps: - uses: actions/checkout@v2 @@ -34,12 +35,11 @@ jobs: - name: Install system dependencies if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc run: | - Rscript -e "remotes::install_github('rcannood/sysreqs')" - sysreqs=$(Rscript -e "pkgs <- unique(c(sysreqs:::get_cran_sysreqs(sysreqs:::get_cran_deps('covr')), sysreqs::sysreqs('DESCRIPTION'))); cat(sysreqs:::sysreq_commands_pkgs(pkgs))") - sudo -s eval "$sysreqs" + while read -r cmd + do + eval sudo $cmd + done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') - name: Install dependencies run: | From cf5d76b7dd1ec13486bde5ce8f05e1c4a804d6ba Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 12:50:59 +0200 Subject: [PATCH 09/15] update metadata and examples --- DESCRIPTION | 8 ++++---- R/calculate_overall_feature_importance.R | 6 ++++++ R/fi_methods.R | 6 ++++++ R/package.R | 2 +- man/calculate_overall_feature_importance.Rd | 6 ++++++ man/dynfeature.Rd | 2 +- man/fi_methods.Rd | 6 ++++++ 7 files changed, 30 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b78c102..ae006f2 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: dynfeature Type: Package -Title: Dynamic Feature Importance -Version: 1.0.0.9000 +Title: Feature Importance for Dynamic Processes +Version: 1.0.0 Authors@R: c(person(given = "Robrecht", family = "Cannoodt", @@ -13,10 +13,9 @@ Authors@R: role = "aut", email = "wouter.saelens@gmail.com", comment = c(ORCID = "0000-0002-7114-6248", github = "zouter"))) -Description: Calculating feature importance scores from trajectories using the random forests algorithm. +Description: Calculating feature importance scores from trajectories using the random forests algorithm and more. License: GPL-3 Encoding: UTF-8 -LazyData: true Imports: dplyr, dynutils (>= 1.0.2), @@ -31,6 +30,7 @@ Imports: tibble Suggests: caret + covr RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) Collate: diff --git a/R/calculate_overall_feature_importance.R b/R/calculate_overall_feature_importance.R index baa0e90..3cf3f26 100644 --- a/R/calculate_overall_feature_importance.R +++ b/R/calculate_overall_feature_importance.R @@ -20,6 +20,12 @@ #' @export #' #' @include calculate_feature_importances.R +#' +#' @examples +#' library(dynwrap) +#' data(example_trajectory) +#' +#' calculate_overall_feature_importance(example_trajectory) calculate_overall_feature_importance <- inherit_default_params( super_functions = list(calculate_feature_importances), fun = function( diff --git a/R/fi_methods.R b/R/fi_methods.R index ad56084..6f7082d 100644 --- a/R/fi_methods.R +++ b/R/fi_methods.R @@ -19,6 +19,12 @@ apply_function_params <- function(params, nrow, ncol) { #' @rdname fi_methods #' #' @export +#' +#' @examples +#' library(dynwrap) +#' data(example_trajectory) +#' +#' calculate_overall_feature_importance(example_trajectory, fi_method = fi_ranger_rf()) fi_ranger_rf_lite <- function( num_trees = 2000, num_variables_per_split = 50, diff --git a/R/package.R b/R/package.R index da9b302..2958831 100755 --- a/R/package.R +++ b/R/package.R @@ -1,4 +1,4 @@ -#' Dynamic feature importance +#' Feature Importance for Dynamic Processes #' #' Calculating feature importance scores from trajectories using the random forests algorithm. #' diff --git a/man/calculate_overall_feature_importance.Rd b/man/calculate_overall_feature_importance.Rd index 5ac3b52..ddcaa8b 100755 --- a/man/calculate_overall_feature_importance.Rd +++ b/man/calculate_overall_feature_importance.Rd @@ -82,3 +82,9 @@ Uses the feature importance measures of \code{\link[ranger]{ranger}} or \code{ca \code{calculate_overall_feature_importance} calculates the importance for the whole trajectory, \code{calculate_milestone_feature_importance} calculates it for individual milestones (e.g. branching points) } +\examples{ +library(dynwrap) +data(example_trajectory) + +calculate_overall_feature_importance(example_trajectory) +} diff --git a/man/dynfeature.Rd b/man/dynfeature.Rd index 2fd5af8..74d7cec 100755 --- a/man/dynfeature.Rd +++ b/man/dynfeature.Rd @@ -3,7 +3,7 @@ \docType{package} \name{dynfeature} \alias{dynfeature} -\title{Dynamic feature importance} +\title{Feature Importance for Dynamic Processes} \description{ Calculating feature importance scores from trajectories using the random forests algorithm. } diff --git a/man/fi_methods.Rd b/man/fi_methods.Rd index 5ba63f6..c5503fb 100644 --- a/man/fi_methods.Rd +++ b/man/fi_methods.Rd @@ -43,3 +43,9 @@ fi_ranger_rf_tiny( \description{ Feature Importance methods } +\examples{ +library(dynwrap) +data(example_trajectory) + +calculate_overall_feature_importance(example_trajectory, fi_method = fi_ranger_rf()) +} From 5ebbfbaadbe42f9d5a6890019ac76e8eeeda6b46 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 12:51:37 +0200 Subject: [PATCH 10/15] update news --- NEWS.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 5c5858a..e45e1ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,8 +1,10 @@ -# dynfeature 1.0.0 (28-03-2019) +# dynfeature 1.0.0 * MINOR CHANGE: Use only one core by default. -* MINOR CHANGE: Support sparse matrices +* MINOR CHANGE: Support sparse matrices. + +* DOCUMENTATION: Add examples. # dynfeature 0.2.0 (25-10-2018) From bfaec91491d60be84a11f97a84680ade8f868af0 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 12:52:21 +0200 Subject: [PATCH 11/15] fix description --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ae006f2..51b6ed3 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ Imports: tidyr, tibble Suggests: - caret + caret, covr RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) From 2adb8dedbbd4b521441046a06aa807c502f6f2c1 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 12:52:25 +0200 Subject: [PATCH 12/15] add cran comments --- .Rbuildignore | 1 + cran-comments.md | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 cran-comments.md diff --git a/.Rbuildignore b/.Rbuildignore index 42b7713..65fea1a 100755 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,4 @@ ^README\.Rmd$ ^\.git$ ^\.github$ +^cran-comments\.md$ diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..0a194bb --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,22 @@ +# dynfeature 1.0.0 + +Initial release of dynfeature on CRAN. + +* MINOR CHANGE: Use only one core by default. + +* MINOR CHANGE: Support sparse matrices. + +* DOCUMENTATION: Add examples. + +## Test environments +* local Fedora install, R 4.0 +* ubuntu 20.04, mac os x, windows (on github actions), R 4.0 +* win-builder (devel and release) + +## R CMD check results + +``` +── R CMD check results ─────────────────────────────────── dynfeature 1.0.0 ──── +... +``` + From 68b21c1d1e36ed0ba30ffc2a5496a5ebadbafbf6 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 13:39:00 +0200 Subject: [PATCH 13/15] clean up documentation and tests --- DESCRIPTION | 10 -- R/calculate_branch_feature_importance.R | 82 ++++++------- ...ulate_branching_point_feature_importance.R | 113 +++++++++--------- R/calculate_cell_feature_importance.R | 68 +++++------ R/calculate_feature_importances.R | 9 +- R/calculate_milestone_feature_importance.R | 79 ++++++------ R/calculate_overall_feature_importance.R | 37 +++--- R/calculate_waypoint_feature_importance.R | 51 ++++---- R/fi_methods.R | 4 +- .../test-calculate_feature_importance.R | 6 +- 10 files changed, 212 insertions(+), 247 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 51b6ed3..9862155 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -33,13 +33,3 @@ Suggests: covr RoxygenNote: 7.1.1 Roxygen: list(markdown = TRUE) -Collate: - 'calculate_feature_importances.R' - 'calculate_branch_feature_importance.R' - 'calculate_branching_point_feature_importance.R' - 'calculate_cell_feature_importance.R' - 'calculate_milestone_feature_importance.R' - 'calculate_overall_feature_importance.R' - 'calculate_waypoint_feature_importance.R' - 'fi_methods.R' - 'package.R' diff --git a/R/calculate_branch_feature_importance.R b/R/calculate_branch_feature_importance.R index 0a07c6f..008e21d 100644 --- a/R/calculate_branch_feature_importance.R +++ b/R/calculate_branch_feature_importance.R @@ -1,52 +1,46 @@ #' @rdname calculate_overall_feature_importance #' #' @export -#' -#' @include calculate_feature_importances.R -calculate_branch_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - fi_method, - verbose - ) { - # assign name to each edge - milestone_network <- - trajectory$milestone_network %>% - mutate(edge_id = as.character(row_number())) %>% - select(from, to, edge_id) +calculate_branch_feature_importance <- function( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + # assign name to each edge + milestone_network <- + trajectory$milestone_network %>% + mutate(edge_id = as.character(row_number())) %>% + select(.data$from, .data$to, .data$edge_id) - # determine which cell is part of which edge - edge_membership <- - trajectory$progressions %>% - group_by(cell_id) %>% - top_n(1, percentage) %>% - ungroup() %>% - left_join(milestone_network, c("from", "to")) %>% - reshape2::acast(cell_id ~ edge_id, value.var = "percentage") %>% - {!is.na(.)} + # determine which cell is part of which edge + edge_membership <- + trajectory$progressions %>% + group_by(.data$cell_id) %>% + top_n(1, .data$percentage) %>% + ungroup() %>% + left_join(milestone_network, c("from", "to")) %>% + mutate(contains = TRUE) %>% + reshape2::acast(cell_id ~ edge_id, value.var = "contains", fill = FALSE) - expression <- get_expression(trajectory, expression_source) + expression <- get_expression(trajectory, expression_source) + out <- calculate_feature_importances( + X = expression, + Y = edge_membership, + fi_method = fi_method, + verbose = verbose + ) + suppressWarnings({ + out <- out %>% + left_join(milestone_network, c("predictor_id" = "edge_id")) + }) - out <- calculate_feature_importances( - X = expression, - Y = edge_membership, - fi_method = fi_method, - verbose = verbose + out %>% + transmute( + .data$feature_id, + from = factor(.data$from, trajectory$milestone_ids), + to = factor(.data$to, trajectory$milestone_ids), + .data$importance ) - suppressWarnings({ - out <- out %>% - left_join(milestone_network, c("predictor_id" = "edge_id")) - }) - - out %>% - transmute( - feature_id, - from = factor(from, trajectory$milestone_ids), - to = factor(to, trajectory$milestone_ids), - importance - ) - } -) \ No newline at end of file +} \ No newline at end of file diff --git a/R/calculate_branching_point_feature_importance.R b/R/calculate_branching_point_feature_importance.R index 06833cc..b55b152 100644 --- a/R/calculate_branching_point_feature_importance.R +++ b/R/calculate_branching_point_feature_importance.R @@ -1,70 +1,65 @@ #' @rdname calculate_overall_feature_importance #' #' @export -#' -#' @include calculate_feature_importances.R -calculate_branching_point_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - milestones_oi = trajectory$milestone_ids, - fi_method, - verbose - ) { - # assign name to each edge - milestone_network <- - trajectory$milestone_network %>% - mutate(edge_id = as.character(row_number())) %>% - select(from, to, edge_id) +calculate_branching_point_feature_importance <- function( + trajectory, + expression_source = "expression", + milestones_oi = trajectory$milestone_ids, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + # assign name to each edge + milestone_network <- + trajectory$milestone_network %>% + mutate(edge_id = as.character(row_number())) %>% + select(.data$from, .data$to, .data$edge_id) - # determine which cell is part of which edge - edge_membership <- trajectory$progressions %>% - group_by(cell_id) %>% - top_n(1, percentage) %>% - ungroup() %>% - left_join(milestone_network, c("from", "to")) %>% - reshape2::acast(cell_id~edge_id, value.var="percentage") %>% - {!is.na(.)} + # determine which cell is part of which edge + edge_membership <- trajectory$progressions %>% + group_by(.data$cell_id) %>% + top_n(1, .data$percentage) %>% + ungroup() %>% + left_join(milestone_network, c("from", "to")) %>% + mutate(contains = TRUE) %>% + reshape2::acast(cell_id~edge_id, value.var = "contains", fill = FALSE) - expression <- get_expression(trajectory, expression_source) + expression <- get_expression(trajectory, expression_source) - map_df( - seq_along(milestones_oi), - function(i) { - if (verbose) cat("Processing milestone ", i , "/", length(milestones_oi), "\n", sep = "") + map_df( + seq_along(milestones_oi), + function(i) { + if (verbose) cat("Processing milestone ", i , "/", length(milestones_oi), "\n", sep = "") - milestone_oi <- milestones_oi[[i]] + milestone_oi <- milestones_oi[[i]] - # select the cells which are close to the milestone - prog <- - trajectory$progressions %>% - filter(from == milestone_oi | to == milestone_oi) %>% - mutate(milestone_other = ifelse(from == milestone_oi, to, from)) %>% - group_by(cell_id) %>% - top_n(1, percentage) %>% - ungroup() + # select the cells which are close to the milestone + prog <- + trajectory$progressions %>% + filter(.data$from == milestone_oi | .data$to == milestone_oi) %>% + mutate(milestone_other = ifelse(.data$from == milestone_oi, .data$to, .data$from)) %>% + group_by(.data$cell_id) %>% + top_n(1, .data$percentage) %>% + ungroup() - expression_oi <- expression[prog$cell_id, ] - outcome <- as.character(prog$milestone_other) + expression_oi <- expression[prog$cell_id, ] + outcome <- as.character(prog$milestone_other) - if(length(unique(prog$milestone_other)) < 2) { - warning("Could not find features specific for milestone ", milestone_oi) - tibble() - } else { - calculate_feature_importances( - X = expression_oi, - Y = outcome, - fi_method = fi_method, - verbose = verbose - ) %>% - transmute( - milestone_id = factor(milestone_oi, levels = trajectory$milestone_ids), - feature_id, - importance - ) - } + if(length(unique(prog$milestone_other)) < 2) { + warning("Could not find features specific for milestone ", milestone_oi) + tibble() + } else { + calculate_feature_importances( + X = expression_oi, + Y = outcome, + fi_method = fi_method, + verbose = verbose + ) %>% + transmute( + milestone_id = factor(milestone_oi, levels = trajectory$milestone_ids), + .data$feature_id, + .data$importance + ) } - ) - } -) \ No newline at end of file + } + ) +} \ No newline at end of file diff --git a/R/calculate_cell_feature_importance.R b/R/calculate_cell_feature_importance.R index 10ceefc..3679029 100644 --- a/R/calculate_cell_feature_importance.R +++ b/R/calculate_cell_feature_importance.R @@ -1,44 +1,38 @@ #' @rdname calculate_overall_feature_importance #' #' @export -#' -#' @include calculate_feature_importances.R -calculate_cell_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - fi_method, - verbose - ) { - if (!is_wrapper_with_waypoints(trajectory)) { - trajectory <- trajectory %>% dynwrap::add_waypoints() - } +calculate_cell_feature_importance <- function( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + if (!is_wrapper_with_waypoints(trajectory)) { + trajectory <- trajectory %>% dynwrap::add_waypoints() + } - waypoints <- trajectory$waypoints + waypoints <- trajectory$waypoints - waypoint_feature_importances <- - calculate_waypoint_feature_importance( - trajectory = trajectory, - expression_source = expression_source, - waypoints = waypoints, - fi_method = fi_method, - verbose = verbose - ) + waypoint_feature_importances <- + calculate_waypoint_feature_importance( + trajectory = trajectory, + expression_source = expression_source, + waypoints = waypoints, + fi_method = fi_method, + verbose = verbose + ) - closest_waypoints <- - trajectory$waypoints$geodesic_distances %>% { - tibble( - cell_id = factor(colnames(.), levels = colnames(.)), - waypoint_id = factor(rownames(.)[apply(., 2, which.min)], levels = waypoints$waypoints$waypoint_id %>% sort) - ) - } + geo <- trajectory$waypoints$geodesic_distances - full_join( - closest_waypoints, - waypoint_feature_importances, - by = "waypoint_id" - ) %>% - select(-waypoint_id) - } -) \ No newline at end of file + closest_waypoints <- tibble( + cell_id = factor(colnames(geo), levels = colnames(geo)), + waypoint_id = factor(rownames(geo)[apply(geo, 2, which.min)], levels = waypoints$waypoints$waypoint_id %>% sort) + ) + + full_join( + closest_waypoints, + waypoint_feature_importances, + by = "waypoint_id" + ) %>% + select(-.data$waypoint_id) +} \ No newline at end of file diff --git a/R/calculate_feature_importances.R b/R/calculate_feature_importances.R index 63033e9..98052ee 100644 --- a/R/calculate_feature_importances.R +++ b/R/calculate_feature_importances.R @@ -4,7 +4,12 @@ #' @param Y A data frame of predictor variables, with `nrow(Y) == nrow(X)`. #' @param fi_method A feature importance method. Default: `fi_ranger_rf_lite()`. Check `?fi_methods` for a full list of available feature importance methods. #' @param verbose Whether to print out extra information. -calculate_feature_importances <- function(X, Y, fi_method = fi_ranger_rf_lite(), verbose = FALSE) { +calculate_feature_importances <- function( + X, + Y, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { # if Y is a vector or a matrix, turn it into a data frame if (!is.data.frame(Y)) { # convert to regular matrix if sparse @@ -45,5 +50,5 @@ calculate_feature_importances <- function(X, Y, fi_method = fi_ranger_rf_lite(), # return importances ordered by value importances %>% - arrange(desc(importance)) + arrange(desc(.data$importance)) } \ No newline at end of file diff --git a/R/calculate_milestone_feature_importance.R b/R/calculate_milestone_feature_importance.R index d362bdb..8c2f178 100644 --- a/R/calculate_milestone_feature_importance.R +++ b/R/calculate_milestone_feature_importance.R @@ -1,50 +1,45 @@ #' @rdname calculate_overall_feature_importance #' #' @export -#' -#' @include calculate_feature_importances.R -calculate_milestone_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - milestones_oi = NULL, - fi_method, - verbose - ) { - # get expression from trajectory source or expression source - expression <- get_expression(trajectory, expression_source) +calculate_milestone_feature_importance <- function( + trajectory, + expression_source = "expression", + milestones_oi = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + # get expression from trajectory source or expression source + expression <- get_expression(trajectory, expression_source) - # check trajectory and extract some variables - testthat::expect_true(dynwrap::is_wrapper_with_trajectory(trajectory)) - milestone_percentages <- trajectory$milestone_percentages - cell_ids <- trajectory$cell_ids + # check trajectory and extract some variables + testthat::expect_true(dynwrap::is_wrapper_with_trajectory(trajectory)) + milestone_percentages <- trajectory$milestone_percentages + cell_ids <- trajectory$cell_ids - testthat::expect_true(all(cell_ids %in% rownames(expression))) - testthat::expect_true(length(cell_ids) >= 3, info = "Need 3 or more cells in a trajectory to determine important features") + testthat::expect_true(all(cell_ids %in% rownames(expression))) + testthat::expect_true(length(cell_ids) >= 3, info = "Need 3 or more cells in a trajectory to determine important features") - # process milestones - if (is.null(milestones_oi)) { - milestones_oi <- trajectory$milestone_ids - } + # process milestones + if (is.null(milestones_oi)) { + milestones_oi <- trajectory$milestone_ids + } - # construct milestone percentages matrix: - # * only retain relevant milestones - # * transform from long format to matrix - # * if cells were filtered out, add them back to the matrix - milenet_m <- - milestone_percentages %>% - filter(milestone_id %in% milestones_oi) %>% - reshape2::acast(cell_id ~ milestone_id, value.var = "percentage", fill = 0) %>% - expand_matrix(rownames = cell_ids) + # construct milestone percentages matrix: + # * only retain relevant milestones + # * transform from long format to matrix + # * if cells were filtered out, add them back to the matrix + milenet_m <- + milestone_percentages %>% + filter(.data$milestone_id %in% milestones_oi) %>% + reshape2::acast(cell_id ~ milestone_id, value.var = "percentage", fill = 0) %>% + expand_matrix(rownames = cell_ids) - # calculate feature importance scores - calculate_feature_importances( - X = expression, - Y = milenet_m, - fi_method = fi_method, - verbose = verbose - ) %>% - rename(milestone_id = predictor_id) - } -) \ No newline at end of file + # calculate feature importance scores + calculate_feature_importances( + X = expression, + Y = milenet_m, + fi_method = fi_method, + verbose = verbose + ) %>% + rename(milestone_id = .data$predictor_id) +} \ No newline at end of file diff --git a/R/calculate_overall_feature_importance.R b/R/calculate_overall_feature_importance.R index 3cf3f26..1e16dd1 100644 --- a/R/calculate_overall_feature_importance.R +++ b/R/calculate_overall_feature_importance.R @@ -19,29 +19,24 @@ #' #' @export #' -#' @include calculate_feature_importances.R -#' #' @examples #' library(dynwrap) #' data(example_trajectory) #' #' calculate_overall_feature_importance(example_trajectory) -calculate_overall_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - fi_method, - verbose - ) { - calculate_milestone_feature_importance( - trajectory = trajectory, - expression_source = expression_source, - fi_method = fi_method, - verbose = verbose - ) %>% - group_by(feature_id) %>% - summarise(importance = mean(importance)) %>% - arrange(desc(importance)) - } -) \ No newline at end of file +calculate_overall_feature_importance <- function( + trajectory, + expression_source = "expression", + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + calculate_milestone_feature_importance( + trajectory = trajectory, + expression_source = expression_source, + fi_method = fi_method, + verbose = verbose + ) %>% + group_by(.data$feature_id) %>% + summarise(importance = mean(.data$importance)) %>% + arrange(desc(.data$importance)) +} \ No newline at end of file diff --git a/R/calculate_waypoint_feature_importance.R b/R/calculate_waypoint_feature_importance.R index f0b3dd2..a5463a5 100644 --- a/R/calculate_waypoint_feature_importance.R +++ b/R/calculate_waypoint_feature_importance.R @@ -1,34 +1,29 @@ #' @rdname calculate_overall_feature_importance #' #' @export -#' -#' @include calculate_feature_importances.R -calculate_waypoint_feature_importance <- inherit_default_params( - super_functions = list(calculate_feature_importances), - fun = function( - trajectory, - expression_source = "expression", - waypoints = NULL, - fi_method, - verbose - ) { - if (is.null(waypoints)) { - if (!is_wrapper_with_waypoints(trajectory)) { - message("Adding waypoints to prediction") - trajectory <- trajectory %>% dynwrap::add_waypoints() - } - - waypoints <- trajectory$waypoints +calculate_waypoint_feature_importance <- function( + trajectory, + expression_source = "expression", + waypoints = NULL, + fi_method = fi_ranger_rf_lite(), + verbose = FALSE +) { + if (is.null(waypoints)) { + if (!is_wrapper_with_waypoints(trajectory)) { + message("Adding waypoints to prediction") + trajectory <- trajectory %>% dynwrap::add_waypoints() } - expression <- get_expression(trajectory, expression_source) - - calculate_feature_importances( - X = expression, - Y = t(waypoints$geodesic_distances)[rownames(expression),unique(rownames(waypoints$geodesic_distances))], - fi_method = fi_method, - verbose = verbose - ) %>% - rename(waypoint_id = predictor_id) + waypoints <- trajectory$waypoints } -) \ No newline at end of file + + expression <- get_expression(trajectory, expression_source) + + calculate_feature_importances( + X = expression, + Y = t(waypoints$geodesic_distances)[rownames(expression),unique(rownames(waypoints$geodesic_distances))], + fi_method = fi_method, + verbose = verbose + ) %>% + rename(waypoint_id = .data$predictor_id) +} \ No newline at end of file diff --git a/R/fi_methods.R b/R/fi_methods.R index 6f7082d..b0578b2 100644 --- a/R/fi_methods.R +++ b/R/fi_methods.R @@ -122,7 +122,9 @@ fi_caret <- function( model <- do.call(caret::train, method_params) - caret::varImp(model)[[1]] %>% {set_names(.[, 1], rownames(.))} + vi <- caret::varImp(model)[[1]] + + set_names(vi[, 1], rownames(vi)) } ) } diff --git a/tests/testthat/test-calculate_feature_importance.R b/tests/testthat/test-calculate_feature_importance.R index 0a04241..94ef3e4 100755 --- a/tests/testthat/test-calculate_feature_importance.R +++ b/tests/testthat/test-calculate_feature_importance.R @@ -11,8 +11,8 @@ milestone_network <- tibble::tribble( "A", "D", 1, TRUE ) -num_cells <- 200 -num_features <- 300 +num_cells <- 100 +num_features <- 20 cell_ids <- paste0("cell_", seq_len(num_cells)) feature_ids <- paste0("feature_", seq_len(num_features)) @@ -39,7 +39,7 @@ trajectory <- ) # generate module positions -num_modules <- 20 +num_modules <- 10 module_ids <- paste0("module_", seq_len(num_modules)) module_progressions <- milestone_network %>% From 501b06e504eb693aa14f77a8f298d7d0c62b2231 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 9 Jun 2021 13:39:04 +0200 Subject: [PATCH 14/15] update news --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index e45e1ea..d03a97d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # dynfeature 1.0.0 +Initial release of dynfeature on CRAN. + * MINOR CHANGE: Use only one core by default. * MINOR CHANGE: Support sparse matrices. From ab72f406f6b05f436488baff58f7528a59e3347d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Thu, 10 Jun 2021 10:37:08 +0200 Subject: [PATCH 15/15] Add documentation --- DESCRIPTION | 3 ++- NEWS.md | 2 +- R/calculate_feature_importances.R | 9 +++++++++ R/calculate_overall_feature_importance.R | 2 ++ R/fi_methods.R | 4 +++- cran-comments.md | 8 ++++++-- man/calculate_feature_importances.Rd | 10 ++++++++++ man/calculate_overall_feature_importance.Rd | 3 +++ man/fi_methods.Rd | 5 ++++- 9 files changed, 40 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9862155..d554431 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -13,7 +13,8 @@ Authors@R: role = "aut", email = "wouter.saelens@gmail.com", comment = c(ORCID = "0000-0002-7114-6248", github = "zouter"))) -Description: Calculating feature importance scores from trajectories using the random forests algorithm and more. +Description: Calculating feature importance scores from trajectories using the random forests algorithm and more. Saelens and Cannoodt et + al. (2019) . License: GPL-3 Encoding: UTF-8 Imports: diff --git a/NEWS.md b/NEWS.md index d03a97d..f7f58e8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,7 +6,7 @@ Initial release of dynfeature on CRAN. * MINOR CHANGE: Support sparse matrices. -* DOCUMENTATION: Add examples. +* DOCUMENTATION: Add examples and returns. # dynfeature 0.2.0 (25-10-2018) diff --git a/R/calculate_feature_importances.R b/R/calculate_feature_importances.R index 98052ee..5123a7c 100644 --- a/R/calculate_feature_importances.R +++ b/R/calculate_feature_importances.R @@ -4,6 +4,15 @@ #' @param Y A data frame of predictor variables, with `nrow(Y) == nrow(X)`. #' @param fi_method A feature importance method. Default: `fi_ranger_rf_lite()`. Check `?fi_methods` for a full list of available feature importance methods. #' @param verbose Whether to print out extra information. +#' +#' @returns A data frame with three columns, `predictor_id`, `feature_id`, and `importance`. `predictor_id` is a column in `Y`, while `feature_id` is a column in `X`. +#' +#' @examples +#' X <- data.frame(matrix(runif(25*10), ncol = 10)) +#' Y <- data.frame(matrix(runif(25*2), ncol = 2)) +#' +#' # don't run since this function is not exported +#' # calculate_feature_importances(X, Y) calculate_feature_importances <- function( X, Y, diff --git a/R/calculate_overall_feature_importance.R b/R/calculate_overall_feature_importance.R index 1e16dd1..77f2fb4 100644 --- a/R/calculate_overall_feature_importance.R +++ b/R/calculate_overall_feature_importance.R @@ -12,6 +12,8 @@ #' @param milestones_oi The milestone(s) for which to calculate feature importance #' @param waypoints The waypoints, optional #' +#' @returns A data frame with two or more columns, `feature_id`, and `importance`. `feature_id` is a column in the trajectory expression matrix. Additional columns may be available depending on the function called. +#' #' @inheritParams calculate_feature_importances #' #' @importFrom reshape2 acast diff --git a/R/fi_methods.R b/R/fi_methods.R index b0578b2..c3a6701 100644 --- a/R/fi_methods.R +++ b/R/fi_methods.R @@ -14,7 +14,9 @@ apply_function_params <- function(params, nrow, ncol) { #' @param num_variables_per_split (fi_ranger_rf_lite) The number of variables to sample per split #' @param num_samples_per_tree (fi_ranger_rf_lite) The number of samples to bootstrap per split #' @param min_node_size (fi_ranger_rf_lite) The minimum node size, no split will be made if the node size is less than this value. -#' @param ... Extra parameters to pass onto the underlying feature importnce function. +#' @param ... Extra parameters to pass onto the underlying feature importance function. +#' +#' @returns A list containing a helper function for calling a feature importance function. #' #' @rdname fi_methods #' diff --git a/cran-comments.md b/cran-comments.md index 0a194bb..093ddf6 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -6,7 +6,7 @@ Initial release of dynfeature on CRAN. * MINOR CHANGE: Support sparse matrices. -* DOCUMENTATION: Add examples. +* DOCUMENTATION: Add examples and returns. ## Test environments * local Fedora install, R 4.0 @@ -17,6 +17,10 @@ Initial release of dynfeature on CRAN. ``` ── R CMD check results ─────────────────────────────────── dynfeature 1.0.0 ──── -... +Duration: 3m 0.1s + +0 errors ✓ | 0 warnings ✓ | 0 notes ✓ + +R CMD check succeeded ``` diff --git a/man/calculate_feature_importances.Rd b/man/calculate_feature_importances.Rd index e84f04e..ad201ed 100644 --- a/man/calculate_feature_importances.Rd +++ b/man/calculate_feature_importances.Rd @@ -20,6 +20,16 @@ calculate_feature_importances( \item{verbose}{Whether to print out extra information.} } +\value{ +A data frame with three columns, \code{predictor_id}, \code{feature_id}, and \code{importance}. \code{predictor_id} is a column in \code{Y}, while \code{feature_id} is a column in \code{X}. +} \description{ Calculate feature importance scores } +\examples{ +X <- data.frame(matrix(runif(25*10), ncol = 10)) +Y <- data.frame(matrix(runif(25*2), ncol = 2)) + +# don't run since this function is not exported +# calculate_feature_importances(X, Y) +} diff --git a/man/calculate_overall_feature_importance.Rd b/man/calculate_overall_feature_importance.Rd index ddcaa8b..f5c548b 100755 --- a/man/calculate_overall_feature_importance.Rd +++ b/man/calculate_overall_feature_importance.Rd @@ -77,6 +77,9 @@ calculate_waypoint_feature_importance( \item{waypoints}{The waypoints, optional} } +\value{ +A data frame with two or more columns, \code{feature_id}, and \code{importance}. \code{feature_id} is a column in the trajectory expression matrix. Additional columns may be available depending on the function called. +} \description{ Uses the feature importance measures of \code{\link[ranger]{ranger}} or \code{caret}. \code{calculate_overall_feature_importance} calculates the importance for the whole trajectory, diff --git a/man/fi_methods.Rd b/man/fi_methods.Rd index c5503fb..ce17fe7 100644 --- a/man/fi_methods.Rd +++ b/man/fi_methods.Rd @@ -36,10 +36,13 @@ fi_ranger_rf_tiny( \item{min_node_size}{(fi_ranger_rf_lite) The minimum node size, no split will be made if the node size is less than this value.} -\item{...}{Extra parameters to pass onto the underlying feature importnce function.} +\item{...}{Extra parameters to pass onto the underlying feature importance function.} \item{caret_method}{(fi_caret) Which caret method to use for feature importance.} } +\value{ +A list containing a helper function for calling a feature importance function. +} \description{ Feature Importance methods }