From 8223497bade0899cb1e6603140cadf327d1380c1 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Thu, 14 Mar 2024 19:18:55 +0100
Subject: [PATCH 01/43] feat: ensemble feature selection

---
 DESCRIPTION          |  1 +
 NAMESPACE            |  1 +
 R/ensemble_fselect.R | 67 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)
 create mode 100644 R/ensemble_fselect.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 5af055c1..5b638898 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -68,6 +68,7 @@ Collate:
     'assertions.R'
     'auto_fselector.R'
     'bibentries.R'
+    'ensemble_fselect.R'
     'extract_inner_fselect_archives.R'
     'extract_inner_fselect_results.R'
     'fselect.R'
diff --git a/NAMESPACE b/NAMESPACE
index 6e326f11..05027c4d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -26,6 +26,7 @@ export(auto_fselector)
 export(callback_fselect)
 export(clbk)
 export(clbks)
+export(ensemble_fselect)
 export(extract_inner_fselect_archives)
 export(extract_inner_fselect_results)
 export(fs)
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
new file mode 100644
index 00000000..42ff17cb
--- /dev/null
+++ b/R/ensemble_fselect.R
@@ -0,0 +1,67 @@
+#' @export
+ensemble_fselect = function(task, learners, outer_resampling, inner_resampling, fselector, terminator) {
+  assert_task(task)
+  assert_learners(learners)
+  assert_resampling(outer_resampling)
+  assert_resampling(inner_resampling)
+  assert_fselector(fselector)
+  assert_terminator(terminator)
+
+  outer_resampling$instantiate(task)
+
+  grid = map_dtr(seq(outer_resampling$iters), function(i) {
+
+    afs = auto_fselector(
+      fselector = fselector,
+      learner = learners[[i]],
+      resampling = inner_resampling,
+      measure = measure,
+      terminator = terminator,
+      store_models = TRUE
+    )
+
+    task_subset = task$clone()$filter(outer_resampling$train_set(i))
+    resampling = rsmp("insample")$instantiate(task_subset)
+
+    data.table(
+      iter = i,
+      base_learner_id = learners[[i]]$id,
+      base_learner = list(learners[[i]]),
+      learner = list(afs),
+      task = list(task_subset),
+      resampling = list(resampling)
+    )
+  })
+
+  design = grid[, list(learner, task, resampling)]
+
+  bmr = benchmark(design, store_models = TRUE)
+
+  # extract
+  afss = bmr$score()$learner
+  features = map(afss, function(afs) {
+    afs$fselect_result$features[[1]]
+  })
+
+  n_features = map_int(afss, function(afs) {
+    afs$fselect_result$n_features[[1]]
+  })
+
+  set(grid, j = "features", value = features)
+  set(grid, j = "n_features", value = n_features)
+
+  grid
+}
+
+if (FALSE) {
+  task = tsk("sonar")
+  learners = lrns(c("classif.rpart", "classif.rpart"))
+  outer_resampling = rsmp("subsampling", repeats = 2)
+  inner_resampling = rsmp("cv", folds = 3)
+  measure = msr("classif.ce")
+  fselector = fs("random_search")
+  terminator = trm("evals", n_evals = 10)
+
+  ensemble_fselect(task, learners, outer_resampling, inner_resampling, fselector, terminator)
+
+}

From e06f8c9247c5e0d8862ce32c0ac03e52e6883144 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Thu, 4 Apr 2024 19:02:27 +0200
Subject: [PATCH 02/43] docs: improve documentation

---
 R/ensemble_fselect.R                   | 56 ++++++++++++++++++--------
 man/ensemble_fselect.Rd                | 56 ++++++++++++++++++++++++++
 tests/testthat/test_ensemble_fselect.R | 15 +++++++
 3 files changed, 111 insertions(+), 16 deletions(-)
 create mode 100644 man/ensemble_fselect.Rd
 create mode 100644 tests/testthat/test_ensemble_fselect.R

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 42ff17cb..85ee2c01 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -1,12 +1,49 @@
+#' @title Ensemble Feature Selection
+#'
+#' @description
+#' Ensemble feature selection using multiple learners.
+#'
+#' @param learners (list of [mlr3::Learner])\cr
+#'  The learners to be used for feature selection.
+#' @param outer_resampling ([mlr3::Resampling])\cr
+#'  The outer resampling strategy.
+#'  The number of iterations must match the number of learners.
+#' @param inner_resampling ([mlr3::Resampling])\cr
+#'  The inner resampling strategy used by the [FSelector].
+#'
+#' @template param_fselector
+#' @template param_task
+#' @template param_measure
+#' @template param_terminator
+#'
 #' @export
-ensemble_fselect = function(task, learners, outer_resampling, inner_resampling, fselector, terminator) {
+#' @examples
+#' \donttest{
+#'
+#'   ensemble_fselect(
+#'     fselector = fs("random_search"),
+#'     task = tsk("sonar"),
+#'     learners = lrns(c("classif.rpart", "classif.featureless")),
+#'     outer_resampling = rsmp("subsampling", repeats = 2),
+#'     inner_resampling = rsmp("cv", folds = 3),
+#'     measure = msr("classif.ce"),
+#'     terminator = trm("evals", n_evals = 10)
+#'   )
+#' }
+ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_resampling, measure, terminator) {
   assert_task(task)
-  assert_learners(learners)
+  assert_learners(as_learners(learners), task = task)
   assert_resampling(outer_resampling)
   assert_resampling(inner_resampling)
+  assert_measure(measure)
   assert_fselector(fselector)
   assert_terminator(terminator)
 
+  if (length(learners) != outer_resampling$iters) {
+    stopf("Number of learners %i must match number of outer resampling iterations %i.",
+      length(learners), outer_resampling$iters)
+  }
+
   outer_resampling$instantiate(task)
 
   grid = map_dtr(seq(outer_resampling$iters), function(i) {
@@ -33,7 +70,7 @@ ensemble_fselect = function(task, learners, outer_resampling, inner_resampling,
     )
   })
 
-  design = grid[, list(learner, task, resampling)]
+  design = grid[, c("learner", "task", "resampling"), with = FALSE]
 
   bmr = benchmark(design, store_models = TRUE)
 
@@ -52,16 +89,3 @@ ensemble_fselect = function(task, learners, outer_resampling, inner_resampling,
 
   grid
 }
-
-if (FALSE) {
-  task = tsk("sonar")
-  learners = lrns(c("classif.rpart", "classif.rpart"))
-  outer_resampling = rsmp("subsampling", repeats = 2)
-  inner_resampling = rsmp("cv", folds = 3)
-  measure = msr("classif.ce")
-  fselector = fs("random_search")
-  terminator = trm("evals", n_evals = 10)
-
-  ensemble_fselect(task, learners, outer_resampling, inner_resampling, fselector, terminator)
-
-}
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
new file mode 100644
index 00000000..d5439996
--- /dev/null
+++ b/man/ensemble_fselect.Rd
@@ -0,0 +1,56 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ensemble_fselect.R
+\name{ensemble_fselect}
+\alias{ensemble_fselect}
+\title{Ensemble Feature Selection}
+\usage{
+ensemble_fselect(
+  fselector,
+  task,
+  learners,
+  outer_resampling,
+  inner_resampling,
+  measure,
+  terminator
+)
+}
+\arguments{
+\item{fselector}{(\link{FSelector})\cr
+Optimization algorithm.}
+
+\item{task}{(\link[mlr3:Task]{mlr3::Task})\cr
+Task to operate on.}
+
+\item{learners}{(list of \link[mlr3:Learner]{mlr3::Learner})\cr
+The learners to be used for feature selection.}
+
+\item{outer_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
+The outer resampling strategy.
+The number of iterations must match the number of learners.}
+
+\item{inner_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
+The inner resampling strategy used by the \link{FSelector}.}
+
+\item{measure}{(\link[mlr3:Measure]{mlr3::Measure})\cr
+Measure to optimize. If \code{NULL}, default measure is used.}
+
+\item{terminator}{(\link{Terminator})\cr
+Stop criterion of the feature selection.}
+}
+\description{
+Ensemble feature selection using multiple learners.
+}
+\examples{
+\donttest{
+
+  ensemble_fselect(
+    fselector = fs("random_search"),
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    outer_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("evals", n_evals = 10)
+  )
+}
+}
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
new file mode 100644
index 00000000..65a53f18
--- /dev/null
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -0,0 +1,15 @@
+test_that("esemble feature selection works", {
+  res = ensemble_fselect(
+    fselector = fs("random_search"),
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    outer_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("evals", n_evals = 10)
+  )
+
+  expect_data_table(res, nrows = 2)
+})
+
+

From 2b724aeb90d145bc2021a76d535e86fbeb38f357 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Wed, 10 Apr 2024 15:30:53 +0200
Subject: [PATCH 03/43] fix: outer iterations times learners

---
 R/ensemble_fselect.R                   | 26 ++++++++++++--------------
 tests/testthat/test_ensemble_fselect.R |  2 +-
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 85ee2c01..e3597b77 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -39,32 +39,30 @@ ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_r
   assert_fselector(fselector)
   assert_terminator(terminator)
 
-  if (length(learners) != outer_resampling$iters) {
-    stopf("Number of learners %i must match number of outer resampling iterations %i.",
-      length(learners), outer_resampling$iters)
-  }
-
-  outer_resampling$instantiate(task)
-
-  grid = map_dtr(seq(outer_resampling$iters), function(i) {
-
-    afs = auto_fselector(
+  # create fselector for each learner
+  afss = map(learners, function(learner) {
+    auto_fselector(
       fselector = fselector,
-      learner = learners[[i]],
+      learner = learner,
       resampling = inner_resampling,
       measure = measure,
       terminator = terminator,
       store_models = TRUE
     )
+  })
+
+  outer_resampling$instantiate(task)
+  grid = map_dtr(seq(outer_resampling$iters), function(i) {
 
+    # create task and resampling for each outer iteration
     task_subset = task$clone()$filter(outer_resampling$train_set(i))
     resampling = rsmp("insample")$instantiate(task_subset)
 
     data.table(
       iter = i,
-      base_learner_id = learners[[i]]$id,
-      base_learner = list(learners[[i]]),
-      learner = list(afs),
+      base_learner_id = map(learners, "id"),
+      base_learner = learners,
+      learner = afss,
       task = list(task_subset),
       resampling = list(resampling)
     )
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 65a53f18..00a28f55 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -9,7 +9,7 @@ test_that("esemble feature selection works", {
     terminator = trm("evals", n_evals = 10)
   )
 
-  expect_data_table(res, nrows = 2)
+  expect_data_table(res, nrows = 4)
 })
 
 

From 2e05bd3d85a0b730932ee5081445b510d38e826c Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Wed, 10 Apr 2024 15:46:11 +0200
Subject: [PATCH 04/43] feat: allow callbacks

---
 R/ensemble_fselect.R | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index e3597b77..644ea0b4 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -30,14 +30,10 @@
 #'     terminator = trm("evals", n_evals = 10)
 #'   )
 #' }
-ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_resampling, measure, terminator) {
+ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_resampling, measure, terminator, callbacks = list()) {
   assert_task(task)
   assert_learners(as_learners(learners), task = task)
   assert_resampling(outer_resampling)
-  assert_resampling(inner_resampling)
-  assert_measure(measure)
-  assert_fselector(fselector)
-  assert_terminator(terminator)
 
   # create fselector for each learner
   afss = map(learners, function(learner) {
@@ -47,7 +43,8 @@ ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_r
       resampling = inner_resampling,
       measure = measure,
       terminator = terminator,
-      store_models = TRUE
+      store_models = TRUE,
+      callbacks = callbacks
     )
   })
 

From 12a78fdab07ca9144141796d688094ed57ae5b64 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Wed, 10 Apr 2024 15:53:01 +0200
Subject: [PATCH 05/43] docs: callback

---
 R/ensemble_fselect.R    | 1 +
 man/ensemble_fselect.Rd | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 644ea0b4..c75aba29 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -15,6 +15,7 @@
 #' @template param_task
 #' @template param_measure
 #' @template param_terminator
+#' @template param_callbacks
 #'
 #' @export
 #' @examples
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index d5439996..f5679063 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -11,7 +11,8 @@ ensemble_fselect(
   outer_resampling,
   inner_resampling,
   measure,
-  terminator
+  terminator,
+  callbacks = list()
 )
 }
 \arguments{
@@ -36,6 +37,9 @@ Measure to optimize. If \code{NULL}, default measure is used.}
 
 \item{terminator}{(\link{Terminator})\cr
 Stop criterion of the feature selection.}
+
+\item{callbacks}{(list of \link{CallbackFSelect})\cr
+List of callbacks.}
 }
 \description{
 Ensemble feature selection using multiple learners.

From b8e28308b53b7fe8765b4899b7c96ee889d9f057 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Thu, 18 Apr 2024 14:23:17 +0200
Subject: [PATCH 06/43] feat: add store_models option

---
 R/ensemble_fselect.R | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index c75aba29..9e37245d 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -31,7 +31,17 @@
 #'     terminator = trm("evals", n_evals = 10)
 #'   )
 #' }
-ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_resampling, measure, terminator, callbacks = list()) {
+ensemble_fselect = function(
+  fselector,
+  task,
+  learners,
+  outer_resampling,
+  inner_resampling,
+  measure,
+  terminator,
+  callbacks = list(),
+  store_models = TRUE
+  ) {
   assert_task(task)
   assert_learners(as_learners(learners), task = task)
   assert_resampling(outer_resampling)
@@ -44,7 +54,7 @@ ensemble_fselect = function(fselector, task, learners, outer_resampling, inner_r
       resampling = inner_resampling,
       measure = measure,
       terminator = terminator,
-      store_models = TRUE,
+      store_models = store_models,
       callbacks = callbacks
     )
   })

From 6408dd3cd3dba276ebbe3927509b6984572f4e39 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Thu, 18 Apr 2024 14:50:12 +0200
Subject: [PATCH 07/43] feat: add scores

---
 R/ensemble_fselect.R | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 9e37245d..7d7424f6 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -80,18 +80,27 @@ ensemble_fselect = function(
 
   bmr = benchmark(design, store_models = TRUE)
 
-  # extract
+
   afss = bmr$score()$learner
+
+  # extract features
   features = map(afss, function(afs) {
     afs$fselect_result$features[[1]]
   })
 
+  # extract n_features
   n_features = map_int(afss, function(afs) {
     afs$fselect_result$n_features[[1]]
   })
 
+  # extract scores
+  scores = map_dbl(afss, function(afs) {
+    afs$fselect_instance$archive$best()[, measure$id, with = FALSE][[1]]
+  })
+
   set(grid, j = "features", value = features)
   set(grid, j = "n_features", value = n_features)
+  set(grid, j = measure$id, value = scores)
 
   grid
 }

From f545bac116f004ff317f09e0c9654dd8073aa4b7 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 15:41:00 +0200
Subject: [PATCH 08/43] refactor input arg + add doc

---
 R/ensemble_fselect.R    | 38 ++++++++++++++++++++++++++++----------
 man/ensemble_fselect.Rd | 29 +++++++++++++++++++++++------
 2 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 7d7424f6..d15b4349 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -2,12 +2,29 @@
 #'
 #' @description
 #' Ensemble feature selection using multiple learners.
+#' The ensemble feature selection method is designed to identify the
+#' most informative features from a given dataset by leveraging multiple
+#' machine learning models and resampling techniques.
+#'
+#' @details
+#' The method begins by applying an initial resampling technique specified
+#' by the user, to create **multiple subsamples** from the original dataset.
+#' This resampling process helps in generating diverse subsets of data for
+#' robust feature selection.
+#'
+#' For each subsample generated in the previous step, the method performs
+#' **wrapped-based feature selection** using each provided learner, an inner
+#' resampling method and a performance measure.
+#' This process generates a best feature subset for each combination of
+#' subsample and learner.
+#' Results are stored in a [data.table] object.
 #'
 #' @param learners (list of [mlr3::Learner])\cr
 #'  The learners to be used for feature selection.
-#' @param outer_resampling ([mlr3::Resampling])\cr
-#'  The outer resampling strategy.
-#'  The number of iterations must match the number of learners.
+#' @param init_resampling ([mlr3::Resampling])\cr
+#'  The initial resampling strategy of the data, from which each train set
+#'  will be passed on to the learners.
+#'  Can only be [mlr_resamplings_subsampling] or [mlr_resamplings_bootstrap].
 #' @param inner_resampling ([mlr3::Resampling])\cr
 #'  The inner resampling strategy used by the [FSelector].
 #'
@@ -25,7 +42,7 @@
 #'     fselector = fs("random_search"),
 #'     task = tsk("sonar"),
 #'     learners = lrns(c("classif.rpart", "classif.featureless")),
-#'     outer_resampling = rsmp("subsampling", repeats = 2),
+#'     init_resampling = rsmp("subsampling", repeats = 2),
 #'     inner_resampling = rsmp("cv", folds = 3),
 #'     measure = msr("classif.ce"),
 #'     terminator = trm("evals", n_evals = 10)
@@ -35,7 +52,7 @@ ensemble_fselect = function(
   fselector,
   task,
   learners,
-  outer_resampling,
+  init_resampling,
   inner_resampling,
   measure,
   terminator,
@@ -44,7 +61,9 @@ ensemble_fselect = function(
   ) {
   assert_task(task)
   assert_learners(as_learners(learners), task = task)
-  assert_resampling(outer_resampling)
+  assert_resampling(init_resampling)
+  assert_choice(class(init_resampling)[1],
+                choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
 
   # create fselector for each learner
   afss = map(learners, function(learner) {
@@ -59,11 +78,11 @@ ensemble_fselect = function(
     )
   })
 
-  outer_resampling$instantiate(task)
-  grid = map_dtr(seq(outer_resampling$iters), function(i) {
+  init_resampling$instantiate(task)
+  grid = map_dtr(seq(init_resampling$iters), function(i) {
 
     # create task and resampling for each outer iteration
-    task_subset = task$clone()$filter(outer_resampling$train_set(i))
+    task_subset = task$clone()$filter(init_resampling$train_set(i))
     resampling = rsmp("insample")$instantiate(task_subset)
 
     data.table(
@@ -80,7 +99,6 @@ ensemble_fselect = function(
 
   bmr = benchmark(design, store_models = TRUE)
 
-
   afss = bmr$score()$learner
 
   # extract features
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index f5679063..bdcf9e39 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -8,11 +8,12 @@ ensemble_fselect(
   fselector,
   task,
   learners,
-  outer_resampling,
+  init_resampling,
   inner_resampling,
   measure,
   terminator,
-  callbacks = list()
+  callbacks = list(),
+  store_models = TRUE
 )
 }
 \arguments{
@@ -25,9 +26,10 @@ Task to operate on.}
 \item{learners}{(list of \link[mlr3:Learner]{mlr3::Learner})\cr
 The learners to be used for feature selection.}
 
-\item{outer_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
-The outer resampling strategy.
-The number of iterations must match the number of learners.}
+\item{init_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
+The initial resampling strategy of the data, from which each train set
+will be passed on to the learners.
+Can only be \link{mlr_resamplings_subsampling} or \link{mlr_resamplings_bootstrap}.}
 
 \item{inner_resampling}{(\link[mlr3:Resampling]{mlr3::Resampling})\cr
 The inner resampling strategy used by the \link{FSelector}.}
@@ -43,6 +45,21 @@ List of callbacks.}
 }
 \description{
 Ensemble feature selection using multiple learners.
+The ensemble feature selection method is designed to identify the
+most informative features from a given dataset by leveraging multiple
+machine learning models and resampling techniques.
+}
+\details{
+The method begins by applying an initial resampling technique specified
+by the user, to create multiple subsamples from the original dataset.
+This resampling process helps in generating diverse subsets of data for
+robust feature selection.
+For each subsample generated in the previous step, the method performs
+wrapped-based feature selection using each provided learner, an inner
+resampling method and a performance measure.
+This process generates a best feature subset for each combination of
+subsample and learner.
+All the results are stored in a \link{data.table} object.
 }
 \examples{
 \donttest{
@@ -51,7 +68,7 @@ Ensemble feature selection using multiple learners.
     fselector = fs("random_search"),
     task = tsk("sonar"),
     learners = lrns(c("classif.rpart", "classif.featureless")),
-    outer_resampling = rsmp("subsampling", repeats = 2),
+    init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
     measure = msr("classif.ce"),
     terminator = trm("evals", n_evals = 10)

From 13f52ff315950f729599c9203cd9c418c1339dde Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 16:56:23 +0200
Subject: [PATCH 09/43] better doc

---
 R/ensemble_fselect.R | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index d15b4349..a113eac5 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -13,8 +13,9 @@
 #' robust feature selection.
 #'
 #' For each subsample generated in the previous step, the method performs
-#' **wrapped-based feature selection** using each provided learner, an inner
-#' resampling method and a performance measure.
+#' **wrapped-based feature selection** ([auto_fselector]) using each provided
+#' learner, the given inner resampling method, performance measure and
+#' optimization algorithm.
 #' This process generates a best feature subset for each combination of
 #' subsample and learner.
 #' Results are stored in a [data.table] object.
@@ -27,6 +28,8 @@
 #'  Can only be [mlr_resamplings_subsampling] or [mlr_resamplings_bootstrap].
 #' @param inner_resampling ([mlr3::Resampling])\cr
 #'  The inner resampling strategy used by the [FSelector].
+#' @param store_model (`logical(1)`)\cr
+#'  Whether to store models in [auto_fselector] or not.
 #'
 #' @template param_fselector
 #' @template param_task
@@ -37,7 +40,6 @@
 #' @export
 #' @examples
 #' \donttest{
-#'
 #'   ensemble_fselect(
 #'     fselector = fs("random_search"),
 #'     task = tsk("sonar"),
@@ -45,7 +47,7 @@
 #'     init_resampling = rsmp("subsampling", repeats = 2),
 #'     inner_resampling = rsmp("cv", folds = 3),
 #'     measure = msr("classif.ce"),
-#'     terminator = trm("evals", n_evals = 10)
+#'     terminator = trm("evals", n_evals = 5)
 #'   )
 #' }
 ensemble_fselect = function(

From 9f0edf3623510f19b27a8493f6a23a8b82061345 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 16:57:36 +0200
Subject: [PATCH 10/43] remove base_learner, correct iter

---
 R/ensemble_fselect.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index a113eac5..dbceddf3 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -89,8 +89,7 @@ ensemble_fselect = function(
 
     data.table(
       iter = i,
-      base_learner_id = map(learners, "id"),
-      base_learner = learners,
+      learner_id = map(learners, "id"),
       learner = afss,
       task = list(task_subset),
       resampling = list(resampling)
@@ -118,6 +117,7 @@ ensemble_fselect = function(
     afs$fselect_instance$archive$best()[, measure$id, with = FALSE][[1]]
   })
 
+  set(grid, j = "iter", value = 1:bmr$n_resample_results)
   set(grid, j = "features", value = features)
   set(grid, j = "n_features", value = n_features)
   set(grid, j = measure$id, value = scores)

From 7bbefd0d42a7218c275d09235aaa3c9faf7c90c0 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 17:11:55 +0200
Subject: [PATCH 11/43] revert back example

---
 R/ensemble_fselect.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index dbceddf3..f568d112 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -47,7 +47,7 @@
 #'     init_resampling = rsmp("subsampling", repeats = 2),
 #'     inner_resampling = rsmp("cv", folds = 3),
 #'     measure = msr("classif.ce"),
-#'     terminator = trm("evals", n_evals = 5)
+#'     terminator = trm("evals", n_evals = 10)
 #'   )
 #' }
 ensemble_fselect = function(

From dcf6728a2df7f4cd18f836c138f7a549094c8995 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 19:03:01 +0200
Subject: [PATCH 12/43] get importance scores from RFE

---
 R/ensemble_fselect.R | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index f568d112..422a4b77 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -122,5 +122,13 @@ ensemble_fselect = function(
   set(grid, j = "n_features", value = n_features)
   set(grid, j = measure$id, value = scores)
 
+  # extract importance scores if RFE optimization was used
+  if (class(fselector)[1] == "FSelectorRFE") {
+    imp_scores = map(afss, function(afs) {
+      afs$fselect_result$importance[[1]]
+    })
+    set(grid, j = "importance", value = imp_scores)
+  }
+
   grid
 }

From 82e9f7fb533b8747cceaaa33f0c8796614d5f775 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 19:04:17 +0200
Subject: [PATCH 13/43] update docs

---
 man/ensemble_fselect.Rd | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index bdcf9e39..7e609c28 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -42,6 +42,9 @@ Stop criterion of the feature selection.}
 
 \item{callbacks}{(list of \link{CallbackFSelect})\cr
 List of callbacks.}
+
+\item{store_model}{(\code{logical(1)})\cr
+Whether to store models in \link{auto_fselector} or not.}
 }
 \description{
 Ensemble feature selection using multiple learners.
@@ -51,19 +54,20 @@ machine learning models and resampling techniques.
 }
 \details{
 The method begins by applying an initial resampling technique specified
-by the user, to create multiple subsamples from the original dataset.
+by the user, to create \strong{multiple subsamples} from the original dataset.
 This resampling process helps in generating diverse subsets of data for
 robust feature selection.
+
 For each subsample generated in the previous step, the method performs
-wrapped-based feature selection using each provided learner, an inner
-resampling method and a performance measure.
+\strong{wrapped-based feature selection} (\link{auto_fselector}) using each provided
+learner, the given inner resampling method, performance measure and
+optimization algorithm.
 This process generates a best feature subset for each combination of
 subsample and learner.
-All the results are stored in a \link{data.table} object.
+Results are stored in a \link{data.table} object.
 }
 \examples{
 \donttest{
-
   ensemble_fselect(
     fselector = fs("random_search"),
     task = tsk("sonar"),

From a563ec81425ac4b53912d87f492da5919ad3bff6 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 19:11:31 +0200
Subject: [PATCH 14/43] update test

---
 tests/testthat/test_ensemble_fselect.R | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 00a28f55..bebac790 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -1,15 +1,19 @@
-test_that("esemble feature selection works", {
+test_that("ensemble feature selection works", {
   res = ensemble_fselect(
-    fselector = fs("random_search"),
+    fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
     task = tsk("sonar"),
     learners = lrns(c("classif.rpart", "classif.featureless")),
-    outer_resampling = rsmp("subsampling", repeats = 2),
+    init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
     measure = msr("classif.ce"),
-    terminator = trm("evals", n_evals = 10)
+    terminator = trm("none")
   )
 
   expect_data_table(res, nrows = 4)
+  expect_list(res$features, any.missing = FALSE, len = 4)
+  expect_vector(res$n_features, size = 4)
+  expect_vector(res$classif.ce, size = 4)
+  expect_list(res$importance, any.missing = FALSE, len = 4)
 })
 
 

From b3f1678ebb9c0b4e491698bc50a13bf31551f9ef Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 22:38:33 +0200
Subject: [PATCH 15/43] fix typo

---
 R/ensemble_fselect.R    | 2 +-
 man/ensemble_fselect.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 422a4b77..2edb698b 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -28,7 +28,7 @@
 #'  Can only be [mlr_resamplings_subsampling] or [mlr_resamplings_bootstrap].
 #' @param inner_resampling ([mlr3::Resampling])\cr
 #'  The inner resampling strategy used by the [FSelector].
-#' @param store_model (`logical(1)`)\cr
+#' @param store_models (`logical(1)`)\cr
 #'  Whether to store models in [auto_fselector] or not.
 #'
 #' @template param_fselector
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 7e609c28..0393ff6d 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -43,7 +43,7 @@ Stop criterion of the feature selection.}
 \item{callbacks}{(list of \link{CallbackFSelect})\cr
 List of callbacks.}
 
-\item{store_model}{(\code{logical(1)})\cr
+\item{store_models}{(\code{logical(1)})\cr
 Whether to store models in \link{auto_fselector} or not.}
 }
 \description{

From f16a62106b54075ec86fe2645a050d6727df15bf Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 16 May 2024 23:03:06 +0200
Subject: [PATCH 16/43] fix warning 'Missing link'

---
 R/ensemble_fselect.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 2edb698b..0f58ccf2 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -1,5 +1,7 @@
 #' @title Ensemble Feature Selection
 #'
+#' @include CallbackFSelect.R
+#'
 #' @description
 #' Ensemble feature selection using multiple learners.
 #' The ensemble feature selection method is designed to identify the
@@ -67,7 +69,7 @@ ensemble_fselect = function(
   assert_choice(class(init_resampling)[1],
                 choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
 
-  # create fselector for each learner
+  # create auto_fselector for each learner
   afss = map(learners, function(learner) {
     auto_fselector(
       fselector = fselector,

From be13f33c69acf08f1c5f2fb88a492c2e71ea5745 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Tue, 21 May 2024 11:57:40 +0200
Subject: [PATCH 17/43] fixes after main merge

---
 R/ensemble_fselect.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 0f58ccf2..021956af 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -1,6 +1,6 @@
 #' @title Ensemble Feature Selection
 #'
-#' @include CallbackFSelect.R
+#' @include CallbackBatchFSelect.R
 #'
 #' @description
 #' Ensemble feature selection using multiple learners.
@@ -125,7 +125,7 @@ ensemble_fselect = function(
   set(grid, j = measure$id, value = scores)
 
   # extract importance scores if RFE optimization was used
-  if (class(fselector)[1] == "FSelectorRFE") {
+  if (class(fselector)[1] == "FSelectorBatchRFE") {
     imp_scores = map(afss, function(afs) {
       afs$fselect_result$importance[[1]]
     })

From 3f4b684d3941a57d08cdc448c1ed555f8c0fcb3b Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Tue, 21 May 2024 11:58:56 +0200
Subject: [PATCH 18/43] updocs

---
 man/AutoFSelector.Rd    | 1 -
 man/ensemble_fselect.Rd | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/man/AutoFSelector.Rd b/man/AutoFSelector.Rd
index bc2c8bb8..383a14ee 100644
--- a/man/AutoFSelector.Rd
+++ b/man/AutoFSelector.Rd
@@ -149,7 +149,6 @@ Hash (unique identifier) for this partial object, excluding some components whic
 \if{html}{\out{
 <details><summary>Inherited methods</summary>
 <ul>
-<li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="estimate_memory_usage"><a href='../../mlr3/html/Learner.html#method-Learner-estimate_memory_usage'><code>mlr3::Learner$estimate_memory_usage()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="format"><a href='../../mlr3/html/Learner.html#method-Learner-format'><code>mlr3::Learner$format()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="help"><a href='../../mlr3/html/Learner.html#method-Learner-help'><code>mlr3::Learner$help()</code></a></span></li>
 <li><span class="pkg-link" data-pkg="mlr3" data-topic="Learner" data-id="predict"><a href='../../mlr3/html/Learner.html#method-Learner-predict'><code>mlr3::Learner$predict()</code></a></span></li>
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 0393ff6d..24878d94 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -40,7 +40,7 @@ Measure to optimize. If \code{NULL}, default measure is used.}
 \item{terminator}{(\link{Terminator})\cr
 Stop criterion of the feature selection.}
 
-\item{callbacks}{(list of \link{CallbackFSelect})\cr
+\item{callbacks}{(list of \link{CallbackBatchFSelect})\cr
 List of callbacks.}
 
 \item{store_models}{(\code{logical(1)})\cr

From 150d9a3d0823cf858a1778abdbc8ef6b966ffd32 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Tue, 21 May 2024 17:13:54 +0200
Subject: [PATCH 19/43] fix bug in one-se callback and refactor

---
 R/mlr_callbacks.R              | 19 ++++++++++++++-----
 man/mlr3fselect.one_se_rule.Rd |  4 +++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/R/mlr_callbacks.R b/R/mlr_callbacks.R
index 99bad922..ea2d26cf 100644
--- a/R/mlr_callbacks.R
+++ b/R/mlr_callbacks.R
@@ -116,7 +116,9 @@ load_callback_svm_rfe = function() {
 #'
 #' @description
 #' Selects the smallest feature set within one standard error of the best as the result.
-#' If there are multiple feature sets with the same performance and number of features, the first one is selected.
+#' If there are multiple such feature sets with the same number of features, the first one is selected.
+#' If the sets have exactly the same performance but different number of features,
+#' the one with the smallest number of features is selected.
 #'
 #' @source
 #' `r format_bib("kuhn2013")`
@@ -152,10 +154,17 @@ load_callback_one_se_rule = function() {
       y = data[[archive$cols_y]]
       se = sd(y) / sqrt(length(y))
 
-      # select smallest future set within one standard error of the best
-      best_y = context$instance$result_y
-      data = data[y > best_y - se & y < best_y + se, ][which.min(n_features)]
-      context$instance$.__enclos_env__$private$.result = data[, setdiff(names(context$instance$result), "x_domain"), with = FALSE]
+      columns_to_keep = setdiff(names(context$instance$result), "x_domain")
+      if (se == 0) {
+        # select smallest future set when all scores are the same
+        context$instance$.__enclos_env__$private$.result =
+          data[,columns_to_keep, with = FALSE][which.min(n_features)]
+      } else {
+        # select smallest future set within one standard error of the best
+        best_y = context$instance$result_y
+        context$instance$.__enclos_env__$private$.result =
+          data[y > best_y - se & y < best_y + se, columns_to_keep, with = FALSE][which.min(n_features)]
+      }
     }
   )
 }
diff --git a/man/mlr3fselect.one_se_rule.Rd b/man/mlr3fselect.one_se_rule.Rd
index 90faa355..ccea397b 100644
--- a/man/mlr3fselect.one_se_rule.Rd
+++ b/man/mlr3fselect.one_se_rule.Rd
@@ -12,7 +12,9 @@ ISBN 978-1-4614-6849-3.
 }
 \description{
 Selects the smallest feature set within one standard error of the best as the result.
-If there are multiple feature sets with the same performance and number of features, the first one is selected.
+If there are multiple such feature sets with the same number of features, the first one is selected.
+If the sets have exactly the same performance but different number of features,
+the one with the smallest number of features is selected.
 }
 \examples{
 clbk("mlr3fselect.one_se_rule")

From f0b1098c58f012235d5e75a0eec0099bfe9de7f4 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Wed, 22 May 2024 14:09:10 +0200
Subject: [PATCH 20/43] add citations

---
 R/bibentries.R          | 40 +++++++++++++++++++++++++++++++++++++++-
 R/ensemble_fselect.R    |  2 ++
 man/ensemble_fselect.Rd | 16 ++++++++++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/R/bibentries.R b/R/bibentries.R
index bf93a7e6..a7ecb1df 100644
--- a/R/bibentries.R
+++ b/R/bibentries.R
@@ -66,6 +66,44 @@ bibentries = c(
     address   = "New York, NY",
     pages     = "61--92",
     isbn      = "978-1-4614-6849-3"
+  ),
+
+  saeys2008 = bibentry("article",
+    author      = "Saeys, Yvan and Abeel, Thomas and Van De Peer, Yves",
+    doi         = "10.1007/978-3-540-87481-2_21",
+    isbn        = "3540874801",
+    journal     = "Machine Learning and Knowledge Discovery in Databases",
+    pages       = "313--325",
+    publisher   = "Springer, Berlin, Heidelberg",
+    title       = "Robust feature selection using ensemble feature selection techniques",
+    volume      = "5212 LNAI",
+    year        = "2008"
+  ),
+
+  abeel2010 = bibentry("article",
+    author    = "Abeel, Thomas and Helleputte, Thibault and Van de Peer, Yves and Dupont, Pierre and Saeys, Yvan",
+    doi       = "10.1093/BIOINFORMATICS/BTP630",
+    issn      = "1367-4803",
+    journal   = "Bioinformatics",
+    month     = "feb",
+    pages     = "392--398",
+    publisher = "Oxford Academic",
+    title     = "Robust biomarker identification for cancer diagnosis with ensemble feature selection methods",
+    volume    = "26",
+    year      = "2010"
+  ),
+
+  pes2020 = bibentry("article",
+    author    = "Pes, Barbara",
+    doi       = "10.1007/s00521-019-04082-3",
+    issn      = "14333058",
+    journal   = "Neural Computing and Applications",
+    month     = "may",
+    number    = "10",
+    pages     = "5951--5973",
+    publisher = "Springer",
+    title     = "Ensemble feature selection for high-dimensional data: a stability analysis across multiple domains",
+    volume    = "32",
+    year      = "2020"
   )
 )
-
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 021956af..93b4c2ab 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -39,6 +39,8 @@
 #' @template param_terminator
 #' @template param_callbacks
 #'
+#' @source
+#' `r format_bib("saeys2008", "abeel2010", "pes2020")`
 #' @export
 #' @examples
 #' \donttest{
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 24878d94..f92d82b7 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -3,6 +3,22 @@
 \name{ensemble_fselect}
 \alias{ensemble_fselect}
 \title{Ensemble Feature Selection}
+\source{
+Saeys, Yvan, Abeel, Thomas, Van De Peer, Yves (2008).
+\dQuote{Robust feature selection using ensemble feature selection techniques.}
+\emph{Machine Learning and Knowledge Discovery in Databases}, \bold{5212 LNAI}, 313--325.
+\doi{10.1007/978-3-540-87481-2_21}.
+
+Abeel, Thomas, Helleputte, Thibault, Van de Peer, Yves, Dupont, Pierre, Saeys, Yvan (2010).
+\dQuote{Robust biomarker identification for cancer diagnosis with ensemble feature selection methods.}
+\emph{Bioinformatics}, \bold{26}, 392--398.
+ISSN 1367-4803, \doi{10.1093/BIOINFORMATICS/BTP630}.
+
+Pes, Barbara (2020).
+\dQuote{Ensemble feature selection for high-dimensional data: a stability analysis across multiple domains.}
+\emph{Neural Computing and Applications}, \bold{32}(10), 5951--5973.
+ISSN 14333058, \doi{10.1007/s00521-019-04082-3}.
+}
 \usage{
 ensemble_fselect(
   fselector,

From a50d0405189e5f85b93d2df91cd03b5b307acc01 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 11:27:44 +0200
Subject: [PATCH 21/43] feat: add result object

---
 DESCRIPTION                            |   4 +-
 R/EnsembleFSResult.R                   |  75 +++++++++++++++
 R/ensemble_fselect.R                   |   2 +-
 man/EnsembleFSResult.Rd                | 123 +++++++++++++++++++++++++
 tests/testthat/test_ensemble_fselect.R |  27 ++++--
 5 files changed, 223 insertions(+), 8 deletions(-)
 create mode 100644 R/EnsembleFSResult.R
 create mode 100644 man/EnsembleFSResult.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index bc854ec2..2559273d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -31,7 +31,8 @@ Imports:
     lgr,
     mlr3misc (>= 0.15.0.9000),
     paradox (>= 1.0.0),
-    R6
+    R6,
+    stabm
 Suggests:
     e1071,
     genalg,
@@ -55,6 +56,7 @@ Collate:
     'AutoFSelector.R'
     'CallbackBatchFSelect.R'
     'ContextBatchFSelect.R'
+    'EnsembleFSResult.R'
     'FSelectInstanceBatchSingleCrit.R'
     'FSelectInstanceBatchMultiCrit.R'
     'mlr_fselectors.R'
diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
new file mode 100644
index 00000000..80794c0b
--- /dev/null
+++ b/R/EnsembleFSResult.R
@@ -0,0 +1,75 @@
+#' @title Ensemble Feature Selection Result
+#'
+#' @description
+#' The `EnsembleFSResult` stores the results of the ensemble feature selection.
+#' The function [ensemble_fselect()] returns an object of this class.
+#'
+#' @examples
+#' \donttest{
+#' efsr = ensemble_fselect(
+#'   fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+#'   task = tsk("sonar"),
+#'   learners = lrns(c("classif.rpart", "classif.featureless")),
+#'   init_resampling = rsmp("subsampling", repeats = 2),
+#'   inner_resampling = rsmp("cv", folds = 3),
+#'   measure = msr("classif.ce"),
+#'   terminator = trm("none")
+#' )
+#'
+#' # contains the benchmark result
+#' efsr$benchmark_result
+#'
+#' # contains the selected features for each iteration
+#' efsr$grid
+#'
+#' # returns the stability of the selected features
+#' efsr$stability(stability_measure = "jaccard")
+#' }
+EnsembleFSResult = R6Class("EnsembleFSResult",
+  public = list(
+
+    #' @field benchmark_result (`BenchmarkResult`)\cr
+    #' The benchmark result object.
+    benchmark_result = NULL,
+
+    #' @field grid (`data.table`)\cr
+    #' The grid of feature selection results.
+    grid = NULL,
+
+    #' @description
+    #' Creates a new instance of this [R6][R6::R6Class] class.
+    #'
+    #' @param benchmark_result (`BenchmarkResult`)\cr
+    #'  The benchmark result object.
+    #' @param grid (`data.table`)\cr
+    #'  The grid of feature selection results.
+    initialize = function(benchmark_result, grid) {
+      self$benchmark_result = assert_benchmark_result(benchmark_result)
+      self$grid = assert_data_table(grid)
+    },
+
+    #' @description
+    #' Returns the feature ranking.
+    feature_ranking = function() {
+
+    },
+
+    #' @description
+    #' Calculates the stability of the selected features with the `stabm` package.
+    #'
+    #' @param stability_measure (`character(1)`)\cr
+    #'  The stability measure to be used.
+    #'  One of the measures returned by [stabm::listStabilityMeasures()] in lower case.
+    #'  Default is `"jaccard"`.
+    #' @param ... (`any`)\cr
+    #'  Additional arguments passed to the stability measure function.
+    stability = function(stability_measure = "jaccard", ...) {
+      funs = stabm::listStabilityMeasures()$Name
+      keys =  tolower(gsub("stability", "", funs))
+      assert_choice(stability_measure, choices = keys)
+
+      fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
+      fun(self$grid$features, ...)
+    }
+  )
+)
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 93b4c2ab..2244ce38 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -134,5 +134,5 @@ ensemble_fselect = function(
     set(grid, j = "importance", value = imp_scores)
   }
 
-  grid
+  EnsembleFSResult$new(bmr, grid)
 }
diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd
new file mode 100644
index 00000000..6c679277
--- /dev/null
+++ b/man/EnsembleFSResult.Rd
@@ -0,0 +1,123 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/EnsembleFSResult.R
+\name{EnsembleFSResult}
+\alias{EnsembleFSResult}
+\title{Ensemble Feature Selection Result}
+\description{
+The \code{EnsembleFSResult} stores the results of the ensemble feature selection.
+The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
+}
+\examples{
+\donttest{
+efsr = ensemble_fselect(
+  fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+  task = tsk("sonar"),
+  learners = lrns(c("classif.rpart", "classif.featureless")),
+  init_resampling = rsmp("subsampling", repeats = 2),
+  inner_resampling = rsmp("cv", folds = 3),
+  measure = msr("classif.ce"),
+  terminator = trm("none")
+)
+
+# contains the benchmark result
+efsr$benchmark_result
+
+# contains the selected features for each iteration
+efsr$grid
+
+# returns the stability of the selected features
+efsr$stability(stability_measure = "jaccard")
+}
+}
+\section{Public fields}{
+\if{html}{\out{<div class="r6-fields">}}
+\describe{
+\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
+The benchmark result object.}
+
+\item{\code{grid}}{(\code{data.table})\cr
+The grid of feature selection results.}
+}
+\if{html}{\out{</div>}}
+}
+\section{Methods}{
+\subsection{Public methods}{
+\itemize{
+\item \href{#method-EnsembleFSResult-new}{\code{EnsembleFSResult$new()}}
+\item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}}
+\item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}}
+\item \href{#method-EnsembleFSResult-clone}{\code{EnsembleFSResult$clone()}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-new"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-new}{}}}
+\subsection{Method \code{new()}}{
+Creates a new instance of this \link[R6:R6Class]{R6} class.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result, grid)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
+The benchmark result object.}
+
+\item{\code{grid}}{(\code{data.table})\cr
+The grid of feature selection results.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-feature_ranking"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}}
+\subsection{Method \code{feature_ranking()}}{
+Returns the feature ranking.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking()}\if{html}{\out{</div>}}
+}
+
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-stability"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-stability}{}}}
+\subsection{Method \code{stability()}}{
+Calculates the stability of the selected features with the \code{stabm} package.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$stability(stability_measure = "jaccard", ...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{stability_measure}}{(\code{character(1)})\cr
+The stability measure to be used.
+One of the measures returned by \code{\link[stabm:listStabilityMeasures]{stabm::listStabilityMeasures()}} in lower case.
+Default is \code{"jaccard"}.}
+
+\item{\code{...}}{(\code{any})\cr
+Additional arguments passed to the stability measure function.}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-clone"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-clone}{}}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$clone(deep = FALSE)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{deep}}{Whether to make a deep clone.}
+}
+\if{html}{\out{</div>}}
+}
+}
+}
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index bebac790..39be1201 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -1,5 +1,5 @@
 test_that("ensemble feature selection works", {
-  res = ensemble_fselect(
+  efsr = ensemble_fselect(
     fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
     task = tsk("sonar"),
     learners = lrns(c("classif.rpart", "classif.featureless")),
@@ -9,11 +9,26 @@ test_that("ensemble feature selection works", {
     terminator = trm("none")
   )
 
-  expect_data_table(res, nrows = 4)
-  expect_list(res$features, any.missing = FALSE, len = 4)
-  expect_vector(res$n_features, size = 4)
-  expect_vector(res$classif.ce, size = 4)
-  expect_list(res$importance, any.missing = FALSE, len = 4)
+  expect_data_table(efsr$grid, nrows = 4)
+  expect_list(efsr$grid$features, any.missing = FALSE, len = 4)
+  expect_vector(efsr$grid$n_features, size = 4)
+  expect_vector(efsr$grid$classif.ce, size = 4)
+  expect_list(efsr$grid$importance, any.missing = FALSE, len = 4)
+  expect_benchmark_result(efsr$benchmark_result)
+})
+
+test_that("stability method works", {
+  efsr = ensemble_fselect(
+    fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("none")
+  )
+
+  expect_number(efsr$stability(stability_measure = "jaccard"))
 })
 
 

From a391bc3740e31043eb47a151a6b0d42a97928fe4 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 31 May 2024 11:32:30 +0200
Subject: [PATCH 22/43] add John as author

---
 DESCRIPTION | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2559273d..f4fe8cdf 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -9,7 +9,9 @@ Authors@R: c(
     person("Michel", "Lang", , "michellang@gmail.com", role = "aut",
            comment = c(ORCID = "0000-0001-9754-0393")),
     person("Bernd", "Bischl", , "bernd_bischl@gmx.net", role = "aut",
-           comment = c(ORCID = "0000-0001-6002-6980"))
+           comment = c(ORCID = "0000-0001-6002-6980")),
+    person("John", "Zobolas", , "bblodfon@gmail.com", role = "aut",
+           comment = c(ORCID = "0000-0002-3609-8674"))
   )
 Description: Feature selection package of the 'mlr3' ecosystem. It selects
     the optimal feature set for any 'mlr3' learner. The package works with

From 4bcc8a9bc9fbc689b4ba7af3e8530ef81e4096c8 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 11:45:34 +0200
Subject: [PATCH 23/43] refactor: remove r6 objects from grid

---
 NAMESPACE                              |  1 +
 R/EnsembleFSResult.R                   | 38 ++++++++++++++++++--------
 R/ensemble_fselect.R                   |  3 ++
 man/EnsembleFSResult.Rd                | 21 ++++++++------
 tests/testthat/test_ensemble_fselect.R | 10 +++----
 5 files changed, 49 insertions(+), 24 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 9fe1de0a..85d360a8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 S3method(as.data.table,ArchiveBatchFSelect)
 S3method(as.data.table,DictionaryFSelector)
+S3method(as.data.table,EnsembleFSResult)
 S3method(extract_inner_fselect_archives,BenchmarkResult)
 S3method(extract_inner_fselect_archives,ResampleResult)
 S3method(extract_inner_fselect_results,BenchmarkResult)
diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 80794c0b..68731a45 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -20,7 +20,7 @@
 #' efsr$benchmark_result
 #'
 #' # contains the selected features for each iteration
-#' efsr$grid
+#' efsr$result
 #'
 #' # returns the stability of the selected features
 #' efsr$stability(stability_measure = "jaccard")
@@ -29,23 +29,19 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
   public = list(
 
     #' @field benchmark_result (`BenchmarkResult`)\cr
-    #' The benchmark result object.
+    #' The benchmark result.
     benchmark_result = NULL,
 
-    #' @field grid (`data.table`)\cr
-    #' The grid of feature selection results.
-    grid = NULL,
-
     #' @description
     #' Creates a new instance of this [R6][R6::R6Class] class.
     #'
     #' @param benchmark_result (`BenchmarkResult`)\cr
     #'  The benchmark result object.
-    #' @param grid (`data.table`)\cr
-    #'  The grid of feature selection results.
-    initialize = function(benchmark_result, grid) {
+    #' @param result ([data.table::data.table])\cr
+    #'  The result of the ensemble feature selection results.
+    initialize = function(benchmark_result, result) {
       self$benchmark_result = assert_benchmark_result(benchmark_result)
-      self$grid = assert_data_table(grid)
+      private$.result = assert_data_table(result)
     },
 
     #' @description
@@ -69,7 +65,27 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       assert_choice(stability_measure, choices = keys)
 
       fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
-      fun(self$grid$features, ...)
+      fun(self$result$features, ...)
+    }
+  ),
+
+  active = list(
+
+    #' @field result ([data.table::data.table])\cr
+    #' Returns the result of the ensemble feature selection.
+    result = function(rhs) {
+      assert_ro_binding(rhs)
+      tab = as.data.table(self$benchmark_result)[, c("task", "learner", "resampling"), with = FALSE]
+      cbind(private$.result, tab)
     }
+  ),
+
+  private = list(
+    .result = NULL
   )
 )
+
+#' @export
+as.data.table.EnsembleFSResult = function(x, ...) {
+  x$result
+}
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 2244ce38..2f0fdf71 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -134,5 +134,8 @@ ensemble_fselect = function(
     set(grid, j = "importance", value = imp_scores)
   }
 
+  set(grid, j = "learner", value = NULL)
+  set(grid, j = "task", value = NULL)
+  set(grid, j = "resampling", value = NULL)
   EnsembleFSResult$new(bmr, grid)
 }
diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd
index 6c679277..306047d9 100644
--- a/man/EnsembleFSResult.Rd
+++ b/man/EnsembleFSResult.Rd
@@ -23,7 +23,7 @@ efsr = ensemble_fselect(
 efsr$benchmark_result
 
 # contains the selected features for each iteration
-efsr$grid
+efsr$result
 
 # returns the stability of the selected features
 efsr$stability(stability_measure = "jaccard")
@@ -33,10 +33,15 @@ efsr$stability(stability_measure = "jaccard")
 \if{html}{\out{<div class="r6-fields">}}
 \describe{
 \item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
-The benchmark result object.}
-
-\item{\code{grid}}{(\code{data.table})\cr
-The grid of feature selection results.}
+The benchmark result.}
+}
+\if{html}{\out{</div>}}
+}
+\section{Active bindings}{
+\if{html}{\out{<div class="r6-active-bindings">}}
+\describe{
+\item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
+Returns the result of the ensemble feature selection.}
 }
 \if{html}{\out{</div>}}
 }
@@ -55,7 +60,7 @@ The grid of feature selection results.}
 \subsection{Method \code{new()}}{
 Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result, grid)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result, result)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -64,8 +69,8 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
 The benchmark result object.}
 
-\item{\code{grid}}{(\code{data.table})\cr
-The grid of feature selection results.}
+\item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
+The result of the ensemble feature selection results.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 39be1201..02ea0ced 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -9,11 +9,11 @@ test_that("ensemble feature selection works", {
     terminator = trm("none")
   )
 
-  expect_data_table(efsr$grid, nrows = 4)
-  expect_list(efsr$grid$features, any.missing = FALSE, len = 4)
-  expect_vector(efsr$grid$n_features, size = 4)
-  expect_vector(efsr$grid$classif.ce, size = 4)
-  expect_list(efsr$grid$importance, any.missing = FALSE, len = 4)
+  expect_data_table(efsr$result, nrows = 4)
+  expect_list(efsr$result$features, any.missing = FALSE, len = 4)
+  expect_vector(efsr$result$n_features, size = 4)
+  expect_vector(efsr$result$classif.ce, size = 4)
+  expect_list(efsr$result$importance, any.missing = FALSE, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
 })
 

From 62011f30e5c377697413f72b2de52ceef03c8c42 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 12:06:17 +0200
Subject: [PATCH 24/43] feat: add feature_ranking method

---
 R/EnsembleFSResult.R                   | 21 +++++++++++++++++++--
 man/EnsembleFSResult.Rd                | 13 +++++++++++--
 man/mlr3fselect-package.Rd             |  1 +
 tests/testthat/test_ensemble_fselect.R | 17 +++--------------
 4 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 68731a45..99ed0ab7 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -45,9 +45,26 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     },
 
     #' @description
-    #' Returns the feature ranking.
-    feature_ranking = function() {
+    #' Calculates the feature ranking.
+    #'
+    #' @param method (`character(1)`)\cr
+    #' The method to calculate the feature ranking.
+    #' Currently, only `"inclusion_probability"` is supported.
+    feature_ranking = function(method = "inclusion_probability") {
+      assert_choice(method, choices = "inclusion_probability")
+
+      features = self$benchmark_result$tasks$task[[1]]$feature_names
+
+      count = map_int(features, function(feature) {
+        sum(map_lgl(self$result$features, function(iteration) {
+          feature %in% iteration
+        }))
+      })
+
+      res = data.table(feature = features, inclusion_probability = count / nrow(self$result))
+      setorderv(res, "inclusion_probability", order = -1L)
 
+      res
     },
 
     #' @description
diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd
index 306047d9..15e70ed2 100644
--- a/man/EnsembleFSResult.Rd
+++ b/man/EnsembleFSResult.Rd
@@ -79,11 +79,20 @@ The result of the ensemble feature selection results.}
 \if{html}{\out{<a id="method-EnsembleFSResult-feature_ranking"></a>}}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}}
 \subsection{Method \code{feature_ranking()}}{
-Returns the feature ranking.
+Calculates the feature ranking.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking()}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking(method = "inclusion_probability")}\if{html}{\out{</div>}}
 }
 
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{method}}{(\code{character(1)})\cr
+The method to calculate the feature ranking.
+Currently, only \code{"inclusion_probability"} is supported.}
+}
+\if{html}{\out{</div>}}
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-stability"></a>}}
diff --git a/man/mlr3fselect-package.Rd b/man/mlr3fselect-package.Rd
index 2548f8f9..51086bdc 100644
--- a/man/mlr3fselect-package.Rd
+++ b/man/mlr3fselect-package.Rd
@@ -27,6 +27,7 @@ Authors:
   \item Patrick Schratz \email{patrick.schratz@gmail.com} (\href{https://orcid.org/0000-0003-0748-6624}{ORCID})
   \item Michel Lang \email{michellang@gmail.com} (\href{https://orcid.org/0000-0001-9754-0393}{ORCID})
   \item Bernd Bischl \email{bernd_bischl@gmx.net} (\href{https://orcid.org/0000-0001-6002-6980}{ORCID})
+  \item John Zobolas \email{bblodfon@gmail.com} (\href{https://orcid.org/0000-0002-3609-8674}{ORCID})
 }
 
 }
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 02ea0ced..74ff1678 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -15,20 +15,9 @@ test_that("ensemble feature selection works", {
   expect_vector(efsr$result$classif.ce, size = 4)
   expect_list(efsr$result$importance, any.missing = FALSE, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
-})
-
-test_that("stability method works", {
-  efsr = ensemble_fselect(
-    fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
-    task = tsk("sonar"),
-    learners = lrns(c("classif.rpart", "classif.featureless")),
-    init_resampling = rsmp("subsampling", repeats = 2),
-    inner_resampling = rsmp("cv", folds = 3),
-    measure = msr("classif.ce"),
-    terminator = trm("none")
-  )
 
   expect_number(efsr$stability(stability_measure = "jaccard"))
+  feature_ranking = efsr$feature_ranking()
+  expect_data_table(feature_ranking, nrows = 60)
+  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 })
-
-

From df1fd1567f228105b463f9969cb41c1bf9f20b50 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 12:22:06 +0200
Subject: [PATCH 25/43] feat: cache results

---
 R/EnsembleFSResult.R    | 30 ++++++++++++++++++++++++------
 man/EnsembleFSResult.Rd | 15 ++++++++++++---
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 99ed0ab7..a5dc32bf 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -28,14 +28,14 @@
 EnsembleFSResult = R6Class("EnsembleFSResult",
   public = list(
 
-    #' @field benchmark_result (`BenchmarkResult`)\cr
+    #' @field benchmark_result ([mlr3::BenchmarkResult])\cr
     #' The benchmark result.
     benchmark_result = NULL,
 
     #' @description
     #' Creates a new instance of this [R6][R6::R6Class] class.
     #'
-    #' @param benchmark_result (`BenchmarkResult`)\cr
+    #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
     #'  The benchmark result object.
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection results.
@@ -53,6 +53,11 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     feature_ranking = function(method = "inclusion_probability") {
       assert_choice(method, choices = "inclusion_probability")
 
+      # cached results
+      if (!is.null(private$.feature_ranking[[method]])) {
+        return(private$.feature_ranking[[method]])
+      }
+
       features = self$benchmark_result$tasks$task[[1]]$feature_names
 
       count = map_int(features, function(feature) {
@@ -64,11 +69,14 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       res = data.table(feature = features, inclusion_probability = count / nrow(self$result))
       setorderv(res, "inclusion_probability", order = -1L)
 
-      res
+      private$.feature_ranking[[method]] = res
+      private$.feature_ranking[[method]]
     },
 
     #' @description
     #' Calculates the stability of the selected features with the `stabm` package.
+    #' The results are cached.
+    #' When the same stability measure is requested again with different arguments, the cache must be reset.
     #'
     #' @param stability_measure (`character(1)`)\cr
     #'  The stability measure to be used.
@@ -76,13 +84,21 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  Default is `"jaccard"`.
     #' @param ... (`any`)\cr
     #'  Additional arguments passed to the stability measure function.
-    stability = function(stability_measure = "jaccard", ...) {
+    #' @param reset_cache (`logical(1)`)\cr
+    #'  If `TRUE`, the cached results are ignored.
+    stability = function(stability_measure = "jaccard", ..., reset_cache = FALSE) {
       funs = stabm::listStabilityMeasures()$Name
       keys =  tolower(gsub("stability", "", funs))
       assert_choice(stability_measure, choices = keys)
 
+      # cached results
+      if (!is.null(private$.stability[[stability_measure]]) && !reset_cache) {
+        return(private$.stability[[stability_measure]])
+      }
+
       fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
-      fun(self$result$features, ...)
+      private$.stability[[stability_measure]] = fun(self$result$features, ...)
+      private$.stability[[stability_measure]]
     }
   ),
 
@@ -98,7 +114,9 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
   ),
 
   private = list(
-    .result = NULL
+    .result = NULL,
+    .stability = NULL,
+    .feature_ranking = NULL
   )
 )
 
diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd
index 15e70ed2..f854803b 100644
--- a/man/EnsembleFSResult.Rd
+++ b/man/EnsembleFSResult.Rd
@@ -32,7 +32,7 @@ efsr$stability(stability_measure = "jaccard")
 \section{Public fields}{
 \if{html}{\out{<div class="r6-fields">}}
 \describe{
-\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
+\item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result.}
 }
 \if{html}{\out{</div>}}
@@ -66,7 +66,7 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{benchmark_result}}{(\code{BenchmarkResult})\cr
+\item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result object.}
 
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
@@ -99,8 +99,14 @@ Currently, only \code{"inclusion_probability"} is supported.}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-stability}{}}}
 \subsection{Method \code{stability()}}{
 Calculates the stability of the selected features with the \code{stabm} package.
+The results are cached.
+When the same stability measure is requested again with different arguments, the cache must be reset.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$stability(stability_measure = "jaccard", ...)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$stability(
+  stability_measure = "jaccard",
+  ...,
+  reset_cache = FALSE
+)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
@@ -113,6 +119,9 @@ Default is \code{"jaccard"}.}
 
 \item{\code{...}}{(\code{any})\cr
 Additional arguments passed to the stability measure function.}
+
+\item{\code{reset_cache}}{(\code{logical(1)})\cr
+If \code{TRUE}, the cached results are ignored.}
 }
 \if{html}{\out{</div>}}
 }

From bb55020ea66acb07cd58e53a39b8e99b1f11a91c Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 12:51:11 +0200
Subject: [PATCH 26/43] feat: allow different callbacks

---
 R/ensemble_fselect.R                   | 14 ++++++++------
 tests/testthat/test_ensemble_fselect.R | 12 ++++++++++++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 2f0fdf71..0feef2f9 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -32,12 +32,14 @@
 #'  The inner resampling strategy used by the [FSelector].
 #' @param store_models (`logical(1)`)\cr
 #'  Whether to store models in [auto_fselector] or not.
+#' @param callbacks (list of lists of [CallbackBatchFSelect])\cr
+#'  Callbacks to be used for each learner.
+#'  The lists must have the same length as the number of learners.
 #'
 #' @template param_fselector
 #' @template param_task
 #' @template param_measure
 #' @template param_terminator
-#' @template param_callbacks
 #'
 #' @source
 #' `r format_bib("saeys2008", "abeel2010", "pes2020")`
@@ -62,17 +64,17 @@ ensemble_fselect = function(
   inner_resampling,
   measure,
   terminator,
-  callbacks = list(),
+  callbacks = NULL,
   store_models = TRUE
   ) {
   assert_task(task)
   assert_learners(as_learners(learners), task = task)
   assert_resampling(init_resampling)
-  assert_choice(class(init_resampling)[1],
-                choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
+  assert_choice(class(init_resampling)[1], choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
+  assert_list(callbacks, types = "list", len = length(learners), null.ok = TRUE)
 
   # create auto_fselector for each learner
-  afss = map(learners, function(learner) {
+  afss = imap(unname(learners), function(learner, i) {
     auto_fselector(
       fselector = fselector,
       learner = learner,
@@ -80,7 +82,7 @@ ensemble_fselect = function(
       measure = measure,
       terminator = terminator,
       store_models = store_models,
-      callbacks = callbacks
+      callbacks = callbacks[[i]]
     )
   })
 
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 74ff1678..cb3eed5d 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -21,3 +21,15 @@ test_that("ensemble feature selection works", {
   expect_data_table(feature_ranking, nrows = 60)
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 })
+
+test_that("different callbacks can be set", {
+  efsr = ensemble_fselect(
+    fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("none")
+  )
+})

From 3502ebd4be0505a19082e31025de5827a60949a1 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Fri, 31 May 2024 14:47:21 +0200
Subject: [PATCH 27/43] fix: callbacks

---
 R/ContextBatchFSelect.R                | 16 ++++++------
 R/EnsembleFSResult.R                   | 16 ++++++++++++
 R/ObjectiveFSelectBatch.R              |  3 +++
 man/EnsembleFSResult.Rd                | 36 ++++++++++++++++++++++++++
 man/ensemble_fselect.Rd                |  7 ++---
 tests/testthat/test_ensemble_fselect.R | 13 +++++++++-
 6 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/R/ContextBatchFSelect.R b/R/ContextBatchFSelect.R
index bba0c7f7..b69b6015 100644
--- a/R/ContextBatchFSelect.R
+++ b/R/ContextBatchFSelect.R
@@ -19,9 +19,9 @@ ContextBatchFSelect = R6Class("ContextBatchFSelect",
     #'   The feature sets of the latest batch.
     xss = function(rhs) {
       if (missing(rhs)) {
-        return(get_private(self$objective_fselect)$.xss)
+        return(get_private(self$instance$objective)$.xss)
       } else {
-        get_private(self$objective_fselect)$.xss = rhs
+        get_private(self$instance$objective)$.xss = rhs
       }
     },
 
@@ -29,9 +29,9 @@ ContextBatchFSelect = R6Class("ContextBatchFSelect",
     #'   The benchmark design of the latest batch.
     design = function(rhs) {
       if (missing(rhs)) {
-        return(get_private(self$objective_fselect)$.design)
+        return(get_private(self$instance$objective)$.design)
       } else {
-        get_private(self$objective_fselect)$.design = rhs
+        get_private(self$instance$objective)$.design = rhs
       }
     },
 
@@ -39,9 +39,9 @@ ContextBatchFSelect = R6Class("ContextBatchFSelect",
     #'   The benchmark result of the latest batch.
     benchmark_result = function(rhs) {
       if (missing(rhs)) {
-        return(get_private(self$objective_fselect)$.benchmark_result)
+        return(get_private(self$instance$objective)$.benchmark_result)
       } else {
-        get_private(self$objective_fselect)$.benchmark_result = rhs
+        get_private(self$instance$objective)$.benchmark_result = rhs
       }
     },
 
@@ -51,9 +51,9 @@ ContextBatchFSelect = R6Class("ContextBatchFSelect",
     #'   A callback can add additional columns which are also written to the archive.
     aggregated_performance = function(rhs) {
       if (missing(rhs)) {
-        return(get_private(self$objective_fselect)$.aggregated_performance)
+        return(get_private(self$instance$objective)$.aggregated_performance)
       } else {
-        get_private(self$objective_fselect)$.aggregated_performance = rhs
+        get_private(self$instance$objective)$.aggregated_performance = rhs
       }
     }
   )
diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index a5dc32bf..b4c98101 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -44,6 +44,22 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       private$.result = assert_data_table(result)
     },
 
+    #' @description
+    #' Helper for print outputs.
+    #' @param ... (ignored).
+    format = function(...) {
+      sprintf("<%s>", class(self)[1L])
+    },
+
+    #' @description
+    #' Printer.
+    #'
+    #' @param ... (ignored).
+    print = function(...) {
+      catf(format(self))
+      print(self$result[, c("learner_id", "n_features"), with = FALSE])
+    },
+
     #' @description
     #' Calculates the feature ranking.
     #'
diff --git a/R/ObjectiveFSelectBatch.R b/R/ObjectiveFSelectBatch.R
index 61a95c34..975410c8 100644
--- a/R/ObjectiveFSelectBatch.R
+++ b/R/ObjectiveFSelectBatch.R
@@ -89,6 +89,9 @@ ObjectiveFSelectBatch = R6Class("ObjectiveFSelectBatch",
         self$archive$benchmark_result$combine(private$.benchmark_result)
         set(private$.aggregated_performance, j = "uhash", value = private$.benchmark_result$uhashes)
       }
+
+      call_back("on_eval_before_archive", self$callbacks, self$context)
+
       private$.aggregated_performance
     },
 
diff --git a/man/EnsembleFSResult.Rd b/man/EnsembleFSResult.Rd
index f854803b..6b52dc5a 100644
--- a/man/EnsembleFSResult.Rd
+++ b/man/EnsembleFSResult.Rd
@@ -49,6 +49,8 @@ Returns the result of the ensemble feature selection.}
 \subsection{Public methods}{
 \itemize{
 \item \href{#method-EnsembleFSResult-new}{\code{EnsembleFSResult$new()}}
+\item \href{#method-EnsembleFSResult-format}{\code{EnsembleFSResult$format()}}
+\item \href{#method-EnsembleFSResult-print}{\code{EnsembleFSResult$print()}}
 \item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}}
 \item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}}
 \item \href{#method-EnsembleFSResult-clone}{\code{EnsembleFSResult$clone()}}
@@ -76,6 +78,40 @@ The result of the ensemble feature selection results.}
 }
 }
 \if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-format"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-format}{}}}
+\subsection{Method \code{format()}}{
+Helper for print outputs.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$format(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-print"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-print}{}}}
+\subsection{Method \code{print()}}{
+Printer.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$print(...)}\if{html}{\out{</div>}}
+}
+
+\subsection{Arguments}{
+\if{html}{\out{<div class="arguments">}}
+\describe{
+\item{\code{...}}{(ignored).}
+}
+\if{html}{\out{</div>}}
+}
+}
+\if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-feature_ranking"></a>}}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-feature_ranking}{}}}
 \subsection{Method \code{feature_ranking()}}{
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index f92d82b7..4111b0af 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -28,7 +28,7 @@ ensemble_fselect(
   inner_resampling,
   measure,
   terminator,
-  callbacks = list(),
+  callbacks = NULL,
   store_models = TRUE
 )
 }
@@ -56,8 +56,9 @@ Measure to optimize. If \code{NULL}, default measure is used.}
 \item{terminator}{(\link{Terminator})\cr
 Stop criterion of the feature selection.}
 
-\item{callbacks}{(list of \link{CallbackBatchFSelect})\cr
-List of callbacks.}
+\item{callbacks}{(list of lists of \link{CallbackBatchFSelect})\cr
+Callbacks to be used for each learner.
+The lists must have the same length as the number of learners.}
 
 \item{store_models}{(\code{logical(1)})\cr
 Whether to store models in \link{auto_fselector} or not.}
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index cb3eed5d..3ef87036 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -23,6 +23,13 @@ test_that("ensemble feature selection works", {
 })
 
 test_that("different callbacks can be set", {
+
+  callback_test = callback_batch_fselect("mlr3fselect.test",
+    on_eval_before_archive = function(callback, context) {
+      context$aggregated_performance[, callback_active := context$instance$objective$learner$id == "classif.rpart"]
+    }
+  )
+
   efsr = ensemble_fselect(
     fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
     task = tsk("sonar"),
@@ -30,6 +37,10 @@ test_that("different callbacks can be set", {
     init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
     measure = msr("classif.ce"),
-    terminator = trm("none")
+    terminator = trm("none"),
+    callbacks = list(list(callback_test), list())
   )
+
+  expect_true(all(efsr$benchmark_result$score()$learner[[1]]$fselect_instance$archive$data$callback_active))
+  expect_null(efsr$benchmark_result$score()$learner[[2]]$fselect_instance$archive$data$callback_active)
 })

From 9532ab9fc778de5dd5a5be88c238ed89d59c731c Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Thu, 6 Jun 2024 12:29:37 +0200
Subject: [PATCH 28/43] add help() method

---
 R/EnsembleFSResult.R                          | 18 ++++++++++++++-
 ...embleFSResult.Rd => ensemble_fs_result.Rd} | 22 +++++++++++++++++--
 tests/testthat/test_ensemble_fselect.R        |  6 +++--
 3 files changed, 41 insertions(+), 5 deletions(-)
 rename man/{EnsembleFSResult.Rd => ensemble_fs_result.Rd} (89%)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index b4c98101..f2a2c05d 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -1,7 +1,12 @@
 #' @title Ensemble Feature Selection Result
 #'
+#' @name ensemble_fs_result
+#'
 #' @description
-#' The `EnsembleFSResult` stores the results of the ensemble feature selection.
+#' The `EnsembleFSResult` stores the results of the ensemble feature selection
+#' and incorporates methods for assessing the stability of the feature selection
+#' and ranking the features.
+#'
 #' The function [ensemble_fselect()] returns an object of this class.
 #'
 #' @examples
@@ -32,6 +37,10 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' The benchmark result.
     benchmark_result = NULL,
 
+    #' @field man (`character(1)`)\cr
+    #' Manual page for this object.
+    man = NULL,
+
     #' @description
     #' Creates a new instance of this [R6][R6::R6Class] class.
     #'
@@ -42,6 +51,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     initialize = function(benchmark_result, result) {
       self$benchmark_result = assert_benchmark_result(benchmark_result)
       private$.result = assert_data_table(result)
+      self$man = "mlr3fselect::ensemble_fs_result"
     },
 
     #' @description
@@ -60,6 +70,12 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       print(self$result[, c("learner_id", "n_features"), with = FALSE])
     },
 
+    #' @description
+    #' Opens the corresponding help page referenced by field `$man`.
+    help = function() {
+      open_help(self$man)
+    },
+
     #' @description
     #' Calculates the feature ranking.
     #'
diff --git a/man/EnsembleFSResult.Rd b/man/ensemble_fs_result.Rd
similarity index 89%
rename from man/EnsembleFSResult.Rd
rename to man/ensemble_fs_result.Rd
index 6b52dc5a..d78f216b 100644
--- a/man/EnsembleFSResult.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -1,10 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/EnsembleFSResult.R
-\name{EnsembleFSResult}
+\name{ensemble_fs_result}
+\alias{ensemble_fs_result}
 \alias{EnsembleFSResult}
 \title{Ensemble Feature Selection Result}
 \description{
-The \code{EnsembleFSResult} stores the results of the ensemble feature selection.
+The \code{EnsembleFSResult} stores the results of the ensemble feature selection
+and incorporates methods for assessing the stability of the feature selection
+and ranking the features.
+
 The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
 }
 \examples{
@@ -34,6 +38,9 @@ efsr$stability(stability_measure = "jaccard")
 \describe{
 \item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result.}
+
+\item{\code{man}}{(\code{character(1)})\cr
+Manual page for this object.}
 }
 \if{html}{\out{</div>}}
 }
@@ -51,6 +58,7 @@ Returns the result of the ensemble feature selection.}
 \item \href{#method-EnsembleFSResult-new}{\code{EnsembleFSResult$new()}}
 \item \href{#method-EnsembleFSResult-format}{\code{EnsembleFSResult$format()}}
 \item \href{#method-EnsembleFSResult-print}{\code{EnsembleFSResult$print()}}
+\item \href{#method-EnsembleFSResult-help}{\code{EnsembleFSResult$help()}}
 \item \href{#method-EnsembleFSResult-feature_ranking}{\code{EnsembleFSResult$feature_ranking()}}
 \item \href{#method-EnsembleFSResult-stability}{\code{EnsembleFSResult$stability()}}
 \item \href{#method-EnsembleFSResult-clone}{\code{EnsembleFSResult$clone()}}
@@ -110,6 +118,16 @@ Printer.
 }
 \if{html}{\out{</div>}}
 }
+}
+\if{html}{\out{<hr>}}
+\if{html}{\out{<a id="method-EnsembleFSResult-help"></a>}}
+\if{latex}{\out{\hypertarget{method-EnsembleFSResult-help}{}}}
+\subsection{Method \code{help()}}{
+Opens the corresponding help page referenced by field \verb{$man}.
+\subsection{Usage}{
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$help()}\if{html}{\out{</div>}}
+}
+
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-feature_ranking"></a>}}
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 3ef87036..c75a778c 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -1,7 +1,8 @@
 test_that("ensemble feature selection works", {
+  task = tsk("sonar")
   efsr = ensemble_fselect(
     fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
-    task = tsk("sonar"),
+    task = task,
     learners = lrns(c("classif.rpart", "classif.featureless")),
     init_resampling = rsmp("subsampling", repeats = 2),
     inner_resampling = rsmp("cv", folds = 3),
@@ -9,6 +10,7 @@ test_that("ensemble feature selection works", {
     terminator = trm("none")
   )
 
+  expect_character(efsr$man)
   expect_data_table(efsr$result, nrows = 4)
   expect_list(efsr$result$features, any.missing = FALSE, len = 4)
   expect_vector(efsr$result$n_features, size = 4)
@@ -18,7 +20,7 @@ test_that("ensemble feature selection works", {
 
   expect_number(efsr$stability(stability_measure = "jaccard"))
   feature_ranking = efsr$feature_ranking()
-  expect_data_table(feature_ranking, nrows = 60)
+  expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 })
 

From c17dff904cc2b2d7b904d39a8eeb86b076ed153a Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 10:15:14 +0200
Subject: [PATCH 29/43] return result without R6 classes

---
 R/EnsembleFSResult.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index f2a2c05d..bdd152c8 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -120,7 +120,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  If `TRUE`, the cached results are ignored.
     stability = function(stability_measure = "jaccard", ..., reset_cache = FALSE) {
       funs = stabm::listStabilityMeasures()$Name
-      keys =  tolower(gsub("stability", "", funs))
+      keys = tolower(gsub("stability", "", funs))
       assert_choice(stability_measure, choices = keys)
 
       # cached results
@@ -140,8 +140,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' Returns the result of the ensemble feature selection.
     result = function(rhs) {
       assert_ro_binding(rhs)
-      tab = as.data.table(self$benchmark_result)[, c("task", "learner", "resampling"), with = FALSE]
-      cbind(private$.result, tab)
+      private$.result
     }
   ),
 

From 766f10278cd8c54ada96de588561a6ec11e92130 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 14:27:03 +0200
Subject: [PATCH 30/43] add task features in initialize()

---
 R/EnsembleFSResult.R | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index bdd152c8..1eaf2118 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -48,9 +48,23 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  The benchmark result object.
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection results.
-    initialize = function(benchmark_result, result) {
-      self$benchmark_result = assert_benchmark_result(benchmark_result)
-      private$.result = assert_data_table(result)
+    #' @param features ([character()])\cr
+    #'  The vector of features of the task that was used in the ensemble feature
+    #'  selection. Ignored if `benchmark_result` is given and mandatory to have
+    #'  if `benchmark_result` is `NULL`.
+    initialize = function(benchmark_result = NULL, result, features) {
+      if (is.null(benchmark_result)) {
+        assert_character(features, any.missing = FALSE, null.ok = FALSE)
+        private$.features = features
+      } else {
+        self$benchmark_result = assert_benchmark_result(benchmark_result)
+        private$.features = self$benchmark_result$tasks$task[[1]]$feature_names
+      }
+
+      assert_data_table(result)
+      assert_names(names(result), must.include = c("iter", "learner_id", "features", "n_features"))
+
+      private$.result = result
       self$man = "mlr3fselect::ensemble_fs_result"
     },
 
@@ -147,7 +161,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
   private = list(
     .result = NULL,
     .stability = NULL,
-    .feature_ranking = NULL
+    .feature_ranking = NULL,
+    .features = NULL
   )
 )
 

From e90b1b2de83b4c87b15fa53cdf260ab487f9202e Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 14:29:27 +0200
Subject: [PATCH 31/43] faster calculation of inclusion probabilities

---
 R/EnsembleFSResult.R | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 1eaf2118..4865130b 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -96,6 +96,10 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' @param method (`character(1)`)\cr
     #' The method to calculate the feature ranking.
     #' Currently, only `"inclusion_probability"` is supported.
+    #'
+    #' @return A [data.table][data.table::data.table] listing all the features,
+    #' ordered by decreasing inclusion probability scores (depending on the
+    #' `method`)
     feature_ranking = function(method = "inclusion_probability") {
       assert_choice(method, choices = "inclusion_probability")
 
@@ -104,16 +108,21 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
         return(private$.feature_ranking[[method]])
       }
 
-      features = self$benchmark_result$tasks$task[[1]]$feature_names
+      count_tbl = sort(table(unlist(self$result$features)), decreasing = TRUE)
+      features_selected = names(count_tbl)
+      features_not_selected = setdiff(private$.features, features_selected)
+
+      res_fs = data.table(
+        feature = features_selected,
+        inclusion_probability = as.vector(count_tbl) / nrow(self$result)
+      )
 
-      count = map_int(features, function(feature) {
-        sum(map_lgl(self$result$features, function(iteration) {
-          feature %in% iteration
-        }))
-      })
+      res_fns = data.table(
+        feature = features_not_selected,
+        inclusion_probability = 0
+      )
 
-      res = data.table(feature = features, inclusion_probability = count / nrow(self$result))
-      setorderv(res, "inclusion_probability", order = -1L)
+      res = rbindlist(list(res_fs, res_fns))
 
       private$.feature_ranking[[method]] = res
       private$.feature_ranking[[method]]

From f20ddbee6eb71ab53a89ce2e328282b104b43a3d Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 14:31:53 +0200
Subject: [PATCH 32/43] test init from data.table result

---
 tests/testthat/test_ensemble_fselect.R | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index c75a778c..380d1d0a 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -24,6 +24,18 @@ test_that("ensemble feature selection works", {
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 })
 
+test_that("EnsembleFSResult initialization", {
+  features = LETTERS
+  result = data.table(a = 1) # not proper column name
+  expect_error(EnsembleFSResult$new(result = result, features = features))
+
+  result = data.table(iter = 1:2, learner_id = list("l1", "l2"),
+                      features = list(LETTERS[1], LETTERS[1:3]),
+                      n_features = c(1,3))
+  # works without benchmark result object
+  expect_class(EnsembleFSResult$new(result = result, features = features), "EnsembleFSResult")
+})
+
 test_that("different callbacks can be set", {
 
   callback_test = callback_batch_fselect("mlr3fselect.test",

From d023a215f39ddad872aef3977358d37955d78f6f Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 14:36:06 +0200
Subject: [PATCH 33/43] refine doc

---
 R/EnsembleFSResult.R | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 4865130b..dd834cab 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -3,9 +3,9 @@
 #' @name ensemble_fs_result
 #'
 #' @description
-#' The `EnsembleFSResult` stores the results of the ensemble feature selection
-#' and incorporates methods for assessing the stability of the feature selection
-#' and ranking the features.
+#' The `EnsembleFSResult` class stores the results of ensemble feature selection.
+#' It includes methods for evaluating the stability of the feature selection
+#' process and for ranking the selected features.
 #'
 #' The function [ensemble_fselect()] returns an object of this class.
 #'
@@ -29,6 +29,9 @@
 #'
 #' # returns the stability of the selected features
 #' efsr$stability(stability_measure = "jaccard")
+#'
+#' # returns a ranking of all features
+#' head(efsr$feature_ranking())
 #' }
 EnsembleFSResult = R6Class("EnsembleFSResult",
   public = list(
@@ -46,12 +49,12 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
     #'  The benchmark result object.
+    #'  Default is `NULL`, but the task's `"features"` must be given.
     #' @param result ([data.table::data.table])\cr
-    #'  The result of the ensemble feature selection results.
+    #'  The result of the ensemble feature selection.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
-    #'  selection. Ignored if `benchmark_result` is given and mandatory to have
-    #'  if `benchmark_result` is `NULL`.
+    #'  selection. Ignored if `"benchmark_result"` is given.
     initialize = function(benchmark_result = NULL, result, features) {
       if (is.null(benchmark_result)) {
         assert_character(features, any.missing = FALSE, null.ok = FALSE)
@@ -129,7 +132,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     },
 
     #' @description
-    #' Calculates the stability of the selected features with the `stabm` package.
+    #' Calculates the stability of the selected features with the \CRANpkg{stabm} package.
     #' The results are cached.
     #' When the same stability measure is requested again with different arguments, the cache must be reset.
     #'

From 38b37e0cb438d79d8e421659b23ce9df4b80b0b7 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Fri, 7 Jun 2024 15:24:45 +0200
Subject: [PATCH 34/43] update docs

---
 man/ensemble_fs_result.Rd | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index d78f216b..e395334f 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -5,9 +5,9 @@
 \alias{EnsembleFSResult}
 \title{Ensemble Feature Selection Result}
 \description{
-The \code{EnsembleFSResult} stores the results of the ensemble feature selection
-and incorporates methods for assessing the stability of the feature selection
-and ranking the features.
+The \code{EnsembleFSResult} class stores the results of ensemble feature selection.
+It includes methods for evaluating the stability of the feature selection
+process and for ranking the selected features.
 
 The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
 }
@@ -31,6 +31,9 @@ efsr$result
 
 # returns the stability of the selected features
 efsr$stability(stability_measure = "jaccard")
+
+# returns a ranking of all features
+head(efsr$feature_ranking())
 }
 }
 \section{Public fields}{
@@ -70,17 +73,22 @@ Returns the result of the ensemble feature selection.}
 \subsection{Method \code{new()}}{
 Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result, result)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result = NULL, result, features)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
 \item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
-The benchmark result object.}
+The benchmark result object.
+Default is \code{NULL}, but the task's \code{"features"} must be given.}
 
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
-The result of the ensemble feature selection results.}
+The result of the ensemble feature selection.}
+
+\item{\code{features}}{(\code{\link[=character]{character()}})\cr
+The vector of features of the task that was used in the ensemble feature
+selection. Ignored if \code{"benchmark_result"} is given.}
 }
 \if{html}{\out{</div>}}
 }
@@ -147,12 +155,17 @@ Currently, only \code{"inclusion_probability"} is supported.}
 }
 \if{html}{\out{</div>}}
 }
+\subsection{Returns}{
+A \link[data.table:data.table]{data.table} listing all the features,
+ordered by decreasing inclusion probability scores (depending on the
+\code{method})
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-stability"></a>}}
 \if{latex}{\out{\hypertarget{method-EnsembleFSResult-stability}{}}}
 \subsection{Method \code{stability()}}{
-Calculates the stability of the selected features with the \code{stabm} package.
+Calculates the stability of the selected features with the \CRANpkg{stabm} package.
 The results are cached.
 When the same stability measure is requested again with different arguments, the cache must be reset.
 \subsection{Usage}{

From 602830567dd9dcc7a0b517115a42c81ffd943ea2 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Mon, 10 Jun 2024 16:59:23 +0200
Subject: [PATCH 35/43] refactor: make bmr optional

---
 R/EnsembleFSResult.R                   | 47 +++++++++++++-------------
 R/ensemble_fselect.R                   | 29 ++++++++--------
 man/ensemble_fs_result.Rd              | 29 +++++++++++-----
 man/ensemble_fselect.Rd                | 24 ++++++-------
 tests/testthat/test_ensemble_fselect.R | 37 +++++++++++++++++++-
 5 files changed, 104 insertions(+), 62 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index dd834cab..59d87323 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -3,12 +3,18 @@
 #' @name ensemble_fs_result
 #'
 #' @description
-#' The `EnsembleFSResult` class stores the results of ensemble feature selection.
-#' It includes methods for evaluating the stability of the feature selection
-#' process and for ranking the selected features.
-#'
+#' The `EnsembleFSResult` stores the results of ensemble feature selection.
+#' It includes methods for evaluating the stability of the feature selection process and for ranking the selected features.
 #' The function [ensemble_fselect()] returns an object of this class.
 #'
+#' @section S3 Methods:
+#' * `as.data.table.EnsembleFSResult(x, benchmark_result = TRUE)`\cr
+#' Returns a tabular view of the ensemble feature selection.\cr
+#' [EnsembleFSResult] -> [data.table::data.table()]\cr
+#'     * `x` ([EnsembleFSResult])
+#'     * `benchmark_result` (`logical(1)`)\cr
+#'       Whether to add the learner, task and resampling information from the benchmark result.
+#'
 #' @examples
 #' \donttest{
 #' efsr = ensemble_fselect(
@@ -47,27 +53,20 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' @description
     #' Creates a new instance of this [R6][R6::R6Class] class.
     #'
-    #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
-    #'  The benchmark result object.
-    #'  Default is `NULL`, but the task's `"features"` must be given.
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
     #'  selection. Ignored if `"benchmark_result"` is given.
-    initialize = function(benchmark_result = NULL, result, features) {
-      if (is.null(benchmark_result)) {
-        assert_character(features, any.missing = FALSE, null.ok = FALSE)
-        private$.features = features
-      } else {
-        self$benchmark_result = assert_benchmark_result(benchmark_result)
-        private$.features = self$benchmark_result$tasks$task[[1]]$feature_names
-      }
-
+    #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
+    #'  The benchmark result object.
+    initialize = function(result, features, benchmark_result = NULL) {
       assert_data_table(result)
       assert_names(names(result), must.include = c("iter", "learner_id", "features", "n_features"))
-
       private$.result = result
+      private$.features = assert_character(features, any.missing = FALSE, null.ok = FALSE)
+      self$benchmark_result = if (!is.null(benchmark_result)) assert_benchmark_result(benchmark_result)
+
       self$man = "mlr3fselect::ensemble_fs_result"
     },
 
@@ -84,7 +83,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' @param ... (ignored).
     print = function(...) {
       catf(format(self))
-      print(self$result[, c("learner_id", "n_features"), with = FALSE])
+      print(private$.result[, c("iter", "learner_id", "n_features"), with = FALSE])
     },
 
     #' @description
@@ -111,13 +110,13 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
         return(private$.feature_ranking[[method]])
       }
 
-      count_tbl = sort(table(unlist(self$result$features)), decreasing = TRUE)
+      count_tbl = sort(table(unlist(private$.result$features)), decreasing = TRUE)
       features_selected = names(count_tbl)
       features_not_selected = setdiff(private$.features, features_selected)
 
       res_fs = data.table(
         feature = features_selected,
-        inclusion_probability = as.vector(count_tbl) / nrow(self$result)
+        inclusion_probability = as.vector(count_tbl) / nrow(private$.result)
       )
 
       res_fns = data.table(
@@ -155,7 +154,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
       }
 
       fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
-      private$.stability[[stability_measure]] = fun(self$result$features, ...)
+      private$.stability[[stability_measure]] = fun(private$.result$features, ...)
       private$.stability[[stability_measure]]
     }
   ),
@@ -166,7 +165,9 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' Returns the result of the ensemble feature selection.
     result = function(rhs) {
       assert_ro_binding(rhs)
-      private$.result
+      if (is.null(self$benchmark_result)) return(private$.result)
+      tab = as.data.table(self$benchmark_result)[, c("task", "learner", "resampling"), with = FALSE]
+      cbind(private$.result, tab)
     }
   ),
 
@@ -179,6 +180,6 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
 )
 
 #' @export
-as.data.table.EnsembleFSResult = function(x, ...) {
+as.data.table.EnsembleFSResult = function(x,  ...) {
   x$result
 }
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 0feef2f9..30f0b802 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -4,23 +4,15 @@
 #'
 #' @description
 #' Ensemble feature selection using multiple learners.
-#' The ensemble feature selection method is designed to identify the
-#' most informative features from a given dataset by leveraging multiple
-#' machine learning models and resampling techniques.
+#' The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
 #'
 #' @details
-#' The method begins by applying an initial resampling technique specified
-#' by the user, to create **multiple subsamples** from the original dataset.
-#' This resampling process helps in generating diverse subsets of data for
-#' robust feature selection.
+#' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset.
+#' This resampling process helps in generating diverse subsets of data for robust feature selection.
 #'
-#' For each subsample generated in the previous step, the method performs
-#' **wrapped-based feature selection** ([auto_fselector]) using each provided
-#' learner, the given inner resampling method, performance measure and
-#' optimization algorithm.
-#' This process generates a best feature subset for each combination of
-#' subsample and learner.
-#' Results are stored in a [data.table] object.
+#' For each subsample generated in the previous step, the method performs **wrapped-based feature selection** ([auto_fselector]) using each provided learner, the given inner resampling method, performance measure and optimization algorithm.
+#' This process generates the best feature subset for each combination of subsample and learner.
+#' Results are stored in an [EnsembleFSResult].
 #'
 #' @param learners (list of [mlr3::Learner])\cr
 #'  The learners to be used for feature selection.
@@ -30,6 +22,8 @@
 #'  Can only be [mlr_resamplings_subsampling] or [mlr_resamplings_bootstrap].
 #' @param inner_resampling ([mlr3::Resampling])\cr
 #'  The inner resampling strategy used by the [FSelector].
+#' @param store_benchmark_result (`logical(1)`)\cr
+#'  Whether to store the benchmark result in [EnsembleFSResult] or not.
 #' @param store_models (`logical(1)`)\cr
 #'  Whether to store models in [auto_fselector] or not.
 #' @param callbacks (list of lists of [CallbackBatchFSelect])\cr
@@ -65,6 +59,7 @@ ensemble_fselect = function(
   measure,
   terminator,
   callbacks = NULL,
+  store_benchmark_result = TRUE,
   store_models = TRUE
   ) {
   assert_task(task)
@@ -72,6 +67,7 @@ ensemble_fselect = function(
   assert_resampling(init_resampling)
   assert_choice(class(init_resampling)[1], choices = c("ResamplingBootstrap", "ResamplingSubsampling"))
   assert_list(callbacks, types = "list", len = length(learners), null.ok = TRUE)
+  assert_flag(store_benchmark_result)
 
   # create auto_fselector for each learner
   afss = imap(unname(learners), function(learner, i) {
@@ -139,5 +135,8 @@ ensemble_fselect = function(
   set(grid, j = "learner", value = NULL)
   set(grid, j = "task", value = NULL)
   set(grid, j = "resampling", value = NULL)
-  EnsembleFSResult$new(bmr, grid)
+  EnsembleFSResult$new(
+    result = grid,
+    features = task$feature_names,
+    benchmark_result = if (store_benchmark_result) bmr)
 }
diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index e395334f..ea861eea 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -5,12 +5,24 @@
 \alias{EnsembleFSResult}
 \title{Ensemble Feature Selection Result}
 \description{
-The \code{EnsembleFSResult} class stores the results of ensemble feature selection.
-It includes methods for evaluating the stability of the feature selection
-process and for ranking the selected features.
-
+The \code{EnsembleFSResult} stores the results of ensemble feature selection.
+It includes methods for evaluating the stability of the feature selection process and for ranking the selected features.
 The function \code{\link[=ensemble_fselect]{ensemble_fselect()}} returns an object of this class.
 }
+\section{S3 Methods}{
+
+\itemize{
+\item \code{as.data.table.EnsembleFSResult(x, benchmark_result = TRUE)}\cr
+Returns a tabular view of the ensemble feature selection.\cr
+\link{EnsembleFSResult} -> \code{\link[data.table:data.table]{data.table::data.table()}}\cr
+\itemize{
+\item \code{x} (\link{EnsembleFSResult})
+\item \code{benchmark_result} (\code{logical(1)})\cr
+Whether to add the learner, task and resampling information from the benchmark result.
+}
+}
+}
+
 \examples{
 \donttest{
 efsr = ensemble_fselect(
@@ -73,22 +85,21 @@ Returns the result of the ensemble feature selection.}
 \subsection{Method \code{new()}}{
 Creates a new instance of this \link[R6:R6Class]{R6} class.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(benchmark_result = NULL, result, features)}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$new(result, features, benchmark_result = NULL)}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
-\item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
-The benchmark result object.
-Default is \code{NULL}, but the task's \code{"features"} must be given.}
-
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
 The result of the ensemble feature selection.}
 
 \item{\code{features}}{(\code{\link[=character]{character()}})\cr
 The vector of features of the task that was used in the ensemble feature
 selection. Ignored if \code{"benchmark_result"} is given.}
+
+\item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
+The benchmark result object.}
 }
 \if{html}{\out{</div>}}
 }
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 4111b0af..2d166203 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -29,6 +29,7 @@ ensemble_fselect(
   measure,
   terminator,
   callbacks = NULL,
+  store_benchmark_result = TRUE,
   store_models = TRUE
 )
 }
@@ -60,28 +61,23 @@ Stop criterion of the feature selection.}
 Callbacks to be used for each learner.
 The lists must have the same length as the number of learners.}
 
+\item{store_benchmark_result}{(\code{logical(1)})\cr
+Whether to store the benchmark result in \link{EnsembleFSResult} or not.}
+
 \item{store_models}{(\code{logical(1)})\cr
 Whether to store models in \link{auto_fselector} or not.}
 }
 \description{
 Ensemble feature selection using multiple learners.
-The ensemble feature selection method is designed to identify the
-most informative features from a given dataset by leveraging multiple
-machine learning models and resampling techniques.
+The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
 }
 \details{
-The method begins by applying an initial resampling technique specified
-by the user, to create \strong{multiple subsamples} from the original dataset.
-This resampling process helps in generating diverse subsets of data for
-robust feature selection.
+The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset.
+This resampling process helps in generating diverse subsets of data for robust feature selection.
 
-For each subsample generated in the previous step, the method performs
-\strong{wrapped-based feature selection} (\link{auto_fselector}) using each provided
-learner, the given inner resampling method, performance measure and
-optimization algorithm.
-This process generates a best feature subset for each combination of
-subsample and learner.
-Results are stored in a \link{data.table} object.
+For each subsample generated in the previous step, the method performs \strong{wrapped-based feature selection} (\link{auto_fselector}) using each provided learner, the given inner resampling method, performance measure and optimization algorithm.
+This process generates the best feature subset for each combination of subsample and learner.
+Results are stored in an \link{EnsembleFSResult}.
 }
 \examples{
 \donttest{
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 380d1d0a..6c61268f 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -1,4 +1,32 @@
 test_that("ensemble feature selection works", {
+  task = tsk("sonar")
+  efsr = ensemble_fselect(
+    fselector = fs("random_search"),
+    task = task,
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("evals", n_evals = 5)
+  )
+
+  expect_character(efsr$man)
+  expect_data_table(efsr$result, nrows = 4)
+  expect_list(efsr$result$features, any.missing = FALSE, len = 4)
+  expect_vector(efsr$result$n_features, size = 4)
+  expect_vector(efsr$result$classif.ce, size = 4)
+  expect_benchmark_result(efsr$benchmark_result)
+
+  expect_number(efsr$stability(stability_measure = "jaccard"))
+  feature_ranking = efsr$feature_ranking()
+  expect_data_table(feature_ranking, nrows = length(task$feature_names))
+  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
+
+  tab = as.data.table(efsr)
+  tab
+})
+
+test_that("ensemble feature selection works with rfe", {
   task = tsk("sonar")
   efsr = ensemble_fselect(
     fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
@@ -22,6 +50,9 @@ test_that("ensemble feature selection works", {
   feature_ranking = efsr$feature_ranking()
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
+
+  tab = as.data.table(efsr)
+  tab
 })
 
 test_that("EnsembleFSResult initialization", {
@@ -33,7 +64,11 @@ test_that("EnsembleFSResult initialization", {
                       features = list(LETTERS[1], LETTERS[1:3]),
                       n_features = c(1,3))
   # works without benchmark result object
-  expect_class(EnsembleFSResult$new(result = result, features = features), "EnsembleFSResult")
+  efsr = EnsembleFSResult$new(result = result, features = features)
+  expect_class(efsr, "EnsembleFSResult")
+  tab = as.data.table(efsr)
+  expect_data_table(tab)
+  expect_names(names(tab), identical.to = c("iter", "learner_id", "features", "n_features"))
 })
 
 test_that("different callbacks can be set", {

From f51500a8a9c727766e04aed05789a821ccfa3208 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 10 Jun 2024 18:06:38 +0200
Subject: [PATCH 36/43] correct 'iter' to 'resampling_id'

---
 R/EnsembleFSResult.R | 4 ++--
 R/ensemble_fselect.R | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 59d87323..064adcad 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -57,12 +57,12 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  The result of the ensemble feature selection.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
-    #'  selection. Ignored if `"benchmark_result"` is given.
+    #'  selection.
     #' @param benchmark_result ([mlr3::BenchmarkResult])\cr
     #'  The benchmark result object.
     initialize = function(result, features, benchmark_result = NULL) {
       assert_data_table(result)
-      assert_names(names(result), must.include = c("iter", "learner_id", "features", "n_features"))
+      assert_names(names(result), must.include = c("resampling_id", "learner_id", "features", "n_features"))
       private$.result = result
       private$.features = assert_character(features, any.missing = FALSE, null.ok = FALSE)
       self$benchmark_result = if (!is.null(benchmark_result)) assert_benchmark_result(benchmark_result)
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 30f0b802..c179ef1a 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -35,6 +35,8 @@
 #' @template param_measure
 #' @template param_terminator
 #'
+#' @returns an [EnsembleFSResult] object.
+#'
 #' @source
 #' `r format_bib("saeys2008", "abeel2010", "pes2020")`
 #' @export
@@ -90,7 +92,7 @@ ensemble_fselect = function(
     resampling = rsmp("insample")$instantiate(task_subset)
 
     data.table(
-      iter = i,
+      resampling_id = i,
       learner_id = map(learners, "id"),
       learner = afss,
       task = list(task_subset),
@@ -119,7 +121,6 @@ ensemble_fselect = function(
     afs$fselect_instance$archive$best()[, measure$id, with = FALSE][[1]]
   })
 
-  set(grid, j = "iter", value = 1:bmr$n_resample_results)
   set(grid, j = "features", value = features)
   set(grid, j = "n_features", value = n_features)
   set(grid, j = measure$id, value = scores)
@@ -138,5 +139,6 @@ ensemble_fselect = function(
   EnsembleFSResult$new(
     result = grid,
     features = task$feature_names,
-    benchmark_result = if (store_benchmark_result) bmr)
+    benchmark_result = if (store_benchmark_result) bmr
+  )
 }

From 122f2a4c8a81680428ee88b8deb9050974b7f3d3 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 10 Jun 2024 18:14:25 +0200
Subject: [PATCH 37/43] rename baseline feature ranking method to approval
 voting + add some doc

---
 R/EnsembleFSResult.R | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 064adcad..14035764 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -95,15 +95,23 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' @description
     #' Calculates the feature ranking.
     #'
+    #' @details
+    #' The feature ranking process is built on the following framework: models
+    #' act as voters, features act as candidates, and voters select certain
+    #' candidates (features). The primary objective is to compile these selections
+    #' into a consensus ranked list of features, effectively forming a committee.
+    #' Currently, only `"approval_voting"` method is supported, which selects the
+    #' candidates/features that have the highest approval score or selection
+    #' frequency, i.e. appear the most often.
+    #'
     #' @param method (`character(1)`)\cr
     #' The method to calculate the feature ranking.
-    #' Currently, only `"inclusion_probability"` is supported.
     #'
     #' @return A [data.table][data.table::data.table] listing all the features,
     #' ordered by decreasing inclusion probability scores (depending on the
     #' `method`)
-    feature_ranking = function(method = "inclusion_probability") {
-      assert_choice(method, choices = "inclusion_probability")
+    feature_ranking = function(method = "approval_voting") {
+      assert_choice(method, choices = "approval_voting")
 
       # cached results
       if (!is.null(private$.feature_ranking[[method]])) {

From f4cabba54e3cc3321d5a66eb5845e03e2eac0443 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 10 Jun 2024 18:14:48 +0200
Subject: [PATCH 38/43] updocs

---
 man/ensemble_fs_result.Rd | 17 +++++++++++++----
 man/ensemble_fselect.Rd   |  3 +++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index ea861eea..2cb41150 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -96,7 +96,7 @@ The result of the ensemble feature selection.}
 
 \item{\code{features}}{(\code{\link[=character]{character()}})\cr
 The vector of features of the task that was used in the ensemble feature
-selection. Ignored if \code{"benchmark_result"} is given.}
+selection.}
 
 \item{\code{benchmark_result}}{(\link[mlr3:BenchmarkResult]{mlr3::BenchmarkResult})\cr
 The benchmark result object.}
@@ -154,18 +154,27 @@ Opens the corresponding help page referenced by field \verb{$man}.
 \subsection{Method \code{feature_ranking()}}{
 Calculates the feature ranking.
 \subsection{Usage}{
-\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking(method = "inclusion_probability")}\if{html}{\out{</div>}}
+\if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$feature_ranking(method = "approval_voting")}\if{html}{\out{</div>}}
 }
 
 \subsection{Arguments}{
 \if{html}{\out{<div class="arguments">}}
 \describe{
 \item{\code{method}}{(\code{character(1)})\cr
-The method to calculate the feature ranking.
-Currently, only \code{"inclusion_probability"} is supported.}
+The method to calculate the feature ranking.}
 }
 \if{html}{\out{</div>}}
 }
+\subsection{Details}{
+The feature ranking process is built on the following framework: models
+act as voters, features act as candidates, and voters select certain
+candidates (features). The primary objective is to compile these selections
+into a consensus ranked list of features, effectively forming a committee.
+Currently, only \code{"approval_voting"} method is supported, which selects the
+candidates/features that have the highest approval score or selection
+frequency, i.e. appear the most often.
+}
+
 \subsection{Returns}{
 A \link[data.table:data.table]{data.table} listing all the features,
 ordered by decreasing inclusion probability scores (depending on the
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 2d166203..72dd0ff7 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -67,6 +67,9 @@ Whether to store the benchmark result in \link{EnsembleFSResult} or not.}
 \item{store_models}{(\code{logical(1)})\cr
 Whether to store models in \link{auto_fselector} or not.}
 }
+\value{
+an \link{EnsembleFSResult} object.
+}
 \description{
 Ensemble feature selection using multiple learners.
 The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.

From 641465292bf21ebdee2da95d808aa323ce3ed81f Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 10 Jun 2024 18:24:43 +0200
Subject: [PATCH 39/43] fix test

---
 tests/testthat/test_ensemble_fselect.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 6c61268f..0c39814c 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -60,7 +60,7 @@ test_that("EnsembleFSResult initialization", {
   result = data.table(a = 1) # not proper column name
   expect_error(EnsembleFSResult$new(result = result, features = features))
 
-  result = data.table(iter = 1:2, learner_id = list("l1", "l2"),
+  result = data.table(resampling_id = 1:2, learner_id = list("l1", "l2"),
                       features = list(LETTERS[1], LETTERS[1:3]),
                       n_features = c(1,3))
   # works without benchmark result object
@@ -68,7 +68,7 @@ test_that("EnsembleFSResult initialization", {
   expect_class(efsr, "EnsembleFSResult")
   tab = as.data.table(efsr)
   expect_data_table(tab)
-  expect_names(names(tab), identical.to = c("iter", "learner_id", "features", "n_features"))
+  expect_names(names(tab), identical.to = c("resampling_id", "learner_id", "features", "n_features"))
 })
 
 test_that("different callbacks can be set", {

From 3628733b229a3628ff66585799c30ba3b5e125b7 Mon Sep 17 00:00:00 2001
From: john <bblodfon@gmail.com>
Date: Mon, 10 Jun 2024 18:27:56 +0200
Subject: [PATCH 40/43] document result data.table columns

---
 R/EnsembleFSResult.R      | 2 ++
 man/ensemble_fs_result.Rd | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 14035764..9c27636f 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -55,6 +55,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'
     #' @param result ([data.table::data.table])\cr
     #'  The result of the ensemble feature selection.
+    #'  Column names should include `"resampling_id"`, `"learner_id"`, `"features"`
+    #'  and `"n_features"`.
     #' @param features ([character()])\cr
     #'  The vector of features of the task that was used in the ensemble feature
     #'  selection.
diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index 2cb41150..c394dda7 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -92,7 +92,9 @@ Creates a new instance of this \link[R6:R6Class]{R6} class.
 \if{html}{\out{<div class="arguments">}}
 \describe{
 \item{\code{result}}{(\link[data.table:data.table]{data.table::data.table})\cr
-The result of the ensemble feature selection.}
+The result of the ensemble feature selection.
+Column names should include \code{"resampling_id"}, \code{"learner_id"}, \code{"features"}
+and \code{"n_features"}.}
 
 \item{\code{features}}{(\code{\link[=character]{character()}})\cr
 The vector of features of the task that was used in the ensemble feature

From bc12f35840a6e5b312bb23c2b65511921905853c Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Tue, 11 Jun 2024 11:22:34 +0200
Subject: [PATCH 41/43] feat: per learner stability

---
 R/EnsembleFSResult.R                   | 52 ++++++++++++++---------
 R/ensemble_fselect.R                   |  4 +-
 tests/testthat/test_ensemble_fselect.R | 59 +++++++++++++++++++++++---
 3 files changed, 88 insertions(+), 27 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index 9c27636f..e37250f7 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -64,7 +64,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  The benchmark result object.
     initialize = function(result, features, benchmark_result = NULL) {
       assert_data_table(result)
-      assert_names(names(result), must.include = c("resampling_id", "learner_id", "features", "n_features"))
+      assert_names(names(result), must.include = c("resampling_iteration", "learner_id", "features", "n_features"))
       private$.result = result
       private$.features = assert_character(features, any.missing = FALSE, null.ok = FALSE)
       self$benchmark_result = if (!is.null(benchmark_result)) assert_benchmark_result(benchmark_result)
@@ -85,7 +85,7 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' @param ... (ignored).
     print = function(...) {
       catf(format(self))
-      print(private$.result[, c("iter", "learner_id", "n_features"), with = FALSE])
+      print(private$.result[, c("resampling_iteration", "learner_id", "n_features"), with = FALSE])
     },
 
     #' @description
@@ -98,20 +98,14 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #' Calculates the feature ranking.
     #'
     #' @details
-    #' The feature ranking process is built on the following framework: models
-    #' act as voters, features act as candidates, and voters select certain
-    #' candidates (features). The primary objective is to compile these selections
-    #' into a consensus ranked list of features, effectively forming a committee.
-    #' Currently, only `"approval_voting"` method is supported, which selects the
-    #' candidates/features that have the highest approval score or selection
-    #' frequency, i.e. appear the most often.
+    #' The feature ranking process is built on the following framework: models act as voters, features act as candidates, and voters select certain candidates (features).
+    #' The primary objective is to compile these selections into a consensus ranked list of features, effectively forming a committee.
+    #' Currently, only `"approval_voting"` method is supported, which selects the candidates/features that have the highest approval score or selection frequency, i.e. appear the most often.
     #'
     #' @param method (`character(1)`)\cr
     #' The method to calculate the feature ranking.
     #'
-    #' @return A [data.table][data.table::data.table] listing all the features,
-    #' ordered by decreasing inclusion probability scores (depending on the
-    #' `method`)
+    #' @return A [data.table::data.table] listing all the features, ordered by decreasing inclusion probability scores (depending on the `method`)
     feature_ranking = function(method = "approval_voting") {
       assert_choice(method, choices = "approval_voting")
 
@@ -151,21 +145,38 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  Default is `"jaccard"`.
     #' @param ... (`any`)\cr
     #'  Additional arguments passed to the stability measure function.
+    #' @param global (`logical(1)`)\cr
+    #'  Whether to calculate the stability globally or for each learner.
     #' @param reset_cache (`logical(1)`)\cr
     #'  If `TRUE`, the cached results are ignored.
-    stability = function(stability_measure = "jaccard", ..., reset_cache = FALSE) {
+    stability = function(stability_measure = "jaccard", ..., global = TRUE, reset_cache = FALSE) {
       funs = stabm::listStabilityMeasures()$Name
       keys = tolower(gsub("stability", "", funs))
       assert_choice(stability_measure, choices = keys)
 
-      # cached results
-      if (!is.null(private$.stability[[stability_measure]]) && !reset_cache) {
-        return(private$.stability[[stability_measure]])
+      if (global) {
+        # cached results
+        if (!is.null(private$.stability_global[[stability_measure]]) && !reset_cache) {
+          return(private$.stability_global[[stability_measure]])
+        }
+
+        fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
+        private$.stability_global[[stability_measure]] = fun(private$.result$features, ...)
+        private$.stability_global[[stability_measure]]
+      } else {
+        # cached results
+        if (!is.null(private$.stability_learner[[stability_measure]]) && !reset_cache) {
+          return(private$.stability_learner[[stability_measure]])
+        }
+
+        fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
+
+        tab = private$.result[, list(score = fun(.SD$features, ...)), by = learner_id]
+        private$.stability_learner[[stability_measure]] = set_names(tab$score, tab$learner_id)
+        private$.stability_learner[[stability_measure]]
       }
 
-      fun = get(funs[which(stability_measure == keys)], envir = asNamespace("stabm"))
-      private$.stability[[stability_measure]] = fun(private$.result$features, ...)
-      private$.stability[[stability_measure]]
+
     }
   ),
 
@@ -183,7 +194,8 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
 
   private = list(
     .result = NULL,
-    .stability = NULL,
+    .stability_global = NULL,
+    .stability_learner = NULL,
     .feature_ranking = NULL,
     .features = NULL
   )
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index c179ef1a..87942adf 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -92,8 +92,8 @@ ensemble_fselect = function(
     resampling = rsmp("insample")$instantiate(task_subset)
 
     data.table(
-      resampling_id = i,
-      learner_id = map(learners, "id"),
+      resampling_iteration = i,
+      learner_id = map_chr(learners, "id"),
       learner = afss,
       task = list(task_subset),
       resampling = list(resampling)
diff --git a/tests/testthat/test_ensemble_fselect.R b/tests/testthat/test_ensemble_fselect.R
index 0c39814c..d6a03a0a 100644
--- a/tests/testthat/test_ensemble_fselect.R
+++ b/tests/testthat/test_ensemble_fselect.R
@@ -17,13 +17,56 @@ test_that("ensemble feature selection works", {
   expect_vector(efsr$result$classif.ce, size = 4)
   expect_benchmark_result(efsr$benchmark_result)
 
+  # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
+  stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
+  expect_numeric(stability, len = 2)
+  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
+
+  # feature ranking
+  feature_ranking = efsr$feature_ranking()
+  expect_data_table(feature_ranking, nrows = length(task$feature_names))
+  expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
+
+  # data.table conversion
+  tab = as.data.table(efsr)
+  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "task", "learner", "resampling"))
+})
+
+test_that("ensemble feature selection works without benchmark result", {
+  task = tsk("sonar")
+  efsr = ensemble_fselect(
+    fselector = fs("random_search"),
+    task = task,
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("evals", n_evals = 5),
+    store_benchmark_result = FALSE
+  )
+
+  expect_character(efsr$man)
+  expect_data_table(efsr$result, nrows = 4)
+  expect_list(efsr$result$features, any.missing = FALSE, len = 4)
+  expect_vector(efsr$result$n_features, size = 4)
+  expect_vector(efsr$result$classif.ce, size = 4)
+  expect_null(efsr$benchmark_result)
+
+  # stability
+  expect_number(efsr$stability(stability_measure = "jaccard"))
+  stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
+  expect_numeric(stability, len = 2)
+  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
+
+  # feature ranking
   feature_ranking = efsr$feature_ranking()
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 
+  # data.table conversion
   tab = as.data.table(efsr)
-  tab
+  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce"))
 })
 
 test_that("ensemble feature selection works with rfe", {
@@ -46,13 +89,20 @@ test_that("ensemble feature selection works with rfe", {
   expect_list(efsr$result$importance, any.missing = FALSE, len = 4)
   expect_benchmark_result(efsr$benchmark_result)
 
+  # stability
   expect_number(efsr$stability(stability_measure = "jaccard"))
+  stability = efsr$stability(stability_measure = "jaccard", global = FALSE)
+  expect_numeric(stability, len = 2)
+  expect_names(names(stability), identical.to = c("classif.rpart", "classif.featureless"))
+
+  # feature ranking
   feature_ranking = efsr$feature_ranking()
   expect_data_table(feature_ranking, nrows = length(task$feature_names))
   expect_names(names(feature_ranking), identical.to = c("feature", "inclusion_probability"))
 
+  # data.table conversion
   tab = as.data.table(efsr)
-  tab
+  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features", "classif.ce", "importance", "task", "learner", "resampling"))
 })
 
 test_that("EnsembleFSResult initialization", {
@@ -60,7 +110,7 @@ test_that("EnsembleFSResult initialization", {
   result = data.table(a = 1) # not proper column name
   expect_error(EnsembleFSResult$new(result = result, features = features))
 
-  result = data.table(resampling_id = 1:2, learner_id = list("l1", "l2"),
+  result = data.table(resampling_iteration = 1:2, learner_id = list("l1", "l2"),
                       features = list(LETTERS[1], LETTERS[1:3]),
                       n_features = c(1,3))
   # works without benchmark result object
@@ -68,11 +118,10 @@ test_that("EnsembleFSResult initialization", {
   expect_class(efsr, "EnsembleFSResult")
   tab = as.data.table(efsr)
   expect_data_table(tab)
-  expect_names(names(tab), identical.to = c("resampling_id", "learner_id", "features", "n_features"))
+  expect_names(names(tab), identical.to = c("resampling_iteration", "learner_id", "features", "n_features"))
 })
 
 test_that("different callbacks can be set", {
-
   callback_test = callback_batch_fselect("mlr3fselect.test",
     on_eval_before_archive = function(callback, context) {
       context$aggregated_performance[, callback_active := context$instance$objective$learner$id == "classif.rpart"]

From 7ac4b6ca0b3f02a781d24deb85ab7b22d6a6751f Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Tue, 11 Jun 2024 11:27:40 +0200
Subject: [PATCH 42/43] docs: update

---
 R/EnsembleFSResult.R      | 39 ++++++++++++++-------------
 R/ensemble_fselect.R      |  4 ++-
 man/ensemble_fs_result.Rd | 56 ++++++++++++++++++++-------------------
 man/ensemble_fselect.Rd   |  4 ++-
 4 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/R/EnsembleFSResult.R b/R/EnsembleFSResult.R
index e37250f7..54ad93d5 100644
--- a/R/EnsembleFSResult.R
+++ b/R/EnsembleFSResult.R
@@ -17,27 +17,27 @@
 #'
 #' @examples
 #' \donttest{
-#' efsr = ensemble_fselect(
-#'   fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
-#'   task = tsk("sonar"),
-#'   learners = lrns(c("classif.rpart", "classif.featureless")),
-#'   init_resampling = rsmp("subsampling", repeats = 2),
-#'   inner_resampling = rsmp("cv", folds = 3),
-#'   measure = msr("classif.ce"),
-#'   terminator = trm("none")
-#' )
+#'   efsr = ensemble_fselect(
+#'     fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+#'     task = tsk("sonar"),
+#'     learners = lrns(c("classif.rpart", "classif.featureless")),
+#'     init_resampling = rsmp("subsampling", repeats = 2),
+#'     inner_resampling = rsmp("cv", folds = 3),
+#'     measure = msr("classif.ce"),
+#'     terminator = trm("none")
+#'   )
 #'
-#' # contains the benchmark result
-#' efsr$benchmark_result
+#'   # contains the benchmark result
+#'   efsr$benchmark_result
 #'
-#' # contains the selected features for each iteration
-#' efsr$result
+#'   # contains the selected features for each iteration
+#'   efsr$result
 #'
-#' # returns the stability of the selected features
-#' efsr$stability(stability_measure = "jaccard")
+#'   # returns the stability of the selected features
+#'   efsr$stability(stability_measure = "jaccard")
 #'
-#' # returns a ranking of all features
-#' head(efsr$feature_ranking())
+#'   # returns a ranking of all features
+#'   head(efsr$feature_ranking())
 #' }
 EnsembleFSResult = R6Class("EnsembleFSResult",
   public = list(
@@ -149,6 +149,9 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
     #'  Whether to calculate the stability globally or for each learner.
     #' @param reset_cache (`logical(1)`)\cr
     #'  If `TRUE`, the cached results are ignored.
+    #'
+    #' @return A `numeric()` value representing the stability of the selected features.
+    #' Or a `numeric()` vector with the stability of the selected features for each learner.
     stability = function(stability_measure = "jaccard", ..., global = TRUE, reset_cache = FALSE) {
       funs = stabm::listStabilityMeasures()$Name
       keys = tolower(gsub("stability", "", funs))
@@ -175,8 +178,6 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
         private$.stability_learner[[stability_measure]] = set_names(tab$score, tab$learner_id)
         private$.stability_learner[[stability_measure]]
       }
-
-
     }
   ),
 
diff --git a/R/ensemble_fselect.R b/R/ensemble_fselect.R
index 87942adf..afde7803 100644
--- a/R/ensemble_fselect.R
+++ b/R/ensemble_fselect.R
@@ -5,6 +5,7 @@
 #' @description
 #' Ensemble feature selection using multiple learners.
 #' The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+#' Returns an [EnsembleFSResult].
 #'
 #' @details
 #' The method begins by applying an initial resampling technique specified by the user, to create **multiple subsamples** from the original dataset.
@@ -42,7 +43,7 @@
 #' @export
 #' @examples
 #' \donttest{
-#'   ensemble_fselect(
+#'   efsr = ensemble_fselect(
 #'     fselector = fs("random_search"),
 #'     task = tsk("sonar"),
 #'     learners = lrns(c("classif.rpart", "classif.featureless")),
@@ -51,6 +52,7 @@
 #'     measure = msr("classif.ce"),
 #'     terminator = trm("evals", n_evals = 10)
 #'   )
+#'   efsr
 #' }
 ensemble_fselect = function(
   fselector,
diff --git a/man/ensemble_fs_result.Rd b/man/ensemble_fs_result.Rd
index c394dda7..d5720cfb 100644
--- a/man/ensemble_fs_result.Rd
+++ b/man/ensemble_fs_result.Rd
@@ -25,27 +25,27 @@ Whether to add the learner, task and resampling information from the benchmark r
 
 \examples{
 \donttest{
-efsr = ensemble_fselect(
-  fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
-  task = tsk("sonar"),
-  learners = lrns(c("classif.rpart", "classif.featureless")),
-  init_resampling = rsmp("subsampling", repeats = 2),
-  inner_resampling = rsmp("cv", folds = 3),
-  measure = msr("classif.ce"),
-  terminator = trm("none")
-)
+  efsr = ensemble_fselect(
+    fselector = fs("rfe", n_features = 2, feature_fraction = 0.8),
+    task = tsk("sonar"),
+    learners = lrns(c("classif.rpart", "classif.featureless")),
+    init_resampling = rsmp("subsampling", repeats = 2),
+    inner_resampling = rsmp("cv", folds = 3),
+    measure = msr("classif.ce"),
+    terminator = trm("none")
+  )
 
-# contains the benchmark result
-efsr$benchmark_result
+  # contains the benchmark result
+  efsr$benchmark_result
 
-# contains the selected features for each iteration
-efsr$result
+  # contains the selected features for each iteration
+  efsr$result
 
-# returns the stability of the selected features
-efsr$stability(stability_measure = "jaccard")
+  # returns the stability of the selected features
+  efsr$stability(stability_measure = "jaccard")
 
-# returns a ranking of all features
-head(efsr$feature_ranking())
+  # returns a ranking of all features
+  head(efsr$feature_ranking())
 }
 }
 \section{Public fields}{
@@ -168,19 +168,13 @@ The method to calculate the feature ranking.}
 \if{html}{\out{</div>}}
 }
 \subsection{Details}{
-The feature ranking process is built on the following framework: models
-act as voters, features act as candidates, and voters select certain
-candidates (features). The primary objective is to compile these selections
-into a consensus ranked list of features, effectively forming a committee.
-Currently, only \code{"approval_voting"} method is supported, which selects the
-candidates/features that have the highest approval score or selection
-frequency, i.e. appear the most often.
+The feature ranking process is built on the following framework: models act as voters, features act as candidates, and voters select certain candidates (features).
+The primary objective is to compile these selections into a consensus ranked list of features, effectively forming a committee.
+Currently, only \code{"approval_voting"} method is supported, which selects the candidates/features that have the highest approval score or selection frequency, i.e. appear the most often.
 }
 
 \subsection{Returns}{
-A \link[data.table:data.table]{data.table} listing all the features,
-ordered by decreasing inclusion probability scores (depending on the
-\code{method})
+A \link[data.table:data.table]{data.table::data.table} listing all the features, ordered by decreasing inclusion probability scores (depending on the \code{method})
 }
 }
 \if{html}{\out{<hr>}}
@@ -194,6 +188,7 @@ When the same stability measure is requested again with different arguments, the
 \if{html}{\out{<div class="r">}}\preformatted{EnsembleFSResult$stability(
   stability_measure = "jaccard",
   ...,
+  global = TRUE,
   reset_cache = FALSE
 )}\if{html}{\out{</div>}}
 }
@@ -209,11 +204,18 @@ Default is \code{"jaccard"}.}
 \item{\code{...}}{(\code{any})\cr
 Additional arguments passed to the stability measure function.}
 
+\item{\code{global}}{(\code{logical(1)})\cr
+Whether to calculate the stability globally or for each learner.}
+
 \item{\code{reset_cache}}{(\code{logical(1)})\cr
 If \code{TRUE}, the cached results are ignored.}
 }
 \if{html}{\out{</div>}}
 }
+\subsection{Returns}{
+A \code{numeric()} value representing the stability of the selected features.
+Or a \code{numeric()} vector with the stability of the selected features for each learner.
+}
 }
 \if{html}{\out{<hr>}}
 \if{html}{\out{<a id="method-EnsembleFSResult-clone"></a>}}
diff --git a/man/ensemble_fselect.Rd b/man/ensemble_fselect.Rd
index 72dd0ff7..6a8454f1 100644
--- a/man/ensemble_fselect.Rd
+++ b/man/ensemble_fselect.Rd
@@ -73,6 +73,7 @@ an \link{EnsembleFSResult} object.
 \description{
 Ensemble feature selection using multiple learners.
 The ensemble feature selection method is designed to identify the most informative features from a given dataset by leveraging multiple machine learning models and resampling techniques.
+Returns an \link{EnsembleFSResult}.
 }
 \details{
 The method begins by applying an initial resampling technique specified by the user, to create \strong{multiple subsamples} from the original dataset.
@@ -84,7 +85,7 @@ Results are stored in an \link{EnsembleFSResult}.
 }
 \examples{
 \donttest{
-  ensemble_fselect(
+  efsr = ensemble_fselect(
     fselector = fs("random_search"),
     task = tsk("sonar"),
     learners = lrns(c("classif.rpart", "classif.featureless")),
@@ -93,5 +94,6 @@ Results are stored in an \link{EnsembleFSResult}.
     measure = msr("classif.ce"),
     terminator = trm("evals", n_evals = 10)
   )
+  efsr
 }
 }

From da3762af27b1b4a30c7bcd72334d0b93ed503dc8 Mon Sep 17 00:00:00 2001
From: be-marc <marcbecker@posteo.de>
Date: Tue, 11 Jun 2024 11:34:26 +0200
Subject: [PATCH 43/43] chore: news

---
 NEWS.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 69450d1c..44852289 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,7 @@
 # mlr3fselect (development version)
 
+* feat: Add ensemble feature selection function `ensemble_fselect()`.
+
 # mlr3fselect 0.12.0
 
 * feat: Add number of features to `instance$result`.