Skip to content

Commit

Permalink
combine EnsembleFSResult objects (#128)
Browse files Browse the repository at this point in the history
* remove bmr_score class

* feat: combine EnsembleFSResults

* add tests for combining efsr objects

* update news

* use assert_set_equal

* fix test
  • Loading branch information
bblodfon authored Dec 10, 2024
1 parent 003f6e9 commit ab6360a
Show file tree
Hide file tree
Showing 8 changed files with 286 additions and 3 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
S3method(as.data.table,ArchiveBatchFSelect)
S3method(as.data.table,DictionaryFSelector)
S3method(as.data.table,EnsembleFSResult)
S3method(c,EnsembleFSResult)
S3method(extract_inner_fselect_archives,BenchmarkResult)
S3method(extract_inner_fselect_archives,ResampleResult)
S3method(extract_inner_fselect_results,BenchmarkResult)
Expand Down
7 changes: 4 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# mlr3fselect (development version)

* Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
* Add embedded ensemble feature selection `embedded_ensemble_fselect()`
* Refactor `ensemble_fselect()` and `EnsembleFSResult()`
* refactor: Use [fastVoteR](https://github.com/bblodfon/fastVoteR) for feature ranking in `EnsembleFSResult()` objects
* feat: Add embedded ensemble feature selection `embedded_ensemble_fselect()`
* refactor/perf: `ensemble_fselect()` and `EnsembleFSResult()`
* feat: Add `c.EnsembleFSResult(...)` and `EnsembleFSResult$combine(...)` methods

# mlr3fselect 1.2.1

Expand Down
89 changes: 89 additions & 0 deletions R/EnsembleFSResult.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#' * `x` ([EnsembleFSResult])
#' * `benchmark_result` (`logical(1)`)\cr
#' Whether to add the learner, task and resampling information from the benchmark result.
#' * `c(...)`\cr
#' ([EnsembleFSResult], ...) -> [EnsembleFSResult]\cr
#' Combines multiple [EnsembleFSResult] objects into a new [EnsembleFSResult].
#'
#' @references
#' `r format_bib("das1999", "meinshausen2010")`
Expand Down Expand Up @@ -166,6 +169,75 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
private$.active_measure = which
},

#' @description
#' Combines a second [EnsembleFSResult] into the current object, modifying it **in-place**.
#' If the second [EnsembleFSResult] (`efsr`) is `NULL`, the method returns the object unmodified.
#'
#' Both objects must have the same task features and `measure`.
#' If the `inner_measure` differs between the objects or is `NULL` in either, it will be set to `NULL` in the combined object.
#' Additionally, the `importance` column will be removed if it is missing in either object.
#' If both objects contain a `benchmark_result`, these will be combined.
#' Otherwise, the combined object will have a `NULL` value for `benchmark_result`.
#'
#' This method modifies the object by reference.
#' To preserve the original state, explicitly `$clone()` the object beforehand.
#' Alternatively, you can use the [c()] function, which internally calls this method.
#'
#' @param efsr ([EnsembleFSResult])\cr
#' A second [EnsembleFSResult] object to combine with the current object.
#'
#' @return
#' Returns the object itself, but modified **by reference**.
combine = function(efsr) {
if (!is.null(efsr)) {
assert_class(efsr, "EnsembleFSResult")

# Ensure both objects have the same task features
assert_set_equal(private$.features, get_private(efsr)$.features)

# Ensure both objects have the same (outer) measure
assert_set_equal(private$.measure$id, get_private(efsr)$.measure$id)

# Set inner measure to NULL if the measure ids are different or one of them is NULL
inner_msr = private$.inner_measure
inner_msr2 = get_private(efsr)$.inner_measure
result2 = get_private(efsr)$.result
if (is.null(inner_msr) || is.null(inner_msr2) || inner_msr$id != inner_msr2$id) {
private$.inner_measure = NULL

# Remove associated inner measure scores from results
if (!is.null(inner_msr)) {
private$.result[[sprintf("%s_inner", inner_msr$id)]] = NULL
}
if (!is.null(inner_msr2)) {
result2[[sprintf("%s_inner", inner_msr2$id)]] = NULL
}
}

# remove importance scores if missing in either object
has_imp = "importance" %in% names(private$.result)
has_imp2 = "importance" %in% names(result2)
if (!has_imp || !has_imp2) {
if (has_imp) private$.result[["importance"]] = NULL
if (has_imp2) result2[["importance"]] = NULL
}

# Combine results from both objects
private$.result = data.table::rbindlist(list(private$.result, result2), fill = FALSE)

# Merge benchmark results if available in both objects
has_bmr = !is.null(self$benchmark_result)
has_bmr2 = !is.null(efsr$benchmark_result)
if (has_bmr && has_bmr2) {
self$benchmark_result = self$benchmark_result$combine(efsr$benchmark_result)
} else {
self$benchmark_result = NULL
}
}

invisible(self)
},

#' @description
#' Calculates the feature ranking via [fastVoteR::rank_candidates()].
#'
Expand Down Expand Up @@ -499,3 +571,20 @@ EnsembleFSResult = R6Class("EnsembleFSResult",
as.data.table.EnsembleFSResult = function(x, ...) {
x$result
}

#' @export
c.EnsembleFSResult = function(...) {
efsrs = list(...)

# Deep clone the first object for initialization
init = efsrs[[1]]$clone(deep = TRUE)

# If there's only one object, return it directly
if (length(efsrs) == 1) {
return(init)
}

# Combine the remaining objects
rest = tail(efsrs, -1)
Reduce(function(lhs, rhs) lhs$combine(rhs), rest, init = init)
}
2 changes: 2 additions & 0 deletions R/embedded_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ embedded_ensemble_fselect = function(

# extract scores on the test sets
scores = bmr$score(measure)
# remove `bmr_score` class
class(scores) = c("data.table", "data.frame")

set(scores, j = "features", value = features)
set(scores, j = "n_features", value = n_features)
Expand Down
2 changes: 2 additions & 0 deletions R/ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ ensemble_fselect = function(

# extract scores on the test sets
scores = bmr$score(measure)
# remove `bmr_score` class
class(scores) = c("data.table", "data.frame")

set(scores, j = "features", value = features)
set(scores, j = "n_features", value = n_features)
Expand Down
36 changes: 36 additions & 0 deletions man/ensemble_fs_result.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 41 additions & 0 deletions tests/testthat/test_embedded_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,44 @@ test_that("embedded efs works", {
expect_data_table(feature_ranking, nrows = length(task$feature_names))
expect_equal(names(feature_ranking), c("feature", "score", "norm_score", "borda_score"))
})

test_that("combine embedded efs results", {
task = tsk("sonar")
with_seed(42, {
efsr1 = embedded_ensemble_fselect(
task = task,
learners = lrns(c("classif.rpart", "classif.featureless")),
init_resampling = rsmp("subsampling", repeats = 2),
measure = msr("classif.ce")
)
})

with_seed(43, {
efsr2 = embedded_ensemble_fselect(
task = task,
learners = lrns(c("classif.rpart", "classif.featureless")),
init_resampling = rsmp("subsampling", repeats = 3),
measure = msr("classif.ce")
)
})

comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
comb2 = c(efsr1, efsr2)

expect_class(comb1, "EnsembleFSResult")
expect_class(comb2, "EnsembleFSResult")
expect_data_table(comb1$result, nrows = 10L)
expect_data_table(comb2$result, nrows = 10L)
expect_equal(comb1$n_learners, 2L)
expect_equal(comb2$n_learners, 2L)
expect_equal(get_private(comb1)$.measure$id, "classif.ce")
expect_equal(get_private(comb2)$.measure$id, "classif.ce")
expect_null(get_private(comb1)$.inner_measure)
expect_null(get_private(comb2)$.inner_measure)
assert_benchmark_result(comb1$benchmark_result)
assert_benchmark_result(comb2$benchmark_result)
expect_equal(comb1$benchmark_result$n_resample_results, 4L)
expect_equal(comb2$benchmark_result$n_resample_results, 4L)
expect_equal(nrow(get_private(comb1$benchmark_result)$.data$data$fact), 10L)
expect_equal(nrow(get_private(comb2$benchmark_result)$.data$data$fact), 10L)
})
111 changes: 111 additions & 0 deletions tests/testthat/test_ensemble_fselect.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,117 @@ test_that("EnsembleFSResult initialization", {
expect_false(efsr$measure$minimize)
})

test_that("combining EnsembleFSResult objects", {
selected_features = list(
c("V3", "V20"),
c("V3", "V5", "V19", "V15"),
c("V11", "V7", "V6", "V8"),
c("V11"),
c("V17", "V2", "V12", "V9", "V1"),
c("V11", "V18", "V9")
)
feats = paste0("V", 1:20)

res1 = data.table(
resampling_iteration = c(1, 1, 2, 2, 3, 3),
learner_id = rep(c("lrn1", "lrn2"), 3),
n_features = c(2, 4, 4, 1, 5, 3),
features = selected_features,
classif.ce = runif(6),
classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
)

# same result, just different learners
res2 = data.table(
resampling_iteration = c(1, 1, 2, 2, 3, 3),
learner_id = rep(c("lrn3", "lrn4"), 3),
n_features = c(2, 4, 4, 1, 5, 3),
features = selected_features,
classif.ce = runif(6),
classif.acc_inner = runif(6) # inner measure has the `_inner` end-fix
)

# no `inner_measure`
res3 = res2[, -c("classif.acc_inner")]
# different `measure`
res4 = setnames(copy(res3), "classif.ce", "classif.auc")
# different `inner_measure`
res5 = setnames(copy(res2), "classif.acc_inner", "classif.ce_inner")

# initialize efsr objects
m1 = msr("classif.ce")
m2 = msr("classif.acc")
m3 = msr("classif.auc")
efsr1 = EnsembleFSResult$new(res1, features = feats, measure = m1, inner_measure = m2)
efsr2 = EnsembleFSResult$new(res2, features = feats, measure = m1, inner_measure = m2)
efsr3 = EnsembleFSResult$new(res3, features = feats, measure = m1)
efsr4 = EnsembleFSResult$new(res4, features = feats, measure = m3)
efsr5 = EnsembleFSResult$new(res5, features = feats, measure = m1, inner_measure = m1)

# combine efsr with nothing gives the same object back deep-cloned
efsr11 = c(efsr1)
assert_class(efsr11, "EnsembleFSResult")
expect_equal(efsr1$result$classif.ce, efsr11$result$classif.ce)

# combine efsrs with same inner and outer measures
comb1 = efsr1$clone(deep = TRUE)$combine(efsr2)
comb11 = c(efsr1, efsr2) # same as above
# efsr1 doesn't change
expect_data_table(efsr1$result, nrows = 6L)
expect_equal(efsr1$n_learners, 2L)
expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
# efsr2 doesn't change either
expect_data_table(efsr2$result, nrows = 6)
expect_equal(efsr2$n_learners, 2)
expect_equal(get_private(efsr2)$.measure$id, "classif.ce")
expect_equal(get_private(efsr2)$.inner_measure$id, "classif.acc")
# combined object has more rows
expect_data_table(comb1$result, nrows = 12L)
expect_data_table(comb11$result, nrows = 12L)
expect_equal(comb1$n_learners, 4L)
expect_equal(comb11$n_learners, 4L)
expect_equal(get_private(comb1)$.measure$id, "classif.ce")
expect_equal(get_private(comb11)$.measure$id, "classif.ce")
expect_equal(get_private(comb1)$.inner_measure$id, "classif.acc")
expect_equal(get_private(comb11)$.inner_measure$id, "classif.acc")

# no `inner_measure` in the 2nd efsr
comb2 = efsr1$clone(deep = TRUE)$combine(efsr3)
comb22 = c(efsr1, efsr3)
expect_equal(get_private(efsr1)$.measure$id, "classif.ce")
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
expect_null(get_private(efsr3)$.inner_measure)
expect_data_table(comb2$result, nrows = 12L)
expect_data_table(comb22$result, nrows = 12L)
expect_equal(comb2$n_learners, 4L)
expect_equal(comb22$n_learners, 4L)
expect_equal(get_private(comb2)$.measure$id, "classif.ce")
expect_equal(get_private(comb22)$.measure$id, "classif.ce")
expect_null(get_private(comb2)$.inner_measure$id)
expect_null(get_private(comb22)$.inner_measure$id)

# different (outer) measure => not possible to combine
expect_error(efsr1$clone(deep = TRUE)$combine(efsr4))

# different `inner_measure`
comb3 = efsr1$clone(deep = TRUE)$combine(efsr5)
expect_data_table(comb3$result, nrows = 12L)
expect_equal(comb3$n_learners, 4L)
expect_equal(get_private(comb3)$.measure$id, "classif.ce")
expect_null(get_private(comb3)$.inner_measure$id)
# `inner_measure`s of the individual objects did not change
expect_equal(get_private(efsr1)$.inner_measure$id, "classif.acc")
expect_equal(get_private(efsr5)$.inner_measure$id, "classif.ce")

# multi-combine works
comb_all = c(efsr1, efsr2, efsr3, efsr5)
expect_data_table(comb_all$result, nrows = 24L)
expect_equal(comb_all$n_learners, 4L)
expect_equal(get_private(comb_all)$.measure$id, "classif.ce")
expect_null(get_private(comb_all)$.inner_measure$id)
})

test_that("different callbacks can be set", {
callback_test = callback_batch_fselect("mlr3fselect.test",
on_eval_before_archive = function(callback, context) {
Expand Down

0 comments on commit ab6360a

Please sign in to comment.