From f2c73062d5ce5e1f6a5f2cb7f4799688df01a220 Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 14 Dec 2023 14:43:41 +0100 Subject: [PATCH 1/3] feat: add n features to data table view of archive --- R/ArchiveFSelect.R | 1 + tests/testthat/test_ArchiveFSelect.R | 32 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/R/ArchiveFSelect.R b/R/ArchiveFSelect.R index 05880920..2dfa659d 100644 --- a/R/ArchiveFSelect.R +++ b/R/ArchiveFSelect.R @@ -157,6 +157,7 @@ as.data.table.ArchiveFSelect = function(x, ..., exclude_columns = "uhash", measu # add feature vector tab[, "features" := lapply(transpose(.SD), function(col) x$cols_x[col]), .SDcols = x$cols_x] + tab[, "n_features" := map(get("features"), length)] if (x$benchmark_result$n_resample_results) { # add extra measures diff --git a/tests/testthat/test_ArchiveFSelect.R b/tests/testthat/test_ArchiveFSelect.R index 4615cc39..3f5caaa9 100644 --- a/tests/testthat/test_ArchiveFSelect.R +++ b/tests/testthat/test_ArchiveFSelect.R @@ -61,45 +61,45 @@ test_that("ArchiveFSelect as.data.table function works", { # default tab = as.data.table(instance$archive) - expect_data_table(tab, nrows = 4, ncols = 16) + expect_data_table(tab, nrows = 4, ncols = 17) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result")) + "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result")) # extra measure tab = as.data.table(instance$archive, measures = msr("classif.acc")) - expect_data_table(tab, nrows = 4, ncols = 17) + expect_data_table(tab, nrows = 4, ncols = 18) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "classif.acc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result")) + "classif.acc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result")) # extra measures tab = as.data.table(instance$archive, measures = msrs(c("classif.acc", "classif.mcc"))) - expect_data_table(tab, nrows = 4, ncols = 18) + expect_data_table(tab, nrows = 4, ncols = 19) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result")) + "classif.acc", "classif.mcc", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result")) # exclude column tab = as.data.table(instance$archive, exclude_columns = "timestamp") - expect_data_table(tab, nrows = 4, ncols = 16) + expect_data_table(tab, nrows = 4, ncols = 17) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "resample_result")) + "runtime_learners", "batch_nr", "uhash", "warnings", "errors", "features", "n_features", "resample_result")) # exclude columns tab = as.data.table(instance$archive, exclude_columns = c("timestamp", "uhash")) - expect_data_table(tab, nrows = 4, ncols = 15) + expect_data_table(tab, nrows = 4, ncols = 16) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "batch_nr", "warnings", "errors", "features", "resample_result")) + "runtime_learners", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result")) # no exclude tab = as.data.table(instance$archive, exclude_columns = NULL) - expect_data_table(tab, nrows = 4, ncols = 17) + expect_data_table(tab, nrows = 4, ncols = 18) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "timestamp", "batch_nr", "uhash", "warnings", "errors", "features", "resample_result")) + "runtime_learners", "timestamp", "batch_nr", "uhash", "warnings", "errors", "features", "n_features", "resample_result")) # no unnest tab = as.data.table(instance$archive, unnest = NULL) - expect_data_table(tab, nrows = 4, ncols = 16) + expect_data_table(tab, nrows = 4, ncols = 17) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result")) + "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result")) # without benchmark result instance = FSelectInstanceSingleCrit$new( @@ -113,9 +113,9 @@ test_that("ArchiveFSelect as.data.table function works", { fselector$optimize(instance) tab = as.data.table(instance$archive) - expect_data_table(tab, nrows = 4, ncols = 15) + expect_data_table(tab, nrows = 4, ncols = 16) expect_named(tab, c("age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", - "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features")) + "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features")) # empty archive instance = FSelectInstanceSingleCrit$new( From 72dd876a7e1267f6312e88c07ed29e7faab50ecc Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 14 Dec 2023 14:45:13 +0100 Subject: [PATCH 2/3] chore: update news --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 773aa8b8..f2e4b0a2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,6 @@ # mlr3fselect (development version) +* feat: Add number of features to `as.data.table.ArchiveFSelect()`. * feat: Features can be always included with the `always_include` column role. * fix: Add `$phash()` method to `AutoFSelector`. * fix: Include `FSelector` in hash of `AutoFSelector`. From e142e5fa13a3299e2a122e4461bc59af894a4cbc Mon Sep 17 00:00:00 2001 From: be-marc Date: Thu, 14 Dec 2023 14:55:31 +0100 Subject: [PATCH 3/3] test: extract_inner_fselect_archives --- .../testthat/test_extract_inner_fselect_archives.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_extract_inner_fselect_archives.R b/tests/testthat/test_extract_inner_fselect_archives.R index 16de03b4..41e63e11 100644 --- a/tests/testthat/test_extract_inner_fselect_archives.R +++ b/tests/testthat/test_extract_inner_fselect_archives.R @@ -4,7 +4,7 @@ test_that("extract_inner_fselect_archives function works with resample and cv", irr = extract_inner_fselect_archives(rr) expect_data_table(irr, nrows = 8) - expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) }) test_that("extract_inner_fselect_archives function works with resample and repeated cv", { @@ -13,7 +13,7 @@ test_that("extract_inner_fselect_archives function works with resample and repea irr = extract_inner_fselect_archives(rr) expect_data_table(irr, nrows = 24) - expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(irr, c("iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) }) test_that("extract_inner_fselect_archives function works with benchmark and cv", { @@ -25,7 +25,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and cv", ibmr = extract_inner_fselect_archives(bmr) expect_data_table(ibmr, nrows = 16) - expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) expect_equal(unique(ibmr$experiment), c(1, 2)) }) @@ -38,7 +38,7 @@ test_that("extract_inner_fselect_archives function works with benchmark and repe ibmr = extract_inner_fselect_archives(bmr) expect_data_table(ibmr, nrows = 48) - expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) expect_equal(unique(ibmr$experiment), c(1, 2)) }) @@ -51,7 +51,7 @@ test_that("extract_inner_fselect_archives function works with multiple tasks", { ibmr = extract_inner_fselect_archives(bmr) expect_data_table(ibmr, nrows = 32) - expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "age", "glucose", "insulin", "mass", "pedigree", "pregnant", "pressure", "triceps", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) expect_equal(unique(ibmr$experiment), c(1, 2, 3, 4)) }) @@ -92,7 +92,7 @@ test_that("extract_inner_fselect_archives function works with mixed store instan bmr = benchmark(grid, store_models = TRUE) ibmr = extract_inner_fselect_archives(bmr) - expect_data_table(ibmr, ncols = 17) + expect_data_table(ibmr, ncols = 18) expect_equal(unique(ibmr$experiment), 2) }) @@ -105,6 +105,6 @@ test_that("extract_inner_fselect_archives function works with autofselector and ibmr = extract_inner_fselect_archives(bmr) expect_data_table(ibmr, nrows = 8) - expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "resample_result", "task_id", "learner_id", "resampling_id")) + expect_named(ibmr, c("experiment", "iteration", "Petal.Length", "Petal.Width", "Sepal.Length", "Sepal.Width", "classif.ce", "runtime_learners", "timestamp", "batch_nr", "warnings", "errors", "features", "n_features", "resample_result", "task_id", "learner_id", "resampling_id")) expect_equal(unique(ibmr$experiment), 1) })