From d014269d7be468a4b7bfb48f8d0a3c129bbbe72b Mon Sep 17 00:00:00 2001 From: Maximilian Muecke Date: Wed, 4 Sep 2024 11:24:41 +0200 Subject: [PATCH] refactor: add missing train tags and only extract task data once See @mllg PR changes for weights, this adds missing train tags, only calls `task$data()` once, and uses keys for reflection subsetting --- R/LearnerClustAffinityPropagation.R | 2 +- R/LearnerClustAgnes.R | 4 ++-- R/LearnerClustDBSCAN.R | 5 +++-- R/LearnerClustDBSCANfpc.R | 5 +++-- R/LearnerClustDiana.R | 4 ++-- R/LearnerClustHDBSCAN.R | 5 +++-- R/LearnerClustHclust.R | 4 ++-- R/LearnerClustKKMeans.R | 5 +++-- R/LearnerClustMiniBatchKMeans.R | 10 ++++++---- R/LearnerClustOPTICS.R | 5 +++-- R/zzz.R | 6 ++---- 11 files changed, 30 insertions(+), 25 deletions(-) diff --git a/R/LearnerClustAffinityPropagation.R b/R/LearnerClustAffinityPropagation.R index a6ed9013..1b17f36d 100644 --- a/R/LearnerClustAffinityPropagation.R +++ b/R/LearnerClustAffinityPropagation.R @@ -66,7 +66,7 @@ LearnerClustAP = R6Class("LearnerClustAP", }, .predict = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") sim_func = pv$s exemplar_data = attributes(self$model)$exemplar_data diff --git a/R/LearnerClustAgnes.R b/R/LearnerClustAgnes.R index 74643b0e..4dcb9e2c 100644 --- a/R/LearnerClustAgnes.R +++ b/R/LearnerClustAgnes.R @@ -32,7 +32,7 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes", tags = "train" ), trace.lev = p_int(0L, default = 0L, tags = "train"), - k = p_int(1L, default = 2L, tags = "predict"), + k = p_int(1L, default = 2L, tags = c("train", "predict")), par.method = p_uty( tags = "train", depends = quote(method %in% c("flexible", "gaverage")), @@ -65,7 +65,7 @@ LearnerClustAgnes = R6Class("LearnerClustAgnes", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") m = invoke(cluster::agnes, x = task$data(), diss = FALSE, diff --git a/R/LearnerClustDBSCAN.R b/R/LearnerClustDBSCAN.R index af360995..7c457b1d 100644 --- a/R/LearnerClustDBSCAN.R +++ b/R/LearnerClustDBSCAN.R @@ -52,8 +52,9 @@ LearnerClustDBSCAN = R6Class("LearnerClustDBSCAN", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::dbscan, x = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(dbscan::dbscan, x = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster } diff --git a/R/LearnerClustDBSCANfpc.R b/R/LearnerClustDBSCANfpc.R index 8f730a97..9007cc93 100644 --- a/R/LearnerClustDBSCANfpc.R +++ b/R/LearnerClustDBSCANfpc.R @@ -62,8 +62,9 @@ LearnerClustDBSCANfpc = R6Class("LearnerClustDBSCANfpc", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(fpc::dbscan, data = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(fpc::dbscan, data = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster } diff --git a/R/LearnerClustDiana.R b/R/LearnerClustDiana.R index 859687b3..1ad446e7 100644 --- a/R/LearnerClustDiana.R +++ b/R/LearnerClustDiana.R @@ -27,7 +27,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana", metric = p_fct(default = "euclidean", levels = c("euclidean", "manhattan"), tags = "train"), stand = p_lgl(default = FALSE, tags = "train"), trace.lev = p_int(0L, default = 0L, tags = "train"), - k = p_int(1L, default = 2L, tags = "predict") + k = p_int(1L, default = 2L, tags = c("train", "predict")) ) param_set$set_values(k = 2L) @@ -46,7 +46,7 @@ LearnerClustDiana = R6Class("LearnerClustDiana", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") m = invoke(cluster::diana, x = task$data(), diss = FALSE, diff --git a/R/LearnerClustHDBSCAN.R b/R/LearnerClustHDBSCAN.R index 447a534c..6005c5dd 100644 --- a/R/LearnerClustHDBSCAN.R +++ b/R/LearnerClustHDBSCAN.R @@ -43,8 +43,9 @@ LearnerClustHDBSCAN = R6Class("LearnerClustHDBSCAN", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::hdbscan, x = task$data(), .args = pv) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(dbscan::hdbscan, x = data, .args = pv) + m = insert_named(m, list(data = data)) if (self$save_assignments) { self$assignments = m$cluster diff --git a/R/LearnerClustHclust.R b/R/LearnerClustHclust.R index e5204d50..c1348b83 100644 --- a/R/LearnerClustHclust.R +++ b/R/LearnerClustHclust.R @@ -35,7 +35,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust", diag = p_lgl(default = FALSE, tags = c("train", "dist")), upper = p_lgl(default = FALSE, tags = c("train", "dist")), p = p_dbl(default = 2, tags = c("train", "dist"), depends = quote(distmethod == "minkowski")), - k = p_int(1L, default = 2L, tags = "predict") + k = p_int(1L, default = 2L, tags = c("train", "predict")) ) param_set$set_values(k = 2L, distmethod = "euclidean") @@ -54,7 +54,7 @@ LearnerClustHclust = R6Class("LearnerClustHclust", ), private = list( .train = function(task) { - pv = self$param_set$get_values() + pv = self$param_set$get_values(tags = "train") dist = invoke(stats::dist, x = task$data(), method = pv$d %??% "euclidean", diff --git a/R/LearnerClustKKMeans.R b/R/LearnerClustKKMeans.R index 0e682a32..1b63025b 100644 --- a/R/LearnerClustKKMeans.R +++ b/R/LearnerClustKKMeans.R @@ -78,12 +78,13 @@ LearnerClustKKMeans = R6Class("LearnerClustKKMeans", c = kernlab::centers(self$model) K = kernlab::kernelf(self$model) + data = task$data() # kernel product between each new datapoint and the centers - d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(task$data()), c), ncol = nrow(c)) + d_xc = matrix(kernlab::kernelMatrix(K, as.matrix(data), c), ncol = nrow(c)) # kernel product between each new datapoint and itself: rows are identical d_xx = matrix( - rep(diag(kernlab::kernelMatrix(K, as.matrix(task$data()))), each = ncol(d_xc)), + rep(diag(kernlab::kernelMatrix(K, as.matrix(data))), each = ncol(d_xc)), ncol = ncol(d_xc), byrow = TRUE ) # kernel product between each center and itself: columns are identical diff --git a/R/LearnerClustMiniBatchKMeans.R b/R/LearnerClustMiniBatchKMeans.R index 789edc0c..d4dc2d6f 100644 --- a/R/LearnerClustMiniBatchKMeans.R +++ b/R/LearnerClustMiniBatchKMeans.R @@ -66,18 +66,20 @@ LearnerClustMiniBatchKMeans = R6Class("LearnerClustMiniBatchKMeans", stopf("`CENTROIDS` must have same number of rows as `clusters`") } - m = invoke(ClusterR::MiniBatchKmeans, data = task$data(), .args = pv) + data = task$data() + m = invoke(ClusterR::MiniBatchKmeans, data = data, .args = pv) if (self$save_assignments) { - self$assignments = as.integer(invoke(predict, m, newdata = task$data())) + self$assignments = as.integer(invoke(predict, m, newdata = data)) } m }, .predict = function(task) { - partition = as.integer(invoke(predict, self$model, newdata = task$data())) + data = task$data() + partition = as.integer(invoke(predict, self$model, newdata = data)) prob = NULL if (self$predict_type == "prob") { - prob = invoke(predict, self$model, newdata = task$data(), fuzzy = TRUE) + prob = invoke(predict, self$model, newdata = data, fuzzy = TRUE) colnames(prob) = seq_len(ncol(prob)) } PredictionClust$new(task = task, partition = partition, prob = prob) diff --git a/R/LearnerClustOPTICS.R b/R/LearnerClustOPTICS.R index a91ac884..8c48fa7f 100644 --- a/R/LearnerClustOPTICS.R +++ b/R/LearnerClustOPTICS.R @@ -51,8 +51,9 @@ LearnerClustOPTICS = R6Class("LearnerClustOPTICS", private = list( .train = function(task) { pv = self$param_set$get_values(tags = "train") - m = invoke(dbscan::optics, x = task$data(), .args = remove_named(pv, "eps_cl")) - m = insert_named(m, list(data = task$data())) + data = task$data() + m = invoke(dbscan::optics, x = data, .args = remove_named(pv, "eps_cl")) + m = insert_named(m, list(data = data)) m = invoke(dbscan::extractDBSCAN, object = m, eps_cl = pv$eps_cl) if (self$save_assignments) { diff --git a/R/zzz.R b/R/zzz.R index 0a91339a..c250697b 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -10,8 +10,6 @@ #' @importFrom stats model.frame terms predict runif dist "_PACKAGE" -utils::globalVariables("type") - mlr3cluster_tasks = new.env() mlr3cluster_learners = new.env() @@ -28,7 +26,7 @@ register_learner = function(name, constructor) { register_mlr3 = function() { # reflections mlr_reflections = utils::getFromNamespace("mlr_reflections", ns = "mlr3") - mlr_reflections$task_types = mlr_reflections$task_types[type != "clust"] + mlr_reflections$task_types = mlr_reflections$task_types[!"clust"] mlr_reflections$task_types = setkeyv(rbind(mlr_reflections$task_types, rowwise_table( ~type, ~package, ~task, ~learner, ~prediction, ~prediction_data, ~measure, "clust", "mlr3cluster", "TaskClust", "LearnerClust", "PredictionClust", "PredictionDataClust", "MeasureClust" @@ -70,7 +68,7 @@ register_mlr3 = function() { walk(names(mlr3cluster_learners), function(id) mlr_learners$remove(id)) walk(names(measures), function(id) mlr_measures$remove(paste("clust", id, sep = "."))) - mlr_reflections$task_types = mlr_reflections$task_types[type != "clust"] + mlr_reflections$task_types = mlr_reflections$task_types[!"clust"] reflections = c( "measure_properties", "default_measures", "learner_properties", "learner_predict_types", "task_properties", "task_col_roles"