diff --git a/DESCRIPTION b/DESCRIPTION index 300d147e3..e636b5822 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -117,6 +117,7 @@ Collate: 'PipeOpEncodeLmer.R' 'PipeOpFeatureUnion.R' 'PipeOpFilter.R' + 'PipeOpFilterRows.R' 'PipeOpFixFactors.R' 'PipeOpHistBin.R' 'PipeOpICA.R' diff --git a/NAMESPACE b/NAMESPACE index f4c424ba8..318c490af 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(PipeOpEncodeLmer) export(PipeOpEnsemble) export(PipeOpFeatureUnion) export(PipeOpFilter) +export(PipeOpFilterRows) export(PipeOpFixFactors) export(PipeOpHistBin) export(PipeOpICA) diff --git a/NEWS.md b/NEWS.md index f56e76b75..92776eec9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ # mlr3pipelines 0.3.4-9000 +* New PipeOps: + - PipeOpFilterRows # mlr3pipelines 0.3.4 diff --git a/R/PipeOpFilterRows.R b/R/PipeOpFilterRows.R new file mode 100644 index 000000000..c3dc595ce --- /dev/null +++ b/R/PipeOpFilterRows.R @@ -0,0 +1,131 @@ +#' @title PipeOpFilterRows +#' +#' @usage NULL +#' @name mlr_pipeops_filterrows +#' @format [`R6Class`] object inheriting from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @description +#' Filter rows of the data of a [`Task`][mlr3::Task]. +#' +#' @section Construction: +#' ``` +#' PipeOpFilterRows$new(id = "filterrows", param_vals = list()) +#' ``` +#' +#' * `id` :: `character(1)` \cr +#' Identifier of resulting object, default `"filterrows"`. +#' * `param_vals` :: named `list` \cr +#' List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise +#' be set during construction. Default `list()`. +#' +#' @section Input and Output Channels: +#' Input and output channels are inherited from [`PipeOpTaskPreproc`]. +#' +#' The output is the input [`Task`][mlr3::Task] with rows kept according to the filtering expression. +#' Whether filtering is performed during training and/or prediction can be specified via the `phase` parameter, see below. +#' +#' @section State: +#' The `$state` is a named `list` with the `$state` elements inherited from [`PipeOpTaskPreproc`]. +#' +#' @section Parameters: +#' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: +#' * `filter_formula` :: `formula` | `NULL` \cr +#' Expression of the filtering to be performed, in the form of a `formula` that evaluates to `TRUE` or `FALSE` +#' for each row within the frame of the [`data.table`] [`DataBackend`][mlr3::DataBackend] of the [`Task`][mlr3::Task]. +#' Rows for which the evaluation is `TRUE` are kept in the output [`Task`][mlr3::Task], others are removed. +#' Initialized to `NULL`, i.e., no filtering is performed and all rows are kept. +#' * `SDcols` :: `function` | [`Selector`] \cr +#' [`Selector`] function, takes a [`Task`][mlr3::Task] as an argument and returns a `character` vector of features. +#' This character vector is set as the `.SDcols` argument when the formula above is evaluated within the frame of the +#' [`data.table`] [`DataBackend`][mlr3::DataBackend] of the [`Task`][mlr3::Task]. +#' Initialized to [`selector_all()`], i.e., all features can be used as the `.SD` variable. +#' * `phase` :: `character(1)` \cr +#' Character specifying the phase when filtering should be performed. Can either be `"always"`, `"train"`, or `"predict"`. +#' Initialized to `"always"`, i.e., filtering is performed both during training and prediction. +#' +#' @section Internals: +#' A `formula` created using the `~` operator always contains a reference to the `environment` in which +#' the `formula` is created. This makes it possible to use variables in the `~`-expressions that both +#' reference either column names or variable names. +#' +#' @section Methods: +#' Only methods inherited from [`PipeOpTaskPreproc`]/[`PipeOp`]. +#' +#' @examples +#' library("mlr3") +#' task = tsk("pima") +#' # filter based on some formula +#' po = PipeOpFilterRows$new(param_vals = list( +#' filter_formula = ~ age < 31 & glucose > median(glucose, na.rm = TRUE)) +#' ) +#' po$train(list(task)) +#' # missing value removal for all features +#' po$param_set$values$filter_formula = ~ !apply(is.na(.SD), MARGIN = 1L, FUN = any) +#' po$train(list(task)) +#' # missing value removal only for some features +#' po$param_set$values$SDcols = selector_name(c("mass", "pressure")) +#' po$train(list(task)) +#' @family PipeOps +#' @include PipeOpTaskPreproc.R +#' @export +PipeOpFilterRows = R6Class("PipeOpFilterRows", + inherit = PipeOpTaskPreproc, + public = list( + initialize = function(id = "filterrows", param_vals = list()) { + ps = ParamSet$new(params = list( + ParamUty$new("filter_formula", tags = c("train", "predict", "required"), custom_check = check_filter_formulae), + ParamUty$new("SDcols", tags = c("train", "predict", "required"), custom_check = check_function), + ParamFct$new("phase", levels = c("always", "train", "predict"), tags = c("train", "predict", "required")) + )) + ps$values = list(filter_formula = NULL, SDcols = selector_all(), phase = "always") + super$initialize(id, param_set = ps, param_vals = param_vals) + } + ), + private = list( + .train_task = function(task) { + self$state = list() + if (self$param_set$values$phase %in% c("always", "train") && length(self$param_set$values$filter_formula)) { + filter_task(task, frm = self$param_set$values$filter_formula, SDcols = self$param_set$values$SDcols(task)) + } else { + task + } + }, + + .predict_task = function(task) { + if (self$param_set$values$phase %in% c("always", "predict") && length(self$param_set$values$filter_formula)) { + filter_task(task, frm = self$param_set$values$filter_formula, SDcols = self$param_set$values$SDcols(task)) + } else { + task + } + } + ) +) + +# check the `filter_formula` parameter of PipeOpFilterRows +# @param x [formula] whatever `filter_formula` is being set to +# checks that `filter_formula` is `formula` with only a rhs (or NULL) +check_filter_formulae = function(x) { + check_formula(x, null.ok = TRUE) %check&&% + if (!is.null(x) && length(x) != 2L) { + sprintf("formula %s must not have a left hand side.", deparse(x, nlines = 1L, width.cutoff = 500)) + } else { + TRUE + } +} + +# helper function to filter a task based on a formula +# the formula is evaluated within the frame of the data.table backend of a task where .SDcols is set to SDcols +# (but only if required) +# @param task [Task] +# @param frm [formula] +# @param SDcols [character] +filter_task = function(task, frm, SDcols) { + row_ids = if (any(grepl(".SD", x = frm[[2L]]))) { + task$row_ids[which(task$data()[, (eval(frm[[2L]], envir = as.list(environment(frm)))), .SDcols = SDcols])] + } else { + task$row_ids[which(task$data()[, (eval(frm[[2L]], envir = as.list(environment(frm))))])] + } + task$filter(row_ids) +} + +mlr_pipeops$add("filterrows", PipeOpFilterRows) diff --git a/man/PipeOp.Rd b/man/PipeOp.Rd index 15c71495a..619508427 100644 --- a/man/PipeOp.Rd +++ b/man/PipeOp.Rd @@ -240,6 +240,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/PipeOpEnsemble.Rd b/man/PipeOpEnsemble.Rd index f9dc38e0e..66c07bc78 100644 --- a/man/PipeOpEnsemble.Rd +++ b/man/PipeOpEnsemble.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/PipeOpImpute.Rd b/man/PipeOpImpute.Rd index 2e254b0c8..824fb1ca4 100644 --- a/man/PipeOpImpute.Rd +++ b/man/PipeOpImpute.Rd @@ -147,6 +147,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/PipeOpTargetTrafo.Rd b/man/PipeOpTargetTrafo.Rd index 9a567930c..ed6d3105a 100644 --- a/man/PipeOpTargetTrafo.Rd +++ b/man/PipeOpTargetTrafo.Rd @@ -158,6 +158,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/PipeOpTaskPreproc.Rd b/man/PipeOpTaskPreproc.Rd index 54d44c0bb..14c8eaedd 100644 --- a/man/PipeOpTaskPreproc.Rd +++ b/man/PipeOpTaskPreproc.Rd @@ -207,6 +207,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/PipeOpTaskPreprocSimple.Rd b/man/PipeOpTaskPreprocSimple.Rd index 73d30ad7e..08e303088 100644 --- a/man/PipeOpTaskPreprocSimple.Rd +++ b/man/PipeOpTaskPreprocSimple.Rd @@ -150,6 +150,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_graphs_robustify.Rd b/man/mlr_graphs_robustify.Rd index a6043ff89..f30fb6552 100644 --- a/man/mlr_graphs_robustify.Rd +++ b/man/mlr_graphs_robustify.Rd @@ -45,7 +45,6 @@ Performs the following steps: \item Imputes \code{factor} features using \code{\link{PipeOpImputeOOR}} \item Encodes \code{factors} using \code{one-hot-encoding}. Factors with a cardinality > max_cardinality are collapsed using \code{\link{PipeOpCollapseFactors}} -\item If \code{scaling}, numeric features are scaled to mean 0 and standard deviation 1 } The graph is built conservatively, i.e. the function always tries to assure everything works. diff --git a/man/mlr_pipeops.Rd b/man/mlr_pipeops.Rd index 156975a4d..e1b72dc5a 100644 --- a/man/mlr_pipeops.Rd +++ b/man/mlr_pipeops.Rd @@ -88,6 +88,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_boxcox.Rd b/man/mlr_pipeops_boxcox.Rd index cf7b8b976..9d4c6c04b 100644 --- a/man/mlr_pipeops_boxcox.Rd +++ b/man/mlr_pipeops_boxcox.Rd @@ -99,6 +99,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_branch.Rd b/man/mlr_pipeops_branch.Rd index 256afebab..eef8b06b5 100644 --- a/man/mlr_pipeops_branch.Rd +++ b/man/mlr_pipeops_branch.Rd @@ -119,6 +119,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_chunk.Rd b/man/mlr_pipeops_chunk.Rd index e7dc01689..74b94fa62 100644 --- a/man/mlr_pipeops_chunk.Rd +++ b/man/mlr_pipeops_chunk.Rd @@ -98,6 +98,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_classbalancing.Rd b/man/mlr_pipeops_classbalancing.Rd index 4e87e9ac5..cd6b67983 100644 --- a/man/mlr_pipeops_classbalancing.Rd +++ b/man/mlr_pipeops_classbalancing.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_classifavg.Rd b/man/mlr_pipeops_classifavg.Rd index f9aab7eb4..7f36e41a4 100644 --- a/man/mlr_pipeops_classifavg.Rd +++ b/man/mlr_pipeops_classifavg.Rd @@ -113,6 +113,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_classweights.Rd b/man/mlr_pipeops_classweights.Rd index deed5fcb7..f72b4a9de 100644 --- a/man/mlr_pipeops_classweights.Rd +++ b/man/mlr_pipeops_classweights.Rd @@ -107,6 +107,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_colapply.Rd b/man/mlr_pipeops_colapply.Rd index ec8ff0d99..d7bc35ca8 100644 --- a/man/mlr_pipeops_colapply.Rd +++ b/man/mlr_pipeops_colapply.Rd @@ -128,6 +128,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_collapsefactors.Rd b/man/mlr_pipeops_collapsefactors.Rd index 4404732c0..ea0f14a8a 100644 --- a/man/mlr_pipeops_collapsefactors.Rd +++ b/man/mlr_pipeops_collapsefactors.Rd @@ -95,6 +95,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_colroles.Rd b/man/mlr_pipeops_colroles.Rd index f342d33d5..93fe3e463 100644 --- a/man/mlr_pipeops_colroles.Rd +++ b/man/mlr_pipeops_colroles.Rd @@ -87,6 +87,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_copy.Rd b/man/mlr_pipeops_copy.Rd index 02ae18124..8a4dcf242 100644 --- a/man/mlr_pipeops_copy.Rd +++ b/man/mlr_pipeops_copy.Rd @@ -117,6 +117,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_datefeatures.Rd b/man/mlr_pipeops_datefeatures.Rd index 5c84d7451..18b337934 100644 --- a/man/mlr_pipeops_datefeatures.Rd +++ b/man/mlr_pipeops_datefeatures.Rd @@ -134,6 +134,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_encode.Rd b/man/mlr_pipeops_encode.Rd index 80e336189..d86055038 100644 --- a/man/mlr_pipeops_encode.Rd +++ b/man/mlr_pipeops_encode.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodeimpact}}, \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_encodeimpact.Rd b/man/mlr_pipeops_encodeimpact.Rd index 0be88b7da..bd34746be 100644 --- a/man/mlr_pipeops_encodeimpact.Rd +++ b/man/mlr_pipeops_encodeimpact.Rd @@ -112,6 +112,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_encodelmer.Rd b/man/mlr_pipeops_encodelmer.Rd index aebf5291b..8d857dc01 100644 --- a/man/mlr_pipeops_encodelmer.Rd +++ b/man/mlr_pipeops_encodelmer.Rd @@ -123,6 +123,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodeimpact}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_featureunion.Rd b/man/mlr_pipeops_featureunion.Rd index c99233a66..7a8f7b992 100644 --- a/man/mlr_pipeops_featureunion.Rd +++ b/man/mlr_pipeops_featureunion.Rd @@ -132,6 +132,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodeimpact}}, \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_filter.Rd b/man/mlr_pipeops_filter.Rd index a87ccb638..b2a7dbbcd 100644 --- a/man/mlr_pipeops_filter.Rd +++ b/man/mlr_pipeops_filter.Rd @@ -142,6 +142,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, \code{\link{mlr_pipeops_ica}}, diff --git a/man/mlr_pipeops_filterrows.Rd b/man/mlr_pipeops_filterrows.Rd new file mode 100644 index 000000000..a6aee8c34 --- /dev/null +++ b/man/mlr_pipeops_filterrows.Rd @@ -0,0 +1,159 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/PipeOpFilterRows.R +\name{mlr_pipeops_filterrows} +\alias{mlr_pipeops_filterrows} +\alias{PipeOpFilterRows} +\title{PipeOpFilterRows} +\format{ +\code{\link{R6Class}} object inheriting from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} +\description{ +Filter rows of the data of a \code{\link[mlr3:Task]{Task}}. +} +\section{Construction}{ +\preformatted{PipeOpFilterRows$new(id = "filterrows", param_vals = list()) +} +\itemize{ +\item \code{id} :: \code{character(1)} \cr +Identifier of resulting object, default \code{"filterrows"}. +\item \code{param_vals} :: named \code{list} \cr +List of hyperparameter settings, overwriting the hyperparameter settings that would otherwise +be set during construction. Default \code{list()}. +} +} + +\section{Input and Output Channels}{ + +Input and output channels are inherited from \code{\link{PipeOpTaskPreproc}}. + +The output is the input \code{\link[mlr3:Task]{Task}} with rows kept according to the filtering expression. +Whether filtering is performed during training and/or prediction can be specified via the \code{phase} parameter, see below. +} + +\section{State}{ + +The \verb{$state} is a named \code{list} with the \verb{$state} elements inherited from \code{\link{PipeOpTaskPreproc}}. +} + +\section{Parameters}{ + +The parameters are the parameters inherited from \code{\link{PipeOpTaskPreproc}}, as well as: +\itemize{ +\item \code{filter_formula} :: \code{formula} | \code{NULL} \cr +Expression of the filtering to be performed, in the form of a \code{formula} that evaluates to \code{TRUE} or \code{FALSE} +for each row within the frame of the \code{\link{data.table}} \code{\link[mlr3:DataBackend]{DataBackend}} of the \code{\link[mlr3:Task]{Task}}. +Rows for which the evaluation is \code{TRUE} are kept in the output \code{\link[mlr3:Task]{Task}}, others are removed. +Initialized to \code{NULL}, i.e., no filtering is performed and all rows are kept. +\item \code{SDcols} :: \code{function} | \code{\link{Selector}} \cr +\code{\link{Selector}} function, takes a \code{\link[mlr3:Task]{Task}} as an argument and returns a \code{character} vector of features. +This character vector is set as the \code{.SDcols} argument when the formula above is evaluated within the frame of the +\code{\link{data.table}} \code{\link[mlr3:DataBackend]{DataBackend}} of the \code{\link[mlr3:Task]{Task}}. +Initialized to \code{\link[=selector_all]{selector_all()}}, i.e., all features can be used as the \code{.SD} variable. +\item \code{phase} :: \code{character(1)} \cr +Character specifying the phase when filtering should be performed. Can either be \code{"always"}, \code{"train"}, or \code{"predict"}. +Initialized to \code{"always"}, i.e., filtering is performed both during training and prediction. +} +} + +\section{Internals}{ + +A \code{formula} created using the \code{~} operator always contains a reference to the \code{environment} in which +the \code{formula} is created. This makes it possible to use variables in the \code{~}-expressions that both +reference either column names or variable names. +} + +\section{Methods}{ + +Only methods inherited from \code{\link{PipeOpTaskPreproc}}/\code{\link{PipeOp}}. +} + +\examples{ +library("mlr3") +task = tsk("pima") +# filter based on some formula +po = PipeOpFilterRows$new(param_vals = list( + filter_formula = ~ age < 31 & glucose > median(glucose, na.rm = TRUE)) +) +po$train(list(task)) +# missing value removal for all features +po$param_set$values$filter_formula = ~ !apply(is.na(.SD), MARGIN = 1L, FUN = any) +po$train(list(task)) +# missing value removal only for some features +po$param_set$values$SDcols = selector_name(c("mass", "pressure")) +po$train(list(task)) +} +\seealso{ +Other PipeOps: +\code{\link{PipeOpEnsemble}}, +\code{\link{PipeOpImpute}}, +\code{\link{PipeOpTargetTrafo}}, +\code{\link{PipeOpTaskPreprocSimple}}, +\code{\link{PipeOpTaskPreproc}}, +\code{\link{PipeOp}}, +\code{\link{mlr_pipeops_boxcox}}, +\code{\link{mlr_pipeops_branch}}, +\code{\link{mlr_pipeops_chunk}}, +\code{\link{mlr_pipeops_classbalancing}}, +\code{\link{mlr_pipeops_classifavg}}, +\code{\link{mlr_pipeops_classweights}}, +\code{\link{mlr_pipeops_colapply}}, +\code{\link{mlr_pipeops_collapsefactors}}, +\code{\link{mlr_pipeops_colroles}}, +\code{\link{mlr_pipeops_copy}}, +\code{\link{mlr_pipeops_datefeatures}}, +\code{\link{mlr_pipeops_encodeimpact}}, +\code{\link{mlr_pipeops_encodelmer}}, +\code{\link{mlr_pipeops_encode}}, +\code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filter}}, +\code{\link{mlr_pipeops_fixfactors}}, +\code{\link{mlr_pipeops_histbin}}, +\code{\link{mlr_pipeops_ica}}, +\code{\link{mlr_pipeops_imputeconstant}}, +\code{\link{mlr_pipeops_imputehist}}, +\code{\link{mlr_pipeops_imputelearner}}, +\code{\link{mlr_pipeops_imputemean}}, +\code{\link{mlr_pipeops_imputemedian}}, +\code{\link{mlr_pipeops_imputemode}}, +\code{\link{mlr_pipeops_imputeoor}}, +\code{\link{mlr_pipeops_imputesample}}, +\code{\link{mlr_pipeops_kernelpca}}, +\code{\link{mlr_pipeops_learner}}, +\code{\link{mlr_pipeops_missind}}, +\code{\link{mlr_pipeops_modelmatrix}}, +\code{\link{mlr_pipeops_multiplicityexply}}, +\code{\link{mlr_pipeops_multiplicityimply}}, +\code{\link{mlr_pipeops_mutate}}, +\code{\link{mlr_pipeops_nmf}}, +\code{\link{mlr_pipeops_nop}}, +\code{\link{mlr_pipeops_ovrsplit}}, +\code{\link{mlr_pipeops_ovrunite}}, +\code{\link{mlr_pipeops_pca}}, +\code{\link{mlr_pipeops_proxy}}, +\code{\link{mlr_pipeops_quantilebin}}, +\code{\link{mlr_pipeops_randomprojection}}, +\code{\link{mlr_pipeops_randomresponse}}, +\code{\link{mlr_pipeops_regravg}}, +\code{\link{mlr_pipeops_removeconstants}}, +\code{\link{mlr_pipeops_renamecolumns}}, +\code{\link{mlr_pipeops_replicate}}, +\code{\link{mlr_pipeops_scalemaxabs}}, +\code{\link{mlr_pipeops_scalerange}}, +\code{\link{mlr_pipeops_scale}}, +\code{\link{mlr_pipeops_select}}, +\code{\link{mlr_pipeops_smote}}, +\code{\link{mlr_pipeops_spatialsign}}, +\code{\link{mlr_pipeops_subsample}}, +\code{\link{mlr_pipeops_targetinvert}}, +\code{\link{mlr_pipeops_targetmutate}}, +\code{\link{mlr_pipeops_targettrafoscalerange}}, +\code{\link{mlr_pipeops_textvectorizer}}, +\code{\link{mlr_pipeops_threshold}}, +\code{\link{mlr_pipeops_tunethreshold}}, +\code{\link{mlr_pipeops_unbranch}}, +\code{\link{mlr_pipeops_updatetarget}}, +\code{\link{mlr_pipeops_vtreat}}, +\code{\link{mlr_pipeops_yeojohnson}}, +\code{\link{mlr_pipeops}} +} +\concept{PipeOps} diff --git a/man/mlr_pipeops_fixfactors.Rd b/man/mlr_pipeops_fixfactors.Rd index 66a9226da..b8f87a024 100644 --- a/man/mlr_pipeops_fixfactors.Rd +++ b/man/mlr_pipeops_fixfactors.Rd @@ -88,6 +88,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_histbin}}, \code{\link{mlr_pipeops_ica}}, diff --git a/man/mlr_pipeops_histbin.Rd b/man/mlr_pipeops_histbin.Rd index 7cef85cce..7e18b7bf0 100644 --- a/man/mlr_pipeops_histbin.Rd +++ b/man/mlr_pipeops_histbin.Rd @@ -100,6 +100,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_ica}}, diff --git a/man/mlr_pipeops_ica.Rd b/man/mlr_pipeops_ica.Rd index da0800c70..294ff885b 100644 --- a/man/mlr_pipeops_ica.Rd +++ b/man/mlr_pipeops_ica.Rd @@ -126,6 +126,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputeconstant.Rd b/man/mlr_pipeops_imputeconstant.Rd index 4ffd9ecdb..4aeb431fd 100644 --- a/man/mlr_pipeops_imputeconstant.Rd +++ b/man/mlr_pipeops_imputeconstant.Rd @@ -102,6 +102,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputehist.Rd b/man/mlr_pipeops_imputehist.Rd index 43a3beb86..87884ac7e 100644 --- a/man/mlr_pipeops_imputehist.Rd +++ b/man/mlr_pipeops_imputehist.Rd @@ -87,6 +87,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputelearner.Rd b/man/mlr_pipeops_imputelearner.Rd index f86074f27..e7a1d0f03 100644 --- a/man/mlr_pipeops_imputelearner.Rd +++ b/man/mlr_pipeops_imputelearner.Rd @@ -116,6 +116,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputemean.Rd b/man/mlr_pipeops_imputemean.Rd index 9a34246aa..f3a954f13 100644 --- a/man/mlr_pipeops_imputemean.Rd +++ b/man/mlr_pipeops_imputemean.Rd @@ -87,6 +87,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputemedian.Rd b/man/mlr_pipeops_imputemedian.Rd index b89c02ee3..596605719 100644 --- a/man/mlr_pipeops_imputemedian.Rd +++ b/man/mlr_pipeops_imputemedian.Rd @@ -87,6 +87,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputemode.Rd b/man/mlr_pipeops_imputemode.Rd index 1ec28fc65..520a68ff7 100644 --- a/man/mlr_pipeops_imputemode.Rd +++ b/man/mlr_pipeops_imputemode.Rd @@ -94,6 +94,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputeoor.Rd b/man/mlr_pipeops_imputeoor.Rd index c141c4d33..a0418ed9b 100644 --- a/man/mlr_pipeops_imputeoor.Rd +++ b/man/mlr_pipeops_imputeoor.Rd @@ -116,6 +116,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_imputesample.Rd b/man/mlr_pipeops_imputesample.Rd index e31bcb461..3aef56dd4 100644 --- a/man/mlr_pipeops_imputesample.Rd +++ b/man/mlr_pipeops_imputesample.Rd @@ -89,6 +89,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_kernelpca.Rd b/man/mlr_pipeops_kernelpca.Rd index 1b426e65d..24cd3e749 100644 --- a/man/mlr_pipeops_kernelpca.Rd +++ b/man/mlr_pipeops_kernelpca.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_learner.Rd b/man/mlr_pipeops_learner.Rd index 9a5a12024..8bec201bb 100644 --- a/man/mlr_pipeops_learner.Rd +++ b/man/mlr_pipeops_learner.Rd @@ -120,6 +120,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_missind.Rd b/man/mlr_pipeops_missind.Rd index 2e04a6645..8790ea307 100644 --- a/man/mlr_pipeops_missind.Rd +++ b/man/mlr_pipeops_missind.Rd @@ -116,6 +116,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_modelmatrix.Rd b/man/mlr_pipeops_modelmatrix.Rd index a001d496c..47e32cc7d 100644 --- a/man/mlr_pipeops_modelmatrix.Rd +++ b/man/mlr_pipeops_modelmatrix.Rd @@ -93,6 +93,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_multiplicityexply.Rd b/man/mlr_pipeops_multiplicityexply.Rd index bd0398108..1dab9b119 100644 --- a/man/mlr_pipeops_multiplicityexply.Rd +++ b/man/mlr_pipeops_multiplicityexply.Rd @@ -99,6 +99,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_multiplicityimply.Rd b/man/mlr_pipeops_multiplicityimply.Rd index e7fa51394..5f3dcf1a7 100644 --- a/man/mlr_pipeops_multiplicityimply.Rd +++ b/man/mlr_pipeops_multiplicityimply.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_mutate.Rd b/man/mlr_pipeops_mutate.Rd index d8b9aa8d4..08ce9ac07 100644 --- a/man/mlr_pipeops_mutate.Rd +++ b/man/mlr_pipeops_mutate.Rd @@ -110,6 +110,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_nmf.Rd b/man/mlr_pipeops_nmf.Rd index 69de35de1..9f03efd46 100644 --- a/man/mlr_pipeops_nmf.Rd +++ b/man/mlr_pipeops_nmf.Rd @@ -139,6 +139,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_nop.Rd b/man/mlr_pipeops_nop.Rd index 72e23ec84..966bcf8a3 100644 --- a/man/mlr_pipeops_nop.Rd +++ b/man/mlr_pipeops_nop.Rd @@ -95,6 +95,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_ovrsplit.Rd b/man/mlr_pipeops_ovrsplit.Rd index 7d7e62379..c929fd56b 100644 --- a/man/mlr_pipeops_ovrsplit.Rd +++ b/man/mlr_pipeops_ovrsplit.Rd @@ -110,6 +110,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_ovrunite.Rd b/man/mlr_pipeops_ovrunite.Rd index 4c58a76fe..95c3e896e 100644 --- a/man/mlr_pipeops_ovrunite.Rd +++ b/man/mlr_pipeops_ovrunite.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_pca.Rd b/man/mlr_pipeops_pca.Rd index df07ac656..47f50e755 100644 --- a/man/mlr_pipeops_pca.Rd +++ b/man/mlr_pipeops_pca.Rd @@ -104,6 +104,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_proxy.Rd b/man/mlr_pipeops_proxy.Rd index 343e40014..99703ba73 100644 --- a/man/mlr_pipeops_proxy.Rd +++ b/man/mlr_pipeops_proxy.Rd @@ -116,6 +116,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_quantilebin.Rd b/man/mlr_pipeops_quantilebin.Rd index 59c70c60e..0dd0df3ba 100644 --- a/man/mlr_pipeops_quantilebin.Rd +++ b/man/mlr_pipeops_quantilebin.Rd @@ -92,6 +92,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_randomprojection.Rd b/man/mlr_pipeops_randomprojection.Rd index 7567e8ef0..e0644bb20 100644 --- a/man/mlr_pipeops_randomprojection.Rd +++ b/man/mlr_pipeops_randomprojection.Rd @@ -104,6 +104,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_randomresponse.Rd b/man/mlr_pipeops_randomresponse.Rd index 557be29e7..2339fcfef 100644 --- a/man/mlr_pipeops_randomresponse.Rd +++ b/man/mlr_pipeops_randomresponse.Rd @@ -119,6 +119,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_regravg.Rd b/man/mlr_pipeops_regravg.Rd index 054da76d8..ffeaa2d32 100644 --- a/man/mlr_pipeops_regravg.Rd +++ b/man/mlr_pipeops_regravg.Rd @@ -105,6 +105,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_removeconstants.Rd b/man/mlr_pipeops_removeconstants.Rd index e4743aff6..9dab9d453 100644 --- a/man/mlr_pipeops_removeconstants.Rd +++ b/man/mlr_pipeops_removeconstants.Rd @@ -97,6 +97,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_renamecolumns.Rd b/man/mlr_pipeops_renamecolumns.Rd index 714611a68..de53687c6 100644 --- a/man/mlr_pipeops_renamecolumns.Rd +++ b/man/mlr_pipeops_renamecolumns.Rd @@ -96,6 +96,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_replicate.Rd b/man/mlr_pipeops_replicate.Rd index 5a5a4ab15..06d7856f9 100644 --- a/man/mlr_pipeops_replicate.Rd +++ b/man/mlr_pipeops_replicate.Rd @@ -89,6 +89,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_scale.Rd b/man/mlr_pipeops_scale.Rd index 1189e238b..35844a8be 100644 --- a/man/mlr_pipeops_scale.Rd +++ b/man/mlr_pipeops_scale.Rd @@ -111,6 +111,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_scalemaxabs.Rd b/man/mlr_pipeops_scalemaxabs.Rd index cf765c8dc..cc5da116c 100644 --- a/man/mlr_pipeops_scalemaxabs.Rd +++ b/man/mlr_pipeops_scalemaxabs.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_scalerange.Rd b/man/mlr_pipeops_scalerange.Rd index 34c58e39d..cd1298489 100644 --- a/man/mlr_pipeops_scalerange.Rd +++ b/man/mlr_pipeops_scalerange.Rd @@ -91,6 +91,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_select.Rd b/man/mlr_pipeops_select.Rd index ffaf3c5a7..1b85c73fe 100644 --- a/man/mlr_pipeops_select.Rd +++ b/man/mlr_pipeops_select.Rd @@ -107,6 +107,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_smote.Rd b/man/mlr_pipeops_smote.Rd index c6870bda0..d5360da4a 100644 --- a/man/mlr_pipeops_smote.Rd +++ b/man/mlr_pipeops_smote.Rd @@ -108,6 +108,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_spatialsign.Rd b/man/mlr_pipeops_spatialsign.Rd index e8b2ee70c..d88320369 100644 --- a/man/mlr_pipeops_spatialsign.Rd +++ b/man/mlr_pipeops_spatialsign.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_subsample.Rd b/man/mlr_pipeops_subsample.Rd index a66619dd4..611cc596f 100644 --- a/man/mlr_pipeops_subsample.Rd +++ b/man/mlr_pipeops_subsample.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_targetinvert.Rd b/man/mlr_pipeops_targetinvert.Rd index e76f0f094..64b268385 100644 --- a/man/mlr_pipeops_targetinvert.Rd +++ b/man/mlr_pipeops_targetinvert.Rd @@ -86,6 +86,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_targetmutate.Rd b/man/mlr_pipeops_targetmutate.Rd index 6c4953cdb..8415e782f 100644 --- a/man/mlr_pipeops_targetmutate.Rd +++ b/man/mlr_pipeops_targetmutate.Rd @@ -132,6 +132,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_targettrafoscalerange.Rd b/man/mlr_pipeops_targettrafoscalerange.Rd index 53f983901..556cd9510 100644 --- a/man/mlr_pipeops_targettrafoscalerange.Rd +++ b/man/mlr_pipeops_targettrafoscalerange.Rd @@ -98,6 +98,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_textvectorizer.Rd b/man/mlr_pipeops_textvectorizer.Rd index fccc3503c..3e092024f 100644 --- a/man/mlr_pipeops_textvectorizer.Rd +++ b/man/mlr_pipeops_textvectorizer.Rd @@ -196,6 +196,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_threshold.Rd b/man/mlr_pipeops_threshold.Rd index 8aa23ccc0..ed18a621c 100644 --- a/man/mlr_pipeops_threshold.Rd +++ b/man/mlr_pipeops_threshold.Rd @@ -91,6 +91,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_tunethreshold.Rd b/man/mlr_pipeops_tunethreshold.Rd index 56947c7ef..094d5cbb9 100644 --- a/man/mlr_pipeops_tunethreshold.Rd +++ b/man/mlr_pipeops_tunethreshold.Rd @@ -112,6 +112,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_unbranch.Rd b/man/mlr_pipeops_unbranch.Rd index 8cbb4dacc..3d6f111b2 100644 --- a/man/mlr_pipeops_unbranch.Rd +++ b/man/mlr_pipeops_unbranch.Rd @@ -98,6 +98,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_updatetarget.Rd b/man/mlr_pipeops_updatetarget.Rd index 245314651..7b2b233fe 100644 --- a/man/mlr_pipeops_updatetarget.Rd +++ b/man/mlr_pipeops_updatetarget.Rd @@ -111,6 +111,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_vtreat.Rd b/man/mlr_pipeops_vtreat.Rd index d2747fbcb..1285795e7 100644 --- a/man/mlr_pipeops_vtreat.Rd +++ b/man/mlr_pipeops_vtreat.Rd @@ -164,6 +164,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/man/mlr_pipeops_yeojohnson.Rd b/man/mlr_pipeops_yeojohnson.Rd index 32eb7f47c..4e56289d1 100644 --- a/man/mlr_pipeops_yeojohnson.Rd +++ b/man/mlr_pipeops_yeojohnson.Rd @@ -101,6 +101,7 @@ Other PipeOps: \code{\link{mlr_pipeops_encodelmer}}, \code{\link{mlr_pipeops_encode}}, \code{\link{mlr_pipeops_featureunion}}, +\code{\link{mlr_pipeops_filterrows}}, \code{\link{mlr_pipeops_filter}}, \code{\link{mlr_pipeops_fixfactors}}, \code{\link{mlr_pipeops_histbin}}, diff --git a/tests/testthat/test_pipeop_filterrows.R b/tests/testthat/test_pipeop_filterrows.R new file mode 100644 index 000000000..8e9daed4c --- /dev/null +++ b/tests/testthat/test_pipeop_filterrows.R @@ -0,0 +1,111 @@ +context("PipeOpFilterRows") + +test_that("PipeOpFilterRows - basic properties", { + op = PipeOpFilterRows$new() + task = mlr_tasks$get("pima") + expect_pipeop(op) + expect_equal(train_pipeop(op, inputs = list(task))[[1L]], task) + expect_equal(predict_pipeop(op, inputs = list(task))[[1L]], task) + expect_datapreproc_pipeop_class(PipeOpFilterRows, task = task) +}) + +test_that("PipeOpFilterRows - filtering", { + set.seed(1) + task = tsk("pima") + train_ids = sample(task$row_ids, size = 200) + task_train = task$clone(deep = TRUE)$filter(train_ids) + task_predict = task$clone(deep = TRUE)$filter(setdiff(task$row_ids, train_ids)) + dt_train = task_train$data(cols = task_train$feature_names) + dt_predict = task_predict$data(cols = task_predict$feature_names) + + op = PipeOpFilterRows$new(param_vals = list( + filter_formula = ~ (age < 31 & glucose > median(glucose, na.rm = TRUE)) | + pedigree < mean(pedigree, na.rm = TRUE))) + + train_out = op$train(list(task_train))[[1L]] + + expect_equal(dt_train[(age < 31 & glucose > median(glucose, na.rm = TRUE)) | + pedigree < mean(pedigree, na.rm = TRUE), ], train_out$data(cols = task_train$feature_names)) + + predict_out = op$predict(list(task_predict))[[1L]] + + expect_equal(dt_predict[(age < 31 & glucose > median(glucose, na.rm = TRUE)) | + pedigree < mean(pedigree, na.rm = TRUE), ], predict_out$data(cols = task_predict$feature_names)) + + # Works with variables from an env + env = new.env() + assign("some_test_val", 7, envir = env) + some_test_val = -100 # this should not be taken! + filter_formula = ~ pregnant == some_test_val + environment(filter_formula) = env + op$param_set$values$filter_formula = filter_formula + expect_true(all(op$train(list(task))[[1L]]$data(cols = "pregnant")[[1L]] == 7L)) + + filter_formula = ~ pregnant == some_test_val & !apply(is.na(.SD), MARGIN = 1L, FUN = any) + environment(filter_formula) = env + op$param_set$values$filter_formula = filter_formula + expect_equal(op$train(list(task))[[1L]]$data(), na.omit(task$data())[pregnant == 7, ]) +}) + +test_that("PipeOpFilterRows - missing values removal", { + set.seed(2) + task = tsk("pima") + train_ids = sample(task$row_ids, size = 200) + task_train = task$clone(deep = TRUE)$filter(train_ids) + task_predict = task$clone(deep = TRUE)$filter(setdiff(task$row_ids, train_ids)) + dt_train = task_train$data(cols = task_train$feature_names) + dt_predict = task_predict$data(cols = task_predict$feature_names) + + op = PipeOpFilterRows$new(param_vals = list(filter_formula = ~ !is.na(insulin))) + + train_out = op$train(list(task_train))[[1L]] + + expect_equal(dt_train[!is.na(insulin), ], + train_out$data(cols = task_train$feature_names)) + + predict_out = op$predict(list(task_predict))[[1L]] + + expect_equal(dt_predict[!is.na(insulin), ], + predict_out$data(cols = task_predict$feature_names)) + + op$param_set$values$phase = "train" + expect_equal(op$predict(list(task_predict))[[1L]], task_predict) + + op$param_set$values$phase = "predict" + expect_equal(op$train(list(task_train))[[1L]], task_train) +}) + +test_that("PipeOpFilterRows - filtering and missing values removal", { + set.seed(3) + task = tsk("pima") + train_ids = sample(task$row_ids, size = 200) + task_train = task$clone(deep = TRUE)$filter(train_ids) + task_predict = task$clone(deep = TRUE)$filter(setdiff(task$row_ids, train_ids)) + dt_train = task_train$data(cols = task_train$feature_names) + dt_predict = task_predict$data(cols = task_predict$feature_names) + + op = PipeOpFilterRows$new(param_vals = list( + filter_formula = ~ age > median(age, na.rm = TRUE) & + !apply(is.na(.SD), MARGIN = 1L, FUN = any))) + + train_out = op$train(list(task_train))[[1L]] + + expect_equal(na.omit(dt_train[age > median(age, na.rm = TRUE)]), + train_out$data(cols = task_train$feature_names)) + + predict_out = op$predict(list(task_predict))[[1L]] + + expect_equal(na.omit(dt_predict[age > median(age, na.rm = TRUE)]), + predict_out$data(cols = task_predict$feature_names)) + + # Test with SDcols selector being explicitly set + op$param_set$values$filter_formula = ~ !apply(is.na(.SD), MARGIN = 1L, FUN = any) + op$param_set$values$SDcols = selector_name("insulin") + expect_equal(op$train(list(task))[[1L]]$data(), task$data()[!is.na(insulin), ]) +}) + +test_that("PipeOpFilterRows - check_filter_formulae", { + expect_true(check_filter_formulae(NULL)) + expect_true(check_filter_formulae(~ age < 1)) + expect_character(check_filter_formulae(y ~ x)) +})