Skip to content

Commit b65df4d

Browse files
authored
rescale() can expand ranges (#467)
* rescale() can expand ranges * no need for RD file * simplify code * test * typo * change behaviour of `add`. allow multiple values * typo
1 parent b2d3b1b commit b65df4d

File tree

5 files changed

+209
-13
lines changed

5 files changed

+209
-13
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: datawizard
33
Title: Easy Data Wrangling and Statistical Transformations
4-
Version: 0.9.0.1
4+
Version: 0.9.0.2
55
Authors@R: c(
66
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
77
comment = c(ORCID = "0000-0003-1995-6531", Twitter = "@patilindrajeets")),
@@ -33,7 +33,7 @@ BugReports: https://github.com/easystats/datawizard/issues
3333
Depends:
3434
R (>= 3.6)
3535
Imports:
36-
insight (>= 0.19.4),
36+
insight (>= 0.19.6),
3737
stats,
3838
utils
3939
Suggests:

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# datawizard 0.9.0.9000 (development version)
22

3+
CHANGES
4+
5+
* `rescale()` gains `multiply` and `add` arguments, to expand ranges by a given
6+
factor or value.
7+
38
# datawizard 0.9.0
49

510
NEW FUNCTIONS

R/data_rescale.R

Lines changed: 94 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,26 @@
11
#' Rescale Variables to a New Range
22
#'
3-
#' Rescale variables to a new range.
4-
#' Can also be used to reverse-score variables (change the keying/scoring direction).
3+
#' Rescale variables to a new range. Can also be used to reverse-score variables
4+
#' (change the keying/scoring direction), or to expand a range.
55
#'
66
#' @inheritParams categorize
77
#' @inheritParams find_columns
88
#' @inheritParams standardize.data.frame
99
#'
10-
#' @param to Numeric vector of length 2 giving the new range that the variable will have after rescaling.
11-
#' To reverse-score a variable, the range should be given with the maximum value first.
12-
#' See examples.
10+
#' @param to Numeric vector of length 2 giving the new range that the variable
11+
#' will have after rescaling. To reverse-score a variable, the range should
12+
#' be given with the maximum value first. See examples.
13+
#' @param multiply If not `NULL`, `to` is ignored and `multiply` will be used,
14+
#' giving the factor by which the actual range of `x` should be expanded.
15+
#' For example, if a vector ranges from 5 to 15 and `multiply = 1.1`, the current
16+
#' range of 10 will be expanded by the factor of 1.1, giving a new range of
17+
#' 11. Thus, the rescaled vector would range from 4.5 to 15.5.
18+
#' @param add A vector of length 1 or 2. If not `NULL`, `to` is ignored and `add`
19+
#' will be used, giving the amount by which the minimum and maximum of the
20+
#' actual range of `x` should be expanded. For example, if a vector ranges from
21+
#' 5 to 15 and `add = 1`, the range will be expanded from 4 to 16. If `add` is
22+
#' of length 2, then the first value is used for the lower bound and the second
23+
#' value for the upper bound.
1324
#' @param range Initial (old) range of values. If `NULL`, will take the range of
1425
#' the input vector (`range(x)`).
1526
#' @param ... Arguments passed to or from other methods.
@@ -37,6 +48,21 @@
3748
#' "Sepal.Length" = c(0, 1),
3849
#' "Petal.Length" = c(-1, 0)
3950
#' )))
51+
#'
52+
#' # "expand" ranges by a factor or a given value
53+
#' x <- 5:15
54+
#' x
55+
#' # both will expand the range by 10%
56+
#' rescale(x, multiply = 1.1)
57+
#' rescale(x, add = 0.5)
58+
#'
59+
#' # expand range by different values
60+
#' rescale(x, add = c(1, 3))
61+
#'
62+
#' # Specify list of multipliers
63+
#' d <- data.frame(x = 5:15, y = 5:15)
64+
#' rescale(d, multiply = list(x = 1.1, y = 0.5))
65+
#'
4066
#' @inherit data_rename
4167
#'
4268
#' @return A rescaled object.
@@ -75,6 +101,8 @@ rescale.default <- function(x, verbose = TRUE, ...) {
75101
#' @export
76102
rescale.numeric <- function(x,
77103
to = c(0, 100),
104+
multiply = NULL,
105+
add = NULL,
78106
range = NULL,
79107
verbose = TRUE,
80108
...) {
@@ -91,6 +119,9 @@ rescale.numeric <- function(x,
91119
range <- c(min(x, na.rm = TRUE), max(x, na.rm = TRUE))
92120
}
93121

122+
# check if user specified "multiply" or "add", and then update "to"
123+
to <- .update_to(x, to, multiply, add)
124+
94125
# called from "makepredictcal()"? Then we have additional arguments
95126
dot_args <- list(...)
96127
required_dot_args <- c("min_value", "max_value", "new_min", "new_max")
@@ -144,6 +175,8 @@ rescale.grouped_df <- function(x,
144175
select = NULL,
145176
exclude = NULL,
146177
to = c(0, 100),
178+
multiply = NULL,
179+
add = NULL,
147180
range = NULL,
148181
append = FALSE,
149182
ignore_case = FALSE,
@@ -188,6 +221,8 @@ rescale.grouped_df <- function(x,
188221
select = select,
189222
exclude = exclude,
190223
to = to,
224+
multiply = multiply,
225+
add = add,
191226
range = range,
192227
append = FALSE, # need to set to FALSE here, else variable will be doubled
193228
add_transform_class = FALSE,
@@ -207,6 +242,8 @@ rescale.data.frame <- function(x,
207242
select = NULL,
208243
exclude = NULL,
209244
to = c(0, 100),
245+
multiply = NULL,
246+
add = NULL,
210247
range = NULL,
211248
append = FALSE,
212249
ignore_case = FALSE,
@@ -245,9 +282,61 @@ rescale.data.frame <- function(x,
245282
if (!is.list(to)) {
246283
to <- stats::setNames(rep(list(to), length(select)), select)
247284
}
285+
# Transform the 'multiply' so that it is a list now
286+
if (!is.null(multiply) && !is.list(multiply)) {
287+
multiply <- stats::setNames(rep(list(multiply), length(select)), select)
288+
}
289+
# Transform the 'add' so that it is a list now
290+
if (!is.null(add) && !is.list(add)) {
291+
add <- stats::setNames(rep(list(add), length(select)), select)
292+
}
293+
# update "to" if user specified "multiply" or "add"
294+
to[] <- lapply(names(to), function(i) {
295+
.update_to(x[[i]], to[[i]], multiply[[i]], add[[i]])
296+
})
248297

249298
x[select] <- as.data.frame(sapply(select, function(n) {
250299
rescale(x[[n]], to = to[[n]], range = range[[n]], add_transform_class = FALSE)
251300
}, simplify = FALSE))
252301
x
253302
}
303+
304+
305+
# helper ----------------------------------------------------------------------
306+
307+
# expand the new target range by multiplying or adding
308+
.update_to <- function(x, to, multiply, add) {
309+
# check if user specified "multiply" or "add", and if not, return "to"
310+
if (is.null(multiply) && is.null(add)) {
311+
return(to)
312+
}
313+
# only one of "multiply" or "add" can be specified
314+
if (!is.null(multiply) && !is.null(add)) {
315+
insight::format_error("Only one of `multiply` or `add` can be specified.")
316+
}
317+
# multiply? If yes, calculate the "add" value
318+
if (!is.null(multiply)) {
319+
# check for correct length
320+
if (length(multiply) > 1) {
321+
insight::format_error("The length of `multiply` must be 1.")
322+
}
323+
add <- (diff(range(x, na.rm = TRUE)) * (multiply - 1)) / 2
324+
}
325+
# add?
326+
if (!is.null(add)) {
327+
# add must be of length 1 or 2
328+
if (length(add) > 2) {
329+
insight::format_error("The length of `add` must be 1 or 2.")
330+
}
331+
# if add is of length 2, then the first value is used for the lower bound
332+
# and the second value for the upper bound
333+
if (length(add) == 2) {
334+
add_low <- add[1]
335+
add_high <- add[2]
336+
} else {
337+
add_low <- add_high <- add
338+
}
339+
to <- c(min(x, na.rm = TRUE) - add_low, max(x, na.rm = TRUE) + add_high)
340+
}
341+
to
342+
}

man/rescale.Rd

Lines changed: 44 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-data_rescale.R

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,67 @@ test_that("data_rescale regex", {
109109
ignore_attr = TRUE
110110
)
111111
})
112+
113+
114+
# expanding range ------------------------------
115+
test_that("data_rescale can expand range", {
116+
# for vectors
117+
x <- 5:15
118+
expect_equal(
119+
rescale(x, multiply = 1.1),
120+
c(4.5, 5.6, 6.7, 7.8, 8.9, 10, 11.1, 12.2, 13.3, 14.4, 15.5),
121+
ignore_attr = TRUE
122+
)
123+
expect_equal(rescale(x, multiply = 1.1), rescale(x, add = 0.5), ignore_attr = TRUE)
124+
expect_error(rescale(x, multiply = 0.9, add = 1), regex = "Only one of")
125+
expect_error(rescale(x, multiply = c(1.2, 1.4)), regex = "The length of")
126+
127+
# different values for add
128+
expect_equal(
129+
rescale(x, add = c(1, 3)),
130+
c(4, 5.4, 6.8, 8.2, 9.6, 11, 12.4, 13.8, 15.2, 16.6, 18),
131+
ignore_attr = TRUE
132+
)
133+
expect_error(rescale(x, add = 1:3), regex = "The length of")
134+
135+
# works with NA
136+
expect_equal(
137+
rescale(rep(NA_real_, 3), multiply = 1.1),
138+
rep(NA_real_, 3),
139+
ignore_attr = TRUE
140+
)
141+
expect_equal(
142+
rescale(rep(NA_real_, 3), add = 2),
143+
rep(NA_real_, 3),
144+
ignore_attr = TRUE
145+
)
146+
147+
# for data frames
148+
d <- data.frame(x = 5:15, y = 5:15)
149+
expect_equal(
150+
rescale(d, multiply = 1.1),
151+
rescale(d, add = 0.5),
152+
ignore_attr = TRUE
153+
)
154+
expect_equal(
155+
rescale(d, multiply = list(x = 1.1, y = 0.5)),
156+
rescale(d, add = list(x = 0.5, y = -2.5)),
157+
ignore_attr = TRUE
158+
)
159+
# data frames accept multiple add-values per column
160+
out <- rescale(d, add = list(x = c(1, 3), y = c(2, 4)))
161+
expect_equal(
162+
out$x,
163+
rescale(d$x, add = c(1, 3)),
164+
ignore_attr = TRUE
165+
)
166+
expect_equal(
167+
out$y,
168+
rescale(d$y, add = c(2, 4)),
169+
ignore_attr = TRUE
170+
)
171+
172+
expect_error(rescale(d, multiply = 0.9, add = 1), regex = "Only one of")
173+
expect_error(rescale(d, multiply = list(x = 0.9, y = 2), add = list(y = 1)), regex = "Only one of")
174+
expect_error(rescale(d, multiply = c(0.9, 1.5)), regex = "The length of")
175+
})

0 commit comments

Comments
 (0)