Skip to content

Commit 60c2fa5

Browse files
New argument zeros in ranktransform() (#573)
* init * minor * fix lints
1 parent 0faf924 commit 60c2fa5

File tree

5 files changed

+89
-15
lines changed

5 files changed

+89
-15
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: datawizard
33
Title: Easy Data Wrangling and Statistical Transformations
4-
Version: 0.13.0.18
4+
Version: 0.13.0.19
55
Authors@R: c(
66
person("Indrajeet", "Patil", , "patilindrajeet.science@gmail.com", role = "aut",
77
comment = c(ORCID = "0000-0003-1995-6531")),

NEWS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ CHANGES
3939
* `data_summary()` also accepts the results of `bayestestR::ci()` as summary
4040
function (#483).
4141

42+
* `ranktransform()` has a new argument `zeros` to determine how zeros should be
43+
handled when `sign = TRUE` (#573).
44+
4245
BUG FIXES
4346

4447
* `describe_distribution()` no longer errors if the sample was too sparse to compute

R/ranktransform.R

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
#' @param method Treatment of ties. Can be one of `"average"` (default),
1111
#' `"first"`, `"last"`, `"random"`, `"max"` or `"min"`. See [rank()] for
1212
#' details.
13+
#' @param zeros How to handle zeros. If `"na"` (default), they are marked as
14+
#' `NA`. If `"signrank"`, they are kept during the ranking and marked as zeros.
15+
#' This is only used when `sign = TRUE`.
1316
#' @param ... Arguments passed to or from other methods.
1417
#' @inheritParams extract_column_names
1518
#' @inheritParams standardize.data.frame
@@ -19,8 +22,11 @@
1922
#' @examples
2023
#' ranktransform(c(0, 1, 5, -5, -2))
2124
#'
22-
#' # Won't work
23-
#' # ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
25+
#' # By default, zeros are converted to NA
26+
#' suppressWarnings(
27+
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE)
28+
#' )
29+
#' ranktransform(c(0, 1, 5, -5, -2), sign = TRUE, zeros = "signrank")
2430
#'
2531
#' head(ranktransform(trees))
2632
#' @return A rank-transformed object.
@@ -38,13 +44,20 @@ ranktransform <- function(x, ...) {
3844
ranktransform.numeric <- function(x,
3945
sign = FALSE,
4046
method = "average",
47+
zeros = "na",
4148
verbose = TRUE,
4249
...) {
4350
# no change if all values are `NA`s
4451
if (all(is.na(x))) {
4552
return(x)
4653
}
4754

55+
zeros <- match.arg(zeros, c("na", "signrank"))
56+
method <- match.arg(
57+
method,
58+
c("average", "first", "last", "random", "max", "min")
59+
)
60+
4861
# Warning if only one value and return early
4962
if (insight::has_single_value(x)) {
5063
if (is.null(names(x))) {
@@ -54,7 +67,13 @@ ranktransform.numeric <- function(x,
5467
}
5568

5669
if (verbose) {
57-
insight::format_warning(paste0("Variable `", name, "` contains only one unique value and will not be normalized."))
70+
insight::format_warning(
71+
paste0(
72+
"Variable `",
73+
name,
74+
"` contains only one unique value and will not be normalized."
75+
)
76+
)
5877
}
5978

6079
return(x)
@@ -70,16 +89,31 @@ ranktransform.numeric <- function(x,
7089
}
7190

7291
if (verbose) {
73-
insight::format_warning(paste0("Variable `", name, "` contains only two different values. Consider converting it to a factor."))
92+
# nolint
93+
insight::format_warning(
94+
paste0(
95+
"Variable `",
96+
name,
97+
"` contains only two different values. Consider converting it to a factor."
98+
)
99+
)
74100
}
75101
}
76102

77-
78103
if (sign) {
79-
ZEROES <- x == 0
80-
if (any(ZEROES) && verbose) insight::format_warning("Zeros detected. These cannot be sign-rank transformed.")
81-
out <- rep(NA, length(x))
82-
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]), ties.method = method, na.last = "keep")
104+
if (zeros == "na") {
105+
out <- rep(NA, length(x))
106+
ZEROES <- x == 0
107+
if (any(ZEROES) && verbose) {
108+
insight::format_warning("Zeros detected. These cannot be sign-rank transformed.") # nolint
109+
}
110+
out[!ZEROES] <- sign(x[!ZEROES]) * rank(abs(x[!ZEROES]),
111+
ties.method = method,
112+
na.last = "keep"
113+
)
114+
} else if (zeros == "signrank") {
115+
out <- sign(x) * rank(abs(x), ties.method = method, na.last = "keep")
116+
}
83117
} else {
84118
out <- rank(x, ties.method = method, na.last = "keep")
85119
}
@@ -102,6 +136,7 @@ ranktransform.grouped_df <- function(x,
102136
method = "average",
103137
ignore_case = FALSE,
104138
regex = FALSE,
139+
zeros = "na",
105140
verbose = TRUE,
106141
...) {
107142
info <- attributes(x)
@@ -143,6 +178,7 @@ ranktransform.data.frame <- function(x,
143178
method = "average",
144179
ignore_case = FALSE,
145180
regex = FALSE,
181+
zeros = "na",
146182
verbose = TRUE,
147183
...) {
148184
# evaluate arguments

man/ranktransform.Rd

Lines changed: 18 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-ranktransform.R

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,22 @@ test_that("signed rank works as expected", {
4646
))))
4747
})
4848

49+
test_that("argument 'zeros' works", {
50+
x <- c(-1, 0, 2, -3, 4)
51+
expect_warning(
52+
ranktransform(x, sign = TRUE),
53+
"cannot be sign-rank"
54+
)
55+
expect_identical(
56+
ranktransform(x, sign = TRUE, zeros = "signrank"),
57+
c(-2, 0, 3, -4, 5)
58+
)
59+
expect_error(
60+
ranktransform(x, sign = TRUE, zeros = "foo"),
61+
"should be one of"
62+
)
63+
})
64+
4965
test_that("ranktransform works with data frames", {
5066
set.seed(123)
5167
expect_snapshot(ranktransform(BOD))
@@ -58,9 +74,9 @@ test_that("ranktransform works with data frames (grouped data)", {
5874
skip_if_not_installed("poorman")
5975

6076
set.seed(123)
61-
value1 <- sample(1:20, 9, replace = TRUE)
77+
value1 <- sample.int(20, 9, replace = TRUE)
6278
set.seed(456)
63-
value2 <- sample(1:20, 9, replace = TRUE)
79+
value2 <- sample.int(20, 9, replace = TRUE)
6480

6581
test_df <- data.frame(
6682
id = rep(c("A", "B", "C"), each = 3),
@@ -69,6 +85,7 @@ test_that("ranktransform works with data frames (grouped data)", {
6985
stringsAsFactors = FALSE
7086
)
7187

88+
# nolint start: nested_pipe_linter
7289
expect_identical(
7390
test_df %>%
7491
poorman::group_by(id) %>%
@@ -81,6 +98,7 @@ test_that("ranktransform works with data frames (grouped data)", {
8198
stringsAsFactors = FALSE
8299
)
83100
)
101+
# nolint end
84102
})
85103

86104

@@ -99,6 +117,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
99117
stringsAsFactors = FALSE
100118
)
101119

120+
# nolint start: nested_pipe_linter
102121
expect_identical(
103122
test_df %>%
104123
poorman::group_by(id) %>%
@@ -111,6 +130,7 @@ test_that("ranktransform works with data frames containing NAs (grouped data)",
111130
stringsAsFactors = FALSE
112131
)
113132
)
133+
# nolint end
114134
})
115135

116136
# select helpers ------------------------------

0 commit comments

Comments
 (0)