-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updates: Added
directOutlyingness
function for univariate dataset
- Loading branch information
1 parent
4123d8f
commit dc9a7b7
Showing
4 changed files
with
183 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#' @title Directional Outlyingness for Skewed Distribution | ||
#' | ||
#' @author Christian L. Goueguel | ||
#' | ||
#' @description | ||
#' This function computes the directional outlyingness of a numeric vector, as | ||
#' proposed by Rousseeuw *et al.* (2017). | ||
#' | ||
#' @details | ||
#' Directional outlyingness takes the potential skewness of the underlying | ||
#' distribution into account, while attaining a smaller computation time and bias. | ||
#' The main idea is to split the sample into two half samples, and then to apply | ||
#' a robust scale estimator to each of them. | ||
#' | ||
#' @references | ||
#' - Rousseeuw, P.J., Raymaekers, J., Hubert, M., (2018). | ||
#' A Measure of Directional Outlyingness With Applications to Image Data and Video. | ||
#' Journal of Computational and Graphical Statistics, 27(2):345–359. | ||
#' | ||
#' @param x A numeric vector | ||
#' @param cutoff.quantile A numeric value between 0 and 1 specifying the quantile for outlier detection (default: 0.995). | ||
#' @param rmZeroes A logical value. If `TRUE`, removes values close to zero (default: `FALSE`). | ||
#' @param maxRatio A numeric value greater than 2. If provided, constrains the ratio between positive and negative scales (default: `NULL`). | ||
#' @param precScale A numeric value specifying the precision scale for near-zero comparisons (default: 1e-10). | ||
#' | ||
#' @return A tibble with columns: | ||
#' \item{data}{The original input data} | ||
#' \item{score}{The calculated outlyingness score} | ||
#' \item{outlier}{Logical; TRUE if the point is identified as potantial outlier} | ||
#' | ||
#' @export directOutlyingness | ||
#' | ||
#' @examples | ||
#' vec <- c(1, 5, 3, 9, 2, 6, 4, 8, 7, 1e3) | ||
#' directOutlyingness(vec) | ||
#' | ||
directOutlyingness <- function(x, cutoff.quantile = 0.995, rmZeroes = FALSE, maxRatio = NULL, precScale = 1e-10) { | ||
|
||
x <- x[!is.na(x)] | ||
med <- stats::median(x) | ||
xc <- x - med | ||
n <- length(xc) | ||
h <- n %/% 2 | ||
xa <- xc[xc > 0] | ||
xb <- xc[xc < 0] | ||
xa <- c(rep(0, (n - h - length(xa))), xa) | ||
xb <- c(rep(0, (n - h - length(xb))), abs(xb)) | ||
|
||
if (rmZeroes){ | ||
xa <- xa[xa > precScale] | ||
xb <- xb[xb > precScale] | ||
} | ||
|
||
if (!is.null(maxRatio)) { | ||
if (maxRatio < 2) { | ||
stop("maxRatio must be at least 2") | ||
} else { | ||
sall <- scale1StepM(x = xc, precScale = precScale) | ||
sa <- min(c(max(sa, sall / maxRatio, na.rm = TRUE), sall * maxRatio), na.rm = TRUE) | ||
sb <- min(c(max(sb, sall / maxRatio, na.rm = TRUE), sall * maxRatio), na.rm = TRUE) | ||
} | ||
} else { | ||
sa <- scale1StepM(x = xa, precScale = precScale) | ||
sb <- scale1StepM(x = xb, precScale = precScale) | ||
} | ||
|
||
res <- dplyr::if_else(x >= med, (x - med) / sa, (med - x) / sb) | ||
cutoff <- computeCutoff(res, cutoff.quantile) | ||
|
||
tbl <- tibble::tibble( | ||
data = x, | ||
score = res, | ||
outlier = dplyr::if_else(res > cutoff, TRUE, FALSE) | ||
) %>% | ||
dplyr::arrange(dplyr::desc(score)) | ||
|
||
return(tbl) | ||
} | ||
|
||
rhoHuber <- function(x, c = 2.1){ | ||
rho <- (x / c)^2 | ||
rho[rho > 1] <- 1 | ||
r <- 1.54^2 * rho | ||
return(r) | ||
} | ||
|
||
loc1StepM <- function(x, c1 = 3, precScale) { | ||
x <- x[!is.na(x)] | ||
medx <- stats::median(x) | ||
ax <- abs(x - medx) | ||
denom <- c1 * stats::median(ax) | ||
mu <- if (denom > precScale) { | ||
ax = ax/denom | ||
w = 1 - ax * ax | ||
w = ((abs(w) + w)/2)^2 | ||
sum(x * w ) / sum(w) | ||
} | ||
else { | ||
medx | ||
} | ||
return(mu) | ||
} | ||
|
||
scale1StepM <- function(x, precScale) { | ||
x <- x[!is.na(x)] | ||
n <- length(x) | ||
if (n == 0) { | ||
return(0.0) | ||
} else { | ||
sigma0 <- 1.4826 * stats::median(abs(x)) | ||
if(sigma0 < precScale) { | ||
return(0.0) | ||
} else { | ||
rho <- rhoHuber(x / sigma0) | ||
return(sigma0 * sqrt(sum(rho) * 2 / n)) | ||
} | ||
} | ||
} | ||
|
||
|
||
computeCutoff <- function(outl, quant) { | ||
Ltemp <- log(0.1 + outl) | ||
cutoff <- exp(stats::qnorm(quant) * stats::mad(Ltemp) + stats::median(Ltemp)) - 0.1 | ||
return(cutoff) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ reference: | |
contents: | ||
- zscore | ||
- iqrMethod | ||
- directOutlyingness | ||
|
||
- title: Multivariate filtering | ||
|
||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.