diff --git a/R/calc_npc.R b/R/calc_npc.R index 5ae4222..dd48e36 100644 --- a/R/calc_npc.R +++ b/R/calc_npc.R @@ -21,35 +21,44 @@ #' calc_npc <- function(var, N=1, cutoff_divisor=10) { + ## only use the first 90% of var - ignore outliers in last 10% + l <- length(var) + if (l > 0) { + l <- as.integer(0.9 * l) + if (l == 0) { + l <- len(var) + } + var <- var[1:l] + } NPC_DEFAULT <- 4 d <- var[-length(var)] - var[-1] descending_d <- sort(d, decreasing=T) max_d <- descending_d[1] mean_N_max <- mean(descending_d[1:N]) - # get measure of spread of data as a proportion of the largest 3 differences + ## get measure of spread of data as a proportion of the largest 3 differences spread <- mean(d)/mean(descending_d[1:3])*100 - #print(paste0("spread = ", spread)) if (spread > 15) { - ## data too evenly spread ? + ## data too evenly spread for this algorithm to converge? return(NPC_DEFAULT) } else if (spread > 10) { cutoff_divisor <- 5 } cutoff <- mean_N_max/cutoff_divisor - #print(paste0("cutoff: ", cutoff)) groups <- list() groups[[1]] <- c(1) group_index <- 1 for (i in 2:length(var)) { if (d[i-1] < cutoff) { - #print(d[i-1]) - # include in current group + ## include in current group groups[[group_index]] <- c(groups[[group_index]], i) ## check stopping criteria len_curr_group <- length(groups[[group_index]]) if (len_curr_group > 7 || (i > 10 && len_curr_group > 3)) { - #if (len_curr_group > 10) { ## return the last index in the previous group + ## catch case where only 1 group + if (group_index == 1) { + return(NPC_DEFAULT) + } prev_group <- groups[[group_index - 1]] nPC <- prev_group[length(prev_group)] ## not helpful to return nPC=1, so use default in this case @@ -59,7 +68,7 @@ calc_npc <- function(var, N=1, cutoff_divisor=10) { return(nPC) } } else { - # start new group + ## start new group group_index <- group_index + 1 groups[[group_index]] <- i }