Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prepare next release #57

Merged
merged 12 commits into from
Mar 28, 2024
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: kwb.utils
Title: General Utility Functions Developed at KWB
Version: 0.14.1
Version: 0.15.0
Authors@R:
c(person(given = "Hauke",
family = "Sonnenberg",
Expand All @@ -20,6 +20,8 @@ Description: This package contains some small helper
License: MIT + file LICENSE
URL: https://github.com/kwb-r/kwb.utils
BugReports: https://github.com/kwb-r/kwb.utils/issues
Imports:
methods
Suggests:
PKI,
testthat,
Expand All @@ -30,4 +32,4 @@ Suggests:
VignetteBuilder:
knitr
Encoding: UTF-8
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(atLeastOneRowIn)
export(backspace)
export(breakInSequence)
export(callWith)
export(callWithData)
export(callWithStringsAsFactors)
export(catAndRun)
export(catChanges)
Expand Down Expand Up @@ -170,6 +171,7 @@ export(limitToRange)
export(linearCombination)
export(listObjects)
export(listToDepth)
export(loadFunctions)
export(loadObject)
export(mainClass)
export(makeUnique)
Expand Down Expand Up @@ -302,4 +304,5 @@ export(warningDeprecated)
export(windowsPath)
export(writeDictionary)
export(writeText)
importFrom(methods,formalArgs)
importFrom(utils,capture.output)
19 changes: 18 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
# [kwb.utils 0.14.1](https://github.com/KWB-R/kwb.utils/releases/tag/v0.14.0) <small>2023-04-21</small>
# [kwb.utils 0.15.0](https://github.com/KWB-R/kwb.utils/releases/tag/v0.15.0) <small>2024-03-28</small>

- add loadFunctions() with private function
- mergeEnvironments()
- add callWithData() with private functions
- splitIntoIdenticalRows(),
- rbindFirstRows(),
- seqAlongRows(),
- nDims(),
- expandToVector()
- multiColumnLookup()
- allow to lookup more than one value column
- add arguments "drop", "includeKeys"
- findPartialDuplicates()
- reimplement the function in order to fix a :bug:
- use catAndRun() within sourceScripts()

# [kwb.utils 0.14.1](https://github.com/KWB-R/kwb.utils/releases/tag/v0.14.1) <small>2023-04-21</small>

- Fix bug in listToDepth(): pass ... arguments to FUN also if pattern is NULL
- Add argument "silent" to private function listFiles()
Expand Down
137 changes: 137 additions & 0 deletions R/callWithData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
# callWithData -----------------------------------------------------------------

#' Call a Function with Argument Combinations from a Data Frame
#'
#' @param FUN function to be called
#' @param data data frame with one column per argument of \code{FUN}
#' @param \dots further (constant) arguments to \code{FUN} that are passed to
#' \code{\link{mapply}} via \code{MoreArgs}
#' @param threshold if the ratio of unique value combinations in the relevant
#' columns in data to all value combinations in these columns is below this
#' threshold value then FUN will be called only with the unique value
#' combinations. This should increase performance.
#' @param SIMPLIFY passed to\code{\link{mapply}}, default: \code{TRUE}
#' @param USE.NAMES passed to\code{\link{mapply}}, default: \code{TRUE}
#' @return vector of length \code{nrow(data)} with the result values returned by
#' \code{FUN}
#' @importFrom methods formalArgs
#' @export
#' @examples
#' combis <- expand.grid(x = 1:2, y = c(10, 20, 30))
#' combis
#'
#' callWithData(`+`, combis)
callWithData <- function(
FUN,
data,
...,
threshold = 0.5,
SIMPLIFY = TRUE,
USE.NAMES = TRUE
)
{
# What arguments does FUN have?
all_args <- methods::formalArgs(FUN)

# Select the columns from data that are arguments of FUN
arg_data <- selectColumns(data, intersect(all_args, names(data)))

# Split arg_data into sets of identical rows
sets <- splitIntoIdenticalRows(arg_data)

# Number of all value combinations
n_all <- nrow(arg_data)

# Number of unique value combinations
n_unique <- length(sets)

# Should we run FUN only for the unique value combinations?
run_unique <- (n_unique / n_all < threshold)

# Name of the function to be called
fun_name <- deparse(substitute(FUN))

# Run FUN for each row of run_data
results <- catAndRun(
messageText = if (run_unique) {
sprintf(
"-> Calling %s() for %d unique value combinations",
fun_name, n_unique
)
} else {
sprintf(
"-> Calling %s() for all %d value combinations",
fun_name, n_all
)
},
expr = {

more_args <- list(...)

mapply_args_fix <- list(
FUN = FUN,
MoreArgs = if (length(more_args)) more_args,
SIMPLIFY = SIMPLIFY,
USE.NAMES = USE.NAMES
)

mapply_args_var <- if (run_unique) {
removeColumns(rbindFirstRows(sets), "row.")
} else {
arg_data
}

do.call(mapply, c(mapply_args_fix, mapply_args_var))
}
)

if (!run_unique) {
return(results)
}

catAndRun(
"-> Expanding the results to the extent of the input",
expandToVector(
x = results,
indices = lapply(sets, selectColumns, "row.")
)
)
}

# splitIntoIdenticalRows -------------------------------------------------------
splitIntoIdenticalRows <- function(data)
{
split(cbind(data, row. = seqAlongRows(data)), f = data, drop = TRUE)
}

# seqAlongRows ---------------------------------------------------------------
seqAlongRows <- function(data)
{
seq_len(nrow(data))
}

# rbindFirstRows ---------------------------------------------------------------
rbindFirstRows <- function(x)
{
stopifnot(is.list(x), all(sapply(x, nDims) == 2L))

resetRowNames(do.call(rbind, lapply(x, utils::head, 1L)))
}

# nDims: number of dimensions --------------------------------------------------
nDims <- function(x)
{
length(dim(x))
}

# expandToVector ---------------------------------------------------------------
expandToVector <- function(x, indices)
{
stopifnot(length(x) == length(indices))

result <- list()

result[unlist(indices)] <- rep(x, lengths(indices))

result
}
5 changes: 2 additions & 3 deletions R/dataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,8 @@ rbindAll <- function(
#'
resetRowNames <- function(x)
{
if (length(dim(x)) != 2) {

stop(deparse(substitute(x)), " must be have two dimensions", call. = FALSE)
if (length(dim(x)) != 2L) {
stop(deparse(substitute(x)), " must have two dimensions", call. = FALSE)
}

row.names(x) <- NULL
Expand Down
51 changes: 20 additions & 31 deletions R/findPartialDuplicates.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,38 +31,27 @@
#'
findPartialDuplicates <- function(data, key_columns, skip_columns = NULL)
{
keys <- selectColumns(data, key_columns, drop = FALSE)

# Columns to be considered in duplicate check
columns <- setdiff(names(data), skip_columns)
row_sets <- unname(split(
x = seq_len(nrow(data)),
f = selectColumns(data, key_columns, drop = FALSE),
drop = TRUE
))

# Are the rows identical in all columns (except those in skip_columns)?
is_row_duplicate <- duplicated(data[, columns])

# Are the keys (combination of key values) duplicated?
is_key_duplicate <- duplicated(keys)
row_sets <- row_sets[lengths(row_sets) > 1L]

# Are the rows duplicated in the key columns but not fully duplicated rows?
is_partial_duplicate <- is_key_duplicate & ! is_row_duplicate

# For any partial duplicate, extract the differing values
if (any(is_partial_duplicate)) {

lapply(which(is_partial_duplicate), function(index) {

# Get the value(s) of the (composed) key
values <- keys[index, , drop = FALSE]

# Which rows are matching in all key columns?
matches <- sapply(names(values), function(x) values[[x]] == keys[[x]])
match_indices <- which(rowSums(matches) == length(values))

# Select the corresponding rows
result <- data[match_indices, , drop = FALSE]

# Select only columns in which values are actually differing
columns <- c(key_columns, names(result)[! sapply(result, allAreEqual)])
result[, columns, drop = FALSE]
})
if (length(row_sets) == 0L) {
return(NULL)
}

row_sets <- row_sets[order(sapply(row_sets, "[", 1L))]

columns_to_consider <- setdiff(names(data), skip_columns)

result <- lapply(row_sets, function(rows) {
y <- data[rows, columns_to_consider]
differing_columns <- names(which(!sapply(y, kwb.utils::allAreEqual)))
y[, c(key_columns, differing_columns), drop = FALSE]
})

stats::setNames(result, NULL)
}
85 changes: 85 additions & 0 deletions R/loadFunctions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#' Load Functions from Scripts into Attached Namespace
#'
#' This function parses R scripts at the given paths and provides all objects in
#' an environment that is attached to the search path.
#'
#' @param paths vector of character with paths to files (R scripts) or
#' directories (in which to look for R scripts).
#' @param attach if \code{TRUE} the environment with all parsed objects is
#' attached to the search path, otherwise the environment is returned
#' @param name name that is used when attaching the environment with all parsed
#' objects to the search path. Default: kwb
#' @param dbg logical indicating whether or not to show debug messages
#' @return if \code{attach} is \code{FALSE} the environment with all parsed
#' objects is returned, otherwise \code{NULL}, invisibly.
#' @export
loadFunctions <- function(paths, attach = TRUE, name = "kwb", dbg = TRUE)
{
# Which paths refer to directories?
isDirectory <- file.info(paths)[, "isdir"]

# List all R script files
files <- dir(paths[isDirectory], full.names = TRUE, "\\.R$")

# Add the paths that refer to (script) files
files <- c(files, paths[!isDirectory])

# Create a new environment in which to store all functions
resultEnv <- new.env()

for (file in files) {

catAndRun(
paste("Loading functions from", file),
dbg = dbg,
expr = {

# Create a new environment in which to store the functions of the script
scriptEnv <- new.env()

# Load the functions defined in the file into the script environment
source(file, local = scriptEnv)

# Check for duplicated names in result and script environment
duplicates <- intersect(ls(scriptEnv), ls(resultEnv))

# Stop if there are duplicates
if (length(duplicates)) {
stopFormatted(
"The function(s) are already defined: %s",
stringList(duplicates)
)
}

# Copy all elements from script environment to result environment
resultEnv <- mergeEnvironments(resultEnv, scriptEnv)
}
)
}

if (!attach) {
return(resultEnv)
}

# Detach the environment if it is already attached
if (name %in% search()) {
detach(name, character.only = TRUE)
}

# Attach the environment
attach(resultEnv, name = name)
}

# mergeEnvironments ------------------------------------------------------------

# Helper function to copy the elements of one environment to another
mergeEnvironments <- function(targetEnv, sourceEnv)
{
# Assign all elements of the source environment in the target environment
for (name in ls(sourceEnv, all.names = TRUE)) {
assign(name, get(name, sourceEnv), targetEnv)
}

# Return the target environment
targetEnv
}
Loading
Loading