Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: units
Version: 0.8-7.3
Version: 0.8-7.4
Title: Measurement Units for R Vectors
Authors@R: c(person("Edzer", "Pebesma", role = c("aut", "cre"), email = "edzer.pebesma@uni-muenster.de", comment = c(ORCID = "0000-0001-8049-7069")),
person("Thomas", "Mailund", role = "aut", email = "mailund@birc.au.dk"),
Expand Down
9 changes: 8 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# version devel
# version 1.0-0

* Breaking change: a new tokenizer fixes longstanding issues with parsing
complex unit expressions, but may break existing code that relied on the
previous (buggy) behavior. The major change is that now numbers are
consistently treated as prefixes, so that units like `ml / min / 1.73m^2`
used in physiology are now correctly parsed as `ml / (min * 1.73 * m^2)`.
See `?as_units` for details; #416 addressing #221, #383

* Vectorize `ud_*()` helpers; #405 addressing #404

Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

parse_unit <- function(x, strict = FALSE) {
.Call('_units_parse_unit', PACKAGE = 'units', x, strict)
}

ud_exit <- function() {
invisible(.Call('_units_ud_exit', PACKAGE = 'units'))
}
Expand Down
3 changes: 2 additions & 1 deletion R/conversion.R
Original file line number Diff line number Diff line change
Expand Up @@ -233,9 +233,10 @@ set_units.numeric <- function(x, value, ..., mode = units_options("set_units_mod
value <- unitless
else if (mode == "symbols") {
value <- substitute(value)

if(is.numeric(value) && !identical(value, 1) && !identical(value, 1L))
stop("The only valid number defining a unit is '1', signifying a unitless unit")
if (is.name(value) || is.call(value))
value <- format(value)
}

units(x) <- as_units(value, ...)
Expand Down
250 changes: 81 additions & 169 deletions R/make_units.R
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
#' # or
#' drop_units(y)
make_units <- function(bare_expression, check_is_valid = TRUE) {
as_units.call(substitute(bare_expression), check_is_valid = check_is_valid)
as_units(format(substitute(bare_expression)), check_is_valid = check_is_valid)
}

#' @name units
Expand Down Expand Up @@ -189,75 +189,76 @@ as_units.difftime <- function(x, value, ...) {

# ----- as_units.character helpers ------

backtick <- function(x) {
# backtick all character runs uninterupted by one of ^()*^/`- or a space
# don't double up backticks
x <- gsub("`?([^() \\*^/`-]+)`?", "`\\1`", x)
gsub("`([0-9]*\\.?[0-9]+)`", "\\1", x) # unbacktick bare numbers
is_udunits_time <- function(s) {
ud_is_parseable(s) && ud_are_convertible(s, "seconds since 1970-01-01")
}

are_exponents_implicit <- function(s) {
s <- trimws(s)
has <- function(chr, regex = FALSE)
grepl(chr, s, fixed = !regex, perl = regex)
!has("^") && !has("*") && !has("/") && has("\\s|\\D.*\\d$", regex = TRUE)
# from package:yasp, paste collapse with serial (oxford) comma
pc_and <- function(..., sep = "") {
x <- paste(..., sep = sep, collapse = NULL)
lx <- length(x)
if(lx == 0L)
""
else if (lx == 1L)
x
else if (lx == 2L)
paste0(x, collapse = " and ")
else
paste0( paste0(x[-lx], collapse = ", "), ", and ", x[lx])
}

is_udunits_time <- function(s) {
ud_is_parseable(s) && ud_are_convertible(s, "seconds since 1970-01-01")
.msg_units_not_recognized <- function(unrecognized_symbols, full_expr) {

if (is.language(full_expr))
full_expr <- deparse(full_expr)

is_are <- if (length(unrecognized_symbols) > 1L) "are" else "is"

paste0("In ", sQuote(full_expr), ", ",
pc_and(sQuote(unrecognized_symbols)), " ", is_are, " not recognized by udunits.\n\n",
"See a table of valid unit symbols and names with valid_udunits().\n",
"Custom user-defined units can be added with install_unit().\n\n",
"See a table of valid unit prefixes with valid_udunits_prefixes().\n",
"Prefixes will automatically work with any user-defined unit.")
}

#' @name units
#' @export
#'
#' @param check_is_valid throw an error if all the unit symbols are not either
#' recognized by udunits2, or a custom
#' user defined via \code{install_unit()}. If \code{FALSE}, no check
#' for validity is performed.
#'
#' @param force_single_symbol Whether to perform no string parsing and force
#' treatment of the string as a single symbol.
#'
#' @param implicit_exponents If the unit string is in product power form (e.g.
#' \code{"km m-2 s-1"}). Defaults to \code{NULL}, in which case a guess is made
#' based on the supplied string. Set to \code{TRUE} or \code{FALSE} if the guess is
#' incorrect.
#'
#' @section Character strings:
#'
#' Generally speaking, there are 3 types of unit strings are accepted in
#' \code{as_units} (and by extension, \code{`units<-`}).
#'
#' The first, and likely most common, is a "standard" format unit
#' The first type, and likely most common, is a "standard" format unit
#' specification where the relationship between unit symbols or names is
#' specified explicitly with arithmetic symbols for division \code{/},
#' multiplication \code{*} and power exponents \code{^}, or other mathematical
#' functions like \code{log()}. In this case, the string is parsed as an R
#' expression via \code{parse(text = )} after backticking all unit symbols and
#' names, and then passed on to \code{as_units.call()}. A heuristic is used to
#' perform backticking, such that any continuous set of characters
#' uninterrupted by one of \code{()\\*^-} are backticked (unless the character
#' sequence consists solely of numbers \code{0-9}), with some care to not
#' double up on pre-existing backticks. This heuristic appears to be quite
#' robust, and works for units would otherwise not be valid R syntax. For
#' example, percent (\code{"\%"}), feet (\code{"'"}), inches (\code{"in"}),
#' and Tesla (\code{"T"}) are all backticked and parsed correctly.
#'
#' Nevertheless, for certain complex unit expressions, this backticking heuristic
#' may give incorrect results. If the string supplied fails to parse as an R
#' expression, then the string is treated as a single symbolic unit and
#' \code{symbolic_unit(chr)} is used as a fallback with a warning. In that
#' case, automatic unit simplification may not work properly when performing
#' operations on unit objects, but unit conversion and other Math operations
#' should still give correct results so long as the unit string supplied
#' returns \code{TRUE} for \code{ud_is_parsable()}.
#' multiplication \code{*} and power exponents \code{^}.
#'
#' The second type of unit string accepted is one with implicit exponents. In
#' this format, \code{/}, \code{*}, and \code{^}, may not be present in the
#' string, and unit symbol or names must be separated by a space. Each unit
#' symbol may optionally be followed by a single number, specifying the power.
#' For example \code{"m2 s-2"} is equivalent to \code{"(m^2)*(s^-2)"}.
#'
#' It must be noted that prepended numbers are supported too, but their
#' interpretation slightly varies depending on whether they are separated from
#' the unit string or not. E.g., \code{"1000 m"} is interpreted as magnitude
#' and unit, but \code{"1000m"} is interpreted as a prefixed unit, and it is
#' equivalent to \code{"km"} to all effects.
#' If the string supplied fails to parse, then the string is treated as a
#' single symbolic unit and \code{symbolic_unit(chr)} is used as a fallback
#' with a warning. In that case, automatic unit simplification may not work
#' properly when performing operations on unit objects, but unit conversion
#' and other Math operations should still give correct results so long as
#' the unit string supplied returns \code{TRUE} for \code{ud_is_parsable()}.
#'
#' It must be noted that prepended numbers are supported too, but are not
#' treated as magnitudes. For example, \code{"1000 m"} is interpreted as
#' a prefixed unit, and it is equivalent to \code{"km"} to all effects.
#'
#' The third type of unit string format accepted is the special case of
#' udunits time duration with a reference origin, for example \code{"hours
Expand All @@ -268,157 +269,69 @@ is_udunits_time <- function(s) {
#' otherwise encouraged to use \code{R}'s date and time functionality provided
#' by \code{Date} and \code{POSIXt} classes.
#'
as_units.character <- function(x,
#' @note By default, unit names are automatically substituted with unit names
#' (e.g., kilogram --> kg). To turn off this behavior, set
#' \code{units_options(auto_convert_names_to_symbols = FALSE)}
#'
#' @seealso \code{\link{install_unit}}, \code{\link{valid_udunits}}
as_units.character <- function(x, ...,
check_is_valid = TRUE,
implicit_exponents = NULL,
force_single_symbol = FALSE, ...) {
force_single_symbol = FALSE) {

stopifnot(is.character(x), length(x) == 1)

if (isTRUE(x == "")) return(unitless)
if (any(is.na(x)))
stop("a missing value for units is not allowed")

if (isTRUE(x == "" || x == "1"))
return(.as.units(1, unitless))

if(force_single_symbol || is_udunits_time(x))
return(symbolic_unit(x, check_is_valid = check_is_valid))

if(is.null(implicit_exponents))
implicit_exponents <- are_exponents_implicit(x)

if(implicit_exponents)
x <- convert_implicit_to_explicit_exponents(x)

x <- backtick(x)
o <- try(expr <- parse(text = x)[[1]], silent = TRUE)

o <- try(su <- parse_unit(x, units_options("strict_tokenizer")), silent=TRUE)
if(inherits(o, "try-error")) {
warning("Could not parse expression: ", sQuote(x), # nocov
". Returning as a single symbolic unit()", call. = FALSE) # nocov
return(symbolic_unit(x, check_is_valid = check_is_valid)) # nocov
warning("Could not parse expression: ", sQuote(x), # nocov
". Returning as a single symbolic unit()", call. = FALSE) # nocov
return(symbolic_unit(x, check_is_valid = check_is_valid)) # nocov
}

as_units.call(expr, check_is_valid = check_is_valid)
}


convert_implicit_to_explicit_exponents <- function(x) {
if (length(grep(c("[*/]"), x)) > 0)
stop("If 'implicit_exponents = TRUE', strings cannot contain `*' or `/'")
x <- gsub("\\b([^\\d-]+)([-]?\\d+)\\b", "\\1^(\\2)", x, perl =TRUE)
x <- gsub("\\s+", " * ", trimws(x), perl = TRUE)
x
}

# ----- as_units.call helpers ------

# from package:yasp, paste collapse with serial (oxford) comma
pc_and <- function(..., sep = "") {
x <- paste(..., sep = sep, collapse = NULL)
lx <- length(x)
if(lx == 0L)
""
else if (lx == 1L)
x
else if (lx == 2L)
paste0(x, collapse = " and ")
else
paste0( paste0(x[-lx], collapse = ", "), ", and ", x[lx])
}

#`%not_in%` <- function(x, table) match(x, table, nomatch = 0L) == 0L

.msg_units_not_recognized <- function(unrecognized_symbols, full_expr) {

if (is.language(full_expr))
full_expr <- deparse(full_expr)

is_are <- if (length(unrecognized_symbols) > 1L) "are" else "is"

paste0("In ", sQuote(full_expr), ", ",
pc_and(sQuote(unrecognized_symbols)), " ", is_are, " not recognized by udunits.\n\n",
"See a table of valid unit symbols and names with valid_udunits().\n",
"Custom user-defined units can be added with install_unit().\n\n",
"See a table of valid unit prefixes with valid_udunits_prefixes().\n",
"Prefixes will automatically work with any user-defined unit.")
}

units_eval_env <- new.env(parent = baseenv())
units_eval_env$ln <- function(x) base::log(x)
units_eval_env$lg <- function(x) base::log(x, base = 10)
units_eval_env$lb <- function(x) base::log(x, base = 2)


#' @name units
#' @export
#'
#' @param check_is_valid throw an error if all the unit symbols are not either
#' recognized by udunits2, or a custom
#' user defined via \code{install_unit()}. If \code{FALSE}, no check
#' for validity is performed.
#'
#' @note By default, unit names are automatically substituted with unit names
#' (e.g., kilogram --> kg). To turn off this behavior, set
#' \code{units_options(auto_convert_names_to_symbols = FALSE)}
#'
#' @section Expressions:
#'
#' In \code{as_units()}, each of the symbols in the unit expression is treated
#' individually, such that each symbol must be recognized by the udunits
#' database, \emph{or} be a custom,
#' user-defined unit symbol that was defined by \code{install_unit()}. To
#' see which symbols and names are currently recognized by the udunits
#' database, see \code{valid_udunits()}.
#'
#' @seealso \code{\link{install_unit}}, \code{\link{valid_udunits}}
as_units.call <- function(x, check_is_valid = TRUE, ...) {

if(missing(x) || identical(x, quote(expr =)) ||
identical(x, 1) || identical(x, 1L))
return(.as.units(1, unitless))

if (is.vector(x) && !is.expression(x) && any(is.na(x)))
stop("a missing value for units is not allowed")

stopifnot(is.language(x))

vars <- all.vars(x)
if(!length(vars))
stop(call. = FALSE,
"No symbols found. Please supply bare expressions with this approach.
See ?as_units for usage examples.")

if (check_is_valid) {
vars <- c(su$numerator, su$denominator)
valid <- vapply(vars, ud_is_parseable, logical(1L))
if (!all(valid))
stop(.msg_units_not_recognized(vars[!valid], x), call. = FALSE)
}

names(vars) <- vars
tmp_env <- lapply(vars, symbolic_unit, check_is_valid = FALSE)

if (dont_simplify_here <- is.na(.units.simplify())) {
units_options(simplify = FALSE)
on.exit(units_options(simplify = NA))
if (units_options("auto_convert_names_to_symbols")) {
name_to_symbol <- function(chr)
if (ud_is_parseable(chr) && length(sym <- ud_get_symbol(chr))) sym else chr
su$numerator <- vapply(su$numerator, name_to_symbol, character(1), USE.NAMES=FALSE)
su$denominator <- vapply(su$denominator, name_to_symbol, character(1), USE.NAMES=FALSE)
}

unit <- tryCatch( eval(x, tmp_env, units_eval_env),
error = function(e) stop( paste0( conditionMessage(e), "\n",
"Did you try to supply a value in a context where a bare expression was expected?"
), call. = FALSE ))

# if(as.numeric(unit) %not_in% c(1, 0)) # 0 if log() used.
# stop(call. = FALSE,
#"In ", sQuote(deparse(x)), " the numeric multiplier ", sQuote(as.numeric(unit)), " is invalid.
#Use `install_unit()` to define a new unit that is a multiple of another unit.")
if (is.na(.units.simplify())) {
units_options(simplify = FALSE)
on.exit(units_options(simplify = NA))
}
.simplify_units(1, su)
}

.as.units(as.numeric(unit), units(unit))
#' @name units
#' @export
as_units.call <- function(x, ...) {
as_units(format(x), ...)
}

#' @name units
#' @export
as_units.expression <- as_units.call
as_units.expression <- function(x, ...) {
as_units(as.character(x), ...)
}

#' @name units
#' @export
as_units.name <- as_units.call
as_units.name <- as_units.expression

#' @name units
#' @export
Expand All @@ -442,7 +355,6 @@ as_units.Date = function(x, value, ...) {


symbolic_unit <- function(chr, check_is_valid = TRUE) {

stopifnot(is.character(chr), length(chr) == 1)

if (check_is_valid && !ud_is_parseable(chr)) {
Expand Down
Loading
Loading