Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
de30b78
first draft of a mechanism to consume the unit name in the conversion
nevrome Feb 23, 2026
1675a6b
look at this @archaeothommy - it seems we can very easily define our …
nevrome Feb 23, 2026
fd8e933
another way to express "weight percent"
nevrome Feb 23, 2026
9e99370
assigning the unit percent to percent errors
nevrome Feb 24, 2026
ba2e709
defining at% and wt% as a derivative of % does not work, unfortunatel…
nevrome Feb 24, 2026
0b61cd9
update of basic archchem golden test
nevrome Feb 24, 2026
ab5e0c1
working through the archchem example code and addressing various issu…
nevrome Feb 24, 2026
00d99b9
satisfied lintr
nevrome Feb 24, 2026
6b41372
added missing test dependency
nevrome Feb 24, 2026
3e7ab7f
split colnames_to_constructors into two steps: 1. creating a table wi…
nevrome Feb 24, 2026
a62b65c
split colnames_to_constructors into two functions
nevrome Feb 24, 2026
32a900a
introduced a mechanism to assign errors columns the units of their so…
nevrome Feb 24, 2026
e84db6e
added an optional feature to remove_units that recovers the unit name…
nevrome Feb 24, 2026
2cbbc68
brought back the established drop_columns behaviour
nevrome Feb 24, 2026
6f9aa67
fix typos [skip ci]
Mar 1, 2026
b4bad32
replace functions deprecated in dplyr 1.2.0
Mar 15, 2026
20509ec
add tests for class archchem
Mar 15, 2026
3b7d2ce
fix linting
Mar 15, 2026
b1b55ce
use reference file consistently from one directory, add test for exce…
Mar 15, 2026
9c1a025
clean-up example data
Mar 15, 2026
3e033d7
re-organise package dependencies in test
Mar 15, 2026
2417e03
add handling of relative units
Mar 15, 2026
4ba9d22
include draft for unit conversion
Mar 15, 2026
a69aebe
attempt to link wtP to SI units
Mar 15, 2026
3255fa2
defined wtP and mass_basis explicitly as a dimensionless unit
nevrome Mar 18, 2026
2b903db
add additional information about supported relative units.
Mar 19, 2026
5171944
attempt to define methods for setting archchem column classes.
Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Imports:
tibble,
tidyselect,
units,
vctrs,
robCompositions,
ggplot2,
ks,
Expand Down
84 changes: 51 additions & 33 deletions R/archchem_basic.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,11 @@
#' conc <- get_concentration_columns(arch) # see also other get_..._columns functions
#'
#' # unit-aware arithmetics on archchem columns thanks to the units package
#' conc$Sb_ppm + conc$Ag_ppb # works
#' \dontrun{conc$Sb_ppm + conc$`Sn_µg/ml`} # fails with: cannot convert µg/ml into ppm
#' conc$Sb + conc$Ag # works
#' \dontrun{conc$Sb + conc$Sn} # fails with: cannot convert µg/ml into ppm
#'
#' # converting units
#' conc$Sb_ppb <- units::set_units(arch$Sb_ppm, "ppb") %>%
#' conc$Sb <- units::set_units(arch$Sb, "ppb") %>%
#' magrittr::set_attr("archchem_class", "archchem_concentration")
#'
#' # removing all units from archchem tables
Expand All @@ -106,14 +106,13 @@
#' # applying tidyverse data manipulation on archchem tables
#' arch %>%
#' dplyr::group_by(Site) %>%
#' dplyr::summarise(mean_Na2O = mean(`Na2O_wt%`))
#' dplyr::summarise(mean_Na2O = mean(Na2O))
#' conc_subset <- conc %>%
#' dplyr::select(-`Sn_µg/ml`, -`Sb_ppm`) %>%
#' dplyr::filter(`Na2O_wt%` > units::set_units(1, "%"))
#' dplyr::select(-Sn, -Sb) %>%
#' dplyr::filter(Na2O > units::set_units(4, "wtP"))
#'
#' # unify all concentration units
#' unify_concentration_unit(conc_subset, "ppm")
#' # note that the column names are inaccurate now
#' unify_concentration_unit(conc_subset, "ppb")
#'
#' @export
as_archchem <- function(
Expand Down Expand Up @@ -142,44 +141,62 @@ as_archchem <- function(
}
context <- append(context, id_column)
# add ID column to context for the following operation
df <- df %>%
df1 <- df %>%
dplyr::mutate(ID = .data[[id_column]]) %>%
dplyr::relocate("ID", .before = 1)
# handle ID duplicates
if (length(unique(df$ID)) != nrow(df)) {
if (length(unique(df1$ID)) != nrow(df1)) {
warning(
"Detected multiple data rows with the same ID. They will be renamed ",
"consecutively using the following convention: _1, _2, ... _n"
)
}
df <- df %>%
df2 <- df1 %>%
dplyr::group_by(.data[["ID"]]) %>%
dplyr::mutate(
ID = dplyr::case_when(
dplyr::n() > 1 ~ paste0(.data[["ID"]], "_", as.character(dplyr::row_number())),
.default = .data[["ID"]]
)
ID = if (dplyr::n() > 1) {
paste0(.data[["ID"]], "_", as.character(dplyr::row_number()))
} else {
.data[["ID"]]
}
) %>%
dplyr::ungroup()
# determine and apply column types
constructors <- colnames_to_constructors(
df, context, bdl, bdl_strategy, guess_context_type, na, drop_columns
)
df <- purrr::map2(df, constructors, function(col, f) f(col)) %>%
column_table <- parse_colnames(df2, context, drop_columns)
constructors <- build_constructors(column_table, bdl, bdl_strategy, guess_context_type, na)
col_list <- purrr::map2(df2, constructors, function(col, f) f(col)) %>%
purrr::discard(is.null)
df3 <- as.data.frame(col_list, check.names = FALSE)
# remove unit names from columns if they got a unit
df4 <- remove_unit_substrings(df3)
# turn into tibble-derived object
df <- tibble::new_tibble(df, nrow = nrow(df), class = "archchem")
df5 <- tibble::new_tibble(df4, nrow = nrow(df4), class = "archchem")
# post-reading validation
if (validate) {
validation_output <- validate(df, quiet = FALSE)
validation_output <- validate(df5, quiet = FALSE)
if (nrow(validation_output) > 0) {
warning(
"See the full list of validation output with: ",
"ASTR::validate(<your archchem object>)."
)
}
}
return(df)
return(df5)
}

# helper function to rename column names
remove_unit_substrings <- function(x, ...) {
dplyr::rename_with(
x,
remove_suffix,
tidyselect::where(function(y) {
class(y) == "units" && !is_archchem_class(y, "archchem_error")
})
)
}

remove_suffix <- function(colname) {
sub("_.*$", "", colname, perl = TRUE)
}

#' @param path path to the file that should be read
Expand Down Expand Up @@ -359,33 +376,34 @@ print.archchem <- function(x, ...) {

# carry over archchem_class column attribute
preserve_archchem_attrs <- function(modified, original) {
purrr::map2(modified, original, function(new_col, old_col) {
arch_attr <- attr(old_col, "archchem_class")
if (!is.null(arch_attr)) attr(new_col, "archchem_class") <- arch_attr
new_col
}) %>%
magrittr::set_names(names(modified)) %>%
tibble::new_tibble(nrow = nrow(modified), class = class(original))
for (nm in intersect(names(modified), names(original))) {
arch_attr <- attr(original[[nm]], "archchem_class")
if (!is.null(arch_attr)) {
attr(modified[[nm]], "archchem_class") <- arch_attr
}
}
tibble::new_tibble(modified, class = class(original))
}

# row-slice method
#' @exportS3Method dplyr::dplyr_row_slice
dplyr_row_slice.archchem <- function(data, i, ...) {
sliced <- purrr::map(data, function(x) x[i])
sliced_tbl <- tibble::new_tibble(sliced, nrow = length(i), class = class(data))
sliced <- purrr::map(data, vctrs::vec_slice, i = i)
sliced_tbl <- tibble::new_tibble(sliced, class = class(data))
preserve_archchem_attrs(sliced_tbl, data)
}

# column modification method
#' @exportS3Method dplyr::dplyr_col_modify
dplyr_col_modify.archchem <- function(data, cols) {
modified_list <- utils::modifyList(as.list(data), cols)
modified_tbl <- tibble::new_tibble(modified_list, nrow = nrow(data), class = class(data))
modified_tbl <- tibble::new_tibble(modified_list, class = class(data))
preserve_archchem_attrs(modified_tbl, data)
}


# final reconstruction
#' @exportS3Method dplyr::dplyr_reconstruct
dplyr_reconstruct.archchem <- function(data, template) {
tibble::new_tibble(data, nrow = nrow(data), class = class(template))
preserve_archchem_attrs(data, template)
}
Loading
Loading