Skip to content

Commit

Permalink
Merge pull request #399 from Olink-Proteomics/optimization_develop_ch…
Browse files Browse the repository at this point in the history
…eck_names_of_df

Optimization develop check names of df
  • Loading branch information
klevdiamanti authored Jul 8, 2024
2 parents 6b71d3e + 97bdfe7 commit 8a4b4e4
Show file tree
Hide file tree
Showing 15 changed files with 1,080 additions and 186 deletions.
6 changes: 6 additions & 0 deletions OlinkAnalyze/R/read_npx_delim.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ read_npx_delim <- function(file,

# additional checks ----

# only one column

if (length(names(df_olink)) == 1L) {

cli::cli_warn(
Expand All @@ -102,6 +104,10 @@ read_npx_delim <- function(file,

}

# top row is as expected for the corresponding format

read_npx_format_colnames(df = df_olink, file = file)

# convert df class ----

# if needed convert the object to the requested output
Expand Down
4 changes: 4 additions & 0 deletions OlinkAnalyze/R/read_npx_excel.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@ read_npx_excel <- function(file,

}

# top row is as expected for the corresponding format

read_npx_format_colnames(df = df_olink, file = file)

# if needed convert the object to the requested output
df_olink <- convert_read_npx_output(df = df_olink,
out_df = out_df)
Expand Down
8 changes: 8 additions & 0 deletions OlinkAnalyze/R/read_npx_legacy.R
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,14 @@ read_npx_legacy <- function(file,
olink_platform = NULL,
data_type = NULL,
quiet = TRUE) {
cli::cli_warn(
c("You are using the function read_npx_legacy()!",
"This function imports Olink data in wide format from MS Excel files
exported by \"Olink NPX Manager\" or \"Olink NPX Signature\" version
earlier than 1.8.0, but fails for data exported from more recent software
versions.")
)

# check input ----

check_is_scalar_boolean(bool = quiet,
Expand Down
291 changes: 291 additions & 0 deletions OlinkAnalyze/R/read_npx_utils.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
#' Help function checking that the requested output class of the read_npx*
#' functions is acceptable.
#'
#' @author Klev Diamanti
#'
#' @param out_df The class of output data frame. One of `tibble` (default) or
#' `arrow` for ArrowObject.
#'
#' @return An error if the argument is not as expected.
#'
check_out_df_arg <- function(out_df) {

# check taht out_df is a string
check_is_scalar_character(string = out_df,
error = TRUE)

if (!(out_df %in% read_npx_df_output)) {

cli::cli_abort(
message = c(
"x" = "Unknown output argument {.arg {rlang::caller_arg(out_df)}}!",
"i" = "Acceptable {.arg {rlang::caller_arg(out_df)}}:
{read_npx_df_output}"
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

#' Help function converting the output data frame from a read_npx* function to a
#' tibble or an ArrowObject.
#'
#' @author
#' Klev Diamanti
#'
#' @param df The data frame to be converted.
#' @param out_df The class of output data frame. One of `tibble` (default) or
#' `arrow` for ArrowObject.
#'
#' @return The data frame in the requested class.
#'
convert_read_npx_output <- function(df,
out_df) {

# check that out_df is ok
check_out_df_arg(out_df = out_df)

if (check_is_arrow_or_tibble(df = df, error = FALSE)) {

if (out_df == "tibble") {

return(dplyr::as_tibble(df))

} else if (out_df == "arrow") {

return(arrow::as_arrow_table(df))

}

} else {

# if nont of the above throw an error
cli::cli_abort(
message = c(
"x" = "Unexpected input data frame {.arg {rlang::caller_arg(df)}}!",
"i" = "Expecting: { cli::ansi_collapse(x = read_npx_df_output,
last = \", or \") }"
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

#' Help function checking that the olink_platform is acceptable.
#'
#' @param x The name of the Olink platform. One of `Explore 3072`, `Explore HT`,
#' `Target 96`, `Target 48`, `Flex` or `Focus`.
#' @param broader_platform Name of the broader Olink platform. One of `qPCR` or
#' `NGS`.
#'
#' @return
#' Nothing if platform is ok, otherwise an error.
#'
check_olink_platform <- function(x,
broader_platform = NULL) {

# input check ----

check_is_scalar_character(string = x,
error = TRUE)
if (!is.null(broader_platform)) {
check_olink_broader_platform(x = broader_platform)
}

# check platform ----

# filter the global variable accepted_olink_platforms to have a collection
# of platforms available.
if (is.null(broader_platform)) {

olink_platforms <- accepted_olink_platforms

} else {

olink_platforms <- accepted_olink_platforms |>
dplyr::filter(.data[["broader_platform"]] == .env[["broader_platform"]])

}

# Throw an error if unexpected platform
if (!(x %in% olink_platforms$name)) {

cli::cli_abort(
message = c(
"x" = "Unexpected Olink platform {.arg {rlang::caller_arg(x)}}!",
"i" = "Expected one of: {olink_platforms$name}"
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

#' Help function checking that the Olink data_type is acceptable.
#'
#' @param x The name of the Olink data type. One of `NPX`, `Quantified` or `Ct`.
#' @param broader_platform Name of the broader Olink platform. One of `qPCR` or
#' `NGS`.
#'
#' @return
#' Nothing if data_type is ok, otherwise an error.
#'
check_olink_data_type <- function(x,
broader_platform = NULL) {

# input check ----

check_is_scalar_character(string = x,
error = TRUE)
if (!is.null(broader_platform)) {
check_olink_broader_platform(x = broader_platform)
}

# check data_type ----

# filter the global variable accepted_olink_platforms to have a collection
# of data types available.
if (is.null(broader_platform)) {

olink_quant_methods <- accepted_olink_platforms

} else {

olink_quant_methods <- accepted_olink_platforms |>
dplyr::filter(
.data[["broader_platform"]] == .env[["broader_platform"]]
)
}

olink_quant_methods <- olink_quant_methods |>
dplyr::pull(
.data[["quant_method"]]
) |>
unlist() |>
unique()

# Throw an error if unexpected data_type
if (!(x %in% olink_quant_methods)) {

cli::cli_abort(
message = c(
"x" = "Unexpected Olink data type {.arg {rlang::caller_arg(x)}}!",
"i" = "Expected one of: {olink_quant_methods}"
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

#' Help function checking that the broader Olink platform is acceptable.
#'
#' @param x Name of the broader Olink platform. One of `qPCR` or `NGS`.
#'
#' @return
#' Nothing if broader Olink platform is ok, otherwise an error.
#'
check_olink_broader_platform <- function(x) {

# input check ----

check_is_scalar_character(string = x,
error = TRUE)

# check broader platform ----

if (!(x %in% unique(accepted_olink_platforms$broader_platform))) {

cli::cli_abort(
message = c(
"x" = "Unexpected Olink broader platform
{.arg {rlang::caller_arg(x)}}!",
"i" = "Expected one of:
{unique(accepted_olink_platforms$broader_platform)}"
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

#' Help function checking whether a data set contains NA or empty strings on
#' its column names
#'
#' @author Klev Diamanti
#'
#' @param df Tibble or ArrowObject with Olink data in wide or long format.
#' @param file Path to Olink software output file in wide or long format.
#' Expecting file extensions `csv`, `txt`, `xls`, `xlsx`, `parquet` or `zip`.
#'
#' @return Error is file contains problematic column names. `NULL` otherwise.
#'
read_npx_format_colnames <- function(df,
file) {

# check input ----
check_is_arrow_or_tibble(df = df,
error = TRUE)

# check columns names ----

# check if column names are correct
# in wide format we expect only cells A1 and B1 to be populated
# in long format no column names should not be empty
if (all(grepl(pattern = "^V", x = names(df)))) { # wide format

# get first row of df
df_row_1 <- df |>
dplyr::slice_head(n = 1L) |>
dplyr::collect()

num_of_cells_with_vals <- ncol(df_row_1) -
sum(is.na(df_row_1)) - sum(df_row_1 == "", na.rm = TRUE)

if (ncol(df_row_1) < 3L || num_of_cells_with_vals != 2L) {

cli::cli_abort(
message = c(
"x" = "Unexpected first row in file {.file {file}}!",
"i" = "Detected file in wide format. Expected only cells in A1 and B1
to be populated."
),
call = rlang::caller_env(),
wrap = FALSE
)

}

} else { # long format

if (any(names(df) == "") == TRUE
|| check_is_character(string = names(df),
error = FALSE) == FALSE) {

cli::cli_abort(
message = c(
"x" = "Unexpected columns in file {.file {file}}!",
"i" = "The dataset contains column names that are `NA` or `empty
string` (\"\")."
),
call = rlang::caller_env(),
wrap = FALSE
)

}

}

}
Binary file modified OlinkAnalyze/R/sysdata.rda
Binary file not shown.
5 changes: 5 additions & 0 deletions OlinkAnalyze/data-raw/column_name_dict.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ column_name_dict <- list(
"sample_type",
"sampletype",
NA_character_),
assay_type = c("Assay_Type",
"AssayType",
"assay_type",
"assaytype",
NA_character_),
olink_id = c("OlinkID",
"OID",
"olinkid",
Expand Down
8 changes: 7 additions & 1 deletion OlinkAnalyze/man/check_olink_broader_platform.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8a4b4e4

Please sign in to comment.