-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #399 from Olink-Proteomics/optimization_develop_ch…
…eck_names_of_df Optimization develop check names of df
- Loading branch information
Showing
15 changed files
with
1,080 additions
and
186 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,291 @@ | ||
#' Help function checking that the requested output class of the read_npx* | ||
#' functions is acceptable. | ||
#' | ||
#' @author Klev Diamanti | ||
#' | ||
#' @param out_df The class of output data frame. One of `tibble` (default) or | ||
#' `arrow` for ArrowObject. | ||
#' | ||
#' @return An error if the argument is not as expected. | ||
#' | ||
check_out_df_arg <- function(out_df) { | ||
|
||
# check taht out_df is a string | ||
check_is_scalar_character(string = out_df, | ||
error = TRUE) | ||
|
||
if (!(out_df %in% read_npx_df_output)) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unknown output argument {.arg {rlang::caller_arg(out_df)}}!", | ||
"i" = "Acceptable {.arg {rlang::caller_arg(out_df)}}: | ||
{read_npx_df_output}" | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
#' Help function converting the output data frame from a read_npx* function to a | ||
#' tibble or an ArrowObject. | ||
#' | ||
#' @author | ||
#' Klev Diamanti | ||
#' | ||
#' @param df The data frame to be converted. | ||
#' @param out_df The class of output data frame. One of `tibble` (default) or | ||
#' `arrow` for ArrowObject. | ||
#' | ||
#' @return The data frame in the requested class. | ||
#' | ||
convert_read_npx_output <- function(df, | ||
out_df) { | ||
|
||
# check that out_df is ok | ||
check_out_df_arg(out_df = out_df) | ||
|
||
if (check_is_arrow_or_tibble(df = df, error = FALSE)) { | ||
|
||
if (out_df == "tibble") { | ||
|
||
return(dplyr::as_tibble(df)) | ||
|
||
} else if (out_df == "arrow") { | ||
|
||
return(arrow::as_arrow_table(df)) | ||
|
||
} | ||
|
||
} else { | ||
|
||
# if nont of the above throw an error | ||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected input data frame {.arg {rlang::caller_arg(df)}}!", | ||
"i" = "Expecting: { cli::ansi_collapse(x = read_npx_df_output, | ||
last = \", or \") }" | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
#' Help function checking that the olink_platform is acceptable. | ||
#' | ||
#' @param x The name of the Olink platform. One of `Explore 3072`, `Explore HT`, | ||
#' `Target 96`, `Target 48`, `Flex` or `Focus`. | ||
#' @param broader_platform Name of the broader Olink platform. One of `qPCR` or | ||
#' `NGS`. | ||
#' | ||
#' @return | ||
#' Nothing if platform is ok, otherwise an error. | ||
#' | ||
check_olink_platform <- function(x, | ||
broader_platform = NULL) { | ||
|
||
# input check ---- | ||
|
||
check_is_scalar_character(string = x, | ||
error = TRUE) | ||
if (!is.null(broader_platform)) { | ||
check_olink_broader_platform(x = broader_platform) | ||
} | ||
|
||
# check platform ---- | ||
|
||
# filter the global variable accepted_olink_platforms to have a collection | ||
# of platforms available. | ||
if (is.null(broader_platform)) { | ||
|
||
olink_platforms <- accepted_olink_platforms | ||
|
||
} else { | ||
|
||
olink_platforms <- accepted_olink_platforms |> | ||
dplyr::filter(.data[["broader_platform"]] == .env[["broader_platform"]]) | ||
|
||
} | ||
|
||
# Throw an error if unexpected platform | ||
if (!(x %in% olink_platforms$name)) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected Olink platform {.arg {rlang::caller_arg(x)}}!", | ||
"i" = "Expected one of: {olink_platforms$name}" | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
#' Help function checking that the Olink data_type is acceptable. | ||
#' | ||
#' @param x The name of the Olink data type. One of `NPX`, `Quantified` or `Ct`. | ||
#' @param broader_platform Name of the broader Olink platform. One of `qPCR` or | ||
#' `NGS`. | ||
#' | ||
#' @return | ||
#' Nothing if data_type is ok, otherwise an error. | ||
#' | ||
check_olink_data_type <- function(x, | ||
broader_platform = NULL) { | ||
|
||
# input check ---- | ||
|
||
check_is_scalar_character(string = x, | ||
error = TRUE) | ||
if (!is.null(broader_platform)) { | ||
check_olink_broader_platform(x = broader_platform) | ||
} | ||
|
||
# check data_type ---- | ||
|
||
# filter the global variable accepted_olink_platforms to have a collection | ||
# of data types available. | ||
if (is.null(broader_platform)) { | ||
|
||
olink_quant_methods <- accepted_olink_platforms | ||
|
||
} else { | ||
|
||
olink_quant_methods <- accepted_olink_platforms |> | ||
dplyr::filter( | ||
.data[["broader_platform"]] == .env[["broader_platform"]] | ||
) | ||
} | ||
|
||
olink_quant_methods <- olink_quant_methods |> | ||
dplyr::pull( | ||
.data[["quant_method"]] | ||
) |> | ||
unlist() |> | ||
unique() | ||
|
||
# Throw an error if unexpected data_type | ||
if (!(x %in% olink_quant_methods)) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected Olink data type {.arg {rlang::caller_arg(x)}}!", | ||
"i" = "Expected one of: {olink_quant_methods}" | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
#' Help function checking that the broader Olink platform is acceptable. | ||
#' | ||
#' @param x Name of the broader Olink platform. One of `qPCR` or `NGS`. | ||
#' | ||
#' @return | ||
#' Nothing if broader Olink platform is ok, otherwise an error. | ||
#' | ||
check_olink_broader_platform <- function(x) { | ||
|
||
# input check ---- | ||
|
||
check_is_scalar_character(string = x, | ||
error = TRUE) | ||
|
||
# check broader platform ---- | ||
|
||
if (!(x %in% unique(accepted_olink_platforms$broader_platform))) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected Olink broader platform | ||
{.arg {rlang::caller_arg(x)}}!", | ||
"i" = "Expected one of: | ||
{unique(accepted_olink_platforms$broader_platform)}" | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
#' Help function checking whether a data set contains NA or empty strings on | ||
#' its column names | ||
#' | ||
#' @author Klev Diamanti | ||
#' | ||
#' @param df Tibble or ArrowObject with Olink data in wide or long format. | ||
#' @param file Path to Olink software output file in wide or long format. | ||
#' Expecting file extensions `csv`, `txt`, `xls`, `xlsx`, `parquet` or `zip`. | ||
#' | ||
#' @return Error is file contains problematic column names. `NULL` otherwise. | ||
#' | ||
read_npx_format_colnames <- function(df, | ||
file) { | ||
|
||
# check input ---- | ||
check_is_arrow_or_tibble(df = df, | ||
error = TRUE) | ||
|
||
# check columns names ---- | ||
|
||
# check if column names are correct | ||
# in wide format we expect only cells A1 and B1 to be populated | ||
# in long format no column names should not be empty | ||
if (all(grepl(pattern = "^V", x = names(df)))) { # wide format | ||
|
||
# get first row of df | ||
df_row_1 <- df |> | ||
dplyr::slice_head(n = 1L) |> | ||
dplyr::collect() | ||
|
||
num_of_cells_with_vals <- ncol(df_row_1) - | ||
sum(is.na(df_row_1)) - sum(df_row_1 == "", na.rm = TRUE) | ||
|
||
if (ncol(df_row_1) < 3L || num_of_cells_with_vals != 2L) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected first row in file {.file {file}}!", | ||
"i" = "Detected file in wide format. Expected only cells in A1 and B1 | ||
to be populated." | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} else { # long format | ||
|
||
if (any(names(df) == "") == TRUE | ||
|| check_is_character(string = names(df), | ||
error = FALSE) == FALSE) { | ||
|
||
cli::cli_abort( | ||
message = c( | ||
"x" = "Unexpected columns in file {.file {file}}!", | ||
"i" = "The dataset contains column names that are `NA` or `empty | ||
string` (\"\")." | ||
), | ||
call = rlang::caller_env(), | ||
wrap = FALSE | ||
) | ||
|
||
} | ||
|
||
} | ||
|
||
} |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.