-
Notifications
You must be signed in to change notification settings - Fork 24
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #349 from Olink-Proteomics/optimization_develop_in…
…st_extdata MacOS still has issues with arrow. All other tests pass seamlessly.
- Loading branch information
Showing
33 changed files
with
2,278 additions
and
371,244 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
# readme ---- | ||
|
||
# This script creates a synthetic manifest file to generate the sample manifest | ||
# data/manifest.rda which is used throughout OlinkAnalyze. | ||
# | ||
# As this script did not exist prior to 2024-04-08, we have stored the original | ||
# manifest.rds file under data-raw/ref_manifest.rds to compare to the dataset | ||
# generated by this script. | ||
# | ||
# A new data/manifest.rda will be generated ONLY IF manifest from this script | ||
# matches ref_manifest! | ||
# | ||
|
||
# manifest ---- | ||
|
||
## create random manifest ---- | ||
|
||
n_subject <- 23L | ||
n_visit <- 6L | ||
|
||
manifest <- dplyr::tibble( | ||
SubjectID = rep(x = LETTERS[1L:n_subject], each = n_visit), | ||
Visit = rep(x = 1L:n_visit, times = n_subject) | ||
) |> | ||
dplyr::mutate( | ||
SampleID = paste(SubjectID, Visit), | ||
Site = c(rep(x = "Site1", times = ceiling(n_subject / 2) * n_visit), | ||
rep(x = "Site2", times = floor(n_subject / 2) * n_visit)) | ||
) | ||
|
||
# clean up | ||
rm(n_subject, n_visit) | ||
|
||
# compare to reference manifest ---- | ||
|
||
## load reference manifest ---- | ||
|
||
ref_manifest_file <- system.file("data-raw", | ||
"ref_manifest.rds", | ||
package = "OlinkAnalyze", | ||
mustWork = TRUE) | ||
|
||
ref_manifest <- readRDS(ref_manifest_file) | ||
|
||
## check columns ---- | ||
|
||
stopifnot( | ||
identical(colnames(manifest), colnames(ref_manifest)) | ||
) | ||
|
||
# clean up | ||
rm(ref_manifest_file) | ||
|
||
# check identical ---- | ||
|
||
# at this stage manifest should be identical to the reference dataset | ||
# ref_manifest. We simply allow some rounding error on the 4th decimal digit. | ||
|
||
stopifnot( | ||
manifest_eq <- all.equal(target = ref_manifest, | ||
current = manifest, | ||
tolerance = 1e-4, | ||
check.attributes = TRUE, | ||
check.names = TRUE) | ||
) | ||
|
||
#### IMPORTANT | ||
# It is extremely important that the two datasets are identical with some minor | ||
# rounding tolerance!! | ||
|
||
# save to data/manifest.rda ---- | ||
|
||
if (manifest_eq == TRUE) { | ||
usethis::use_data(manifest, | ||
overwrite = TRUE, | ||
compress = "xz", | ||
version = 2L) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
# readme ---- | ||
|
||
# This script uses the raw data files: | ||
# 1. inst/extdata/npx_data1_meta_original.csv | ||
# 2. inst/extdata/npx_data1_original.xlsx | ||
# to generate the sample dataset data/npx_data1.rda which is used throughout | ||
# OlinkAnalyze. | ||
# | ||
# As this script did not exist prior to 2024-04-08, we have stored the original | ||
# npx_data1.rds file under data-raw/ref_npx_data1.rds to compare to the dataset | ||
# generated by this script. | ||
# | ||
# A new data/npx_data1.rda will be generated ONLY IF npx_data1 from this script | ||
# matches ref_npx_data1! | ||
# | ||
|
||
# manifest ---- | ||
|
||
## load manifest ---- | ||
|
||
manifest_data1_file <- system.file("extdata", | ||
"npx_data1_meta.csv", | ||
package = "OlinkAnalyze", | ||
mustWork = TRUE) | ||
|
||
manifest_data1 <- read.delim( | ||
file = manifest_data1_file, | ||
header = TRUE, | ||
sep = ";", | ||
na.strings = c("", "NA") | ||
) | ||
|
||
## modify manifest ---- | ||
|
||
manifest_data1 <- manifest_data1 |> | ||
# remove duplicate entries | ||
dplyr::distinct() |> | ||
# make all columns character vectors | ||
dplyr::mutate( | ||
dplyr::across( | ||
dplyr::everything(), | ||
~ as.character(.x) | ||
) | ||
) |> | ||
# rename the project to "data1" to match reference | ||
dplyr::mutate( | ||
Project = dplyr::if_else(is.na(.data[["Project"]]), | ||
NA_character_, | ||
"data1") | ||
) | ||
|
||
## clean up | ||
rm(manifest_data1_file) | ||
|
||
# npx_data1 ---- | ||
|
||
# note that this data frame is quite large and for the purposes of this package | ||
# we will use only 2 panels. | ||
# | ||
# we want the outcome from this section to be identical to the reference data | ||
# frame npx_data1 | ||
|
||
## load npx_data1 ---- | ||
|
||
npx_data1_file <- system.file("extdata", | ||
"npx_data1.xlsx", | ||
package = "OlinkAnalyze", | ||
mustWork = TRUE) | ||
|
||
npx_data1 <- read_npx(filename = npx_data1_file, | ||
out_df = "tibble", | ||
long_format = FALSE, | ||
data_type = "NPX", | ||
olink_platform = "Target 96") | ||
# ignore the following: | ||
# 1. warning message about 2 duplicate samples. this is driven by control | ||
# samples | ||
# 2. warning that the olink platform could not be determined from the file and | ||
# that the use input "Target 96" should be accepted | ||
|
||
## modify npx_data1 ---- | ||
|
||
npx_data1 <- npx_data1 |> | ||
# keep only data from 2 panels: cardiometabolic and inflammation | ||
dplyr::filter( | ||
.data[["Panel"]] %in% c("Olink CARDIOMETABOLIC", "Olink INFLAMMATION") | ||
) |> | ||
# make Panel as a title: first letter of every word capital and the remaining | ||
# lower case | ||
dplyr::mutate( | ||
Panel = stringr::str_to_title(string = .data[["Panel"]]) | ||
) |> | ||
# Panel_Version is NA from read_npx as it cannot be determined from the input | ||
# file, so we have to input it manually | ||
dplyr::mutate( | ||
Panel_Version = dplyr::case_match( | ||
.data[["Panel"]], | ||
"Olink Cardiometabolic" ~ "v.1201", | ||
"Olink Inflammation" ~ "v.1002", | ||
.default = NA_character_ | ||
) | ||
) |> | ||
# Convert NPX, LOD and MissingFreq to numeric and keep only 5 sign digits | ||
dplyr::mutate( | ||
dplyr::across( | ||
dplyr::all_of( | ||
c("NPX", "LOD", "MissingFreq") | ||
), | ||
~ as.numeric(.x) |> | ||
signif(digits = 5L) | ||
) | ||
) |> | ||
# remove columns missing from the reference npx_data1 | ||
dplyr::select( | ||
-dplyr::all_of( | ||
"Olink NPX Signature Version" | ||
) | ||
) | ||
|
||
## join with manifest ---- | ||
|
||
npx_data1 <- npx_data1 |> | ||
# bring in sample info from manifest file | ||
dplyr::inner_join( | ||
manifest_data1, | ||
by = "SampleID", | ||
relationship = "many-to-one" | ||
) | ||
|
||
## order dataset ---- | ||
|
||
npx_data1 <- npx_data1 |> | ||
# order df to match reference npx_data1 | ||
dplyr::arrange( | ||
.data[["OlinkID"]], .data[["PlateID"]], .data[["SampleID"]] | ||
) | ||
|
||
# clean up | ||
rm(npx_data1_file, manifest_data1) | ||
|
||
# compare to reference npx_data1 ---- | ||
|
||
## load reference npx_data1 ---- | ||
|
||
ref_npx_data1_file <- system.file("data-raw", | ||
"ref_npx_data1.rds", | ||
package = "OlinkAnalyze", | ||
mustWork = TRUE) | ||
|
||
ref_npx_data1 <- readRDS(ref_npx_data1_file) | ||
|
||
## check columns ---- | ||
|
||
stopifnot( | ||
all(colnames(npx_data1) %in% colnames(ref_npx_data1)) | ||
) | ||
|
||
stopifnot( | ||
ncol(npx_data1) == 16L | ||
) | ||
|
||
stopifnot( | ||
ncol(ref_npx_data1) == 17L | ||
) | ||
|
||
## modify reference npx_data1 ---- | ||
|
||
ref_npx_data1 <- ref_npx_data1 |> | ||
# selecting only columns that are present in npx_data1. this should result in | ||
# removing only column "Index" from ref_npx_data1 and ordering its columns | ||
# similarly to npx_data1 | ||
dplyr::select( | ||
dplyr::all_of( | ||
colnames(npx_data1) | ||
) | ||
) |> | ||
# order df to match npx_data1 | ||
dplyr::arrange( | ||
.data[["OlinkID"]], .data[["PlateID"]], .data[["SampleID"]] | ||
) | ||
|
||
# clean up | ||
rm(ref_npx_data1_file) | ||
|
||
# check identical ---- | ||
|
||
# at this stage npx_data1 should be identical to the reference dataset | ||
# ref_npx_data1. We simply allow some rounding error on the 4th decimal digit. | ||
|
||
stopifnot( | ||
npx_data1_eq <- all.equal(target = ref_npx_data1, | ||
current = npx_data1, | ||
tolerance = 1e-4, | ||
check.attributes = TRUE, | ||
check.names = TRUE) | ||
) | ||
|
||
#### IMPORTANT | ||
# It is extremely important that the two datasets are identical with some minor | ||
# rounding tolerance!! | ||
|
||
# save to data/npx_data1.rda ---- | ||
|
||
if (npx_data1_eq == TRUE) { | ||
usethis::use_data(npx_data1, | ||
overwrite = TRUE, | ||
compress = "xz", | ||
version = 2L) | ||
} |
Oops, something went wrong.