From 3a12d6fb880d05e7e5686572663e01a65a41625b Mon Sep 17 00:00:00 2001 From: Ilja Kocken Date: Tue, 30 Jan 2024 22:03:52 +0100 Subject: [PATCH] docs: convert org file to slightly cleaner vignette --- .gitignore | 3 + DESCRIPTION | 5 +- NAMESPACE | 1 + R/our_summary.R | 1 + vignettes/.gitignore | 2 + .../bootstrapped_clumped_calibration.Rmd | 178 ++++++++++++++++++ 6 files changed, 189 insertions(+), 1 deletion(-) create mode 100644 vignettes/.gitignore create mode 100644 vignettes/bootstrapped_clumped_calibration.Rmd diff --git a/.gitignore b/.gitignore index c6f2271..84c7ae1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ /.Rbuildignore docs /out/*.rds +inst/doc +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index 8fd4187..0185af8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,7 @@ Description: Calculate bootstrapped York regression from clumped isotope License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 Imports: bfsl, boot, @@ -27,6 +27,9 @@ Imports: tidylog, tidyr Suggests: + knitr, + rmarkdown, testthat (>= 3.0.0) Config/testthat/edition: 3 URL: https://japhir.github.io/clumpedcalib/ +VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 7dcf096..87a3089 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(bootstrap_means) export(clumped_calib_boot) export(d18Osw_calc) export(filter_outliers) +export(our_summary) export(supported_equations) export(temp_calc) export(temp_d18Osw_calc) diff --git a/R/our_summary.R b/R/our_summary.R index 6589e92..a5a5251 100644 --- a/R/our_summary.R +++ b/R/our_summary.R @@ -2,6 +2,7 @@ #' #' @param boot Output of `apply_calib_and_d18O_boot()` #' @param group The group to summarize by. +#' @export our_summary <- function(boot, group) { boot |> dplyr::group_by({{group}}) |> diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 0000000..097b241 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/bootstrapped_clumped_calibration.Rmd b/vignettes/bootstrapped_clumped_calibration.Rmd new file mode 100644 index 0000000..b9bbd53 --- /dev/null +++ b/vignettes/bootstrapped_clumped_calibration.Rmd @@ -0,0 +1,178 @@ +--- +title: "Bootstraped Clumped Isotope Calibration" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Bootstraped Clumped Isotope Calibration} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 8, fig.height = 6 +) +``` + +This vignette shows you how to: + +1. Calculate a bootstrapped York regression for your clumped isotope calibration dataset. +2. Apply this calibration to a set of sample replicates, with propagated calibration uncertainty. + +```{r setup} +library(dplyr) +library(tibble) +library(ggplot2) +library(ggdist) +theme_set(theme_bw()) +library(clumpedcalib) +set.seed(123) +``` + +## load calibration data and calculate bootstrapped York regression + +Here I show this with a very small artificial set of calibration data. +Be sure to replace with your own! + +The column names must be identical to `X`, `D47`, `sd_X`, and `sd_D47`. + +```{r calibration} +raw <- tibble::tribble( + ~ X, ~ D47, ~ sd_X, ~ sd_D47, + 11.25,0.607,0.062,0.007, + 11.26,0.602,0.070,0.006, + 11.26,0.603,0.069,0.005, + 11.60,0.604,0.068,0.005, + 12.81,0.674,0.091,.004, + 13.28,0.672,0.092,0.012, + 13.05,0.670,0.093,0.003, + ) + +# or if you have your data saved as an CSV +## raw <- readr::read_csv("dat/example_calib.csv", +## col_names = c("X", "D47", "sd_X", "sd_D47")) + +# this is very much a toy example with only 100 bootstraps! +calib <- clumped_calib_boot(raw, Nsim = 100) #|> + # for the real deal, increase Nsim to something like 1e5 and save the results + # for re-use + ## readr::write_csv(glue::glue("calib_clumped_boot.csv")) + +# then you can read these stored results like so: +## calib <- readr::read_csv("calib_clumped_boot.csv") +``` + + +```{r, calibrationplot} +raw |> + ggplot(aes(x = X, y = D47)) + + labs(x = 10^6 / T^2 ~ "(K)", y = Delta[47] ~ "(I-CDES)") + + scale_x_continuous(sec.axis = sec_axis( + "Temperature (°C)", + trans = \(x) sqrt(1e6 / x) - 273.15, + breaks = seq(0, 30, 5))) + + geom_point() + + geom_errorbar(aes(xmin = X - sd_X, xmax = X + sd_X)) + + geom_errorbar(aes(ymin = D47 - sd_D47, ymax = D47 + sd_D47)) + + # for now just draw all the lines since we're only sampling a few + # setting alpha lower allows you to overplot the draws + # don't forget to subset to just a few 100 though, otherwise it will be slow + geom_abline(aes(slope = slope, intercept = intercept), + alpha = .2, data = calib) +``` + +## load sample data + +I've come up with some fake replicate-level data. + +```{r fakedata} +dat <- tibble::tribble( + ~ age, ~ bins, ~ d18O_PDB_vit, ~ d13C_PDB_vit, ~ D47_final, ~ outlier, ~ identifier_1, ~ broadid, + 15.2,1,12,13,0.6,FALSE,"smp1","other", + 15.4,1,8,9,0.61,FALSE,"smp1","other", + 15.7,1,9,15,0.599,FALSE,"smp2","other", + 33.2,2,12,13,0.62,FALSE,"smp3","other", + 33.7,2,8,9,0.65,FALSE,"smp4","other", + 33.6,2,8,14,0.67,FALSE,"smp5","other", + 33.9,2,9,15,0.63,FALSE,"smp5","other", +) + +# You can save your data to a CSV with the same column names and load it like so: +## dat <- readr::read_csv("fake_data.csv") + +pl_raw <- dat |> + ggplot(aes(x = age, y = D47_final, colour = factor(bins))) + + geom_point() +pl_raw +``` + +## calculate bootstrapped means and apply the temperature calibration + +### just give me the results! + +This is a simple wrapper function to quickly get the end result. If you want more control, see below. + +```{r bootstraped} +# calculate other "normal" summary stats if desired, like N +oth <- dat |> + group_by(bins) |> + summarize(n = n()) + +# calculate d18Osw and temp median and quantile interval +sum <- apply_calib_and_d18O_boot(data = dat, + calib = calib, + group = bins, + Nsim = 100, + output = "summary") |> + # add N back in + left_join(oth) +``` + + +```{r bootplot} +# make a plot +sum |> + ggplot(aes(x = age, y = temp)) + + ggdist::geom_pointinterval(aes(ymin = temp.lower, ymax = temp.upper, + linewidth = factor(.width))) + + scale_linewidth_manual(values = c("0.68" = 9, "0.95" = 2), guide = "none") + + geom_text(aes(label = paste("N =", n)), nudge_x = 1.5) +``` + +### individual functions + +If you want to have more control, I recommend not using the above wrapper +function, but in stead take it one step at a time: + +1. `filter_outliers()`: What it says on the tin. +2. `bootstrap_means()`: Calculate bootstrapped means for our sample bins. +3. `temp_d18Osw_calc()`: Apply the temperature calibration and use the + clumped-derived temperature and a d18Occ--d18Osw--temperature relationship + to calculate d18Osw. +4. `our_summary()`: Summarize by calculating the median and 68% and 95% + confidence intervals of the mean. + +You can overwrite `filter_outliers()` with your own filtering function or +comment it out if you have already cleaned up your data. It assumes there's a +logical column `outlier` and a character column `broadid` with samples equal to +`"other"`. + +```{r manual} +sim <- dat |> + filter_outliers(group = bins) |> + bootstrap_means(group = bins, # specify which columns to use + age = age, + d13C = d13C_PDB_vit, + d18O = d18O_PDB_vit, + D47 = D47_final, + # obviously set Nsim way higher for real data! + Nsim = 100) |> + # you can choose several d18Occ--d18Osw--temperature equations, see `equations_supported()`. + temp_d18Osw_calc(calib = calib, equation = "KimONeil1997", Nsim = 100) |> + our_summary(group = bins) +``` + +# acknowledgements + +We would like to thank Alvaro Fernandez for writing and sharing the Matlab code that inspired this R package.