diff --git a/docs/source/literature.bib b/docs/source/literature.bib index e21e6b8..bf3dec7 100644 --- a/docs/source/literature.bib +++ b/docs/source/literature.bib @@ -212,3 +212,78 @@ @article{harris2020array publisher = {Springer Science and Business Media {LLC}}, url = {https://doi.org/10.1038/s41586-020-2649-2} } + +@article{vivo2012bayesian, + title={Bayesian approach for peak detection in two-dimensional chromatography}, + author={Viv{\'o}-Truyols, Gabriel}, + journal={Analytical chemistry}, + volume={84}, + number={6}, + pages={2622--2630}, + year={2012}, + publisher={ACS Publications} +} + +@article{woldegebriel2015probabilistic, + title={Probabilistic model for untargeted peak detection in LC--MS using Bayesian statistics}, + author={Woldegebriel, Michael and Viv{\'o}-Truyols, Gabriel}, + journal={Analytical chemistry}, + volume={87}, + number={14}, + pages={7345--7355}, + year={2015}, + publisher={ACS Publications} +} + +@article{briskot2019prediction, + title={Prediction uncertainty assessment of chromatography models using Bayesian inference}, + author={Briskot, Till and St\"{u}ckler, Ferdinand and Wittkopp, Felix and Williams, Christopher and Yang, Jessica and Konrad, Susanne and Doninger, Katharina and Griesbach, Jan and Bennecke, Moritz and Hepbildikler, Stefan and others}, + journal={Journal of Chromatography A}, + volume={1587}, + pages={101--110}, + year={2019}, + publisher={Elsevier} +} + +@article{yamamoto2021uncertainty, + title={Uncertainty quantification for chromatography model parameters by Bayesian inference using sequential Monte Carlo method}, + author={Yamamoto, Yota and Yajima, Tomoyuki and Kawajiri, Yoshiaki}, + journal={Chemical Engineering Research and Design}, + volume={175}, + pages={223--237}, + year={2021}, + publisher={Elsevier} +} + +@article{wiczling2016much, + title={How much can we learn from a single chromatographic experiment? A Bayesian perspective}, + author={Wiczling, Pawe{\l} and Kaliszan, Roman}, + journal={Analytical chemistry}, + volume={88}, + number={1}, + pages={997--1002}, + year={2016}, + publisher={ACS Publications} +} + +@article{kelly1971estimation, + title={Estimation of chromatographic peaks with particular consideration of effects of base-line noise}, + author={Kelly, PC and Harris, WE}, + journal={Analytical Chemistry}, + volume={43}, + number={10}, + pages={1170--1183}, + year={1971}, + publisher={ACS Publications} +} + +@article{kelly1971application, + title={Application of method of maximum posterior probability to estimation of gas-chromatographic peak parmeters}, + author={Kelly, PC and Harris, WE}, + journal={Analytical Chemistry}, + volume={43}, + number={10}, + pages={1184--1195}, + year={1971}, + publisher={ACS Publications} +} diff --git a/paper/paper.md b/paper/paper.md index 1e580be..9537c72 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -56,6 +56,8 @@ Since this is a time-consuming, not to mention tedious, procedure and introduces The advantage of this approach is the complete integration of all relevant parameters – i.e. baseline, peak area and height, mean, signal-to-noise ratio etc. – into one single model through which all parameters are estimated simultaneously. Furthermore, Bayesian inference comes with uncertainty quantification for all peak model parameters, and thus does not merely yield a point estimate as would commonly be the case. It also grants access to novel metrics for avoiding false positives and negatives by rejecting signals where a) a convergence criterion of the peak fitting procedure was not fulfilled or b) the uncertainty of the estimated parameters exceeded a user-defined threshold. +By employing peak fitting to uncover peak parameters – most importantly the area –, this approach thus differs from recent applications of Bayesian statistics to chromatographic peak data which e.g. focussed on peak detection [@vivo2012bayesian; @woldegebriel2015probabilistic], method optimization [@wiczling2016much] and simulations of chromatography [@briskot2019prediction; @yamamoto2021uncertainty]. +The first studies to be published about this topic contain perhaps the technique most similar in spirit to the present one since functions made of an idealized peak shape and a noise term are fitted but beyond this common starting point the methodolody is quiet distinct [@kelly1971estimation; @kelly1971application]. # Materials and Methods ## Implementation @@ -63,7 +65,7 @@ $\texttt{PeakPerformance}$ is an open source Python package compatible with Wind At the time of manuscript submission, it features three modules: `pipeline`, `models`, and `plotting`. Due to its modular design, $\texttt{PeakPerformance}$ can easily be expanded by adding e.g. additional models for deviating peak shapes or different plots. Currently, the featured peak models describe peaks in the shape of normal or skew normal distributions, as well as double peaks of normal or skewed normal shape. -The normal distribution is regarded as the ideal peak shape and common phenomena like tailing and fronting can be expressed by the skew normal distribution [@RN144].\\ +The normal distribution is regarded as the ideal peak shape and common phenomena like tailing and fronting can be expressed by the skew normal distribution [@RN144]. Bayesian inference is conducted utilizing the PyMC package [@RN150] with the external sampler $\texttt{nutpie}$ for improved performance [@nutpie]. Both model selection and analysis of inference data objects are realized with the ArviZ package [@RN147]. Since the inference data is stored alongside graphs and report sheets, users may employ the ArviZ package or others for further analysis of the results if necessary. @@ -93,6 +95,8 @@ Upon passing the first filter, a Markov chain Monte Carlo (MCMC) simulation is c Before sampling from the posterior distribution, a prior predictive check is performed. When a posterior distribution has been obtained, the main filtering step is next in line which checks the convergence of the Markov chains via the potential scale reduction factor [@RN152] or $\hat{R}$ statistic and based on the uncertainty of the determined peak parameters. If a signal was accepted as a peak, a posterior predictive check is conducted and added to the inference data object resulting from the model simulation. +Regarding the performance of the simulation, in our tests an analysis of a single peaks took 20 s to 30 s and of a double peaks 25 s to 90 s. +This is of course dependent on the power of the computer as well as whether an additional simulation with an increased number of samples needs to be conducted. ## Peak fitting results and diagnostic plots