-
Notifications
You must be signed in to change notification settings - Fork 1
/
peakData_Report.Rmd
287 lines (227 loc) · 11.5 KB
/
peakData_Report.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
---
title: "Summary of Data Processing"
output:
html_document: default
params:
upload: NA
emeta: NA
processed: NA
C13_ID: NA
groups_list: NA
db_tables_info: NA
bibliography: report_refs.bib
nocite: |
@Koch_2006, @Hughey_2001, @Kim_2003, @Koch_2016_erratum, @LAROWE20112030, @Rivas_Ubach_2018, @BAILEY2017133
---
<!-- For testing using postmortem objects: -->
<!-- report(revals_postmortem$uploaded_data, revals_postmortem$peakData2, emeta, output_file = 'testreport', C13_ID = '1', groups_list = revals_postmortem$groups_list, db_tables_info = tables_postmortem$saved_db_info) -->
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
# params list passed to report should have uploaded data, processed data, and the identifier for c13.
# If additional parameters need to be included in the future, simply pass them to report() as a named element of the 'params' argument, then access them in this file using params$<newvaluename>
# get parameters passed to report()
upload <- params$upload
emeta <- params$emeta
processed <- params$processed
c13 <- params$C13_ID
groups <- params$groups_list
dbinfo <- params$db_tables_info
library(dplyr)
library(kableExtra)
library(readr)
panderOptions('knitr.auto.asis', FALSE)
# tables of calculation options, column names, and display names
# calc_funs <- read.csv("calculation_options.csv", stringsAsFactors = FALSE)
calc_cols <- read.csv("calculation_variables.csv", stringsAsFactors = FALSE)
```
### **Input Data**
Your data started with **`r nrow(upload$f_data)` samples** and measurements for **`r nrow(upload$e_data)` peaks**.
```{r}
# get data scale: will almost always be 'Raw Abundance'
scale <- switch(attributes(upload)$data_info$data_scale,
"abundance" = "Raw Abundance",
"log2" = "log-2 Abundance",
"log10" = "log-10 Abundance",
"log" = "log-e Abundance",
"pres" = "Presence-Absence")
# proportion missing
pct_mis <- mean(upload$e_data[-which(colnames(upload$e_data) == getEDataColName(upload))] == 0)
```
**`r round(pct_mis*100, 2)`**% of the observations in the data file were missing.
Measurements were of the form: **`r scale`**.
The molecular identification file included the following fields before adding additional variables:
```{r original columns}
# get all non-null column names that are not edata_cname, fdata_cname, mass_cname
temp <- colnames(emeta[-which(colnames(emeta) == getEDataColName(upload))])
# temp <- attributes(upload)$cnames[which(!sapply(attributes(upload)$cnames, is.null))]
# temp <- temp[which(!(names(temp) %in% c("edata_cname", "fdata_cname", "mass_cname")))]
tags$b(paste(temp, collapse=', '))
```
****
The following column selections were made for mass, isotopic notation/symbol , molecule form and/or elemental counts:
```{r column selections}
# display names
selection_names <- c("Mass", "Isotopic Notation/Symbol", "Molecule Formula", "Carbon", "Hydrogen", "Oxygen", "Nitrogen", "Phosphorus")
# actual attribute names
selection_values <- list("mass_cname", "isotopic_cname", "mf_cname", "c_cname", "h_cname", "o_cname", "n_cname", "p_cname")
# make vector of columns that the user selected. 'No Selection' if they didnt make a selection
selection_values <- lapply(selection_values, function(name){
res <- ifelse(is.null(attributes(processed)$cnames[[name]]), "No Selection", attributes(processed)$cnames[[name]])
if(name == "isotopic_cname" & res != "No Selection") res <- paste0(res, " : ", c13)
res
}) %>% unlist()
kable(data.frame("Value" = selection_names, "Column Selected" = selection_values), align = c("l", "c"))
```
****
```{r groups}
if(length(groups) > 0){
groups_df <- data.frame("Group Name" = names(groups), "Samples" = sapply(groups, paste, collapse = ' | '), check.names = FALSE)
# scrollable elements for samples
groups_df <- groups_df %>%
mutate_at('Samples', function(x){paste0('<div style="width:500px;overflow-x:auto;display:inline-block;">', x, '</div>')})
tagList(
tags$h3(tags$b('Groups')),
DT::datatable(groups_df, options = list(
columnDefs = list(list(className = 'dt-center', targets = '_all')),
scrollX = TRUE, dom = 't'
), escape = FALSE, rownames = FALSE, height = '100%'
),
tags$hr()
)
}
```
```{r}
# find the columns that were added to the original data
proc_cols <- processed$e_meta %>%
dplyr::select(which(colnames(processed$e_meta) %in% calc_cols$ColumnName)) %>%
colnames() %>% setdiff(temp)
if(length(proc_cols) > 0){
tagList(
tags$h3(tags$b('Processed Variables')),
tags$p("The following variables were added in the 'Preprocessing' tab:")
)
}
### **Reference html strings**
href1 <- "<a href = 'https://pubs.acs.org/doi/pdf/10.1021/ac010560w'> [Hughey et al., 2001]</a>"
href2 <- "<a href = 'https://onlinelibrary.wiley.com/doi/abs/10.1002/rcm.2386'> [Koch & Dittmar, 2006]</a><a href = 'https://onlinelibrary.wiley.com/doi/full/10.1002/rcm.7433'> | [Erratum]</a>"
href3 <- "<a href = 'https://www.sciencedirect.com/science/article/pii/S0016703711000378'> [LaRowe & Van Cappellen, 2011]</a>"
href4 <- "<a href = 'https://pubs.acs.org/doi/abs/10.1021/ac034415p'> [Kim et al., 2003]</a>"
href5 <- "<a href = 'https://www.sciencedirect.com/science/article/pii/S0038071716306447'> [Bailey et al., 2017]</a>"
href6 <- "<a href = 'https://pubs.acs.org/doi/abs/10.1021/acs.analchem.8b00529'> [Rivas-Ubach et al., 2018]</a>"
# list of equations and references, if applicable
equations <- list("$\\frac{O}{C}$",
"$\\frac{H}{C}$",
"$\\frac{N}{C}$",
"$\\frac{P}{C}$",
"$\\frac{N}{P}$",
paste0("$IUPAC mass*(14/14.01565)$", href1),
paste0("Nominal Kendrick Mass - Kendrick Mass", href1),
paste0("$-(\\frac{4C + H - 3N - 2O + 5P - 2S}{C})$", href2),
paste0("$60.3 - 28.5*NOSC$", href3),
paste0("$\\frac{1 + C - O - S - 0.5*(N + P + H)}{C - O - S - N - P}$", href2),
paste0("$\\frac{1 + C - 0.5O - S - 0.5*(N + P + H)}{C - 0.5*O - S - N - P}$", href2),
paste0("$1 + C - O - S - 0.5*(N + P + H)$", href2),
paste0("$1 + C - O - S - 0.5*(N + P + H) - O$", href2),
"Composition of C,H,N,O,S,P",
paste0("Compound Class Set 1", href4),
paste0("Compound Class Set 2", href5),
paste0("Compound Class Set 3", href6)
)
```
```{r, results = 'asis', message = FALSE}
# get display names that have a corresponding column in the processed columns
cols_for_display <- calc_cols %>% filter(ColumnName %in% proc_cols) %>% purrr::pluck("DisplayName")
# This is only for the element ratios which occupy the first 5 rows of the calculated columns table.
if(all(calc_cols[1:5,]$DisplayName %in% cols_for_display)){
pander(paste(cols_for_display[1:5]), collapse = ", ") # row of names for element ratios
pander(" \n\n ")
pander(paste(unlist(equations)[1:5], collapse = " ")) # element ratios as actual math-mode ratio
pander(" \n\n ")
}
# for the rest (6 and onward), if the column corresponding to a particular equation is present in the calculated columns, paste an html string:
dontoutput <-lapply(6:length(equations), function(i){
if(calc_cols$ColumnName[i] %in% proc_cols){
# paste the display name inside a fixed width, inline-block div
cat(paste0("<div style = 'display:inline-block;width:275px'>",
cols_for_display[which(cols_for_display == calc_cols$DisplayName[i])],
"</div><div style = 'display:inline-block'>")
)
# use pander() to add the equation, notice the previous string started another inline-block div
pander(equations[[i]])
# end the second div and add two breaks
cat("</div><br><br>")
}
})
# old code
# cat("<div style = 'display:inline-block;width:275px'>hi</div><div style = 'display:inline-block'>")
# pander(equations[[1]])
# cat("</div>")
```
`r if(length(proc_cols) > 0) tags$hr()`
```{r filter resources}
# get the filters applied and make a df to display
# row filters
filters <- attributes(processed)$filters %>% names()
filters_names <- sapply(filters, function(x){ switch(x, "massFilt" = "Mass Filter",
"moleculeFilt" = "Molecule Filter",
"formulaFilt" = "Elemental Formula Filter",
paste0("Custom Filter on column: ", gsub("emetaFilt_", "", x)))})
num_filtered <- sapply(attributes(processed)$filters, function(x){
as.character(length(x$filtered))
})
# sample filter
removed_samples <- setdiff(upload$f_data[,getFDataColName(upload)], processed$f_data[,getFDataColName(processed)])
retained_samples <- setdiff(upload$f_data[,getFDataColName(upload)], removed_samples)
if(length(removed_samples) > 0){
num_filtered <- c(nrow(upload$e_data) - nrow(subset(upload, samples = retained_samples, check_rows = TRUE)$e_data), num_filtered)
filters_names <- c("Sample Filter", filters_names)
}
filt_df <- data.frame("Filter" = filters_names, "Number of Peaks Removed" = num_filtered, row.names = NULL, check.names = FALSE)
```
```{r filters html}
if(length(filters_names) > 0){
if(length(removed_samples) > 0){
samples_tags <- tagList(
tags$p(tags$b("Samples Removed: "), paste(removed_samples, collapse = ", ")),
tags$p(tags$b("Samples Kept: "), paste(retained_samples, collapse = ", "))
)
}
else samples_tags <- NULL
tagList(
tags$h3(tags$b('Filters')),
tags$p('The following filters were applied to the data:'),
lapply(filters_names, function(x) tags$div(tags$b(x))),
tags$br(),
samples_tags,
tags$p("The following table shows the number of peaks removed by each filter. Filters are applied *in order* from top to bottom, i.e. 'Peaks Removed' for any given filter is the peaks removed after previous filters have already been applied."),
DT::datatable(filt_df, height = '100%', rownames = FALSE,
options = list(columnDefs = list(list(className = 'dt-center', targets = '_all')),
dom = 't')
)
)
}
```
After all preprocessing steps, **`r nrow(processed$f_data)` samples** and measurements for **`r nrow(processed$e_data)` peaks** remained.
***
```{r database html}
if(inherits(dbinfo, 'data.frame')){
tagList(
tags$h3(style='font-weight:bold', 'Database Mapping'),
tags$p(sprintf('The following database tables were calculated using KeggData version %s and MetaCycData version %s',
packageVersion('KeggData'), packageVersion('MetaCycData'))),
DT::datatable(dbinfo, height = '100%', rownames = FALSE,
options = list(columnDefs = list(list(className = 'dt-center', targets = '_all')),
dom = 't')
)
)
}
```
***
### **References**
<!-- <div id = 'refs'></div> -->
<!-- $(1)$ [Hughey et al., 2001](https://pubs.acs.org/doi/pdf/10.1021/ac010560w) -->
<!-- $(2)$ [Koch \& Dittmar, 2006](https://onlinelibrary.wiley.com/doi/abs/10.1002/rcm.2386) | [Erratum](https://onlinelibrary.wiley.com/doi/full/10.1002/rcm.7433) -->
<!-- $(3)$ [LaRowe \& Van Cappellen, 2011](https://www.sciencedirect.com/science/article/pii/S0016703711000378) -->
<!-- $(4)$ [Kim et al., 2003](https://pubs.acs.org/doi/abs/10.1021/ac034415p) -->
<!-- $(5)$ [Bailey et al., 2017](https://www.sciencedirect.com/science/article/pii/S0038071716306447) -->
<!-- $(6)$ [Rivas-Ubach et al., 2018](https://pubs.acs.org/doi/abs/10.1021/acs.analchem.8b00529) -->