-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUI.R
135 lines (126 loc) · 11 KB
/
UI.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
## Shiny UI for MB MassArray Classification
## Machine learning model and classifier code by Dr Reza Rafiee 2014-2017
## Adaptation and Shiny code: Dr Matthew Bashton
library(shiny) # load shiny at beginning at both scripts
library(shinythemes)
shinyUI(fluidPage(theme = shinytheme("united"),
titlePanel("MIMIC: MInimal MethylatIon Classifier"), # give the interface a title
br(),
sidebarLayout(
sidebarPanel( # all the UI controls go in here
width=3,
fileInput('file1', 'MassARRAY CSV file upload:',
accept=c('text/csv',
'text/comma-separated-values,text/plain',
'.csv')
),
helpText("MIMIC will classify MassARRAY medulloblastoma methylation data in to one of four molecular subgroups"),
"Download test data to try classifier:", a(href="test_samples.csv", "Test CSV file", target="_blank")
), # End sidebarPanel
mainPanel( # all of the output elements go here
tabsetPanel(
id = 'sequenom',
tabPanel('Classification Table', dataTableOutput('classification_table'), br(), helpText("Unclassifiable samples are those for which a confident subgroup call could not be made"), br(), downloadButton('downloadClassification', 'Download table as .csv'),p(), br(), textOutput("time")),
#tabPanel('Classification Plot', plotOutput("classifierPlot", height = "640", width = "720"), br(), textOutput("time")),
tabPanel('Classification Plot', plotOutput("classifierPlot", height = "640", width = "auto"), br(), helpText("Unclassifiable and Probe QC failed samples are not shown in this plot. Boxes show the confidence interval for subgroup assignment generated by bootstrapping, and the individual data points represent the final probability associated with each subgroup call."), br(), downloadButton('PlotDownload', 'Download plot as high-res .png'), p()),
tabPanel('Informative Probes Table', dataTableOutput('mp'), br(), downloadButton('downloadMissing', 'Download table as .csv')),
tabPanel('Sample QC', br(), p(), textOutput("fs"), br(), textOutput("fc")),
tabPanel('Bisulphite Conversion Efficiency', dataTableOutput("BS_Eff"), br(), downloadButton('downloadBS_Eff', 'Download table as .csv')),
tabPanel('β-values', dataTableOutput('Beta'), br(), downloadButton('downloadBeta', 'Download table as .csv')),
tabPanel('About',
h4("MIMIC version 3.3.4-2"),
br(),
p("Machine learning and classifier code: ", a("Reza Rafiee", href="mailto:Gholamreza.Rafiee@newcastle.ac.uk?subject=Sequenom classifier website")),
p("Shiny web app code and adaptation: ", a("Matthew Bashton", href="mailto:matthew.bashton@newcastle.ac.uk?subject=Sequenom classifier website")),
p("Project concept and MassARRAY file parser: ", a("Ed Schwalbe", href="mailto:ed.schwalbe@newcastle.ac.uk?subject=Sequenom classifier website")),
br(),
h4("Overview"),
p("MIMIC will classify MassARRAY medulloblastoma methylation data in to one of four molecular subgroups: WNT, SHH, Group 3 and Group 4."),
p("In summary the classifier works as described below:"),
tags$ol(
tags$li("We use 17 different methylation probes in an",
a("Agena iPLEX assay", href="http://agenabio.com/products/applications/genotyping-and-mutation-detection/"),
", the readout is performed by the",
a("MassARRAY", href="http://agenabio.com/products/massarray-system/"),
"mass spectrometer."), p(),
tags$li("Peak heights from the Mass Spectrometer, corresponding to the 17 probes for each sample are outputted as a comma separated .csv file; these values are submitted to MIMIC and converted to β values for each probe."), p(),
tags$li("The number of probes successfully reporting β values out of the 17 is assessed for each sample,",
a("imputation", href="https://en.wikipedia.org/wiki/Imputation_%28statistics%29"),
"(exploiting our own MassARRAY cohort) is used to impute any missing values using",
a("multiple imputation (MI)", href = "https://en.wikipedia.org/wiki/Imputation_%28statistics%29#Multiple_imputation"),
"modelling utilising a",
a("Bootstrap Expectation Maximisation", href="https://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm"),
"(BEM) algorithm implemented in the",
a("Amelia package.", href = "http://gking.harvard.edu/amelia"),
"We can efficiently impute missing values of up to 6 missing probes, if a sample has more missing values it is said to have failed Probe QC and is not classified."), p(),
tags$li("A multi-class optimised ", a("Support Vector Machine", href = "https://en.wikipedia.org/wiki/Support_vector_machine"), "(SVM) validated and trained on our extensive 450k medulloblastoma cohort is used to robustly assign a subgroup to samples by their 17 β values."), p(),
tags$ul(
tags$li("Our SVM is validated using a bootstrapping technique via 1,000 random iterations of 80% of the training set, confidence interval derived from this is plotted on the Classification Graph as a box plot."), p(),
tags$li("The final probability assignment for a subgroup call is made by creating an SVM model with the whole 450k training set; these probabilities are given in the Classification Table in the initial tab."), p(),
tags$li("Calls made with a probability below our predefined threshold are considered unreliable and samples will be labeled as Unclassifiable in the Classification Table, these samples will not be plotted in the Classification Graph."), p()
),
tags$li("Various post processing and formatting operations on the data take place with the interactive website being implemented in the R", a("Shiny", href = "http://shiny.rstudio.com"), "reactive web application framework."), p()
), # End ol
p("For a typical dataset with 14 samples this whole computational procedure will take around 4 seconds - total classification time is given below the Classification Graph. "),
p("For more detailed explanation of our classifier including various optimisation and validation exercise see our manuscript and corresponding supplementary information (manuscript in preparation)."),
p(), br(),
h4("Reference"),
p("A manuscript is in preparation."),
p(), br(),
h4("Download"),
"The R code for this", a("Shiny", href = "http://shiny.rstudio.com/"), "based website including training and validation cohorts can be downloaded from", a("GitHub", href = "https://github.com/MattBashton/MIMIC"), "the website can also be run locally using", a("Rstudio", href = "https://www.rstudio.com/"), "instructions and dependancies are outlined on GitHub.",
p(), br(),
h4("Funding"),
p("MIMIC development was funded by a Cancer Research UK program grant.")
),
tabPanel('Help',h4("How to use our Classifier"),
p("MIMIC will classify MassARRAY medulloblastoma methylation data in to one of four molecular subgroups. To use the classifier follow the steps outlined below:"),
tags$ol(
tags$li("A Comma separated value (.csv) file produced by the MassARRAY scanner is needed as input to use the classifier. If you would like to test drive the classifier, or would like to see how it should be formatted a test file can be downloaded using the link in the grey box on the left."),
p(), img(src = "step1.png"), p(), br(),
tags$li("A MassARRAY .csv file can then be uploaded by clicking on the 'Chose File' or 'Browse...' (browser dependent) button on the left, once uploaded the classification happens automatically."),
p(), img(src = "step2.png"), p(), br(),
tags$li("By default the Classification Table output is preselected and will present you with a four subgroup Medulloblastoma classification for each of your samples. Other tabs presenting other information can then be accessed by clicking their names present at the top of the main panel."),
p(), img(src = "step3.png"), p(), br(),
tags$li("The contents of Tables can be downloaded by clicking the grey download button, these .csv files can then be loaded into Excel or other spreadsheet software if required."),
p(), img(src = "step4.png"), p(), br(),
tags$li("The Classification Plot can also be downloaded as a .png by clicking on the grey Download button at the bottom of the Classification Plot tab."),
p(), img(src = "step5.png"), br()
), # End of list
p(), br(),
h4("Input file format"),
"The input file used for this version of MIMIC is exported from the Agena MassARRAY software. It is key that all input lines for a particular sample have a unifying sample name in the second ", tags$i("Sample Description"), "column. The ", tags$i("Assay Id"), "column will also need to have appropriately named probe names we provide an example file in", a(href="test_samples.csv", "test_samples.csv", target="_blank"), p(),
p("Probe names (corresponding to 450k probes) we use are:
cg00583535,
cg18788664,
cg08123444,
cg17185060,
cg04541368,
cg25923609,
cg06795768,
cg19336198,
cg05851505,
cg20912770,
cg09190051,
cg01986767,
cg01561259,
cg12373208,
cg24280645,
cg00388871,
cg09923107 and a final probe Conv_2 is used to assess bisulphite conversion efficiency."),
p(), br(),
h4("Suppport"),
"If you have any issues with using MIMIC please contact ",
a("Matthew Bashton.", href="mailto:matthew.bashton@newcastle.ac.uk?subject=Sequenom classifier website")
) # End of Help tab
), # End of tabsetPanel
br(),
hr(),
p("WARNING: MIMIC is for research use only, and should only be used on samples with a confirmed histopathological diagnosis of medulloblastoma. MassARRAY is a registered trademark of Agena Bioscience."),
hr(),
img(src = "nicr.png"), img(src = "ncl.png"),
br()
) # End of mainPanel
) # End sidebarLayout
) # End fluidPage
) # End shinyUI