-
Notifications
You must be signed in to change notification settings - Fork 1
/
50_Response_x_Time_Analysis.Rmd
422 lines (312 loc) · 12.9 KB
/
50_Response_x_Time_Analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
---
title: "Genes Change over time associated with Response"
output: github_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
# Clear the environment
rm(list = ls())
# Free up memory by forcing garbage collection
invisible(gc())
# Manually set the seed to an arbitrary number for consistency in reports
myseed <- 9
##_ Set knitr root directory to correspond to project working directory
##_ setting based on structure with code in <project dir>/code
knitr::opts_knit$set(root.dir = here::here())
```
## Procedure
1. Load rma data, nlme results, sample annotation, probeset annotation
1. Load clinical data file, used to annotate 42 paired sample Patients
1. Filter nlme results to 189 genes associated with response change over time
1. AND with >1.25-fold change in responders
1. I am not using criteria for a difference at Week 4 (56prbs)
1. Calculate the effect size
1. Subset the RMA data to the 42 paired samples
1. Make dataframe of Diff in RMA values (Week4 - Screen) per Patient
1. Make Annotated Heatmap of Change in Expression
1. Make boxplot of top hit
1. Save table of 189 genes
# Paths and Packages
```{r paths_packages}
# Provide paths
data_dir <- "./data/import"
results_dir <- "./results"
work_dir <- "./work"
## Load packages ##
#tidyverse bundles: ggplot2, dplyr, tidyr, readr, purrr and tibble
suppressPackageStartupMessages(library(tidyverse))
library(ComplexHeatmap)
library(circlize)
library(ggpubr)
```
```{r functions}
source("./code/ggplot_theme_dj_prm.R")
#' Custom ggplot theme with bolded text for easier legibility
```
```{r palettes}
# shape map for prior therapy
shape_prior <- c("PriorTherapy_Yes" = 16,
"PriorTherapy_No" = 18)
# color map for response
color_response <- c("Not20pct" = "black",
"20pctDec" = "goldenrod2")
```
## Data Sources
```{r loaddata}
# nlme Results
TumorRESPfile <- paste(data_dir, "CA209009-tumorAffy-table02-v01.csv", sep = "/")
BiopsyRESP <- read.csv(TumorRESPfile, stringsAsFactors=FALSE, header=TRUE, na.strings = "NA")
# SDRF (Sample and Data Relationship Format) file from Array Express
sdrf_file <- paste(data_dir, "E-MTAB-3218 sdrf_response.txt", sep = "/")
sdrf <- read_tsv(sdrf_file)
# Affymetrix probeset to Gene annotation
probeset_file <- paste(data_dir, "U219_BrainArray_Locus_Symbol_IRIS.txt", sep = "/")
probeset <- read_tsv(probeset_file)
# Expression values
rma_file <- paste(data_dir, "CA209009-tumorAffy-HGU219_HS_ENTREZG.rma", sep = "/")
rma <- read_tsv(rma_file)
# Patient Annotation
# Anonymized
clinicalData_file <- paste(data_dir, "CM9_Patient_Annotation.txt", sep = "/" )
clinicalData <- read_tsv(clinicalData_file)
```
## Time.response Gene Results Table
I am going to select the 189 genes using:
+ p.response < P05 (Umbrella test for difference in means at any timepoint)
+ p.response.time < P01 (Contrast for difference in slope of change over time)
+ absolute difference in Responder means >= 0.32165 (1.25-fold effect size over Time in Responders)
```{r results}
# Filter Parameters
P01 <- 0.01
P05 <- 0.05
Fold13 <- 0.3785
Fold12 <- 0.263
Fold125 <- 0.32165
# To subset the 189 probesets with a Time.Response effect, with 1.25x in Responders
TumTimeRESPeffect <- filter(BiopsyRESP,
p.response < P05,
p.response.time < P01,
#p.response28 < P01,
#abs(response28) > Fold125
abs(DecGE20pc_Day28_GrpAvg-DecGE20pc_Day0_GrpAvg) > Fold125) %>%
dplyr::select(Gene, p.response,
p.response.time, q.response.time,
p.response28,
DecLT20pc_Day0_GrpAvg, DecGE20pc_Day0_GrpAvg,
DecLT20pc_Day28_GrpAvg, DecGE20pc_Day28_GrpAvg)
# Merge in updated gene annotation
TumTimeRESPeffect <- right_join(probeset [,1:5], TumTimeRESPeffect,
by = c("Probeset" = "Gene"))
# calculate Effect size using modeled group means
TumTimeRESPeffect <- mutate(TumTimeRESPeffect,
Effect.Size.Resp = round(DecGE20pc_Day28_GrpAvg-DecGE20pc_Day0_GrpAvg,2),
Effect.Size.NonResp = round(DecLT20pc_Day28_GrpAvg-DecLT20pc_Day0_GrpAvg,2))
# calculate Foldchange using modeled group means
TumTimeRESPeffect <- mutate(TumTimeRESPeffect,
Fold.Change.Resp = round(2^(DecGE20pc_Day28_GrpAvg-DecGE20pc_Day0_GrpAvg),2),
Fold.Change.NonResp = round(2^(DecLT20pc_Day28_GrpAvg-DecLT20pc_Day0_GrpAvg),2))
# Round modeled group mean RMA values
TumTimeRESPeffect <- TumTimeRESPeffect %>%
mutate(Responder_Day0_GrpAvg = round(DecGE20pc_Day0_GrpAvg,2))%>%
mutate(NonResponder_Day0_GrpAvg = round(DecLT20pc_Day0_GrpAvg,2))%>%
mutate(Responder_Day28_GrpAvg = round(DecGE20pc_Day28_GrpAvg,2))%>%
mutate(NonResponder_Day28_GrpAvg = round(DecLT20pc_Day28_GrpAvg,2))%>%
select(-DecGE20pc_Day0_GrpAvg, -DecLT20pc_Day0_GrpAvg,
-DecGE20pc_Day28_GrpAvg, -DecLT20pc_Day28_GrpAvg)
```
## Time.Response Subject sample Annotation
Subset the Sample annotation to the 42 paired samples
```{r samples}
sdrfTime <- sdrf %>%
filter(Subject_Affy_Status == "42Pairs", Response20pct != "NE")
annot42 <- clinicalData %>%
filter(Subject_Affy_Status == "42Pairs", Response20pct != "NE")
# Insert MPCT biopsy where the lesion is the same pre/post
annot42$MPCTbiopsy <- annot42$MPCTbiopsyS1
annot42$MPCTbiopsy[annot42$MatchedLesionBiopsy == "FALSE"] <- NA
```
## Screen and Week4 Gene RMA expression Values
Obtain the RMA data for only the paired samples
```{r rma_diff}
# Make Colnames match "Assay.Name" in sdrf annotation
colnames(rma) <- sub("ENTREZG-", "", colnames(rma))
rma <- dplyr::rename(rma, Probeset = X1)
# Filter rma data to 189 probesets and 84 paired Array columns plus Probeset column
rma189 <- rma[rma$Probeset %in% TumTimeRESPeffect$Probeset, ]
rma189 <- select(rma189, one_of("Probeset",sdrfTime$Assay.Name))
# Each observation in one row
# 189 probesets x 84 observations
rma189rows <- rma189 %>%
gather(key = "Assay.Name", value = "RMA", -Probeset) %>%
left_join(select(sdrf,
c("Assay.Name", "biopsy.timepoint", "Response20pct",
"individual", "clinical.history")))
# Make dataframe of diff in RNA values (Week4 - Screen) per subject
# 189 probesets by 42 subjects
rma189diff <- rma189rows %>%
select(-Assay.Name) %>% #if not there are 2 assays per subject per probeset
group_by(individual,Probeset) %>% #one row per subject per probeset
spread(biopsy.timepoint,RMA) %>% #2 timepoint columns with RMA value
mutate(Change = Week4 - Screen) %>% #caculate Diff
select(Probeset,individual,Change) %>% #only necessary columns
group_by(Probeset) %>% #one row per probeset
spread(individual,Change) #42 Individuals columns with Change value
#Put data columns in the same order as the annotation dataframe
colorder <- annot42$SUBJID
rma189diff <- rma189diff[,c("Probeset",colorder)]
```
## Heatmap of the change in expression values
```{r heatmap}
#make a row annotation matrix in the rma datset order
row189 <- left_join(rma189diff[,1], probeset[,1:5], by = "Probeset")
# Matrix will be the heatmap of Diff values (log fold change)
mat = rma189diff[,-1]
rownames(mat) = rma189diff$Probeset
# Top heatmap annotation is tumor burden reduction; MPCT and MPCT biopsy
ha_top = HeatmapAnnotation(barplot2 = anno_barplot(annot42$MPCTbiopsy, axis = TRUE,baseline = 0,
gp = gpar(fill = ifelse(annot42$MPCTbiopsy > -19,
"black", "goldenrod2"))),
annotation_height = unit(c(3), "cm"))
#draw(ha_top,1:42)
# Bottom heatmap annotation is response annotation, BOR?
ha_bottom = HeatmapAnnotation(response = annot42$Response20pct,
Matched = annot42$MatchedLesionBiopsy,
colname = anno_text(annot42$SUBJID, rot = 90, just = "right", offset = unit(1, "npc") - unit(2, "mm"),
gp = gpar(fontsize = 8)),
col = list(Matched = c("TRUE"="goldenrod2","FALSE"="Black"),
response = c ("Not20pct" ="black", "20pctDec"= "goldenrod2")),
na_col = "white",
annotation_height = unit(c(0.5, 0.5,1), "cm"))
# draw(ha_bottom,1:42)
# Provide column order as descending MPCT (ie waterfall plot)
ordering = order(-annot42$MPCTrank)
heatmap_object = Heatmap(mat,
col = colorRamp2(c(-1, 0, 1), c("dodgerblue", "white", "firebrick")),
cluster_rows = TRUE,
km = 2,
show_row_names = FALSE,
cluster_columns = FALSE,
column_order = ordering,
column_title = "CA209-009: 42 pairs for 189 Response.Time",
show_column_names = FALSE,
width = unit(10, "cm"),
top_annotation = ha_top,
top_annotation_height = unit(3, "cm"),
bottom_annotation = ha_bottom,
bottom_annotation_height = unit(2, "cm")) +
rowAnnotation(IRIS = row189$IRIS_Most_Specific,
col = list(IRIS =
c(`T Cell` = "darkolivegreen1",
`NK Cell` = "darkolivegreen1",
`B Cell` = "darkolivegreen1",
Lymphoid = "darkolivegreen1",
`Dendritic Cell` = "blue",
Neutrophil = "blue",
Monocyte = "blue",
Myeloid = "blue",
Multiple = "goldenrod2")),
na_col = "white",
width = unit(0.5, "cm"),
show_annotation_name = TRUE)
heatmap_file <- paste0(results_dir,"/","GEP_Heatmap_Biopsy_All_TimeResponseEffect.pdf")
pdf(file=heatmap_file)
draw(heatmap_object, padding = unit(c(2, 20, 2, 2), "mm"))
# Decorate the heatmap object, adding layers of texts or lines
decorate_annotation("Matched",
{grid.text("Matched Lesion",
unit(-2, "mm"), just = "right",
gp = gpar(fontsize = 8))})
decorate_annotation("response",
{grid.text("Response",
unit(-2, "mm"), just = "right",
gp = gpar(fontsize = 10))})
# decorate_annotation("barplot1",
# {grid.text("MPCT",
# unit(-10, "mm"), just = "bottom",
# rot = 90, check.overlap = T,
# gp = gpar(fontsize = 10))})
decorate_annotation("barplot2",
{grid.text("Matched Biopsy\n\ Reduction",
unit(-10, "mm"), just = "bottom",
rot = 90, check.overlap = T,
gp = gpar(fontsize = 10))})
dev.off()
```
## Boxplot of a selected Probeset
```{r boxplot}
mycomparisons <- list(c("Screen","Week4"))
# Define the desired probeset and get its Symbol
#3606_at LOC3606 IL18
probesetwanted <- "3606_at"
genewanted <- as.character(probeset[probeset$Probeset == probesetwanted, 4])
# Get Gene values, then transpose to Array ID and RMA values in columns
onegene <- rma %>%
filter(rma$Probeset == probesetwanted) %>%
gather(key = "Assay.Name", value = "RMA", -Probeset) %>%
left_join(select(sdrf,
c("Assay.Name", "biopsy.timepoint", "Response20pct",
"individual", "clinical.history"))) %>%
filter(Response20pct != "NE")
# Set levels of Response factor to determine plotting order
onegene$Response20pct <-factor(onegene$Response20pct,
levels = c("Not20pct","20pctDec"))
# Set levels of biopsy.timepoint factor to determine plotting order
onegene$biopsy.timepoint <-factor(onegene$biopsy.timepoint,
levels = c("Screen","Week4"))
# Convert "individual" from numeric
onegene$individual <- as.factor(as.character(onegene$individual))
# Count number of datapoints plotted
plotcount <- nrow(onegene)
# Plot baseline vs Week 4 expression values for gene
boxplot <- ggplot(onegene,
aes(x = biopsy.timepoint, y = RMA)) +
geom_boxplot(aes(biopsy.timepoint),
outlier.shape = NA,
alpha=0.2) +
geom_point(aes(color = Response20pct,shape= clinical.history),
size = 2,
position = position_jitter(w = 0.2, h = 0)) +
scale_colour_manual(name = 'Response',
values = c("black", "goldenrod2")) +
scale_shape_manual(name = 'Prior Therapy',
values=c(16,18)) +
geom_line(aes(group = individual),
colour = "black", alpha=0.3) +
scale_y_continuous(breaks=seq(3,9,1),
limits=c(3, 9))+
facet_grid(~Response20pct)+
labs(title = paste("CA209-009 Biopsy: ",probesetwanted, "=", genewanted),
subtitle = paste("Screen and/or Week4 Affymetrix, N=",plotcount),
x = "Response Category",
y = "Expression signal, RMA") +
theme(legend.position = "bottom",
axis.text=element_text(size=12),
axis.title=element_text(size=14,face="bold")) +
guides(shape = guide_legend(nrow=2,byrow=TRUE),
fill = guide_legend(nrow=2,byrow=TRUE)) +
scale_x_discrete(labels=c("Screen" = "Screen",
"Week4" = "Day28"))+
stat_compare_means(method="t.test", size = 6,
aes(label = paste0("P = ", ..p.format..)),
comparisons = mycomparisons,
vjust = "inward")+
theme_dj(16)
print(boxplot)
```
## Output Results
```{r output}
# Table of 189 genes
# Count the hits and make an output filename
countTimeRESP <- nrow(TumTimeRESPeffect)
TumTimeRESP_file <- paste0(results_dir,"/",
"GEP_Table_BiopsyAll_TimeResponseEffect_",countTimeRESP,"prbs",".txt")
# Write tsv file of 189prbs, annotation and statistics
write_tsv(TumTimeRESPeffect, TumTimeRESP_file)
# Save box plot of selected gene
box_file <- paste(results_dir, "/GEP_Boxplot_Time_By_Response_",
paste0(probesetwanted,"_",genewanted),
".png",
sep="")
ggsave(boxplot, file = box_file, width=7, height=4,
units = "in", dpi = 96)
```