Skip to content

Commit

Permalink
RUV and gene plot changes
Browse files Browse the repository at this point in the history
  • Loading branch information
lpantano committed Apr 29, 2024
1 parent b70f9e8 commit a9cb331
Showing 1 changed file with 54 additions and 28 deletions.
82 changes: 54 additions & 28 deletions inst/rmarkdown/templates/rnaseq/skeleton/DE/DEG.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params:
numerator: tumor
denominator: normal
subset_value: NA
ruv: false
params_file: params_de.R
project_file: ../information.R
---
Expand All @@ -43,8 +44,9 @@ library(org.Hs.eg.db)
library(knitr)
library(EnhancedVolcano)
library(bcbioR)
library(ggprism)
ggplot2::theme_set(theme_light(base_size = 14))
ggplot2::theme_set(theme_prism(base_size = 14))
opts_chunk[["set"]](
cache = F,
cache.lazy = FALSE,
Expand All @@ -57,6 +59,9 @@ opts_chunk[["set"]](
warning = FALSE,
echo = T,
fig.height = 4)
# set seed for reproducibility
set.seed(1234567890L)
```

```{r sanitize_datatable}
Expand Down Expand Up @@ -85,6 +90,7 @@ if (!is.na(params$subset_value)){
}
contrasts = c(column,params$numerator,params$denominator)
coef=paste0(column,"_",params$numerator,"_vs_",params$denominator)
name_expression_fn=file.path(
basedir,
Expand Down Expand Up @@ -123,43 +129,56 @@ rdata = AnnotationDbi::select(org.Hs.eg.db, rownames(counts), 'SYMBOL', 'ENSEMBL
```

# Remove Unwanted Variability

When performing differential expression analysis, it is important to ensure that any detected differences are truly a result of the experimental comparison being made and not any additional variability in the data.


```{r setup_RUV}
dds_before <- DESeqDataSetFromMatrix(counts, coldata, design = ~1)
vsd_before <- vst(dds_before)
dds_to_use <- DESeqDataSetFromMatrix(counts, coldata, design = ~1)
vsd_before <- vst(dds_to_use)
norm_matrix = assay(vsd_before)
```

# Covariate analysis

```{r covariates, fig.height = 6, fig.width = 10}
se <- readRDS(se_object) #local
metrics <- metadata(se)$metrics %>% mutate(sample = toupper(sample)) %>%
left_join(coldata %>% rownames_to_column('sample')) %>% column_to_rownames('sample')
degCovariates(
assay(vsd_before),
norm_matrix,
metrics
)
```

# PCA analysis

```{r before_RUV}
pca1 <- degPCA(assay(vsd_before), colData(vsd_before),
pca1 <- degPCA(norm_matrix, colData(dds_to_use),
condition = column) + ggtitle('Before RUV')
pca1 + scale_color_cb_friendly()
```

```{r do_RUV}
```{r init_DESEQ}
formula <- as.formula(paste0("~ ", " + ", column))
dds_to_use <- DESeqDataSetFromMatrix(counts, coldata, design = formula)
vsd_before <- vst(dds_to_use)
norm_matrix = assay(vsd_before)
```

```{eval=params$ruv, results='asis'}
# Remove Unwanted Variability
When performing differential expression analysis, it is important to ensure that any detected differences are truly a result of the experimental comparison being made and not any additional variability in the data.
```

```{r do_RUV, eval=params$ruv}
# If you want to skip the code, just set up formula to be your model in the next chunk of code
design <- coldata[[column]]
names(design) <- coldata$name
diffs <- makeGroups(design)
dat <- assay(vsd_before)
ruvset <- RUVs(dat, cIdx=rownames(dat), k=1, diffs, isLog = T, round = F)
Expand All @@ -173,17 +192,19 @@ formula <- as.formula(paste0("~ ",
collapse = " + "
), " + ", column)
)
norm_matrix=ruvset$normalizedCounts
pca2 <- degPCA(norm_matrix, new_cdata,
condition = column) + ggtitle('After RUV')
pca2 + scale_color_cb_friendly()
```

```{r after_RUV}
```{r after_RUV, eval=params$ruv}
## Check if sample name matches
stopifnot(all(names(counts) == rownames(new_cdata)))
dds_after <- DESeqDataSetFromMatrix(counts, new_cdata, design = formula)
vsd_after<- vst(dds_after, blind=FALSE)
pca2 <- degPCA(ruvset$normalizedCounts, new_cdata,
condition = column) + ggtitle('After RUV')
pca2 + scale_color_cb_friendly()
dds_to_use <- DESeqDataSetFromMatrix(counts, new_cdata, design = formula)
vsd_to_use<- vst(dds_to_use, blind=FALSE)
```

Expand All @@ -197,7 +218,7 @@ Before fitting the model, we often look at a metric called dispersion, which is
We use the below dispersion plot, which should show an inverse relationship between dispersion and mean expression, to get an idea of whether our data is a good fit for the model.

```{r DE}
de <- DESeq(dds_after)
de <- DESeq(dds_to_use)
DESeq2::plotDispEsts(de)
```
Expand All @@ -207,7 +228,7 @@ Because it is difficult to accurately detect and quantify the expression of lowl
```{r lfc_shrink}
# resultsNames(de) # check the order is right
resLFC = results(de, contrast=contrasts)
resLFCS <- lfcShrink(de, coef=resultsNames(de)[ncol(vars)+2], type="apeglm")
resLFCS <- lfcShrink(de, coef=coef, type="apeglm")
res <- as.data.frame(resLFCS) %>%
rownames_to_column('gene_id') %>% left_join(rdata, by = 'gene_id') %>%
Expand Down Expand Up @@ -249,19 +270,24 @@ EnhancedVolcano(res_mod,
res_sig %>% sanitize_datatable
```

## Plot top 16 genes

```{r top n DEGs, fig.height = 6, fig.width = 8}
n = 16
top_n <- res_sig %>% slice_min(order_by = padj, n = n, with_ties = F) %>%
dplyr::select(gene_name, gene_id)
top_n_exp <- assays(vsd_after) %>% as.data.frame() %>%
top_n_exp <- norm_matrix %>% as.data.frame() %>%
rownames_to_column('gene_id') %>%
dplyr::select(-group, -group_name) %>%
# dplyr::select(-group, -group_name) %>%
pivot_longer(!gene_id, names_to = 'sample', values_to = 'log2_expression') %>%
right_join(top_n) %>%
right_join(top_n, relationship = "many-to-many") %>%
left_join(coldata, by = c('sample' = 'description'))
ggplot(top_n_exp, aes_string(x = column, y = 'log2_expression')) + geom_boxplot() +
facet_wrap(~gene_name) + ggtitle(str_interp('Expression of Top ${n} DEGs'))
ggplot(top_n_exp, aes_string(x = column, y = 'log2_expression')) +
geom_boxplot(outlier.shape = NA, linewidth=0.5, color="grey") +
geom_point() +
facet_wrap(~gene_name) +
ggtitle(str_interp('Expression of Top ${n} DEGs'))
```

Expand Down Expand Up @@ -322,7 +348,7 @@ pathways_ora_all %>% sanitize_datatable()


```{r write-files}
counts_norm=ruvset$normalizedCounts %>% as.data.frame() %>%
counts_norm=norm_matrix %>% as.data.frame() %>%
rownames_to_column("gene_id") %>%
mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}"))
Expand Down

0 comments on commit a9cb331

Please sign in to comment.