From b499e471c5a4a5dfa4138a161b65e7c6df4821b4 Mon Sep 17 00:00:00 2001 From: alexq Date: Thu, 7 Nov 2024 15:38:43 +1100 Subject: [PATCH] changes --- vignettes/scdneySpatial.Rmd | 114 ++++++++++++++++++++---------------- 1 file changed, 65 insertions(+), 49 deletions(-) diff --git a/vignettes/scdneySpatial.Rmd b/vignettes/scdneySpatial.Rmd index 8bc6945..d6fcc0d 100644 --- a/vignettes/scdneySpatial.Rmd +++ b/vignettes/scdneySpatial.Rmd @@ -13,7 +13,7 @@ editor_options: wrap: 72 --- -```{r} +```{r, echo = FALSE} knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { @@ -39,7 +39,7 @@ knitr::opts_chunk$set( cache = FALSE, message = FALSE, warning = FALSE, - time_it = TRUE + time_it = FALSE ) ``` @@ -136,11 +136,10 @@ the immune environment can be used to predict primary tumour progression or metastases in patients. We will use our spicy workflow to reach a similar conclusion. -## Workshop -## Setup +# Setup -### Installation +## Installation ```{r install, warning=FALSE, message=FALSE, eval = FALSE} if (!requireNamespace("BiocManager", quietly = TRUE)) { @@ -150,7 +149,7 @@ if (!requireNamespace("BiocManager", quietly = TRUE)) { BiocManager::install(c( "simpleSeg", "cytomapper", "scClassify", "scHOT", "FuseSOM","glmnet", "spicyR", "lisaClust","Statial", "scFeatures", "ClassifyR", "tidyverse", "scater", "SingleCellExperiment", "STexampleData", "SpatialDatasets", "tidySingleCellExperiment", "scuttle", "batchelor")) ``` -### Load packages +## Load packages ```{r library, warning=FALSE, message=FALSE} @@ -182,7 +181,7 @@ options("restore_SingleCellExperiment_show" = TRUE) ``` -### Download images +## Download images Feel free to follow along for this section. Much of the segmentation pipeline is computationally demanding, and most likely won't be able to @@ -230,12 +229,12 @@ names(images) <- nam # Reassigning image names mcols(images)[["imageID"]] <- nam # Reassigning image names ``` -## SimpleSeg: Segmentation +# SimpleSeg: Segmentation [SimpleSeg Package](https://www.bioconductor.org/packages/release/bioc/html/simpleSeg.html) -### Run simpleSeg +## Run simpleSeg If your images are stored in a `list` or `CytoImageList` they can be segmented with a simple call to `simpleSeg()`. To summarise, @@ -284,7 +283,7 @@ masks <- simpleSeg(images, ) ``` -### Visualise outlines +## Visualise outlines The `plotPixels` function in `cytomapper` makes it easy to overlay the mask on top of the nucleus intensity marker to see how well our @@ -348,6 +347,7 @@ knitr::include_graphics(system.file("images", "see-seg-markers-1.png", package = 3. What are some intrinsic limitations of the simpleSeg method? ::: + In order to characterise the phenotypes of each of the segmented cells, `measureObjects()` from `cytomapper` will calculate the average intensity of each channel within each cell as well as a few @@ -393,13 +393,13 @@ clinical <- clinical[names(images), ] colData(cells) <- cbind(colData(cells), clinical[cells$imageID, ]) ``` -## simpleSeg: Normalisation +# simpleSeg: Normalisation We've made it easy to follow on from the normalisation section. Our `SpatialDatasets` package has the segmented SCE file stored in the function `spe_Ferguson_2022()`. -```{r load-spe} +```{r load-spe, time_it = TRUE} #Download data now cells = SpatialDatasets::spe_Ferguson_2022() @@ -407,7 +407,7 @@ cells$x = spatialCoords(cells)[, "x"] cells$y = spatialCoords(cells)[, "y"] ``` -### Normalise the data +## Normalise the data We should check to see if the marker intensities of each cell require some form of transformation or normalisation. The reason we do this is @@ -438,14 +438,14 @@ cells |> Can we see what is a CD3+ vs a CD3- cell here? ::: -### Dimension reduction and visualisation +## Dimension reduction and visualisation As our data is stored in a `SpatialExperiment` we can also use `scater` to perform and visualise our data in a lower dimension to look for batch effects in our images. We can see that before normalisation, our UMAP shows a clear batch effect between images. -```{r umap} +```{r umap, time_it = TRUE} # Usually we specify a subset of the original markers which are informative to separating out distinct cell types for the UMAP and clustering. ct_markers <- c("podoplanin", "CD13", "CD31", "panCK", "CD3", "CD4", "CD8a", @@ -487,7 +487,7 @@ see that this normalised data appears more bimodal, not perfectly, but likely to a sufficient degree for clustering, as we can at least observe a clear CD3+ peak at 1.00, and a CD3- peak at around 0.3. -```{r normalisation, fig.width=5, fig.height=5} +```{r normalisation, fig.width=8, fig.height=5, time_it = TRUE} # Leave out the nuclei markers from our normalisation process. useMarkers <- rownames(cells)[!rownames(cells) %in% c("DNA1", "DNA2", "HH3")] @@ -512,9 +512,12 @@ cells |> **Questions** Can we see what is a CD3+ vs a CD3- cell here? + ::: -```{r norm-umap} + + +```{r norm-umap, time_it = TRUE} set.seed(51773) # Perform dimension reduction using UMAP. cells <- scater::runUMAP( @@ -542,7 +545,8 @@ scater::plotReducedDim( post-normalisation correction UMAP? ::: -## FuseSOM + +# FuseSOM [FuseSOM Package](https://www.bioconductor.org/packages/release/bioc/html/FuseSOM.html) @@ -554,7 +558,7 @@ The `runFuseSOM()` function from the `FuseSOM` package conveniently runs clustering on any `SingleCellExperiment` object, by specifying the number of clusters under `numClusters`. -```{r FuseSOM} +```{r FuseSOM, time_it = TRUE} # Set seed. set.seed(51773) @@ -589,7 +593,7 @@ optiPlot(cells, method = "gap") 3. From a biological perspective, what would increasing `k` do? ::: -### Attempt to interpret the phenotype of each cluster +## Attempt to interpret the phenotype of each cluster We can begin the process of understanding what each of these cell clusters are by using the `plotGroupedHeatmap` function from `scater`. @@ -665,6 +669,7 @@ cells$cellType |> the least expressed cell type for this dataset? ::: + We can also use the UMAP we computed earlier to visualise our data in a lower dimension and see how well our annotated cell types cluster out. @@ -677,7 +682,7 @@ scater::plotReducedDim( ) ``` -## spicyR: Test spatial relationships +# spicyR: Test spatial relationships [spicyR Package](https://www.bioconductor.org/packages/release/bioc/html/spicyR.html) @@ -701,10 +706,13 @@ spicyTest <- spicy(cells, ``` ::: question -**Questions** 1. What does an L-function value of 0 suggest? 2. How -would you choose the optimal radii (`Rs`) for a given dataset? +**Questions** + +1. What does an L-function value of 0 suggest? +2. How would you choose the optimal radii (`Rs`) for a given dataset? ::: + To save time we have saved the output of the above code. ```{r topPairs spicy} @@ -716,8 +724,7 @@ We can visualise these tests using the `signifPlot` function. ```{r signifPlot} # Visualise which relationships are changing the most. -signifPlot(spicyTest, - fdr = TRUE) +signifPlot(spicyTest) ``` ::: question @@ -726,6 +733,7 @@ co-localistion/dispersion? 2. Does this align with what we might expect to see in a biological context? ::: + `spicyR` also has functionality for plotting out individual pairwise relationships. @@ -744,7 +752,7 @@ spicyBoxPlot(spicyTest, rank = 1) ``` -## lisaClust: Find cellular neighbourhoods +# lisaClust: Find cellular neighbourhoods [lisaClust Package](https://www.bioconductor.org/packages/release/bioc/html/lisaClust.html) @@ -754,12 +762,12 @@ Package](https://www.bioconductor.org/packages/release/bioc/html/lisaClust.html) between cell types is similar. Here we use the `lisaClust` function to clusters cells into 5 regions with distinct spatial ordering. -```{r lisaClust} +```{r lisaClust, time_it = TRUE} set.seed(51773) cells <- lisaClust( cells, - k = 5, + k = 4, sigma = 50, cellType = "cellType", BPPARAM = BPPARAM @@ -770,7 +778,7 @@ cells <- lisaClust( To save time the results of the above code have been saved and can be loaded in. -### Region - cell type enrichment heatmap +## Region - cell type enrichment heatmap We can interpret which spatial orderings the regions are quantifying using the `regionMap` function. This plots the frequency of each cell @@ -781,7 +789,7 @@ type in a region relative to what you would expect by chance. regionMap(cells, cellType = "cellType", limit = c(0.2, 2)) ``` -### Visualise regions +## Visualise regions By default, these identified regions are stored in the `regions` column in the `colData` of our `SpatialExperiment` object. We can quickly @@ -797,7 +805,7 @@ cells |> While much slower, the `hatchingPlot` function can also be used to view regions of co-localisation simultaneously with the cell type calls. -```{r hatchingPlot} +```{r hatchingPlot, time_it = TRUE} # Use hatching to visualise regions and cell types. hatchingPlot( cells, @@ -808,17 +816,19 @@ hatchingPlot( ``` ::: question -**Questions** 1. How could the choice of `k` affect our clustering -results? 2. Looking at the graph above, would a different value of `k` -be better? +**Questions** + +1. How could the choice of `k` affect our clustering results? +2. Looking at the graph above, would a different value of `k` be better? ::: -### Test for association with progression + +## Test for association with progression We can use the `colTest` function to test for associations between the proportions of cells in each region and progression status. -```{r} +```{r, time_it = TRUE} # Test if the proportion of each region is associated # with progression status. testRegion <- colTest( @@ -831,16 +841,18 @@ testRegion ``` ::: question -**Questions** 1. How could results from `lisaClust` be used in -conjunction with results from `spicyR`? +**Questions** + +How could results from `lisaClust` be used in conjunction with results from `spicyR`? ::: -## Statial + +# Statial [Statial Package](https://www.bioconductor.org/packages/release/bioc/html/Statial.html) -### SpatioMark: Continuous changes in marker expression associated with varying levels of localisation. +## SpatioMark: Continuous changes in marker expression associated with varying levels of localisation. The first step in analysing these changes is to calculate the spatial proximity (`getDistances`) of each cell to every cell type. These values @@ -850,7 +862,7 @@ provides functionality to look into proximal cell abundance using the `getAbundance()` function, which is further explored within the `Statial` package vignette -```{r getDistances} +```{r getDistances, time_it = TRUE} cells <- getDistances(cells, maxDist = 200, nCores = nCores, @@ -896,7 +908,7 @@ image, `calcStateChanges()` will examine the most significant correlations between distance and marker expression across the entire dataset. -```{r stateChanges} +```{r stateChanges, time_it = TRUE} state_dist <- calcStateChanges( cells = cells, cellType = "cellType", @@ -917,7 +929,7 @@ parameter, with the coefficient being the default extracted parameter. We can see that with SpatioMark, we get some features which are significant after adjusting for FDR. -```{r spatioMarkOutcome} +```{r spatioMarkOutcome, time_it = TRUE} # Preparing outcome vector outcome <- cells$group[!duplicated(cells$imageID)] names(outcome) <- cells$imageID[!duplicated(cells$imageID)] @@ -935,11 +947,11 @@ survivalResults <- colTest(distMat, outcome, type = "ttest") head(survivalResults) ``` -### Kontextual +## Kontextual [Pre-print](https://www.biorxiv.org/content/10.1101/2024.09.03.611109v1) -```{r hierarchy} +```{r hierarchy, time_it = TRUE} # Cluster cell type hierarchy hierarchy <- treekoR::getClusterTree(t(assay(cells, "norm")), clusters = cells$cellType, @@ -983,7 +995,7 @@ We can use the `spicyR::spicy` to test for associations between the `Kontextual` values and progression status. To do so we use the `alternateResult` argument. -```{r kontextOutcome, fig.height = 5, fig.width = 10} +```{r kontextOutcome, fig.height = 5, fig.width = 10, time_it = TRUE} # Converting Kontextual result into data matrix kontextMat <- Statial::prepMatrix(kontext) @@ -1014,7 +1026,9 @@ signifPlot(kontextOutcome) (example below), is there anything that strikes out to you? ::: -```{r viewingKontextual} + + +```{r viewingKontextual, time_it = TRUE} spicyR::signifPlot(spicyTest) relationship = "TC_CD4__EC__parent_5" @@ -1049,7 +1063,7 @@ Cell type localisation in reference to a parent cell type from `Kontextual` 5) Cell changes in response to proximal changes from `SpatioMark` -```{r features} +```{r features, time_it = TRUE} # Create list to store data.frames data <- list() @@ -1107,7 +1121,7 @@ We have saved the results for convenience. load(system.file("extdata", "cv.rda", package = "scdneySpatialBiocAsia2024")) ``` -### Visualise cross-validated prediction performance +## Visualise cross-validated prediction performance Here we use the `performancePlot` function to assess the AUC from each repeat of the 5-fold cross-validation. @@ -1164,6 +1178,8 @@ plot = performancePlot( knitr::include_graphics(system.file("images", "plot.png", package = "scdneySpatialBiocAsia2024")) ``` +# Session Information + ```{r} sessionInfo() ```