diff --git a/.gitignore b/.gitignore index 630dcff..d2a4d04 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,6 @@ TidyGenomicsTranscriptomicsWorkshop_bioc2023.Rproj .DS_Store /doc/ /Meta/ -vignettes/tidySpatialWorkshop2025_cache/ -vignettes/tidySpatialWorkshop2025_files/ -tidySpatialWorkshop2025.Rproj +vignettes/tidySpatialWorkshop_cache/ +vignettes/tidySpatialWorkshop_files/ +tidySpatialWorkshop.Rproj diff --git a/DESCRIPTION b/DESCRIPTION index 976a7c7..2a3ba1c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,4 +1,4 @@ -Package: tidySpatialWorkshop2025 +Package: tidySpatialWorkshop Title: Workshop Materials for Tidy Spatial Analysis Version: 0.18.5 Authors@R: c( @@ -65,5 +65,5 @@ Suggests: scatterpie, ggcorrplot VignetteBuilder: knitr -URL: https://github.com/tidyomics/tidySpatialWorkshop2025 -BugReports: https://github.com/tidyomics/tidySpatialWorkshop2025/issues +URL: https://github.com/tidyomics/tidySpatialWorkshop +BugReports: https://github.com/tidyomics/tidySpatialWorkshop/issues diff --git a/README.md b/README.md index bf9ebac..b110807 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# tidySpatialWorkshop2025 +# tidySpatialWorkshop [![DOI](https://zenodo.org/badge/379767139.svg)](https://zenodo.org/badge/latestdoi/379767139) -[![Check, build, and push image](https://github.com/tidyomics/tidySpatialWorkshop2025/actions/workflows/basic_checks.yaml/badge.svg)](https://github.com/tidyomics/tidySpatialWorkshop2025/actions/workflows/basic_checks.yaml) +[![Check, build, and push image](https://github.com/tidyomics/tidySpatialWorkshop/actions/workflows/basic_checks.yaml/badge.svg)](https://github.com/tidyomics/tidySpatialWorkshop/actions/workflows/basic_checks.yaml) ## Instructor names and contact information @@ -12,7 +12,7 @@ ## Syllabus -Material [web page](https://tidyomics.github.io/tidySpatialWorkshop2025/) +Material [web page](https://tidyomics.github.io/tidySpatialWorkshop/) More details on the workshop are below. @@ -33,42 +33,31 @@ Bioconductor 3.19. # Install workshop package #install.packages('BiocManager') -BiocManager::install("tidyomics/tidySpatialWorkshop2025", dependencies = TRUE) - -# In May 2025, the following packages should be installed from github repositories, to use the latest features. In case you have them pre installed, run the following command -BiocManager::install(c("lmweber/ggspavis", - "stemangiola/tidySummarizedExperiment", - "william-hutchison/tidySpatialExperiment", - "stemangiola/tidybulk", - "stemangiola/tidygate", - "stemangiola/CuratedAtlasQueryR"), - update = FALSE) - -BiocManager::install("ggcorrplot") - +BiocManager::install("tidyomics/tidySpatialWorkshop", dependencies = TRUE) + # Then build the vignettes -BiocManager::install("tidyomics/tidySpatialWorkshop2025", build_vignettes = TRUE, force=TRUE) +BiocManager::install("tidyomics/tidySpatialWorkshop", build_vignettes = TRUE, force=TRUE) # To view vignette -library(tidySpatialWorkshop2025) -vignette("Introduction") +library(tidySpatialWorkshop) +vignette("Session_1_sequencing_assays") ``` ## Interactive execution of the vignettes -From command line, and enter the tidySpatialWorkshop2025 directory. +From command line, and enter the tidySpatialWorkshop directory. ``` # Open the command line -git clone git@github.com:tidyomics/tidySpatialWorkshop2025.git +git clone git@github.com:tidyomics/tidySpatialWorkshop.git ``` -Alternatively download the [git zipped package](https://github.com/tidyomics/tidySpatialWorkshop2025/archive/refs/heads/devel.zip). Uncompress it. And enter the directory. +Alternatively download the [git zipped package](https://github.com/tidyomics/tidySpatialWorkshop/archive/refs/heads/devel.zip). Uncompress it. And enter the directory. To run the code, you could then copy and paste the code from the workshop vignette or -[R markdown file](https://github.com/tidyomics/tidySpatialWorkshop2025/blob/devel/vignettes/Session_1_sequencing_assays.Rmd) +[R markdown file](https://github.com/tidyomics/tidySpatialWorkshop/blob/devel/vignettes/Session_1_sequencing_assays.Rmd) into a new R Markdown file on your computer. ## Workshop Description diff --git a/_pkgdown.yml b/_pkgdown.yml index 76b0d89..5793c57 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -1,4 +1,4 @@ -url: https://tidyomics.github.io/tidySpatialWorkshop2025 +url: https://tidyomics.github.io/tidySpatialWorkshop template: params: @@ -6,10 +6,10 @@ template: #ganalytics: UA-99999999-9 home: - title: "tidySpatialWorkshop2025" + title: "tidySpatialWorkshop" type: inverse navbar: right: - icon: fa-github - href: https://github.com/tidyomics/tidySpatialWorkshop2025 + href: https://github.com/tidyomics/tidySpatialWorkshop diff --git a/inst/images/stomics.png b/inst/images/stomics.png new file mode 100644 index 0000000..b744865 Binary files /dev/null and b/inst/images/stomics.png differ diff --git a/inst/images/three_technologies.png b/inst/images/three_technologies.png new file mode 100644 index 0000000..2065c9e Binary files /dev/null and b/inst/images/three_technologies.png differ diff --git a/inst/images/visium.png b/inst/images/visium.png new file mode 100644 index 0000000..f800eb1 Binary files /dev/null and b/inst/images/visium.png differ diff --git a/inst/images/visiumhd.png b/inst/images/visiumhd.png new file mode 100644 index 0000000..8eeb15f Binary files /dev/null and b/inst/images/visiumhd.png differ diff --git a/vignettes/Introduction.Rmd b/vignettes/Introduction.Rmd index a0fdc1c..7b956cb 100644 --- a/vignettes/Introduction.Rmd +++ b/vignettes/Introduction.Rmd @@ -4,7 +4,7 @@ author: - Stefano Mangiola, South Australian immunoGENomics Cancer Institute^[], Walter and Eliza Hall Institute^[] - Luciano Martellotto, Adelaide Centre for Epigenetics, South Australian immunoGENomics Cancer Institute^[] output: rmarkdown::html_vignette -# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop2025', 'bibliography'), 'bibliography.bib')`" +# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop', 'bibliography'), 'bibliography.bib')`" vignette: > %\VignetteIndexEntry{Introduction to Spatial omic analyses} %\VignetteEncoding{UTF-8} @@ -54,7 +54,7 @@ knitr::include_graphics(here("inst/images/physalia-min.png")) You can view the material at the workshop webpage -[here](https://tidyomics.github.io/tidySpatialWorkshop2025/articles/main.html). +[here](https://tidyomics.github.io/tidySpatialWorkshop/index.html). ## Workshop package installation @@ -72,24 +72,25 @@ BiocManager::install("tidyomics/tidySpatialWorkshop", dependencies = TRUE) BiocManager::install("tidyomics/tidySpatialWorkshop", build_vignettes = TRUE, force=TRUE) # To view vignette -library(tidySpatialWorkshop2025) +library(tidySpatialWorkshop) vignette("Introduction") ``` ## Interactive execution of the vignettes -From command line, and enter the tidySpatialWorkshop2025 directory. +From command line, and enter the tidySpatialWorkshop directory. ``` # Open the command line -git clone git@github.com:tidyomics/tidySpatialWorkshop2025.git +git clone git@github.com:tidyomics/tidySpatialWorkshop.git ``` -Alternatively download the [git zipped package](https://github.com/tidyomics/tidySpatialWorkshop2025/archive/refs/heads/devel.zip). Uncompress it. And enter the directory. +Alternatively download the [git zipped package](https://github.com/tidyomics/tidySpatialWorkshop/archive/refs/heads/devel.zip). Uncompress it. And enter the directory. # Announcements +Tidyomics is now published in (Nature Methods)[https://www.nature.com/articles/s41592-024-02299-2]. And availabel for (free) here[https://www.biorxiv.org/content/10.1101/2023.09.10.557072v3]. # Introduction to Spatial Omics @@ -100,35 +101,65 @@ sequencing in experimental and analytical contexts. ### Workshop Structure -#### 1. Welcome and Introduction +#### Day 1 -- Overview of the workshop. -- Goals for Day 1. +##### 1. Welcome and Introduction -#### 2. What is Spatial Omics? +- Introduction of the instructor +- Introduction of the crowd +- Overview and goals of the workshop. + +##### 2. What is Spatial Omics? - Definition and significance in modern biology. - Key applications and impact. - -#### 3. Technologies in Spatial Omics - - Overview of different spatial omics technologies. - Comparison of imaging-based vs sequencing-based approaches. -#### 4. Sequencing Spatial Omics +##### 3. Sequencing Spatial Omics - Detailed comparison of methodologies. - Experimental design considerations. - Data analysis challenges and solutions. -#### 5. Overview of Analysis Frameworks +##### 5. Analysis of sequencing based spatial data + +- Getting Started with SpatialExperiment. +- Data Visualisation and Manipulation. +- Quality control and filtering. +- Dimensionality reduction. +- Spatial Clustering. +- Deconvolution of pixel-based spatial data. + +#### Day 2 + +##### 1. Introduction to tidyomics + +- Use tidyverse on spatial, single-cell, pseudobulk and bulk genomic data -- Introduction to various analysis frameworks. -- Brief mention of 'tidy' data principles in spatial omics. +##### 2. Working with tidySpatialExperiment + +- tidySpatialExperiment package +- Tidyverse commands +- Advanced filtering/gating and pseudobulk +- Work with features +- Summarisation/aggregation +- tidyfying your workflow +- Visualisation + +#### Day 3 + +##### 1. Imaging Spatial Omics + +- Detailed comparison of methodologies. +- Experimental design considerations. +- Data analysis challenges and solutions. -#### 6. Wrap-Up and Q&A +##### 2. Spatial analyses of imaging data -- Summarize key takeaways. -- Open floor for questions and discussions. +- Working with imaging-based data in Bioconductor with MoleculeExperiment +- Aggregation and analysis +- Clustering +- Neighborhood analyses diff --git a/vignettes/Session_1_sequencing_assays.Rmd b/vignettes/Session_1_sequencing_assays.Rmd index 97025d2..23f47cf 100644 --- a/vignettes/Session_1_sequencing_assays.Rmd +++ b/vignettes/Session_1_sequencing_assays.Rmd @@ -3,7 +3,7 @@ title: "Sequencing assays" author: - Stefano Mangiola, South Australian immunoGENomics Cancer Institute^[], Walter and Eliza Hall Institute^[] output: rmarkdown::html_vignette -# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop2025', 'vignettes'), 'tidyomics.bib')`" +# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop', 'vignettes'), 'tidyomics.bib')`" vignette: > %\VignetteIndexEntry{Sequencing assays} %\VignetteEncoding{UTF-8} @@ -17,8 +17,6 @@ knitr::opts_chunk$set(echo = TRUE, cache = FALSE) # Session 1: Spatial Analysis of Sequencing Data -Web rendering: https://rpubs.com/mangiolas/1186971 - ## Overview This workshop introduces spatial transcriptomics analysis using the Bioconductor framework, with a particular focus on the `SpatialExperiment` package. Participants will learn essential concepts and practical skills for analyzing spatially-resolved genomic data. @@ -39,6 +37,57 @@ By the end of this session, participants will be able to: - Familiarity with genomic data concepts - Understanding of basic statistical methods +## Experimental technologies + +**Spatial-omics** encompasses a suite of powerful methods that reveal not only which genes are active in a tissue but also exactly where those genes are switched on. One widely used strategy involves laying a thin slice of tissue onto a specially prepared glass slide that carries an array of microscopic “spots,” each spot marked with its own unique molecular barcode. As the tissue is gently broken down, the messenger RNA molecules released from each cell adhere to the underlying spots and pick up that spot’s barcode. By sequencing the barcodes together with the captured RNA, researchers can reconstruct a two-dimensional map of gene expression. For example, the Visium platform from 10x Genomics uses this barcoded-surface approach to chart gene activity across tumour biopsies, helping oncologists to identify pockets of treatment-resistant cells within a cancerous mass. + +An alternative method, known as **combinatorial FISH** (fluorescence in situ hybridisation), skips the need for physical barcodes by using fluorescent probes that bind directly to RNA molecules within intact tissue. Each probe is tagged with a small coloured label, and by carrying out multiple rounds of staining, imaging and probe removal, a unique sequence of coloured dots is generated for each target gene. It’s akin to reading a barcode of coloured spots: once the entire sequence of images has been captured, computational decoding reveals which gene each pattern corresponds to and pinpoints its exact location. This technique underlies MERFISH (Multiplexed Error-Robust FISH), which neuroscientists often employ to map hundreds of genes simultaneously in brain sections, illuminating the molecular identities of different neuronal subtypes. + +**In-situ sequencing** offers yet another route to spatially resolved transcriptomics by performing the sequencing reactions directly within fixed tissue sections. Rather than relying on pre-made probes, this approach uses a series of enzymatic ligation or polymerisation steps to read out the RNA sequence base by base. At each cycle, fluorescently labelled reagents indicate which nucleotide (A, C, G or T) has been incorporated, and repeated imaging across multiple cycles yields short sequence reads in situ. Once these reads are matched to a reference genome, they reveal precisely where specific transcripts lie. Developmental biologists have harnessed this method—pioneered by technologies such as Fluorescent In Situ Sequencing (FISSEQ)—to follow gene expression patterns during embryo formation, tracking how cells differentiate according to their spatial context. + +```{r, echo=FALSE, out.width="700px"} +library(here) + +knitr::include_graphics(here("inst/images/three_technologies.png")) +``` + +The **Visium CytAssist** platform from 10x Genomics brings the power of spatial transcriptomics into a streamlined, sequencing-based workflow. At its heart lies a standard glass slide bearing an 11 mm by 11 mm capture area patterned with roughly 14 000 microscopic spots (or 5 000 spots on a smaller 6.5 mm by 6.5 mm format). Each spot is densely coated with millions of identical oligonucleotides, each bearing a unique spatial barcode, a unique molecular identifier (UMI) and a poly(dT) tail designed to bind the polyadenylated tails of mRNA. When a fresh‐frozen or FFPE tissue section is mounted onto this slide, RNA molecules released during permeabilisation will hybridise to these oligos, effectively “stamping” each transcript with its precise tissue coordinates. + +The CytAssist instrument automates the critical steps of permeabilisation, RNA digestion and probe release. Rather than capturing native transcripts directly, Visium employs probe hybridisation: a comprehensive set of probes tiles the entire transcriptome (v2 chemistry covers some 18 000 human or 19 000 mouse genes), binding selectively to their target RNAs. Once the tissue has been permeabilised, these probes are enzymatically released and immediately recaptured by the underlying barcoded array. A short extension reaction then attaches the probe insert to the spatial barcode and UMI, before a denaturation step frees the complete construct for library preparation. + +Sequencing libraries are configured so that Read 1 decodes the slide’s spatial barcode and the UMI, while Read 2 reads into the ligated probe insert, revealing the gene identity. To ensure robust detection of both abundant and rare messages, Visium recommends a minimum of 25 000 read‐pairs per covered spot. Optional immunofluorescence staining can be performed in parallel, providing morphological and protein‐level context alongside the transcriptomic data. + +In practice, Visium CytAssist has found widespread use across many fields. Cancer researchers have applied it to map immune cell infiltration and stromal niches within melanoma or breast carcinoma biopsies. Developmental biologists use it to chart gene expression gradients in embryonic tissues, revealing how cells acquire distinct identities in different locations. Even neuroscientists have begun to dissect the molecular architecture of brain regions, linking spatial patterns of gene activity with anatomy and function. By combining a turnkey instrument with a comprehensive probe set and high‐throughput sequencing, Visium offers an accessible route to the spatial “geography” of gene expression in virtually any tissue. + +```{r, echo=FALSE, out.width="700px"} +library(here) + +knitr::include_graphics(here("inst/images/visium.png")) +``` + +The **Visium HD** system represents a next-generation leap in spatial transcriptomics, offering subcellular resolution on a standard CytAssist instrument. Instead of discrete 55 µm spots, the Visium HD slide presents a continuous lawn of capture oligonucleotides across a 6.5 mm × 6.5 mm area, each oligo bearing a unique spatial barcode and UMI. These barcodes are patterned in a fine grid of 2 µm × 2 µm squares, which are digitally binned into 8 µm × 8 µm “pixels” for data analysis. In practice, this means that gene expression can be mapped at roughly one-cell or even subcellular scale—more than a six-fold improvement in resolution compared with the original Visium array. + +As with the standard Visium workflow, fresh-frozen or FFPE tissue sections are first stained (H&E or immunofluorescence, if desired) and imaged for morphological context. The CytAssist then automates permeabilisation, RNA digestion and probe‐release steps: a comprehensive probe set tiles the entire transcriptome, binding each target mRNA; released probes are recaptured by the underlying barcoded oligo lawn; and a short extension reaction fuses the probe insert to its spatial barcode and UMI. After denaturation frees these constructs, they undergo library preparation and high-throughput sequencing. Read 1 decodes the spatial barcode and UMI, while Read 2 reads into the probe insert to identify the gene. To cover the full 6.5 mm capture area at HD resolution, Visium HD recommends approximately 275 million read-pairs per run. + +```{r, echo=FALSE, out.width="700px"} +library(here) + +knitr::include_graphics(here("inst/images/visiumhd.png")) +``` + +**BGI’s STOmics** system brings spatial transcriptomics onto DNA nanoball (DNB) patterned chips that can cover areas from a few square millimetres right up to an entire microscope slide, offering both enormous scale and subcellular resolution. The process begins with the creation of a dense array of molecular “nanoballs,” each just 220 nm across and stamped onto the chip in a precise grid. During chip manufacture, each nanoball is endowed with three key elements: a poly-T tail for capturing polyadenylated mRNA, a unique molecular identifier (UMI) to count individual transcripts, and a coordinate identifier (CID) that records its exact X–Y position on the array. + +Once a freshly frozen or paraformaldehyde-fixed tissue section has been mounted and (optionally) stained for nuclei or protein markers, the chip is brought into contact with the specimen so that mRNA diffuses down into the nanoball layer and hybridises to the poly-T oligos. Reverse transcription then converts these captured RNAs into complementary DNA, preserving both their sequence information and spatial tag. Library construction and high-throughput sequencing follow much as in conventional RNA-seq, but every read now carries the CID and UMI, which bioinformatics pipelines use to reconstruct a high-density map of gene expression. + +The sheer density of the DNB pattern—over 25 000 spots per 100 µm² in the highest-resolution formats—means that STOmics can detect transcripts at nearly subcellular scale, revealing fine-grained differences in gene activity within single cells or across tiny tissue niches. At the same time, chip formats up to 174 cm² in area allow researchers to profile entire organs or large tissue biopsies in one run, without stitching together multiple fields of view. In practice, developmental biologists have used this platform to survey gene expression across whole zebrafish embryos, while tumour biologists have mapped the spatial organisation of immune infiltrates in large cancer resections. By marrying nanometre-scale resolution with slide-wide coverage, BGI’s STOmics empowers scientists to explore biological landscapes from the level of subcellular compartments all the way up to entire tissue architectures. + +```{r, echo=FALSE, out.width="700px"} +library(here) + +knitr::include_graphics(here("inst/images/stomics.png")) +``` + + ## Introduction to Bioconductor Bioconductor is an open-source, open-development software project built on the R programming language. It provides powerful tools for analyzing and comprehending high-throughput genomic data. @@ -146,6 +195,10 @@ Maynard and Torres et al., doi: [10.1038/s41593-020-00787-0](https://www.ncbi.nl library(spatialLIBD) library(ExperimentHub) +# To avoid error for SPE loading +# https://support.bioconductor.org/p/9161859/#9161863 +setClassUnion("ExpData", c("matrix", "SummarizedExperiment")) + spatial_data <- ExperimentHub::ExperimentHub() |> spatialLIBD::fetch_data( eh = _, type = "spe") @@ -221,7 +274,7 @@ ggspavis::plotSpots( Explore additional visualisation features offered by the Visium platform, exposing the H&E (hematoxylin and eosin) image. ```{r, fig.width=6, fig.height=6} -ggspavis::plotVisium(spatial_data) +ggspavis::plotVisium(spatial_data, point_size = 0.5) ``` This visualisation focuses on specific tissue features within the dataset, emphasising areas of interest. @@ -233,7 +286,8 @@ This visualisation focuses on specific tissue features within the dataset, empha ggspavis::plotVisium( spatial_data, annotate = "spatialLIBD", - highlight = "in_tissue" + highlight = "in_tissue", + point_size =0.5 ) + facet_wrap(~sample_id) @@ -763,19 +817,7 @@ SPOTlight uses a seeded non-negative matrix factorization regression, initialize #### Producing the reference for single-cell databases -[cellNexus](https://stemangiola.github.io/cellNexus/) is a query interface that allow the programmatic exploration and retrieval of the harmonised, curated and reannotated CELLxGENE single-cell human cell atlas. Data can be retrieved at cell, sample, or dataset levels based on filtering criteria. - -Harmonised data is stored in the ARDC Nectar Research Cloud, and most cellNexus functions interact with Nectar via web requests, so a network connection is required for most functionality. - -Mangiola et al., 2025 doi [doi.org/10.1101/2023.06.08.542671](https://www.biorxiv.org/content/10.1101/2023.06.08.542671v3) - -```{r, echo=FALSE, out.width="700px"} -knitr::include_graphics(here("inst/images/curated_atlas_query.png")) -``` - - - - +Here, we retrieve and prepare a single-cell RNA reference. The dataset in question, zhong-prefrontal-2018, originates from a study by Zhong et al. (2018), which offers a comprehensive single-cell transcriptomic survey of the human prefrontal cortex during development . Utilising the scRNAseq package, the dataset is fetched and subsequently processed to aggregate counts across cells sharing the same sample and cell type, thereby reducing data complexity and enhancing interpretability. Further filtering steps ensure the removal of empty columns and entries with missing cell type annotations. Finally, the logNormCounts function from the scuttle package is applied to perform log-normalisation, a crucial step for mitigating technical variability and preparing the data for accurate comparative analyses . ```{r, message=FALSE, warning=FALSE, fig.width=6, fig.height=6} # Get reference @@ -942,6 +984,9 @@ No, let's look at the correlation matrices to see which cell type are most often plotCorrelationMatrix(res$mat) ``` +```{r} +mat_df = as.data.frame(res$mat) +``` #### Excercise @@ -954,36 +999,112 @@ Rather than looking at the correlation matrix, overall, let's observe whether th ::: -```{r, fig.width=6, fig.height=6} -res_spatialLIBD = split(data.frame(res$mat), colData(spatial_data_gene_name)$spatialLIBD ) +::: {.note} +**Exercise 1.5** + +## Exercise 1.5 (adapted to your current cell types) + +Some of the most positive correlations in the new matrix are seen between: + +- **Microglia** and **Neurons** +- **Astrocytes** and **Stem.cells** + +> **Microglia** are the resident immune cells of the central nervous system, constantly surveying the parenchyma and clearing debris. +> **Neurons** are the electrically excitable cells that transmit and process information via synaptic connections. +> **Astrocytes** are star-shaped glia that support neuronal metabolism, regulate extracellular ions and neurotransmitter uptake. +> **Stem.cells** denote undifferentiated progenitors capable of self-renewal and differentiation into multiple neural lineages. + +Let us now **visualise** where these pairs of cell types most co-occur in your spatial map. For **each** pair, carry out the following: + +1. **Label** any pixel where both cell types exceed 10 % abundance (i.e. > 0.1). +2. **Label** any pixel where the _sum_ of their abundances exceeds 40 % (i.e. > 0.4). +3. **Plot** the spatial coordinates of all pixels, **colouring** them by this new label (for example: + - `0` = neither condition met + - `1` = both abundances > 0.1 + - `2` = summed abundance > 0.4 + +You should end up with two analogous visualisations: + +- **Microglia + Neurons** +- **Astrocytes + Stem.cells** + +Feel free to reuse your previous code, simply substituting the cell-type columns and updating the thresholds as above. + +::: + + + +#### Bonus - Alternative reference from the Human Cell Atlas - using cellNexus -lapply(res_spatialLIBD, function(x) plotCorrelationMatrix(as.matrix(x[,-10]))) +[cellNexus](https://stemangiola.github.io/cellNexus/) is a query interface that allow the programmatic exploration and retrieval of the harmonised, curated and reannotated CELLxGENE single-cell human cell atlas. Data can be retrieved at cell, sample, or dataset levels based on filtering criteria. + +Harmonised data is stored in the ARDC Nectar Research Cloud, and most cellNexus functions interact with Nectar via web requests, so a network connection is required for most functionality. + +Mangiola et al., 2025 doi [doi.org/10.1101/2023.06.08.542671](https://www.biorxiv.org/content/10.1101/2023.06.08.542671v3) + +```{r, echo=FALSE, out.width="700px"} +knitr::include_graphics(here("inst/images/curated_atlas_query.png")) ``` +```{r, eval = FALSE, message=FALSE, warning=FALSE, fig.width=3, fig.height=3} +# Get reference +library(cellNexus) +library(HDF5Array) -::: {.note} -**Exercise 1.5** +tmp_file_path = tempfile() -Some of the most positive correlations involve the endothelial cells with Oligodendrocytes and Leptomeningeal cells. +brain_reference = + + # Query metadata across 30M cells + get_metadata() |> + + # Filter your data of interest + dplyr::filter(tissue_groups=="cerebral lobes and cortical areas", disease == "Normal") |> + + # Collect pseudobulk as SummarizedExperiment + get_pseudobulk() |> + + # Normalise for Spotlight + scuttle::logNormCounts() |> + + # Save for fast reading + HDF5Array::saveHDF5SummarizedExperiment(tmp_file_path, replace = TRUE) +``` -Leptomeningeal cells refer to the cells that make up the leptomeninges, which consist of two of the three layers olet's meninges surrounding the brain and spinal cord: the arachnoid mater and the pia mater. These layers play a critical role in protecting the central nervous system and assisting in various physiological processes. +```{r, eval = FALSE, message=FALSE} +library(HDF5Array) -Oligodendrocytes are a type of glial cell in the central nervous system (CNS) of vertebrates, including humans and mouse. These cells are crucial for the formation and maintenance of the myelin sheath, a fatty layer that encases the axons of many neurons. +brain_reference = HDF5Array::loadHDF5SummarizedExperiment(tmp_file_path) -Let's try to visualise the pixel where these cell types most occur. +my_metadata = colData(brain_reference) +knitr::kable(head(my_metadata), format = "html") +``` -- Label pixel that have > 10% (> 0.1) endothelial_cell and leptomeningeal_cell -- Label pixels that have > 40% (> 0.4) across these two cells -- Plot pixels colouring by the new label +These are the cell types included in our reference, and the number of pseudobulk samples we have for each cell type. -::: +```{r, eval = FALSE} -```{r} -mat_df = as.data.frame(res$mat) +table(brain_reference$cell_type_harmonised) + +``` + +These are the number of samples we have for each of the three data sets. + +```{r, eval = FALSE} + +table(brain_reference$dataset_id) +``` + +The `collection_id` can be used to gather information on the cell database. e.g. + +```{r, eval = FALSE} +table(brain_reference$collection_id) ``` + + **Session Information** ```{r} diff --git a/vignettes/Session_2_Tidy_spatial_analyses.Rmd b/vignettes/Session_2_Tidy_spatial_analyses.Rmd index 22e866e..533120e 100644 --- a/vignettes/Session_2_Tidy_spatial_analyses.Rmd +++ b/vignettes/Session_2_Tidy_spatial_analyses.Rmd @@ -3,7 +3,7 @@ title: "Tidy spatial analyses" author: - Stefano Mangiola, South Australian immunoGENomics Cancer Institute^[], Walter and Eliza Hall Institute^[] output: rmarkdown::html_vignette -# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop2025', 'vignettes'), 'tidyomics.bib')`" +# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop', 'vignettes'), 'tidyomics.bib')`" vignette: > %\VignetteIndexEntry{Tidy spatial analyses} %\VignetteEncoding{UTF-8} @@ -22,9 +22,22 @@ library(here) # Session 2: Tidying spatial data -A good introduction of `tidyomics` can be found here +## Introduction to tidyomics -[tidySpatialWorkshop2025](https://github.com/tidyomics/tidySpatialWorkshop2025) +`tidyomics` represents a significant advancement in bioinformatics analysis by bridging the gap between Bioconductor and the tidyverse ecosystem. This integration provides several key benefits: + +1. **Unified Analysis Framework**: Combines the power of Bioconductor's specialized biological data structures with tidyverse's intuitive data manipulation +2. **Maintained Compatibility**: Preserves original data containers and methods, ensuring long-term support +3. **Enhanced Workflow Efficiency**: Enables streamlined analysis pipelines using familiar tidyverse syntax + +The ecosystem includes several specialized packages: +- `tidySummarizedExperiment`: For bulk RNA-seq analysis +- `tidySingleCellExperiment`: For single-cell data +- `tidySpatialExperiment`: For spatial transcriptomics +- Additional tools: `plyranges`, `nullranges`, `tidyseurat`, `tidybulk`, `tidytof` + + +[tidySpatialWorkshop](https://github.com/tidyomics/tidySpatialWorkshop) [tidy transcriptomic manifesto](https://tidyomics.github.io/tidyomicsBlog/post/2021-07-07-tidy-transcriptomics-manifesto/) `tidyomics` is an interoperable software ecosystem that bridges Bioconductor and the tidyverse. `tidyomics` is installable with a single homonymous meta-package. This ecosystem includes three new packages: tidySummarizedExperiment, tidySingleCellExperiment, and tidySpatialExperiment, and five publicly available R packages: `plyranges`, `nullranges`, `tidyseurat`, `tidybulk`, `tidytof`. Importantly, `tidyomics` leaves the original data containers and methods unaltered, ensuring compatibility with existing software, maintainability and long-term Bioconductor support. @@ -61,7 +74,7 @@ BiocManager::install(c("lmweber/ggspavis", ``` -**Then please restart your R session** to make sure the packages we will load will be the ones we intalled mode recently. +**Important:** Please restart your R session after installation to ensure the updated packages are loaded correctly. Let's load the libraries needed for this session @@ -99,6 +112,10 @@ doi: [10.1038/s41593-020-00787-0](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8 library(spatialLIBD) library(ExperimentHub) +# To avoid error for SPE loading +# https://support.bioconductor.org/p/9161859/#9161863 +setClassUnion("ExpData", c("matrix", "SummarizedExperiment")) + spatial_data <- ExperimentHub::ExperimentHub() |> spatialLIBD::fetch_data( eh = _, type = "spe") @@ -114,9 +131,17 @@ rownames(spatialCoords(spatial_data)) = colnames(spatial_data) # Bug? spatial_data ``` ::: {.note} -If `ExperimentHub` should not work. The `spatial_data` object from the previous code block can be downloaded from [Zenodo - 10.5281/zenodo.11233385](https://zenodo.org/records/11233385/files/tidySpatialWorkshop2025_spatial_data.rds?download=1) +If `ExperimentHub` should not work. The `spatial_data` object from the previous code block can be downloaded from [Zenodo - 10.5281/zenodo.11233385](https://zenodo.org/records/11233385/files/tidySpatialWorkshop_spatial_data.rds?download=1) ::: +## Working with tidySpatialExperiment + +The `tidySpatialExperiment` package creates a bridge between `SpatialExperiment` objects and the tidyverse ecosystem. It provides: + +1. A tidy data view of `SpatialExperiment` objects +2. Compatible dplyr, tidyr, ggplot and plotly functions +3. Seamless integration with existing SpatialExperiment functionality + ### 1. tidySpatialExperiment package `tidySpatialExperiment` provides a bridge between the `SpatialExperiment` single-cell package and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the `SpatialExperiment` object as a tidyverse tibble, and provides `SpatialExperiment`-compatible `dplyr`, `tidyr`, `ggplot` @@ -359,7 +384,7 @@ spatial_data = filter(in_tissue, sample_id=="151673") |> # Gate based on tissue morphology - tidySpatialExperiment::gate(alpha = 0.1, colour = "spatialLIBD", programmatic_gates = tidySpatialWorkshop2025::spatial_data_gated) + tidySpatialExperiment::gate(alpha = 0.1, colour = "spatialLIBD", programmatic_gates = tidySpatialWorkshop::spatial_data_gated) ``` @@ -726,7 +751,7 @@ Here, we will show how to use ad-hoc spatial visualisation, as well as `ggplot` #### Ad-hoc visualisation: Plotting the regions -Let’s visualise the regions that spatialLIBD labelled across three Visium 10X samples. +Let's visualise the regions that spatialLIBD labelled across three Visium 10X samples. ```{r, fig.width=7, fig.height=8} spatial_data_filtered |> diff --git a/vignettes/Session_3_imaging_assays.Rmd b/vignettes/Session_3_imaging_assays.Rmd index 4441c51..f6c5819 100644 --- a/vignettes/Session_3_imaging_assays.Rmd +++ b/vignettes/Session_3_imaging_assays.Rmd @@ -4,7 +4,7 @@ author: - Stefano Mangiola, South Australian immunoGENomics Cancer Institute^[], Walter and Eliza Hall Institute^[] - Luciano Martellotto, Adelaide Centre for Epigenetics, South Australian immunoGENomics Cancer Institute^[] output: rmarkdown::html_vignette -# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop2025', 'vignettes'), 'tidyomics.bib')`" +# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop', 'vignettes'), 'tidyomics.bib')`" vignette: > %\VignetteIndexEntry{Imaging assays (tidy)} %\VignetteEncoding{UTF-8} @@ -77,7 +77,11 @@ This [data package](https://bioconductor.org/packages/release/data/experiment/ht ```{r, eval=FALSE} -eh = ExperimentHub(cache = "/vast/scratch/users/mangiola.s") +# To avoid error for SPE loading +# https://support.bioconductor.org/p/9161859/#9161863 +setClassUnion("ExpData", c("matrix", "SummarizedExperiment")) + +eh = ExperimentHub() query(eh, "SubcellularSpatialData") # Brain Mouse data @@ -94,6 +98,10 @@ tx_small = tx[sample(seq_len(nrow(tx)), size = nrow(tx)/500),] ``` ```{r, echo=FALSE} +# To avoid error for SPE loading +# https://support.bioconductor.org/p/9161859/#9161863 +setClassUnion("ExpData", c("matrix", "SummarizedExperiment")) + options(timeout = max(300, getOption("timeout"))) tx_small_file = tempfile() utils:: download.file("https://zenodo.org/records/11213118/files/tx_small.rda?download=1", destfile = tx_small_file) diff --git a/vignettes/Solutions.Rmd b/vignettes/Solutions.Rmd index 6d1555b..a3196f5 100644 --- a/vignettes/Solutions.Rmd +++ b/vignettes/Solutions.Rmd @@ -3,9 +3,9 @@ title: "Solutions to exercises" author: - Stefano Mangiola, South Australian immunoGENomics Cancer Institute^[], Walter and Eliza Hall Institute^[] output: rmarkdown::html_vignette -# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop2025', 'vignettes'), 'tidyomics.bib')`" +# bibliography: "`r file.path(system.file(package='tidySpatialWorkshop', 'vignettes'), 'tidyomics.bib')`" vignette: > - %\VignetteIndexEntry{Sequencing assays} + %\VignetteIndexEntry{Solutions to exercises} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} --- @@ -96,31 +96,31 @@ lapply(res_spatialLIBD, function(x) plotCorrelationMatrix(as.matrix(x[,-10]))) ```{r, fig.width=7, fig.height=8, eval=FALSE} - - -is_endothelial_leptomeningeal = mat_df$endothelial_cell >0.1 & mat_df$leptomeningeal_cell>0.1 & mat_df$endothelial_cell + mat_df$leptomeningeal_cell > 0.4 - -spatial_data$is_endothelial_leptomeningeal = is_endothelial_leptomeningeal - -ggspavis::plotSpots(spatial_data, annotate = "is_endothelial_leptomeningeal") + - facet_wrap(~sample_id) + - scale_color_manual(values = c("TRUE"= "red", "FALSE" = "grey")) -theme(legend.position = "none") + - labs(title = "endothelial + leptomeningeal") - - - - -is_endothelial_oligodendrocytes = mat_df$endothelial_cell >0.1 & mat_df$oligodendrocyte>0.05 & mat_df$endothelial_cell + mat_df$oligodendrocyte > 0.4 - -spatial_data$is_endothelial_oligodendrocyte = is_endothelial_oligodendrocytes - -ggspavis::plotSpots(spatial_data, annotate = "is_endothelial_oligodendrocyte") + - facet_wrap(~sample_id) + - scale_color_manual(values = c("TRUE"= "blue", "FALSE" = "grey")) -theme(legend.position = "none") + - labs(title = "endothelial + oligodendrocyte") - +# 1. Microglia + Neurons +is_microglia_neuron <- mat_df$Microglia > 0.1 & + mat_df$Neurons > 0.1 & + (mat_df$Microglia + mat_df$Neurons) > 0.4 +spatial_data$is_microglia_neuron <- is_microglia_neuron + +ggspavis::plotSpots(spatial_data, annotate = "is_microglia_neuron") + + facet_wrap(~sample_id) + + scale_color_manual(values = c("TRUE" = "red", "FALSE" = "grey")) + + theme(legend.position = "none") + + labs(title = "Microglia + Neurons") + + +# 2. Astrocytes + Stem cells +# note the space in the column name — use backticks +is_astrocyte_stem <- mat_df$Astrocytes > 0.1 & + mat_df$`Stem cells` > 0.1 & + (mat_df$Astrocytes + mat_df$`Stem cells`) > 0.4 +spatial_data$is_astrocyte_stem <- is_astrocyte_stem + +ggspavis::plotSpots(spatial_data, annotate = "is_astrocyte_stem") + + facet_wrap(~sample_id) + + scale_color_manual(values = c("TRUE" = "blue", "FALSE" = "grey")) + + theme(legend.position = "none") + + labs(title = "Astrocytes + Stem cells") ``` **Excercise 1.6**