Skip to content

Commit

Permalink
fix software versions and accelerate CSV writing #4
Browse files Browse the repository at this point in the history
  • Loading branch information
sreichl committed Feb 22, 2024
1 parent 35da3b7 commit f4e4e6c
Show file tree
Hide file tree
Showing 10 changed files with 47 additions and 60 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# scCRISPR-seq Perturbation Analysis Snakemake Workflow using Seurat's Mixscape
A [Snakemake](https://snakemake.readthedocs.io/en/stable/) workflow for performing perturbation analyses of pooled (multimodal) CRISPR screens with sc/snRNA-seq read-out (scCRISPR-seq) powered by the R package [Seurat's](https://satijalab.org/seurat/index.html) method [Mixscape](https://satijalab.org/seurat/articles/mixscape_vignette.html).

This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details and modules check out the project's repository.
This workflow adheres to the module specifications of [MR.PARETO](https://github.com/epigen/mr.pareto), an effort to augment research by modularizing (biomedical) data science. For more details, instructions and modules check out the project's repository. Please consider starring and sharing modules that are useful to you, this helps me in prioritizing my efforts!

**If you use this workflow in a publication, please don't forget to give credit to the authors by citing it using this DOI [10.5281/zenodo.8424761](https://doi.org/10.5281/zenodo.8424761).**

Expand Down
7 changes: 2 additions & 5 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ import pandas as pd
import yaml
from snakemake.utils import min_version

min_version("6.0.3")

SDIR = os.path.realpath(os.path.dirname(srcdir("Snakefile")))
shell.prefix(f"set -eo pipefail;")
min_version("7.15.2")

##### container image #####
# containerized: "docker://sreichl/..."
Expand All @@ -31,7 +28,7 @@ rule all:
prtb_score_plot = expand(os.path.join(result_path,'{sample}','plots','MIXSCAPE_ALL_PerturbScores_{split_by}.png'),
sample=samples,
split_by=['',config["CalcPerturbSig"]["split_by_col"], config["RunMixscape"]["split_by_col"]]),
envs = expand(os.path.join(config["result_path"],'envs','mixscape_seurat','{env}.yaml'),env=['seurat']),
envs = expand(os.path.join(config["result_path"],'envs','mixscape_seurat','{env}.yaml'),env=['seurat_mixscape','seurat_lda']),
configs = os.path.join(config["result_path"],'configs','mixscape_seurat','{}_config.yaml'.format(config["project_name"])),
annotations = os.path.join(config["result_path"],'configs','mixscape_seurat','{}_annot.csv'.format(config["project_name"])),
resources:
Expand Down
13 changes: 13 additions & 0 deletions workflow/envs/seurat_lda.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- r-seurat=4.4.0
- r-irlba=2.3.5.1
- r-matrix=1.6_1.1
- r-mixtools=2.0.0
- r-ggplot2=3.4.4
- r-scales=1.3.0
- r-patchwork=1.2.0
- data.table
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ channels:
- defaults
dependencies:
- r-seurat=4.4.0
- r-matrix=1.6_5
- r-irlba=2.3.5.1
- r-matrix=1.6_1
- r-mixtools=2.0.0
- r-ggplot2=3.4.4
- r-scales=1.3.0
- r-patchwork=1.2.0
- data.table
8 changes: 4 additions & 4 deletions workflow/rules/mixscape.smk
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ rule mixscape:
subcategory="{sample}"),
resources:
mem_mb=config.get("mem", "16000"),
threads: config.get("threads", 1)
threads: 4*config.get("threads", 1)
conda:
"../envs/seurat.yaml"
"../envs/seurat_mixscape.yaml"
log:
os.path.join("logs","rules","mixscape_{sample}.log"),
params:
Expand All @@ -27,7 +27,7 @@ rule mixscape:
grna_split_symbol = config["grna_split_symbol"],
script:
"../scripts/mixscape.R"

# perform LDA on perturbed subset
rule lda:
input:
Expand All @@ -45,7 +45,7 @@ rule lda:
mem_mb=config.get("mem", "16000"),
threads: config.get("threads", 1)
conda:
"../envs/seurat.yaml"
"../envs/seurat_lda.yaml"
log:
os.path.join("logs","rules","lda_{sample}.log"),
params:
Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/visualize.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule visualize:
mem_mb=config.get("mem", "16000"),
threads: config.get("threads", 1)
conda:
"../envs/seurat.yaml"
"../envs/seurat_mixscape.yaml"
log:
os.path.join("logs","rules","visualize_{sample}.log"),
params:
Expand Down
17 changes: 8 additions & 9 deletions workflow/scripts/lda.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
#### load libraries & utility function
library(Seurat)
library(ggplot2)
library(scales)
library("Seurat")
library("ggplot2")
library("scales")

# source utility functions
# source("workflow/scripts/utils.R")
snakemake@source("./utils.R")

# inputs
mixscape_object_path <- snakemake@input[["mixscape_object"]] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/mixscape_seurat/condition_24h_cytokines/MIXSCAPE_ALL_object.rds"
mixscape_object_path <- snakemake@input[["mixscape_object"]]

# outputs
lda_object_path <- snakemake@output[["lda_object"]] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/mixscape_seurat/condition_24h_cytokines/MIXSCAPE_FILTERED_LDA_object.rds"
lda_object_path <- snakemake@output[["lda_object"]]
lda_plot_path <- snakemake@output[["lda_plot"]]

# parameters
assay <- snakemake@params[["assay"]] #"SCT" #"RNA"
assay <- snakemake@params[["assay"]]
calcPerturbSig_params <- snakemake@params[["CalcPerturbSig_params"]]
runMixscape_params <- snakemake@params[["RunMixscape_params"]]
mixscapeLDA_params <- snakemake@params[["MixscapeLDA_params"]]
Expand Down Expand Up @@ -121,8 +121,7 @@ save_seurat_object(seurat_obj=sub,
prefix="MIXSCAPE_FILTERED_LDA_")

# save matrix of LDA values
write.csv(lda_data, file=file.path(result_dir, paste0('MIXSCAPE_FILTERED_LDA_data.csv')), row.names=TRUE)
fwrite(as.data.frame(lda_data), file=file.path(result_dir, paste0('MIXSCAPE_FILTERED_LDA_data.csv')), row.names=TRUE)

# save matrix of PRTB values
slot <- "data"
write.csv(GetAssayData(object = sub, slot = slot, assay = "PRTB"), file=file.path(result_dir, paste0('MIXSCAPE_FILTERED_PRTB_',slot,'.csv')), row.names=TRUE)
fwrite(as.data.frame(GetAssayData(object = sub, slot = "data", assay = "PRTB")), file=file.path(result_dir, paste0('MIXSCAPE_FILTERED_PRTB_',slot,'.csv')), row.names=TRUE)
23 changes: 8 additions & 15 deletions workflow/scripts/mixscape.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
#### load libraries & utility function
library(Seurat)
library(ggplot2)
library("Seurat")
library("ggplot2")

# source utility functions
# source("workflow/scripts/utils.R")
snakemake@source("./utils.R")

# inputs
filtered_object_path <- snakemake@input[[1]] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/scrnaseq_processing_seurat/condition_24h_cytokines/FILTERED_object.rds"
filtered_object_path <- snakemake@input[[1]]

# outputs
mixscape_object_path <- snakemake@output[["mixscape_object"]] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/mixscape_seurat/condition_24h_cytokines/MIXSCAPE_ALL_object.rds"
mixscape_object_path <- snakemake@output[["mixscape_object"]]
mixscape_plot_path <- snakemake@output[["mixscape_plot"]]

# parameters
assay <- snakemake@params[["assay"]] #"SCT" #"RNA"
assay <- snakemake@params[["assay"]]
variable_features_only <- snakemake@params[["variable_features_only"]]
calcPerturbSig_params <- snakemake@params[["CalcPerturbSig_params"]]
runMixscape_params <- snakemake@params[["RunMixscape_params"]]
Expand Down Expand Up @@ -164,14 +164,7 @@ save_seurat_object(seurat_obj=data,
prefix="MIXSCAPE_ALL_")

# save mixscape statistics
write.csv(t(stat_table), file=file.path(result_dir, paste0('MIXSCAPE_ALL_stats.csv')), row.names=TRUE)
fwrite(as.data.frame(t(stat_table)), file=file.path(result_dir, paste0('MIXSCAPE_ALL_stats.csv')), row.names=TRUE)

# save matrix of PRTB values
slot <- "data"
write.csv(GetAssayData(object = data, slot = slot, assay = "PRTB"), file=file.path(result_dir, paste0('MIXSCAPE_ALL_PRTB_',slot,'.csv')), row.names=TRUE)


# slots counts and scale.data are emtpy -> only save data slot
# for (slot in c("counts","data","scale.data")){
# write.csv(GetAssayData(object = data, slot = slot, assay = "PRTB"), file=file.path(result_dir, paste0('MIXSCAPE_ALL_PRTB_',slot,'.csv')), row.names=TRUE)
# }
# save matrix of PRTB values; slots counts and scale.data are emtpy -> only save data slot
fwrite(as.data.frame(GetAssayData(object = data, slot = "data", assay = "PRTB")), file=file.path(result_dir, paste0('MIXSCAPE_ALL_PRTB_',slot,'.csv')), row.names=TRUE)
18 changes: 6 additions & 12 deletions workflow/scripts/utils.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,23 @@
### Utility Functions
library("data.table")

# save Seurat object results
save_seurat_object <- function (seurat_obj, result_dir, prefix){

# make result directory if not exist
# if (!dir.exists(result_dir)){
# dir.create(result_dir, recursive = TRUE)
# }

# save seurat object
saveRDS(seurat_obj, file=file.path(result_dir, paste0(prefix,"object",".rds")))
saveRDS(seurat_obj, file=file.path(result_dir, paste0(prefix,"object.rds")))

# save metadata
write.csv(seurat_obj[[]], file=file.path(result_dir, paste0(prefix,"metadata",".csv")), row.names=TRUE)
fwrite(as.data.frame(seurat_obj[[]]), file=file.path(result_dir, paste0(prefix,"metadata.csv")), row.names=TRUE)

# save stats
stats <- paste0("cells: ",ncol(seurat_obj),"\nfeatures: ",nrow(seurat_obj))
write(stats, file=file.path(result_dir, paste0(prefix,"stats",".txt")))
write(stats, file=file.path(result_dir, paste0(prefix,"stats.txt")))
}


# extended ggsave
ggsave_new <- function(filename, results_path, plot, width=5, height=5){
# make result directory if not exist
# if (!dir.exists(results_path)){
# dir.create(results_path, recursive = TRUE)
# }

# failsafe for large plots
width <- min(100, width)
Expand Down
14 changes: 2 additions & 12 deletions workflow/scripts/visualize.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,16 @@ library(patchwork)
snakemake@source("./utils.R")

# inputs
mixscape_object_path <- snakemake@input[["mixscape_object"]] #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/mixscape_seurat/condition_24h_cytokines/MIXSCAPE_ALL_object.rds"
mixscape_object_path <- snakemake@input[["mixscape_object"]]

# outputs
post_prob_plot_path <- dirname(file.path(snakemake@output[["post_prob_plot"]][1])) #"/nobackup/lab_bock/projects/macroIC/results/AKsmall/mixscape_seurat/condition_24h_cytokines/plots/MIXSCAPE_ALL_PerturbScores_.png"
post_prob_plot_path <- dirname(file.path(snakemake@output[["post_prob_plot"]][1]))

# parameters
calcPerturbSig_params <- snakemake@params[["CalcPerturbSig_params"]]
runMixscape_params <- snakemake@params[["RunMixscape_params"]]
antibody_capture_flag <- snakemake@params[["antibody_capture_flag"]]

# for testing
# calcPerturbSig_params <- list()
# calcPerturbSig_params[["split_by_col"]] <- "batch"
# calcPerturbSig_params[["gene_col"]] <- "KOcall"
# calcPerturbSig_params[["nt_term"]] <- "NonTargeting"
# runMixscape_params <- list()
# runMixscape_params[["split_by_col"]] <- "condition"
# runMixscape_params[["prtb_type"]] <- "KO"
# antibody_capture_flag <- "AB"

### load mixscape data
data <- readRDS(file = file.path(mixscape_object_path))
Idents(data) <- "mixscape_class"
Expand Down

0 comments on commit f4e4e6c

Please sign in to comment.