Skip to content
This repository has been archived by the owner on Apr 19, 2023. It is now read-only.

Commit

Permalink
Merge pull request #100 from vib-singlecell-nf/develop
Browse files Browse the repository at this point in the history
Develop

Former-commit-id: cc19ce9
  • Loading branch information
cflerin authored Jan 22, 2020
2 parents 29ab89d + ba5e9a6 commit f56f454
Show file tree
Hide file tree
Showing 29 changed files with 455 additions and 174 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ work/
out/
tests/
debug/
*.swp
*.swo
237 changes: 179 additions & 58 deletions README.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ tar -xzvf pbmc_1k_v3_filtered_feature_bc_matrix.tar.gz -C data/10x/1k_pbmc/1k_pb

Download the small meta data to annotate the samples:
```
wget https://raw.githubusercontent.com/aertslab/SingleCellTxBenchmark/master/data/10x/1k_pbmc/metadata.tsv -O data/10x/1k_pbmc/metadata.tsv
wget https://raw.githubusercontent.com/vib-singlecell-nf/vsn-pipelines/master/data/10x/1k_pbmc/metadata.tsv -O data/10x/1k_pbmc/metadata.tsv
```

If these links appear not work, you can always download them from https://support.10xgenomics.com/single-cell-gene-expression/datasets.
Expand Down
18 changes: 15 additions & 3 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ workflow bbknn_scenic {
// run single_sample, output a scope loom file
workflow single_sample {

include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params)
include single_sample_standalone as SINGLE_SAMPLE from './workflows/single_sample' params(params)
SINGLE_SAMPLE()

}
Expand All @@ -43,7 +43,7 @@ workflow single_sample {
workflow single_sample_scenic {

include SCENIC_append from './src/scenic/main.nf' params(params)
include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params)
include single_sample_standalone as SINGLE_SAMPLE from './workflows/single_sample' params(params)
SINGLE_SAMPLE()
SCENIC_append( SINGLE_SAMPLE.out.filteredloom, SINGLE_SAMPLE.out.scopeloom )

Expand Down Expand Up @@ -75,7 +75,19 @@ workflow cellranger {
// runs mkfastq, CellRanger count, then single_sample:
workflow single_sample_cellranger {

cellranger | single_sample
include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params)
cellranger | SINGLE_SAMPLE

}

workflow h5ad_single_sample {

include getChannel as getH5ADChannel from './src/channels/h5ad' params(params)
include single_sample as SINGLE_SAMPLE from './workflows/single_sample' params(params)
data = getH5ADChannel(
params.data.h5ad.file_paths,
params.data.h5ad.suffix
).view() | SINGLE_SAMPLE

}

Expand Down
9 changes: 6 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

manifest {
name = 'vib-singlecell-nf/vib-singlecell-nf'
name = 'vib-singlecell-nf/vsn-pipelines'
description = 'A repository of pipelines for single-cell data in Nextflow DSL2'
homePage = 'https://github.com/vib-singlecell-nf/vib-singlecell-nf'
version = '0.6.1'
homePage = 'https://github.com/vib-singlecell-nf/vsn-pipelines'
version = '0.8.0'
mainScript = 'main.nf'
defaultBranch = 'master'
nextflowVersion = '!19.12.0-edge' // with ! prefix, stop execution if current version does not match required version.
Expand Down Expand Up @@ -110,6 +110,9 @@ profiles {
tenx {
includeConfig 'src/channels/conf/tenx.config'
}
h5ad {
includeConfig 'src/channels/conf/h5ad.config'
}
sra {
includeConfig 'src/channels/conf/sra.config'
includeConfig 'src/utils/conf/sra_metadata.config'
Expand Down
2 changes: 1 addition & 1 deletion src/cellranger
Submodule cellranger updated 1 files
+1 −8 cellranger.config
8 changes: 8 additions & 0 deletions src/channels/conf/h5ad.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
params {
data {
h5ad {
file_paths = ''
suffix = ''
}
}
}
33 changes: 33 additions & 0 deletions src/channels/h5ad.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
nextflow.preview.dsl=2

def extractSample(path, suffix) {
if(!path.endsWith(".h5ad"))
throw new Exception("Wrong channel used for data: "+ path)
// Extract the sample name based on the given path and on the given suffix
suffix = suffix.replace(".","\\.")
pattern = /(.+)\/(.+)${suffix}/
(full, parentDir, id) = (path =~ pattern)[0]
return id
}

workflow getChannel {

take:
glob
sampleSuffixWithExtension // Suffix after the sample name in the file paths

main:
// Check whether multiple globs are provided
if(glob.contains(',')) {
glob = Arrays.asList(glob.split(','));
}
channel = Channel
.fromPath(glob, checkIfExists: true)
.map {
path -> tuple(extractSample( "${path}", sampleSuffixWithExtension ), file("${path}"))
}

emit:
channel

}
3 changes: 1 addition & 2 deletions src/channels/singleend.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ nextflow.preview.dsl=2
def extractSample(path) {
pattern = /(.+)\/(.+)_R[1-2](.*)\.fastq(\.gz)?/
(full, parentDir, id, whateverSuffix, compressionExtension) = (path =~ pattern)[0]

return id
}

Expand All @@ -20,7 +19,7 @@ workflow getChannel {
channel = Channel
.fromPath(glob, checkIfExists: true)
.map {
path -> tuple(extractSample( "${path}" ), path("${path}"))
path -> tuple(extractSample( "${path}" ), file("${path}"))
}

emit:
Expand Down
2 changes: 1 addition & 1 deletion src/dropletutils
2 changes: 1 addition & 1 deletion src/fastp
Submodule fastp updated 1 files
+1 −1 fastp.config
2 changes: 1 addition & 1 deletion src/flybaser
Submodule flybaser updated 1 files
+1 −1 flybaser.config
2 changes: 1 addition & 1 deletion src/pcacv
Submodule pcacv updated 1 files
+2 −2 pcacv.config
2 changes: 1 addition & 1 deletion src/picard
Submodule picard updated 1 files
+1 −1 picard.config
2 changes: 1 addition & 1 deletion src/scenic
Submodule scenic updated 3 files
+6 −5 README.md
+1 −1 conf/test.config
+1 −1 scenic.config
2 changes: 1 addition & 1 deletion src/sratoolkit
Submodule sratoolkit updated 1 files
+2 −2 sratoolkit.config
2 changes: 1 addition & 1 deletion src/utils/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ ENV PATH="/opt/venv/bin:$PATH"

RUN python3 -m pip install ipykernel && \
pip install --no-cache-dir papermill && \
pip install --no-cache-dir pysradb
pip install --no-cache-dir pysradb==0.9.9

FROM python:3.6.8-slim-stretch AS build-image
RUN apt-get -y update && \
Expand Down
31 changes: 27 additions & 4 deletions src/utils/bin/sc_file_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@
action='store'
)

parser.add_argument(
"-t", "--tag-cell-with-sample-id",
action="store_true",
dest="tag_cell_with_sample_id",
default=False,
help="Tag each cell with the given sample_id."
)


parser.add_argument(
"-o", "--output-format",
action="store", # optional because action defaults to "store"
Expand Down Expand Up @@ -83,6 +92,12 @@ def check_10x_cellranger_mex_path(path):
)


def add_sample_id(adata, args):
# Annotate the file with the sample ID
adata.obs["sample_id"] = args.sample_id
return adata


if INPUT_FORMAT == '10x_cellranger_mex' and OUTPUT_FORMAT == 'h5ad':
check_10x_cellranger_mex_path(path=FILE_PATH_IN)
# Convert
Expand All @@ -92,8 +107,12 @@ def check_10x_cellranger_mex_path(path):
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=False
)
# If is sample_id is given, add the sample ID as suffix
if args.sample_id is not None:
adata = add_sample_id(
adata=adata,
args=args
)
# If is tag_cell_with_sample_id is given, add the sample ID as suffix
if args.tag_cell_with_sample_id:
adata.obs.index = map(lambda x: re.sub('-[0-9]+', f"-{args.sample_id}", x), adata.obs.index)
print("Writing 10x data to h5ad...")
adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME))
Expand All @@ -106,8 +125,12 @@ def check_10x_cellranger_mex_path(path):
adata = sc.read_10x_h5(
FILE_PATH_IN
)
# If is sample_id is given, add the sample ID as suffix
if args.sample_id is not None:
adata = add_sample_id(
adata=adata,
args=args
)
# If is tag_cell_with_sample_id is given, add the sample ID as suffix
if args.tag_cell_with_sample_id:
adata.obs.index = map(lambda x: re.sub('-[0-9]+', f"-{args.sample_id}", x), adata.obs.index)
print("Writing 10x data to h5ad...")
adata.write_h5ad(filename="{}.h5ad".format(FILE_PATH_OUT_BASENAME))
Expand Down
6 changes: 3 additions & 3 deletions src/utils/bin/sra_to_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,15 +76,15 @@
# Get the metadata
#

if "sra_db" in args:
if args.sra_db is not None:
db = SRAdb(args.sra_db.name)
print(f"Using local SRA SQLite database to query...")
else:
print(f"Using NCBi's esearch and esummary interface to query...")
db = SRAweb()

metadata = db.sra_metadata(
acc=args.sra_project_id,
args.sra_project_id,
detailed=True,
expand_sample_attributes=True,
sample_attribute=True
Expand All @@ -99,7 +99,7 @@
[
metadata,
metadata["experiment_title"].str.extract(
r'^(.*): ([a-zA-Z0-9_-]*); (.*); (.*)$', expand=True
r'^(.*): ([a-zA-Z0-9\s,_-]*); (.*); (.*)$', expand=True
).rename(
columns={
0: 'geo_accession',
Expand Down
4 changes: 2 additions & 2 deletions src/utils/conf/base.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
params {
utils {
container = 'dweemx/sctx-utils:0.2.0'
container = 'vibsinglecellnf/utils:0.2.1'
workflow_configuration {
report_ipynb = "/src/utils/bin/reports/workflow_configuration_template.ipynb"
}
Expand All @@ -22,4 +22,4 @@ params {
off = 'tsv'
}
}
}
}
9 changes: 5 additions & 4 deletions src/utils/conf/sra_metadata.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ params {
outdir = 'out'
}
utils {
container = 'dweemx/sctx-utils:0.0.1'
container = 'vibsinglecellnf/utils:0.2.1'
sra_metadata {
sraDb = ''
sraDbForceDownload = false
sraDbOutDir = '/ddn1/vol1/staging/leuven/stg_00002/lcb/dwmax/documents/resources/sra'
mode = 'web' // or db
// sraDb = ''
// sraDbForceDownload = false
// sraDbOutDir = ''
}
}
}
2 changes: 1 addition & 1 deletion src/utils/conf/test.config
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
params {
sc {
scanpy {
container = 'aertslab/sctx-scanpy:0.5.0'
container = 'vibsinglecellnf/scanpy:0.5.0'
}
file_converter {
iff = '10x_cellranger_mex'
Expand Down
25 changes: 19 additions & 6 deletions src/utils/conf/test/downloadSRARunCellRanger.test.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@ params {
global {
outdir = 'out'
}
data {
// Based on SRA Project Identifiers
sra = [
[
id: '',
samples: [""] // Use Unix globbing
]
]
}
sratoolkit {
container = 'dweemx/sctx-sratoolkit:2.9.4-1.1.0'
downloadFastqs {
Expand Down Expand Up @@ -30,12 +39,16 @@ params {
// indicies = ''
}
}
utils {
sra_metadata {
sraDb = ''
sraDbForceDownload = false
sraDbOutDir = '/ddn1/vol1/staging/leuven/stg_00002/lcb/dwmax/documents/resources/sra'
}
}

utils {
container = 'vibsinglecellnf/utils:0.2.1'

sra_metadata {
mode = 'web' // or db
// sraDb = ''
// sraDbForceDownload = false
// sraDbOutDir = ''
}
}
}
Expand Down
10 changes: 9 additions & 1 deletion src/utils/main.test.nf
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,22 @@ workflow {
db = file(params.utils.sra_metadata.sraDbOutDir + "/SRAmetadb.sqlite")
SRA_TO_METADATA( sra, db )
break;
case "GET_METADATA_FROM_SRA_WEB":
// Imports
include getChannel as getSRAChannel from './../channels/sra' params(params)
include SRA_TO_METADATA from './processes/sra' params(params)
// Run
sra = getSRAChannel( params.data.sra )
SRA_TO_METADATA( sra, file('NO_FILE') )
break;
case "DOWNLOAD_FROM_SRA":
// Imports
include DOWNLOAD_FROM_SRA from './workflows/downloadFromSRA' params(params)
include SC__CELLRANGER__PREPARE_FOLDER from './../cellranger/processes/utils.nf'
include SC__CELLRANGER__COUNT from './../cellranger/processes/count' params(params)
// Run
DOWNLOAD_FROM_SRA(
tuple('SRP125768', ["w1118_15d_*"]) //["DGRP-551_*d_r*","w1118_*d_r*"]
tuple('SRP162698', ["10x, sample 1", "10x, sample 2"])
)
break;
case "DOWNLOAD_FROM_SRA_AND_RUN_CELLRANGER":
Expand Down
16 changes: 12 additions & 4 deletions src/utils/processes/sra.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,20 @@ process SRA_TO_METADATA {
file "${sraId}_metadata.tsv"

script:
if(sraDb.name != 'NO_FILE') {
sraDbAsArgument = "--sra-db ${sraDb}"
if(processParams.mode == 'db') {
if(sraDb.name != 'NO_FILE') {
sraDbAsArgument = "--sra-db ${sraDb}"
} else {
if(!processParams.containsKey('sraDb') || processParams.sraDb == '')
throw new Exception("The db modue requires sraDb to be specified")
sraDbAsArgument = '--sra-db ' + processParams.sraDb
}
} else if(processParams.mode == 'web') {
sraDbAsArgument = ''
} else {
sraDbAsArgument = (processParams.containsKey('sraDb') && processParams.sraDb != '') ? '--sra-db ' + processParams.sraDb : ''
throw new Exception("The "+ processParams.mode +" mode does not exist. Choose one of: web, db.")
}
def sampleFiltersAsArguments = sampleFilters.collect({ '--sample-filter' + ' ' + it }).join(' ')
def sampleFiltersAsArguments = sampleFilters.collect({ '--sample-filter' + ' "' + it + '"'}).join(' ')
"""
${binDir}sra_to_metadata.py \
${sraId} \
Expand Down
Loading

0 comments on commit f56f454

Please sign in to comment.