Skip to content
This repository was archived by the owner on Apr 19, 2023. It is now read-only.

Commit

Permalink
Merge branch 'master' into hotfixes.
Browse files Browse the repository at this point in the history
Former-commit-id: 5aee4c1
  • Loading branch information
dweemx committed Nov 18, 2019
2 parents 4b9db24 + 039e4da commit 3b2140a
Show file tree
Hide file tree
Showing 38 changed files with 2,438 additions and 864 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,10 @@ src/
├── scenic
│ ├── bin
│ │ ├── grnboost2_without_dask.py
│ │ └── merge_SCENIC_motif_track_loom.py
│ ├── processes
│ │ ├── aucell.nf
│ │ ├── cistarget.nf
│ │ ├── grnboost2withoutDask.nf
│ │ └── mergeScenicLooms.nf
│ ├── main.nf
│ └── scenic.config
Expand Down
5 changes: 1 addition & 4 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ manifest {
name = 'aertslab/SingleCellTxBenchmark'
description = 'A repository of pipelines for single-cell data in Nextflow DSL2'
homePage = 'https://github.com/aertslab/SingleCellTxBenchmark'
version = '0.3.1'
version = '0.3.3'
mainScript = 'main.nf'
defaultBranch = 'master'
nextflowVersion = '!19.09.0-edge' // with ! prefix, stop execution if current version does not match required version.
Expand All @@ -12,11 +12,8 @@ manifest {
params {
// These parameters are passed to all processes
global {
// baseFilePath = "/opt/SingleCellTxBenchmark"
// project_name = "MCF7"
project_name = "10x_PBMC"
outdir = "out"
// tenx_folder = "/ddn1/vol1/staging/leuven/stg_00002/lcb/lcb_projects/TWE/MCF7/10x/ControlvsTSA/cellranger/TEW*/outs/"
tenx_folder = "data/10x/1k_pbmc/1k_pbmc_*/outs/"
tracedir = "${params.global.outdir}/pipeline_reports"
qsubaccount = ""
Expand Down
Empty file removed src/__init__.py
Empty file.
59 changes: 58 additions & 1 deletion src/scenic/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,63 @@

## Running the pipeline

### Generate the config file

- Single SCENIC run

*Note*: The `qsub` profile if you are not running the pipeline on a cluster.

```{bash}
nextflow config \
-profile scenic,qsub,singularity aertslab/SingleCellTxBenchmark \
> nextflow.config
```

- Multi-runs SCENIC

*Note*: The `qsub` profile if you are not running the pipeline on a cluster.

```{bash}
nextflow config \
-profile scenic_multiruns,qsub,singularity aertslab/SingleCellTxBenchmark \
> nextflow.config
```

### Update the config file

Make sure the following parameters are correctly set:
- `params.global.project_name`
- `params.global.qsubaccount` if running on a cluster (SGE cluster)
- `params.sc.scenic.filteredloom`
- `params.sc.scenic.grn.TFs`
- `params.sc.scenic.cistarget.mtfDB`
- `params.sc.scenic.cistarget.mtfANN`
- `params.sc.scenic.cistarget.trkDB` if commented, track-based cisTarget won't run
- `params.sc.scenic.cistarget.trkDB` if commented, track-based cisTarget won't run
- `params.sc.scenic.numRuns` if running SCENIC in multi-runs mode
- `singularity.runOptions` Specify the paths to mount
- `params.sc.scope.tree`

Additionally, you can update the other paraemeters for the different steps.

### Run

```{bash}
nextflow -C nextflow.config run \
aertslab/SingleCellTxBenchmark \
-entry scenic \
-with-report report.html \
-with-trace
```

### Miscellaneous

Here is the DAG summarizing the multi-runs SCENIC workflow:

![Multi-Runs Motif and Track based SCENIC](assets/multi_runs_motif_track_scenic.svg)

## Testing the pipeline

```{bash}
nextflow -C conf/test.config,scenic.config run main.nf --test
nextflow -C conf/test.config,conf/test_multi_runs.config run main.nf --test
```
714 changes: 714 additions & 0 deletions src/scenic/assets/multi_runs_motif_track_scenic.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
102 changes: 20 additions & 82 deletions src/scenic/bin/add_visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import umap
from MulticoreTSNE import MulticoreTSNE as TSNE

import export_to_loom

################################################################################
################################################################################

Expand All @@ -39,43 +41,24 @@
args = parser.parse_args()


def df_to_named_matrix(df):
arr_ip = [tuple(i) for i in df.as_matrix()]
dtyp = np.dtype(list(zip(df.dtypes.index, df.dtypes)))
arr = np.array(arr_ip, dtype=dtyp)
return arr


def visualize_AUCell(args):

################################################################################
# load data from loom
################################################################################

# scenic output
lf = lp.connect(args.loom_input, mode='r', validate=False)
meta = json.loads(zlib.decompress(base64.b64decode(lf.attrs.MetaData)))
auc_mtx = pd.DataFrame(lf.ca.RegulonsAUC, index=lf.ca.CellID)
regulons = pd.DataFrame(lf.ra.Regulons, index=lf.ra.Gene)
lf.close()


################################################################################
# Fix regulon objects to display properly in SCope:
# Load the data from the loom and merge if needed
################################################################################

# add underscore for SCope compatibility:
auc_mtx.columns = auc_mtx.columns.str.replace('\\(', '_(')

# add underscore for SCope compatibility:
regulons.columns = regulons.columns.str.replace('\\(', '_(')

# Rename regulons in the thresholds object, motif
rt = meta['regulonThresholds']
for i, x in enumerate(rt):
tmp = x.get('regulon').replace("(", "_(") # + '-motif'
x.update({'regulon': tmp})
with lp.connect(args.loom_input, mode='r', validate=False) as lf:

if "RegulonsAUC" in lf.ca.keys():
auc_mtx = pd.DataFrame(lf.ca.RegulonsAUC, index=lf.ca.CellID)
else:
print("Loom with motif & track regulons detected, merging the regulons AUC matrices...")
mtf_auc_mtx = pd.DataFrame(lf.ca.MotifRegulonsAUC, index=lf.ca.CellID)
trk_auc_mtx = pd.DataFrame(lf.ca.TrackRegulonsAUC, index=lf.ca.CellID)
# merge the AUC matrices:
auc_mtx = pd.concat([mtf_auc_mtx, trk_auc_mtx], sort=False, axis=1, join='outer')
# fill NAs (if any) with 0s:
auc_mtx.fillna(0, inplace=True)

################################################################################
# Visualize AUC matrix:
Expand All @@ -84,65 +67,20 @@ def visualize_AUCell(args):
# UMAP
run_umap = umap.UMAP(n_neighbors=10, min_dist=0.4, metric='correlation').fit_transform
dr_umap = run_umap(auc_mtx.dropna())

# tSNE
tsne = TSNE(n_jobs=args.num_workers)
dr_tsne = tsne.fit_transform(auc_mtx.dropna())


################################################################################
# embeddings
# Add visualization data
################################################################################

default_embedding = pd.DataFrame(dr_umap, columns=['_X', '_Y'], index=auc_mtx.dropna().index)

embeddings_x = pd.DataFrame(dr_tsne, columns=['_X', '_Y'], index=auc_mtx.dropna().index)[['_X']].astype('float32')
embeddings_y = pd.DataFrame(dr_tsne, columns=['_X', '_Y'], index=auc_mtx.dropna().index)[['_Y']].astype('float32')

embeddings_x.columns = ['1']
embeddings_y.columns = ['1']


################################################################################
# copy loom
################################################################################

copyfile(args.loom_input, args.loom_output)


################################################################################
# update scenic data
################################################################################

lf = lp.connect(args.loom_output, mode='r+', validate=False)

# write regulon information:
lf.ca['RegulonsAUC'] = df_to_named_matrix(auc_mtx)
lf.ra['Regulons'] = df_to_named_matrix(regulons)

# write embeddings:
lf.ca['Embedding'] = df_to_named_matrix(default_embedding)
lf.ca['Embeddings_X'] = df_to_named_matrix(embeddings_x)
lf.ca['Embeddings_Y'] = df_to_named_matrix(embeddings_y)

metaJson = {}
metaJson['embeddings'] = [
{
"id": -1,
"name": "SCENIC AUC UMAP"
},
{
"id": 1,
"name": "SCENIC AUC t-SNE"
},
]

metaJson["regulonThresholds"] = rt

lf.attrs['MetaData'] = base64.b64encode(zlib.compress(json.dumps(metaJson).encode('ascii'))).decode('ascii')

lf.close()
scope_loom = export_to_loom.SCopeLoom.read_loom(filename=args.loom_input)
scope_loom.add_embedding(embedding=dr_umap, embedding_name="SCENIC AUC UMAP", is_default=True)
scope_loom.add_embedding(embedding=dr_tsne, embedding_name="SCENIC AUC t-SNE", is_default=False)
scope_loom.export(out_fname=args.loom_output)


if __name__ == "__main__":
visualize_AUCell(args)

99 changes: 0 additions & 99 deletions src/scenic/bin/aggregate_SCENIC_multi_runs_features.py

This file was deleted.

Loading

0 comments on commit 3b2140a

Please sign in to comment.