From ead5caa554406b2a8c3064db4c00b11fb00c241d Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 14:25:37 +0100 Subject: [PATCH 1/7] Organized report output directory - Intermediate reports are saved to an 'intermediate' sub-directory - The final report is saved to the root of the notebooks directory (based on a publishDir pattern) - Reports are now hard links Former-commit-id: 646d12a1a51228a9bc907da473acc64dac481420 --- src/scanpy/processes/reports.nf | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/scanpy/processes/reports.nf b/src/scanpy/processes/reports.nf index 85a72d2c..729679f4 100644 --- a/src/scanpy/processes/reports.nf +++ b/src/scanpy/processes/reports.nf @@ -9,7 +9,7 @@ outputs ipynb named by the value in ${report_title} process SC__SCANPY__GENERATE_REPORT { container params.sc.scanpy.container - publishDir "${params.outdir}/notebooks", mode: 'symlink' + publishDir "${params.outdir}/notebooks/intermediate", mode: 'link', overwrite: true input: file ipynb @@ -30,7 +30,7 @@ process SC__SCANPY__GENERATE_REPORT { process SC__SCANPY__FILTER_QC_REPORT { container params.sc.scanpy.container - publishDir "${params.outdir}/notebooks", mode: 'symlink' + publishDir "${params.outdir}/notebooks/intermediate", mode: 'link', overwrite: true input: file(ipynb) @@ -51,7 +51,9 @@ process SC__SCANPY__FILTER_QC_REPORT { process SC__SCANPY__REPORT_TO_HTML { container params.sc.scanpy.container - publishDir "${params.outdir}/notebooks", mode: 'symlink' + publishDir "${params.outdir}/notebooks/intermediate", mode: 'link', overwrite: true + // copy final "merged_report" to notbooks root: + publishDir "${params.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true input: file ipynb @@ -68,7 +70,9 @@ process SC__SCANPY__REPORT_TO_HTML { process SC__SCANPY__MERGE_REPORTS { container params.sc.scanpy.container - publishDir "${params.outdir}/notebooks", mode: 'symlink' + publishDir "${params.outdir}/notebooks/intermediate", mode: 'link', overwrite: true + // copy final "merged_report" to notbooks root: + publishDir "${params.outdir}/notebooks", pattern: '*merged_report*', mode: 'link', overwrite: true input: file(ipynbs) From dab4d741f500fd151195d47da6038213f3a64cdc Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 14:38:24 +0100 Subject: [PATCH 2/7] Changed cellranger publishDir to hard links Former-commit-id: 3507f4b9cac29ef0aa15fd12dde6aca70d342462 --- src/cellranger/processes/count.nf | 3 ++- src/cellranger/processes/mkfastq.nf | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/cellranger/processes/count.nf b/src/cellranger/processes/count.nf index daba2ecd..1fbb6406 100644 --- a/src/cellranger/processes/count.nf +++ b/src/cellranger/processes/count.nf @@ -2,7 +2,7 @@ nextflow.preview.dsl=2 process SC__CELLRANGER__COUNT { - publishDir "${params.global.outdir}/counts", mode: 'symlink' + publishDir "${params.global.outdir}/counts", mode: 'link', overwrite: true container params.sc.cellranger.container input: @@ -36,3 +36,4 @@ process SC__CELLRANGER__COUNT { ${(params.sc.cellranger.count.containsKey('indicies')) ? '--indicies ' + params.sc.cellranger.count.indicies: ''} """ } + diff --git a/src/cellranger/processes/mkfastq.nf b/src/cellranger/processes/mkfastq.nf index 52453d84..827e6757 100644 --- a/src/cellranger/processes/mkfastq.nf +++ b/src/cellranger/processes/mkfastq.nf @@ -2,7 +2,7 @@ nextflow.preview.dsl=2 process SC__CELLRANGER__MKFASTQ { - publishDir "${params.global.outdir}/fastqs", saveAs: { filename -> dirname = filename =~ /(.*)_fastqOut/; "${dirname[0][1]}" }, mode: 'symlink' + publishDir "${params.global.outdir}/fastqs", saveAs: { filename -> dirname = filename =~ /(.*)_fastqOut/; "${dirname[0][1]}" }, mode: 'link', overwrite: true container params.sc.cellranger.container input: @@ -35,3 +35,4 @@ process SC__CELLRANGER__MKFASTQ { done """ } + From e918bca4dce468d2f2a1bfe664419b6bafcd4893 Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 14:39:44 +0100 Subject: [PATCH 3/7] Utilities functions publishDir changes: - For endpoint loom files, changed to hard links - For intermediate data files, keep as symlink, and store in an 'intermediate' sub-directory Former-commit-id: e42744c03e458c97669f9c9c5d33213c4a4babc2 --- src/utils/processes/h5ad_to_loom.nf | 4 ++-- src/utils/processes/utils.nf | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/utils/processes/h5ad_to_loom.nf b/src/utils/processes/h5ad_to_loom.nf index 7c355a3e..ac118309 100644 --- a/src/utils/processes/h5ad_to_loom.nf +++ b/src/utils/processes/h5ad_to_loom.nf @@ -5,7 +5,7 @@ include getBaseName from './files.nf' process SC__H5AD_TO_LOOM { container params.sc.scanpy.container - publishDir "${params.outdir}/loom", mode: 'symlink' + publishDir "${params.outdir}/loom", mode: 'link', overwrite: true input: file(f) @@ -22,7 +22,7 @@ process SC__H5AD_TO_LOOM { process SC__H5AD_TO_FILTERED_LOOM { container params.sc.scanpy.container - publishDir "${params.outdir}/loom", mode: 'symlink' + publishDir "${params.outdir}/loom", mode: 'link', overwrite: true input: file(f) diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 2005976f..9921bc1d 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -12,7 +12,7 @@ process SC__FILE_CONVERTER { cache 'deep' container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: set id, file(f) @@ -64,7 +64,7 @@ process SC__FILE_CONVERTER_HELP { process SC__FILE_CONCATENATOR() { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -82,7 +82,7 @@ process SC__FILE_CONCATENATOR() { process SC__STAR_CONCATENATOR() { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -102,7 +102,7 @@ include getBaseName from './files.nf' process SC__FILE_ANNOTATOR() { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) From a8ae3dbb5b496ac7995d299c748f5df12fcd1612 Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 14:41:50 +0100 Subject: [PATCH 4/7] SCENIC publishDir changes - change all to hard links Former-commit-id: 1d2e634a8eb19d5ee0f25ea87e061fbd3aa0e518 --- src/scenic/processes/aggregateMultiRunsFeatures.nf | 2 +- src/scenic/processes/aggregateMultiRunsRegulons.nf | 2 +- src/scenic/processes/aucell.nf | 3 ++- src/scenic/processes/aucellGeneSigsFromFolder.nf | 3 ++- src/scenic/processes/cistarget.nf | 3 ++- src/scenic/processes/grnboost2withoutDask.nf | 2 +- src/scenic/processes/saveScenicMultiRunsToLoom.nf | 3 ++- src/scenic/processes/scenicLoomHandler.nf | 2 +- 8 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/scenic/processes/aggregateMultiRunsFeatures.nf b/src/scenic/processes/aggregateMultiRunsFeatures.nf index d0b6b4c8..fb03451d 100644 --- a/src/scenic/processes/aggregateMultiRunsFeatures.nf +++ b/src/scenic/processes/aggregateMultiRunsFeatures.nf @@ -9,7 +9,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__AGGR_MULTI_RUNS_FEATURES { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_cistarget/", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_cistarget/", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" input: diff --git a/src/scenic/processes/aggregateMultiRunsRegulons.nf b/src/scenic/processes/aggregateMultiRunsRegulons.nf index 0dd00d22..1a5dbc08 100644 --- a/src/scenic/processes/aggregateMultiRunsRegulons.nf +++ b/src/scenic/processes/aggregateMultiRunsRegulons.nf @@ -9,7 +9,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__AGGR_MULTI_RUNS_REGULONS { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" input: diff --git a/src/scenic/processes/aucell.nf b/src/scenic/processes/aucell.nf index 16e897cb..2f159333 100644 --- a/src/scenic/processes/aucell.nf +++ b/src/scenic/processes/aucell.nf @@ -5,7 +5,7 @@ nextflow.preview.dsl=2 process SC__SCENIC__AUCELL { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/aucell/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'symlink' + publishDir "${params.sc.scenic.scenicoutdir}/aucell/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" maxForks params.sc.scenic.maxForks @@ -29,3 +29,4 @@ process SC__SCENIC__AUCELL { --num_workers ${params.sc.scenic.numWorkers} """ } + diff --git a/src/scenic/processes/aucellGeneSigsFromFolder.nf b/src/scenic/processes/aucellGeneSigsFromFolder.nf index 4507d2c7..de96e473 100644 --- a/src/scenic/processes/aucellGeneSigsFromFolder.nf +++ b/src/scenic/processes/aucellGeneSigsFromFolder.nf @@ -11,7 +11,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__AUCELL_GENESIGS_FROM_FOLDER { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_aucell/", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_aucell/", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" input: @@ -37,3 +37,4 @@ process SC__SCENIC__AUCELL_GENESIGS_FROM_FOLDER { --gene-attribute ${params.sc.scenic.gene_attribute} """ } + diff --git a/src/scenic/processes/cistarget.nf b/src/scenic/processes/cistarget.nf index 63c49130..ea15d511 100644 --- a/src/scenic/processes/cistarget.nf +++ b/src/scenic/processes/cistarget.nf @@ -5,7 +5,7 @@ nextflow.preview.dsl=2 process SC__SCENIC__CISTARGET { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/cistarget/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'symlink' + publishDir "${params.sc.scenic.scenicoutdir}/cistarget/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" maxForks params.sc.scenic.maxForks @@ -54,3 +54,4 @@ process SC__SCENIC__CISTARGET { --min_genes MIN_GENES --expression_mtx_fname EXPRESSION_MTX_FNAME */ + diff --git a/src/scenic/processes/grnboost2withoutDask.nf b/src/scenic/processes/grnboost2withoutDask.nf index 030b26e5..aee4232b 100644 --- a/src/scenic/processes/grnboost2withoutDask.nf +++ b/src/scenic/processes/grnboost2withoutDask.nf @@ -11,7 +11,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__GRNBOOST2WITHOUTDASK { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/grnboost2withoutDask/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'symlink' + publishDir "${params.sc.scenic.scenicoutdir}/grnboost2withoutDask/${params.sc.scenic.numRuns > 1 ? "run_" + runId : ""}", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" maxForks params.sc.scenic.maxForks diff --git a/src/scenic/processes/saveScenicMultiRunsToLoom.nf b/src/scenic/processes/saveScenicMultiRunsToLoom.nf index 969de855..5278cf4a 100644 --- a/src/scenic/processes/saveScenicMultiRunsToLoom.nf +++ b/src/scenic/processes/saveScenicMultiRunsToLoom.nf @@ -11,7 +11,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__SAVE_SCENIC_MULTI_RUNS_TO_LOOM { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_looms/", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}/multi_runs_looms/", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" input: @@ -43,3 +43,4 @@ process SC__SCENIC__SAVE_SCENIC_MULTI_RUNS_TO_LOOM { --scope-tree-level-3 "${params.sc.scope.tree.level_3}" """ } + diff --git a/src/scenic/processes/scenicLoomHandler.nf b/src/scenic/processes/scenicLoomHandler.nf index 001e6ef3..7652e8f6 100644 --- a/src/scenic/processes/scenicLoomHandler.nf +++ b/src/scenic/processes/scenicLoomHandler.nf @@ -9,7 +9,7 @@ if(!params.containsKey("test")) { process SC__SCENIC__MERGESCENICLOOMS { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}", mode: 'link', overwrite: true clusterOptions "-l nodes=1:ppn=${params.sc.scenic.numWorkers} -l pmem=2gb -l walltime=24:00:00 -A ${params.global.qsubaccount}" input: From 44c27fb906f251b307fce2dbb4eba85fef6b7ef9 Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 14:44:28 +0100 Subject: [PATCH 5/7] Scanpy publishDir changes - intermediate data files are kept as symlinks and moved to 'intermediate' sub-directory Former-commit-id: 60f0dbbc93c8ae932421d0c904e6201a48548714 --- src/scanpy/processes/adjust.nf | 3 ++- src/scanpy/processes/batch_effect_correct.nf | 3 ++- src/scanpy/processes/cluster.nf | 3 ++- src/scanpy/processes/dim_reduction.nf | 3 ++- src/scanpy/processes/feature_selection.nf | 3 ++- src/scanpy/processes/filter.nf | 4 ++-- src/scanpy/processes/marker_genes.nf | 3 ++- src/scanpy/processes/transform.nf | 7 ++++--- 8 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/scanpy/processes/adjust.nf b/src/scanpy/processes/adjust.nf index 84bcc00c..4c257a24 100644 --- a/src/scanpy/processes/adjust.nf +++ b/src/scanpy/processes/adjust.nf @@ -8,7 +8,7 @@ normalizationVariablesToRegressOutAsArguments = params.normalizationVariablesToR process SC__SCANPY__ADJUSTMENT { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -23,3 +23,4 @@ process SC__SCANPY__ADJUSTMENT { "${getBaseName(f)}.SC__SCANPY__ADJUSTMENT.${params.off}" """ } + diff --git a/src/scanpy/processes/batch_effect_correct.nf b/src/scanpy/processes/batch_effect_correct.nf index 24e8bca3..07aba3db 100644 --- a/src/scanpy/processes/batch_effect_correct.nf +++ b/src/scanpy/processes/batch_effect_correct.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__BATCH_EFFECT_CORRECTION { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -28,3 +28,4 @@ process SC__SCANPY__BATCH_EFFECT_CORRECTION { $f """ } + diff --git a/src/scanpy/processes/cluster.nf b/src/scanpy/processes/cluster.nf index c7eea561..b3988183 100644 --- a/src/scanpy/processes/cluster.nf +++ b/src/scanpy/processes/cluster.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__CLUSTERING { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -20,3 +20,4 @@ process SC__SCANPY__CLUSTERING { "${getBaseName(f)}.SC__SCANPY__CLUSTERING.${params.off}" """ } + diff --git a/src/scanpy/processes/dim_reduction.nf b/src/scanpy/processes/dim_reduction.nf index 81f80d28..a3e49292 100644 --- a/src/scanpy/processes/dim_reduction.nf +++ b/src/scanpy/processes/dim_reduction.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__DIM_REDUCTION { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -26,3 +26,4 @@ process SC__SCANPY__DIM_REDUCTION { "${getBaseName(f)}.SC__SCANPY__DIM_REDUCTION_${method}.${params.off}" """ } + diff --git a/src/scanpy/processes/feature_selection.nf b/src/scanpy/processes/feature_selection.nf index e0703a49..97ce7d5a 100644 --- a/src/scanpy/processes/feature_selection.nf +++ b/src/scanpy/processes/feature_selection.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__FEATURE_SELECTION { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -23,3 +23,4 @@ process SC__SCANPY__FEATURE_SELECTION { "${getBaseName(f)}.SC__SCANPY__FEATURE_SELECTION.${params.off}" """ } + diff --git a/src/scanpy/processes/filter.nf b/src/scanpy/processes/filter.nf index 9756d35e..fa2c23ab 100644 --- a/src/scanpy/processes/filter.nf +++ b/src/scanpy/processes/filter.nf @@ -30,7 +30,7 @@ process SC__SCANPY__COMPUTE_QC_STATS { process SC__SCANPY__GENE_FILTER { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -50,7 +50,7 @@ process SC__SCANPY__GENE_FILTER { process SC__SCANPY__CELL_FILTER { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) diff --git a/src/scanpy/processes/marker_genes.nf b/src/scanpy/processes/marker_genes.nf index 83646023..b15f87bc 100644 --- a/src/scanpy/processes/marker_genes.nf +++ b/src/scanpy/processes/marker_genes.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__MARKER_GENES { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -21,3 +21,4 @@ process SC__SCANPY__MARKER_GENES { "${getBaseName(f)}.SC__SCANPY__MARKER_GENES.${params.off}" """ } + diff --git a/src/scanpy/processes/transform.nf b/src/scanpy/processes/transform.nf index 188d6a62..91ae9343 100644 --- a/src/scanpy/processes/transform.nf +++ b/src/scanpy/processes/transform.nf @@ -5,7 +5,7 @@ include getBaseName from '../../utils/processes/files.nf' process SC__SCANPY__NORMALIZATION { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -24,7 +24,7 @@ process SC__SCANPY__NORMALIZATION { process SC__SCANPY__DATA_TRANSFORMATION { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -42,7 +42,7 @@ process SC__SCANPY__DATA_TRANSFORMATION { process SC__SCANPY__FEATURE_SCALING { container params.sc.scanpy.container - publishDir "${params.outdir}/data", mode: 'symlink' + publishDir "${params.outdir}/data/intermediate", mode: 'symlink', overwrite: true input: file(f) @@ -57,3 +57,4 @@ process SC__SCANPY__FEATURE_SCALING { "${getBaseName(f)}.SC__SCANPY__FEATURE_SCALING.${params.off}" """ } + From 58f2722fa8ee64d9efe52812d4ba5b2b4f21b383 Mon Sep 17 00:00:00 2001 From: cflerin Date: Thu, 31 Oct 2019 16:03:46 +0100 Subject: [PATCH 6/7] Publish the final anndata file to outdir - Added a process to symlink, then publish the final anndata file - Updated the workflows to name, and publish the final anndata file - Fixes to the single_sample_star workflow Former-commit-id: cc73a12edf3f075a5c081bc3c2025ce8f1ce6576 --- src/scanpy/workflows/bec_bbknn.nf | 3 +++ src/utils/processes/utils.nf | 16 ++++++++++++++++ workflows/single_sample.nf | 3 +++ workflows/single_sample_star.nf | 12 ++++++++---- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/src/scanpy/workflows/bec_bbknn.nf b/src/scanpy/workflows/bec_bbknn.nf index e6552f03..d5ed1b59 100644 --- a/src/scanpy/workflows/bec_bbknn.nf +++ b/src/scanpy/workflows/bec_bbknn.nf @@ -30,6 +30,7 @@ include SC__SCANPY__CLUSTERING from '../processes/cluster.nf' params(params.sc.s include SC__SCANPY__DIM_REDUCTION as SC__SCANPY__DIM_REDUCTION__UMAP from '../processes/dim_reduction.nf' params(params.sc.scanpy.dim_reduction.umap + params.global + params) include SC__H5AD_TO_LOOM from '../../utils/processes/h5ad_to_loom.nf' params(params.global + params) include CLUSTER_IDENTIFICATION from './cluster_identification.nf' params(params + params.global) +include SC__PUBLISH_H5AD from '../src/utils/processes/utils.nf' params(params + params.global) ////////////////////////////////////////////////////// // Define the workflow @@ -41,6 +42,8 @@ workflow BEC_BBKNN { SC__SCANPY__BATCH_EFFECT_CORRECTION( data ) CLUSTER_IDENTIFICATION( SC__SCANPY__BATCH_EFFECT_CORRECTION.out ) SC__SCANPY__DIM_REDUCTION__UMAP( CLUSTER_IDENTIFICATION.out.marker_genes ) + SC__PUBLISH_H5AD( SC__SCANPY__DIM_REDUCTION__UMAP.out, + params.global.project_name+".BEC_BBKNN.output") SC__H5AD_TO_LOOM(SC__SCANPY__DIM_REDUCTION__UMAP.out ) // Not using t-SNE as it does not use the neighbour graph (which BBKNN alters) when constructing its dimensionality reduction emit: diff --git a/src/utils/processes/utils.nf b/src/utils/processes/utils.nf index 9921bc1d..d83b1eb0 100644 --- a/src/utils/processes/utils.nf +++ b/src/utils/processes/utils.nf @@ -118,3 +118,19 @@ process SC__FILE_ANNOTATOR() { "${getBaseName(f)}.SC__FILE_ANNOTATOR.${params.off}" """ } + +process SC__PUBLISH_H5AD { + + publishDir "${params.outdir}/data", mode: 'link', overwrite: true + + input: + file f_in + val f_out + output: + file "${f_out}.h5ad" + script: + """ + ln -s ${f_in} ${f_out}.h5ad + """ +} + diff --git a/workflows/single_sample.nf b/workflows/single_sample.nf index 4e77f895..cd337053 100644 --- a/workflows/single_sample.nf +++ b/workflows/single_sample.nf @@ -31,6 +31,7 @@ include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(par include CLUSTER_IDENTIFICATION from '../src/scanpy/workflows/cluster_identification.nf' params(params + params.global) include SC__H5AD_TO_LOOM from '../src/utils/processes/h5ad_to_loom.nf' params(params + params.global) include SC__H5AD_TO_FILTERED_LOOM from '../src/utils/processes/h5ad_to_loom.nf' params(params + params.global) +include SC__PUBLISH_H5AD from '../src/utils/processes/utils.nf' params(params + params.global) // data channel to start from 10x data: include getChannel as getTenXChannel from '../src/channels/tenx.nf' params(params) @@ -49,6 +50,8 @@ workflow single_sample { DIM_REDUCTION( HVG_SELECTION.out.scaled ) CLUSTER_IDENTIFICATION( DIM_REDUCTION.out.dimred ) scopeloom = SC__H5AD_TO_LOOM( CLUSTER_IDENTIFICATION.out.marker_genes ) + SC__PUBLISH_H5AD( CLUSTER_IDENTIFICATION.out.marker_genes, + params.global.project_name+".single_sample.output") // reporting: SC__SCANPY__MERGE_REPORTS( QC_FILTER.out.report.mix( diff --git a/workflows/single_sample_star.nf b/workflows/single_sample_star.nf index b1c67d99..7d898d13 100644 --- a/workflows/single_sample_star.nf +++ b/workflows/single_sample_star.nf @@ -30,6 +30,7 @@ include HVG_SELECTION from '../src/scanpy/workflows/hvg_selection.nf' params(par include DIM_REDUCTION from '../src/scanpy/workflows/dim_reduction.nf' params(params + params.global) include CLUSTER_IDENTIFICATION from '../src/scanpy/workflows/cluster_identification.nf' params(params + params.global) include SC__H5AD_TO_LOOM from '../src/utils/processes/h5ad_to_loom.nf' params(params + params.global) +include SC__PUBLISH_H5AD from '../src/utils/processes/utils.nf' params(params + params.global) // data channel to start from 10x data: include getChannel as getTenXChannel from '../src/channels/tenx.nf' params(params) @@ -39,10 +40,13 @@ workflow single_sample_star { data = STAR() QC_FILTER( data ) - NORMALIZE_TRANSFORM( QC_FILTER.out ) + NORMALIZE_TRANSFORM( QC_FILTER.out.filtered ) HVG_SELECTION( NORMALIZE_TRANSFORM.out ) - DIM_REDUCTION( HVG_SELECTION.out ) - CLUSTER_IDENTIFICATION( DIM_REDUCTION.out ) - filteredloom = SC__H5AD_TO_LOOM( CLUSTER_IDENTIFICATION.out ) + DIM_REDUCTION( HVG_SELECTION.out.scaled ) + CLUSTER_IDENTIFICATION( DIM_REDUCTION.out.dimred ) + SC__PUBLISH_H5AD( CLUSTER_IDENTIFICATION.out.marker_genes, + params.global.project_name+".single_sample.output") + filteredloom = SC__H5AD_TO_LOOM( CLUSTER_IDENTIFICATION.out.marker_genes ) } + From 5419c45711a1a1062bd8fe668cc32e8624c0ddb5 Mon Sep 17 00:00:00 2001 From: Chris Flerin Date: Thu, 7 Nov 2019 10:02:39 +0100 Subject: [PATCH 7/7] publishDir updates for scenic loom handler Former-commit-id: 82957f5802a2f69dfc6d3c2aca77578af1a0ec12 --- src/scenic/processes/scenicLoomHandler.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scenic/processes/scenicLoomHandler.nf b/src/scenic/processes/scenicLoomHandler.nf index 83eeada6..22760b51 100644 --- a/src/scenic/processes/scenicLoomHandler.nf +++ b/src/scenic/processes/scenicLoomHandler.nf @@ -68,7 +68,7 @@ process SC__SCENIC__MERGE_MOTIF_TRACK_LOOMS { process SC__SCENIC__APPEND_SCENIC_LOOM { cache 'deep' container params.sc.scenic.container - publishDir "${params.sc.scenic.scenicoutdir}", mode: 'copy' + publishDir "${params.sc.scenic.scenicoutdir}", mode: 'link', overwrite: true input: file scopeloom