-
Notifications
You must be signed in to change notification settings - Fork 1
/
nextflow.config
101 lines (90 loc) · 4.54 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
/*
* -------------------------------------------------
* TRON-Bioinformatics/tronflow-vcf-postprocessing Nextflow config file
* -------------------------------------------------
*/
profiles {
conda {
params.enable_conda = true
conda.enabled = true
}
debug { process.beforeScript = 'echo $HOSTNAME' }
ci {
params.cpus = 1
params.memory = "3g"
timeline.enabled = false
report.enabled = false
trace.enabled = false
dag.enabled = false
}
test {
params.reference = "$baseDir/test/data/ucsc.hg19.minimal.fasta"
params.input_vcfs = "$baseDir/test/data/test_input.txt"
}
}
// Export this variable to prevent local Python libraries from conflicting with those in the container
env {
PYTHONNOUSERSITE = 1
}
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
VERSION = '3.1.0'
manifest {
name = 'TRON-Bioinformatics/tronflow-vcf-postprocessing'
author = 'Pablo Riesgo-Ferreiro'
homePage = 'https://github.com/TRON-Bioinformatics/tronflow-vcf-postprocessing'
description = 'VCF postprocessing pipeline'
mainScript = 'main.nf'
nextflowVersion = '>=19.10.0'
version = VERSION
}
params.help_message = """
TronFlow VCF postprocessing v${VERSION}
Usage:
nextflow run main.nf --input_vcfs input_vcfs --reference reference.fasta
Input:
* --input_vcf: the path to a single VCF to normalize (not compatible with --input_files)
* --input_vcfs: the path to a tab-separated values file containing in each row the sample name and path to the VCF file (not compatible with --input_vcf)
The input file does not have header!
Example input file:
sample1 /path/to/your/file.vcf
sample2 /path/to/your/file2.vcf
Optional input:
* --input_bams: a tab-separated values file containing in each row the sample name and path to a BAM file for VAFator annotations
The input file does not have header!
Example input file:
sample1 primary:/path/to/your/file.vcf
sample1 metastasis:/path/to/your/file2.vcf
* --input_purities: a tab-separated values file containing in each row the sample name and a purity value for VAFator annotations
The input file does not have header!
Example input file:
sample1 primary:0.5
sample1 metastasis:0.6
* --input_clonalities: a tab-separated values file containing in each row the sample name and a either a
genome-wide clonality value or a Bedgraph file with local clonality values for VAFator annotations
The input file does not have header!
Example input file:
sample1 primary:3
sample1 metastasis:/path/to/metastasis.local_clonalities.bed
* --reference: absolute path to the FASTA genome reference (indexes expected *.fai, *.dict) [required for normalization and for functional annotation with BCFtools]
* --gff: absolute path to a GFF gene annotations file [required for functional annotation with BCFtools, only Ensembl-like GFF files]
* --vcf-without-ad: indicate when the VCFs to normalize do not have the FORMAT/AD annotation
* --output: the folder where to publish output
* --skip_normalization: flag indicating to skip all normalization steps
* --skip_decompose_complex: flag indicating not to split complex variants (ie: MNVs and combinations of SNVs and indels)
* --filter: specify the filter to apply if any (e.g.: PASS), only variants with this value will be kept
* --input_bams: a tab-separated values file containing in each row the sample name, tumor and normal BAM files for annotation with Vafator
* --skip_multiallelic_filter: after VAFator annotations if any multiallelic variant is present (ie: two different
mutations in the same position) only the highest VAF variant is kept unless this flag is passed
* --snpeff_organism: the SnpEff organism name (eg: hg19, hg38, GRCh37.75, GRCh38.99)
* --snpeff_datadir: the SnpEff data folder where the reference genomes were previously downloaded. Required if --snpeff_organism is provided
* --snpeff_args: additional SnpEff arguments
* --snpeff_memory: for some samples SnpEff may require to use more memory (default: 3g)
* --mapping_quality: VAFator minimum mapping quality (default: 0)
* --base_call_quality: VAFator minimum base call quality (default: 0)
* --phasing-sample: enables phasing with whatshap and defines the sample BAM to phase the VCF (BAMs need to be provided)
Output:
* Normalized VCF file
* Tab-separated values file with the absolute paths to the normalized VCF files, normalized_vcfs.txt
* Summary statistics before and after normalization
"""