nextflow.config

/*
 * -------------------------------------------------
 *  TRON-Bioinformatics/tronflow-vcf-postprocessing Nextflow config file
 * -------------------------------------------------
 */

profiles {
  conda { 
    params.enable_conda = true
    conda.enabled = true
  }
  debug { process.beforeScript = 'echo $HOSTNAME' }
  ci {
    params.cpus = 1
    params.memory = "3g"
    timeline.enabled = false
    report.enabled = false
    trace.enabled = false
    dag.enabled = false
  }
  test {
    params.reference = "$baseDir/test/data/ucsc.hg19.minimal.fasta"
    params.input_vcfs = "$baseDir/test/data/test_input.txt"
  }
}

// Export this variable to prevent local Python libraries from conflicting with those in the container
env {
  PYTHONNOUSERSITE = 1
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

VERSION = '3.1.0'

manifest {
  name = 'TRON-Bioinformatics/tronflow-vcf-postprocessing'
  author = 'Pablo Riesgo-Ferreiro'
  homePage = 'https://github.com/TRON-Bioinformatics/tronflow-vcf-postprocessing'
  description = 'VCF postprocessing pipeline'
  mainScript = 'main.nf'
  nextflowVersion = '>=19.10.0'
  version = VERSION
}

params.help_message = """
TronFlow VCF postprocessing v${VERSION}

Usage:
    nextflow run main.nf --input_vcfs input_vcfs --reference reference.fasta


Input:
    * --input_vcf: the path to a single VCF to normalize (not compatible with --input_files)
    * --input_vcfs: the path to a tab-separated values file containing in each row the sample name  and path to the VCF file (not compatible with --input_vcf)
    The input file does not have header!
    Example input file:
    sample1	/path/to/your/file.vcf
    sample2	/path/to/your/file2.vcf

Optional input:
    * --input_bams: a tab-separated values file containing in each row the sample name and path to a BAM file for VAFator annotations
    The input file does not have header!
    Example input file:
    sample1	primary:/path/to/your/file.vcf
    sample1	metastasis:/path/to/your/file2.vcf
    * --input_purities: a tab-separated values file containing in each row the sample name and a purity value for VAFator annotations
    The input file does not have header!
    Example input file:
    sample1	primary:0.5
    sample1	metastasis:0.6
    * --input_clonalities: a tab-separated values file containing in each row the sample name and a either a
    genome-wide clonality value or a Bedgraph file with local clonality values for VAFator annotations
    The input file does not have header!
    Example input file:
    sample1	primary:3
    sample1	metastasis:/path/to/metastasis.local_clonalities.bed
    * --reference: absolute path to the FASTA genome reference (indexes expected *.fai, *.dict) [required for normalization and for functional annotation with BCFtools]
    * --gff: absolute path to a GFF gene annotations file [required for functional annotation with BCFtools, only Ensembl-like GFF files]
    * --vcf-without-ad: indicate when the VCFs to normalize do not have the FORMAT/AD annotation
    * --output: the folder where to publish output
    * --skip_normalization: flag indicating to skip all normalization steps
    * --skip_decompose_complex: flag indicating not to split complex variants (ie: MNVs and combinations of SNVs and indels)
    * --filter: specify the filter to apply if any (e.g.: PASS), only variants with this value will be kept
    * --input_bams: a tab-separated values file containing in each row the sample name, tumor and normal BAM files for annotation with Vafator
    * --skip_multiallelic_filter: after VAFator annotations if any multiallelic variant is present (ie: two different
    mutations in the same position) only the highest VAF variant is kept unless this flag is passed
    * --snpeff_organism: the SnpEff organism name (eg: hg19, hg38, GRCh37.75, GRCh38.99)
    * --snpeff_datadir: the SnpEff data folder where the reference genomes were previously downloaded. Required if --snpeff_organism is provided
    * --snpeff_args: additional SnpEff arguments
    * --snpeff_memory: for some samples SnpEff may require to use more memory (default: 3g)
    * --mapping_quality: VAFator minimum mapping quality (default: 0)
    * --base_call_quality: VAFator minimum base call quality (default: 0)
    * --phasing-sample: enables phasing with whatshap and defines the sample BAM to phase the VCF (BAMs need to be provided)

Output:
    * Normalized VCF file
    * Tab-separated values file with the absolute paths to the normalized VCF files, normalized_vcfs.txt
    * Summary statistics before and after normalization
    """