This repository has been archived by the owner on Dec 3, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
align.nf
executable file
·136 lines (124 loc) · 5.07 KB
/
align.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#! /usr/bin/env nextflow
/*
* Aligns fasta/fastq files to a reference and reports on the alignments and raw fastq file.
*
* Uses bwa mem for alignment.
* Generates QualiMap and Nanoplot reports for alignments.
* Generates FastQC reports for fastq files.
*
* usage : ./align.nf --threads # --reference <fasta ref file name>
*
*
* @author Dave Roe
*/
params.home = baseDir
home = params.home + "/"
params.raw = "/opt/kass/raw"
params.output = home + "/output"
output = params.output + "/"
params.reference = ""
// for raw fastq, use --bwa "-xpacbio" (todo: how to run on command line?)
// for exact haplotype references, use this
// but be aware: susceptible to soft clipping
// params.bwa = "-k1800 -W9000 -r10 -A1 -B100 -O40 -E40 -L50"
// for inexact haplotype references, can start with this or just leave all default
params.threads = "7"
params.minimap = "minimap2 -Y -B 77 -O 25 -E 25 -a -N 2 -t ${params.threads} -x map-pb "
params.container = "droeatumn/kass:latest"
params.nocontainer = "null"
refFile = file("${params.raw}/${params.reference}")
raw = "${params.raw}/*{fastq,fq,fastq.gz,fq.gz,fasta,fa,fasta.gz,fa.gz}"
reads = Channel.fromPath(raw).ifEmpty { error "cannot find any reads matching ${raw}" }.map { path -> tuple(sample(path), path) }
alignmentReads = Channel.create()
readTap = reads.tap(alignmentReads).filter{ it[1] != refFile }
//System.err.println readTap
process align {
if(params.nocontainer == "null") {
container = params.container
}
publishDir params.output, mode: 'copy', overwrite: true
errorStrategy 'ignore'
tag { s }
// r is the input to be aligned
input:
set s, file(r) from readTap
file ref from refFile
output:
// set s, file {"*_sorted.bam*"} into bamOutput
tuple val(s), file("*_sorted.bam"), file("*_sorted.bam.bai"), file(r) into bamOutput
set s, file {"*unaligned.sam"} into unaligned
set s, file {"*.fasta.*" } into refs
set s, file {"bowtie_indexes" } into btIndexes
script:
// todo: modularize this chunk
def refName = ref.name.replaceFirst(".fasta", "")
def bamName = r.name.replaceFirst(".fastq.gz", "").replaceFirst(".fasta.gz", "").replaceFirst(".fastq", "").replaceFirst(".fasta", "")
def outName = bamName + "_" + refName
def fastaFlag = "" // tell bowtie it is a fasta or fastq
if((r.name.endsWith("fasta") || r.name.endsWith("fa")) ||
r.name.endsWith("fasta.gz") || r.name.endsWith("fa.gz")) {
fastaFlag = "-f"
}
"""
# indexing
# todo: split this; only needs to happen once
echo ${s} ${r} ${params.reference} ${refName} $fastaFlag
samtools faidx ${params.reference}
bwa index ${params.reference}
mkdir -p bowtie_indexes
cd bowtie_indexes
bowtie2-build ../${params.reference} ${refName}_index
cd ..
# alignment
${params.minimap} ${ref} ${r} > ${outName}.sam 2> ${outName}_err.txt
samtools view -h -O SAM -f 4 ${outName}.sam > ${outName}_unaligned.sam
samtools view -h -O SAM -F 0x04 ${outName}.sam > ${outName}_aligned.sam
samtools view -h -O BAM ${outName}_aligned.sam > ${outName}.bam
samtools sort ${outName}.bam -o ${outName}_sorted.bam
samtools index ${outName}_sorted.bam
# rm ${outName}.[bs]am
"""
} // align
process report {
if(params.nocontainer == "null") {
container = params.container
}
errorStrategy 'ignore'
publishDir params.output, mode: 'copy', overwrite: true
tag { s }
// r is the input to be aligned
input:
set s, file(bamFile), file(bamIndexFile), file(r) from bamOutput
file ref from refFile
output:
set s, file {"*_reports" } into reports
script:
// todo: modularize this chunk
def refName = ref.name.replaceFirst(".fasta", "")
def bamName = r.name.replaceFirst(".fastq.gz", "").replaceFirst(".fasta.gz", "").replaceFirst(".fastq", "").replaceFirst(".fasta", "")
def outName = bamName + "_" + refName
def fastaFlag = "" // tell bowtie it is a fasta or fastq
if((r.name.endsWith("fasta") || r.name.endsWith("fa")) ||
r.name.endsWith("fasta.gz") || r.name.endsWith("fa.gz")) {
fastaFlag = "-f"
}
"""
mkdir -p ${outName}_reports
qualimap bamqc -bam ${outName}_sorted.bam -gd HUMAN -outdir ${outName}_reports -outformat PDF
mv ${outName}_reports/report.pdf ${outName}_reports/${outName}_qualimap.pdf
mv ${outName}_reports/genome_results.txt ${outName}_reports/${outName}_qualimap_genome_results.txt
NanoPlot -t ${params.threads} --bam ${outName}_sorted.bam -o ${outName}_reports -p ${outName} -f pdf --N50 || true
if ["$fastaFlag" == ""]
then
fastqc -t ${params.threads} -o ${outName}_reports -f fastq ${r}
fi
# quast.py --no-sv --k-mer-size 900 --min-alignment 20000 --extensive-mis-size 30000 ${r} -r ${ref} || true
quast.py --no-sv --k-mer-size 900 --min-alignment 10000 --extensive-mis-size 30000 ${r} -r ${ref} || true
mv quast_results/results* ${outName}_reports/quast || true
"""
} // report
def sample(Path path) {
def name = path.getFileName().toString()
int start = Math.max(0, name.lastIndexOf('/'))
return name.substring(start, name.indexOf('.'))
}