-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
151 lines (111 loc) · 3.24 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env nextflow
nextflow.enable.dsl=2
//--------------------------------------------------------------------------
// Param Checking
//--------------------------------------------------------------------------
if(!params.fastaSubsetSize) {
throw new Exception("Missing params.fastaSubsetSize")
}
if(params.inputFilePath) {
seqs = Channel.fromPath( params.inputFilePath )
.splitFasta( by:params.fastaSubsetSize, file:true )
}
else {
throw new Exception("Missing params.inputFilePath")
}
//--------------------------------------------------------------------------
// Main Workflow
//--------------------------------------------------------------------------
workflow {
signalp5(seqs)
filterInputFastaByResults(params.inputFilePath, signalp5.out.pred_summary.collectFile())
// Now that we have our filtered result, we can split it further to run with signalp4 and signalp6
filteredSeq = filterInputFastaByResults.out.splitFasta( by:200, file:true )
signalp4(filteredSeq)
signalp6(filteredSeq)
collectedGff = signalp5.out.gff.mix(signalp4.out.gff).mix(signalp6.out.gff).collectFile(name: 'result.gff3')
indexResults(collectedGff)
}
process filterInputFastaByResults {
container = 'bioperl/bioperl:stable'
input:
path fasta
path predictions
output:
path "filtered.fasta"
script:
"""
filterProteinsByScore.pl --fasta $fasta \
--predictions $predictions \
--score_cutoff ${task.ext.filter_score_cutoff} \
--pct_proteins_cutoff ${task.ext.filter_min_protein_percent_cutoff} \
--output_file filtered.fasta
"""
}
process signalp6 {
label = "signalp"
input:
path subsetFasta
output:
path "combined.gff3", emit: gff
script:
"""
signalp6 --fastafile $subsetFasta \
--format none \
--organism $params.org \
--mode fast \
--output_dir .
fixAndCombineGff.pl --gff output.gff3 --region_gff region_output.gff3 --sp_version 6 --output_file combined.gff3
"""
}
process signalp4 {
label = "signalp"
input:
path subsetFasta
output:
path "signalp4.gff3", emit: gff
script:
"""
signalp4 -f short \
-t $params.org \
-n signalp4.gff2 \
$subsetFasta >sp4_prediction_summary.txt
# make gff3 format (remove the group column)
fixAndCombineGff.pl --gff signalp4.gff2 --sp_version 4 --output_file signalp4.gff3
"""
}
process signalp5 {
label = "signalp"
input:
path subsetFasta
output:
path 'sp5_prediction_summary.txt', emit: pred_summary
path "subsetFasta.gff3", emit: gff
script:
"""
signalp -fasta $subsetFasta \
-format short \
-gff3 \
-org $params.org \
-plot 'none' \
-prefix signalp5 \
-stdout >sp5_prediction_summary.txt
fixAndCombineGff.pl --gff signalp5.gff3 --sp_version 5 --output_file subsetFasta.gff3
"""
}
// make this a module??
process indexResults {
container = 'biocontainers/tabix:v1.9-11-deb_cv1'
publishDir params.outputDir, mode: 'copy'
input:
path gff
output:
path '*.gff.gz'
path '*.tbi'
script:
"""
sort -k1,1 -k4,4n $gff > sorted.gff
bgzip sorted.gff
tabix -p gff sorted.gff.gz
"""
}