-
Notifications
You must be signed in to change notification settings - Fork 1
/
fastq_quality_control.nf
390 lines (319 loc) · 11.3 KB
/
fastq_quality_control.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
#!/usr/bin/env nextflow
/*================================================================
The AGUILAR LAB presents...
The fastq QC processing Pipeline
- A fastq trimming and BEFORE and AFTER comparisson tool
==================================================================
Version: 0.0.2
Project repository: https://github.com/Iaguilaror/fastq_QC_processing
==================================================================
Authors:
- Bioinformatics Design
Israel Aguilar-Ordonez (iaguilaror@gmail.com)
- Bioinformatics Development
Israel Aguilar-Ordonez (iaguilaror@gmail.com)
- Nextflow Port
Israel Aguilar-Ordonez (iaguilaror@gmail.com)
=============================
Pipeline Processes In Brief:
.
Pre-processing:
_pre1_fastqc_before
Core-processing:
_001_trimmomatic
Pos-processing
_pos1_gather_sample
Anlysis
_an1_trimreport
ENDING
_register_configs
================================================================*/
/* Define the help message as a function to call when needed *//////////////////////////////
def helpMessage() {
log.info"""
==========================================
The fastq QC processing Pipeline
- A fastq trimming and BEFORE and AFTER comparisson tool
v${version}
==========================================
Usage:
nextflow run fastq_quality_control.nf --input_dir <path to inputs> [--output_dir path to results ]
--input_dir <- To-do;
To-do;
To-do;
--output_dir <- directory where results, intermediate and log files will bestored;
default: same dir where --query_fasta resides
-resume <- Use cached results if the executed project has been run before;
default: not activated
This native NF option checks if anything has changed from a previous pipeline execution.
Then, it resumes the run from the last successful stage.
i.e. If for some reason your previous run got interrupted,
running the -resume option will take it from the last successful pipeline stage
instead of starting over
Read more here: https://www.nextflow.io/docs/latest/getstarted.html#getstart-resume
--help <- Shows Pipeline Information
--version <- Show Pipeline version
""".stripIndent()
}
/*//////////////////////////////
Define pipeline version
If you bump the number, remember to bump it in the header description at the begining of this script too
*/
version = "0.0.2"
/*//////////////////////////////
Define pipeline Name
This will be used as a name to include in the results and intermediates directory names
*/
pipeline_name = "fastq_quality_control"
/*
Initiate default values for parameters
to avoid "WARN: Access to undefined parameter" messages
*/
params.input_dir = false //if no inputh path is provided, value is false to provoke the error during the parameter validation block
params.help = false //default is false to not trigger help message automatically at every run
params.version = false //default is false to not trigger version message automatically at every run
/*//////////////////////////////
If the user inputs the --help flag
print the help message and exit pipeline
*/
if (params.help){
helpMessage()
exit 0
}
/*//////////////////////////////
If the user inputs the --version flag
print the pipeline version
*/
if (params.version){
println "Pipeline v${version}"
exit 0
}
/*//////////////////////////////
Define the Nextflow version under which this pipeline was developed or successfuly tested
Updated by iaguilar at MAY 2021
*/
nextflow_required_version = '20.10.0'
/*
Try Catch to verify compatible Nextflow version
If user Nextflow version is lower than the required version pipeline will continue
but a message is printed to tell the user maybe it's a good idea to update her/his Nextflow
*/
try {
if( ! nextflow.version.matches(">= $nextflow_required_version") ){
throw GroovyException('Your Nextflow version is older than Pipeline required version')
}
} catch (all) {
log.error "-----\n" +
" This pipeline requires Nextflow version: $nextflow_required_version \n" +
" But you are running version: $workflow.nextflow.version \n" +
" The pipeline will continue but some things may not work as intended\n" +
" You may want to run `nextflow self-update` to update Nextflow\n" +
"============================================================"
}
/*//////////////////////////////
INPUT PARAMETER VALIDATION BLOCK
TODO (iaguilar) check the extension of input queries; see getExtension() at https://www.nextflow.io/docs/latest/script.html#check-file-attributes
*/
/* Check if inputs provided
if they were not provided, they keep the 'false' value assigned in the parameter initiation block above
and this test fails
*/
if ( !params.input_dir ) {
log.error " Please provide the following params: --input_dir \n\n" +
" For more information, execute: nextflow run fastq_quality_control.nf --help"
exit 1
}
/*
Output directory definition
Default value to create directory is the parent dir of --input_dir
*/
params.output_dir = file(params.input_dir).getParent()
/*
Results and Intermediate directory definition
They are always relative to the base Output Directory
and they always include the pipeline name in the variable (pipeline_name) defined by this Script
This directories will be automatically created by the pipeline to store files during the run
*/
results_dir = "${params.output_dir}/${pipeline_name}-results/"
intermediates_dir = "${params.output_dir}/${pipeline_name}-intermediate/"
/*
Useful functions definition
*/
/*//////////////////////////////
LOG RUN INFORMATION
*/
log.info"""
==========================================
The fastq QC processing Pipeline
- A fastq trimming and BEFORE and AFTER comparisson tool
v${version}
==========================================
"""
log.info "--Nextflow metadata--"
/* define function to store nextflow metadata summary info */
def nfsummary = [:]
/* log parameter values beign used into summary */
/* For the following runtime metadata origins, see https://www.nextflow.io/docs/latest/metadata.html */
nfsummary['Resumed run?'] = workflow.resume
nfsummary['Run Name'] = workflow.runName
nfsummary['Current user'] = workflow.userName
/* string transform the time and date of run start; remove : chars and replace spaces by underscores */
nfsummary['Start time'] = workflow.start.toString().replace(":", "").replace(" ", "_")
nfsummary['Script dir'] = workflow.projectDir
nfsummary['Working dir'] = workflow.workDir
nfsummary['Current dir'] = workflow.launchDir
nfsummary['Launch command'] = workflow.commandLine
log.info nfsummary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
log.info "\n\n--Pipeline Parameters--"
/* define function to store nextflow metadata summary info */
def pipelinesummary = [:]
/* log parameter values beign used into summary */
pipelinesummary['trimmomatic: paired-end'] = params.pe
pipelinesummary['trimmomatic: avgqual'] = params.trim_avgqual
pipelinesummary['trimmomatic: leading'] = params.trim_leading
pipelinesummary['trimmomatic: trailing'] = params.trim_trailing
pipelinesummary['trimmomatic: slide size'] = params.trim_slide_size
pipelinesummary['trimmomatic: slide qual'] = params.trim_slide_qual
pipelinesummary['trimmomatic: minlen'] = params.trim_minlen
pipelinesummary['input directory'] = params.input_dir
pipelinesummary['Results Dir'] = results_dir
pipelinesummary['Intermediate Dir'] = intermediates_dir
/* print stored summary info */
log.info pipelinesummary.collect { k,v -> "${k.padRight(15)}: $v" }.join("\n")
log.info "==========================================\nPipeline Start"
/*//////////////////////////////
PIPELINE START
*/
/*
READ INPUTS
*/
/* Define function for finding files that share sample name */
/* in this case, the file name comes from the 1st ".",
since tokenize array starts at 0, array index shoould be 0 */
def get_sample_prefix = { file -> file.name.toString().tokenize('R')[0] }
/* Load fq files into channel as pairs */
Channel
.fromPath( "${params.input_dir}/*.fastq.gz" )
.map{ file -> tuple(get_sample_prefix(file), file) }
.groupTuple()
// .view()
.into{ fastq_inputs; fastq_inputs_again }
/* _pre1_fastqc_before */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-fastqc/*")
.toList()
.set{ mkfiles_pre1 }
process _pre1_fastqc_before {
publishDir "${results_dir}/_pre1_fastqc_before/",mode:"copy"
input:
set val( sample_name ), file( sample ) from fastq_inputs
file mk_files from mkfiles_pre1
output:
file "*" into results_pre1_fastqc_before
"""
bash runmk.sh
"""
}
if (params.pe){ // If the data is paired end, we will load the normal module
/* Process _001_trimmomatic */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-trimmomatic/*")
.toList()
.set{ mkfiles_001 }
} else {
/* Process _001_trimmomatic */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-trimmomatic-SE/*")
.toList()
.set{ mkfiles_001 }
}
process _001_trimmomatic {
publishDir "${results_dir}/_001_trimmomatic/",mode:"copy"
input:
set val( sample_name ), file( sample ) from fastq_inputs_again
file mk_files from mkfiles_001
output:
file "*.fastq.gz" into results_001_trimmomatic_trimmed_fq
file "*.trimlog.txt" into results_001_trimmomatic_trimlog
file "*.trimreport.txt" into results_001_trimmomatic_trimreport
"""
export TRIM_AVGQUAL="${params.trim_avgqual}"
export TRIM_LEADING="${params.trim_leading}"
export TRIM_TRAILING="${params.trim_trailing}"
export TRIM_SLIDE_SIZE="${params.trim_slide_size}"
export TRIM_SLIDE_QUAL="${params.trim_slide_qual}"
export TRIM_MINLEN="${params.trim_minlen}"
bash runmk.sh
"""
}
/* gather all trimreport.txt files */
results_001_trimmomatic_trimreport
.flatten()
.toList()
// .view()
.set{ all_trimreports }
if (params.pe){ // If the data is paired end, we will load the normal module
/* Process _an1_trimreport */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-trimreport/*")
.toList()
.set{ mkfiles_an1 }
} else {
/* Process _an1_trimreport */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-trimreport-SE/*")
.toList()
.set{ mkfiles_an1 }
}
process _an1_trimreport {
publishDir "${results_dir}/_an1_trimreport/",mode:"copy"
input:
file reports from all_trimreports
file mk_files from mkfiles_an1
output:
file "*.pdf"
"""
bash runmk.sh
"""
}
if (params.pe){ // this last process is only required for PE data
/* Process _pos1_gather_sample */
/* Read mkfile module files */
Channel
.fromPath("${workflow.projectDir}/mkmodules/mk-gather-sample/*")
.toList()
.set{ mkfiles_pos1 }
process _pos1_gather_sample {
publishDir "${results_dir}/_pos1_gather_sample/",mode:"copy"
input:
file fq from results_001_trimmomatic_trimmed_fq
file mk_files from mkfiles_pos1
output:
file "*"
"""
bash runmk.sh
"""
}
}
/* prepare for ENDing pipeline */
/* _register_configs
to know which parameters where used for each results dir */
/* Read nextflow config file */
Channel
.fromPath("${workflow.projectDir}/nextflow.config")
.set{ nfconfig }
process _register_configs {
publishDir "${params.output_dir}",mode:"copy"
input:
file config from nfconfig
output:
file "${config}_used.txt"
"""
cat ${config} > ${config}_used.txt
"""
}