Skip to content

Commit

Permalink
Small fixes (#21)
Browse files Browse the repository at this point in the history
* Add support for collect_outputs

* Fixes to provenance and collected outputs

* Whitespace fixes

* whitespace fixes
  • Loading branch information
dfornika authored Feb 20, 2024
1 parent 6f6a459 commit f6e4e3e
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 48 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ are stored to the same directory.
```yml
- pipeline_name: BCCDC-PHL/mlst-nf
pipeline_version: 0.1.4
nextflow_session_id: f18b89aa-06f7-41e4-b016-3519dfd5a5cb
nextflow_run_name: sharp_bhaskara
timestamp_analysis_start: 2024-02-20T22:59:37.862710
- input_filename: NC-000913.3.fa
input_path: /home/runner/work/mlst-nf/mlst-nf/.github/data/assemblies/NC-000913.3.fa
Expand Down
2 changes: 1 addition & 1 deletion bin/parse_quast_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def main():
]

report = parse_transposed_quast_report(args.transposed_quast_report)
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='unix', extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
for record in report:
writer.writerow(record)
Expand Down
67 changes: 36 additions & 31 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,20 @@ include { mlst } from './modules/mlst.nf'
include { parse_alleles } from './modules/mlst.nf'

workflow {
ch_start_time = Channel.of(LocalDateTime.now())
ch_pipeline_name = Channel.of(workflow.manifest.name)
ch_pipeline_version = Channel.of(workflow.manifest.version)

ch_pipeline_provenance = pipeline_provenance(ch_pipeline_name.combine(ch_pipeline_version).combine(ch_start_time))
ch_workflow_metadata = Channel.value([
workflow.sessionId,
workflow.runName,
workflow.manifest.name,
workflow.manifest.version,
workflow.start,
])

if (params.samplesheet_input != 'NO_FILE') {
ch_assemblies = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['ASSEMBLY']] }
ch_assemblies = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['ASSEMBLY']] }
} else {
ch_assemblies = Channel.fromPath( params.assembly_search_path ).map{ it -> [it.baseName.split('_')[0], it] }.unique{ it -> it[0] }
ch_assemblies = Channel.fromPath( params.assembly_search_path ).map{ it -> [it.baseName.split('_')[0], it] }.unique{ it -> it[0] }
}


main:
hash_files(ch_assemblies.combine(Channel.of("assembly-input")))
Expand All @@ -34,30 +36,33 @@ workflow {
parse_alleles(mlst.out.mlst)

if (params.collect_outputs) {
parse_quast_report.out.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_quast.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
parse_alleles.out.alleles.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_alleles.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
parse_alleles.out.sequence_type.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_sequence_type.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
parse_quast_report.out.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_quast.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
parse_alleles.out.alleles.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_alleles.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
parse_alleles.out.sequence_type.map{ it -> it[1] }.collectFile(
name: params.collected_outputs_prefix + "_sequence_type.csv",
storeDir: params.outdir,
keepHeader: true,
sort: { it -> it.readLines()[1].split(',')[0] }
)
}

ch_provenance = mlst.out.provenance
ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], [it[1]] << it[2]] }
ch_provenance = ch_provenance.join(quast.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(ch_assemblies.map{ it -> it[0] }.combine(ch_pipeline_provenance)).map{ it -> [it[0], it[1] << it[2]] }

ch_sample_ids = ch_assemblies.map{ it -> it[0] }
ch_provenance = ch_sample_ids
ch_pipeline_provenance = pipeline_provenance(ch_workflow_metadata)
ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(mlst.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
ch_provenance = ch_provenance.join(quast.out.provenance).map{ it -> [it[0], it[1] << it[2]] }

collect_provenance(ch_provenance)

}
34 changes: 18 additions & 16 deletions modules/provenance.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,22 @@ process collect_provenance {

process pipeline_provenance {

tag { pipeline_name + " / " + pipeline_version }

executor 'local'

input:
tuple val(pipeline_name), val(pipeline_version), val(analysis_start)

output:
file("pipeline_provenance.yml")

script:
"""
printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml
printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml
printf -- " timestamp_analysis_start: ${analysis_start}\\n" >> pipeline_provenance.yml
"""
tag { pipeline_name + " / " + pipeline_version }

executor 'local'

input:
tuple val(session_id), val(run_name), val(pipeline_name), val(pipeline_version), val(analysis_start_time)

output:
file("pipeline_provenance.yml")

script:
"""
printf -- "- pipeline_name: ${pipeline_name}\\n" >> pipeline_provenance.yml
printf -- " pipeline_version: ${pipeline_version}\\n" >> pipeline_provenance.yml
printf -- " nextflow_session_id: ${session_id}\\n" >> pipeline_provenance.yml
printf -- " nextflow_run_name: ${run_name}\\n" >> pipeline_provenance.yml
printf -- " timestamp_analysis_start: ${analysis_start_time}\\n" >> pipeline_provenance.yml
"""
}

0 comments on commit f6e4e3e

Please sign in to comment.