Small fixes (#21)

* Add support for collect_outputs * Fixes to provenance and collected outputs * Whitespace fixes * whitespace fixes
BCCDC-PHL · Feb 20, 2024 · f6e4e3e · f6e4e3e
1 parent 6f6a459
commit f6e4e3e
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -131,6 +131,8 @@ are stored to the same directory.
 ```yml
 - pipeline_name: BCCDC-PHL/mlst-nf
   pipeline_version: 0.1.4
+  nextflow_session_id: f18b89aa-06f7-41e4-b016-3519dfd5a5cb
+  nextflow_run_name: sharp_bhaskara
   timestamp_analysis_start: 2024-02-20T22:59:37.862710
 - input_filename: NC-000913.3.fa
   input_path: /home/runner/work/mlst-nf/mlst-nf/.github/data/assemblies/NC-000913.3.fa

diff --git a/bin/parse_quast_report.py b/bin/parse_quast_report.py
@@ -118,7 +118,7 @@ def main():
     ]
 
     report = parse_transposed_quast_report(args.transposed_quast_report)
-    writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
+    writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='unix', extrasaction='ignore', quoting=csv.QUOTE_MINIMAL)
     writer.writeheader()
     for record in report:
         writer.writerow(record)

diff --git a/main.nf b/main.nf
@@ -13,18 +13,20 @@ include { mlst }                   from './modules/mlst.nf'
 include { parse_alleles }          from './modules/mlst.nf'
 
 workflow {
-    ch_start_time = Channel.of(LocalDateTime.now())
-    ch_pipeline_name = Channel.of(workflow.manifest.name)
-    ch_pipeline_version = Channel.of(workflow.manifest.version)
 
-    ch_pipeline_provenance = pipeline_provenance(ch_pipeline_name.combine(ch_pipeline_version).combine(ch_start_time))
+    ch_workflow_metadata = Channel.value([
+        workflow.sessionId,
+        workflow.runName,
+        workflow.manifest.name,
+        workflow.manifest.version,
+        workflow.start,
+    ])
 
     if (params.samplesheet_input != 'NO_FILE') {
-	ch_assemblies = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['ASSEMBLY']] }
+	      ch_assemblies = Channel.fromPath(params.samplesheet_input).splitCsv(header: true).map{ it -> [it['ID'], it['ASSEMBLY']] }
     } else {
-	ch_assemblies = Channel.fromPath( params.assembly_search_path ).map{ it -> [it.baseName.split('_')[0], it] }.unique{ it -> it[0] }  
+	      ch_assemblies = Channel.fromPath( params.assembly_search_path ).map{ it -> [it.baseName.split('_')[0], it] }.unique{ it -> it[0] }  
     }
-
 
     main:
     hash_files(ch_assemblies.combine(Channel.of("assembly-input")))
@@ -34,30 +36,33 @@ workflow {
     parse_alleles(mlst.out.mlst)
 
     if (params.collect_outputs) {
-	parse_quast_report.out.map{ it -> it[1] }.collectFile(
-	    name: params.collected_outputs_prefix + "_quast.csv",
-	    storeDir: params.outdir,
-	    keepHeader: true,
-	    sort: { it -> it.readLines()[1].split(',')[0] }
-	)
-	parse_alleles.out.alleles.map{ it -> it[1] }.collectFile(
-	    name: params.collected_outputs_prefix + "_alleles.csv",
-	    storeDir: params.outdir,
-	    keepHeader: true,
-	    sort: { it -> it.readLines()[1].split(',')[0] }
-	)
-	parse_alleles.out.sequence_type.map{ it -> it[1] }.collectFile(
-	    name: params.collected_outputs_prefix + "_sequence_type.csv",
-	    storeDir: params.outdir,
-	    keepHeader: true,
-	    sort: { it -> it.readLines()[1].split(',')[0] }
-	)
+        parse_quast_report.out.map{ it -> it[1] }.collectFile(
+            name: params.collected_outputs_prefix + "_quast.csv",
+            storeDir: params.outdir,
+            keepHeader: true,
+            sort: { it -> it.readLines()[1].split(',')[0] }
+        )
+        parse_alleles.out.alleles.map{ it -> it[1] }.collectFile(
+            name: params.collected_outputs_prefix + "_alleles.csv",
+            storeDir: params.outdir,
+            keepHeader: true,
+            sort: { it -> it.readLines()[1].split(',')[0] }
+        )
+        parse_alleles.out.sequence_type.map{ it -> it[1] }.collectFile(
+            name: params.collected_outputs_prefix + "_sequence_type.csv",
+            storeDir: params.outdir,
+            keepHeader: true,
+            sort: { it -> it.readLines()[1].split(',')[0] }
+        )
     }
-
-    ch_provenance = mlst.out.provenance
-    ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], [it[1]] << it[2]] }
-    ch_provenance = ch_provenance.join(quast.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
-    ch_provenance = ch_provenance.join(ch_assemblies.map{ it -> it[0] }.combine(ch_pipeline_provenance)).map{ it -> [it[0], it[1] << it[2]] }
+
+    ch_sample_ids = ch_assemblies.map{ it -> it[0] }
+    ch_provenance = ch_sample_ids
+    ch_pipeline_provenance = pipeline_provenance(ch_workflow_metadata)
+    ch_provenance = ch_provenance.combine(ch_pipeline_provenance).map{ it -> [it[0], [it[1]]] }
+    ch_provenance = ch_provenance.join(hash_files.out.provenance).map{ it -> [it[0], it[1] << it[2]] }
+    ch_provenance = ch_provenance.join(mlst.out.provenance).map{ it ->       [it[0], it[1] << it[2]] }
+    ch_provenance = ch_provenance.join(quast.out.provenance).map{ it ->      [it[0], it[1] << it[2]] }
+
     collect_provenance(ch_provenance)
-
 }
diff --git a/modules/provenance.nf b/modules/provenance.nf
@@ -20,20 +20,22 @@ process collect_provenance {
 
 process pipeline_provenance {
 
-  tag { pipeline_name + " / " + pipeline_version }
-
-  executor 'local'
-
-  input:
-  tuple val(pipeline_name), val(pipeline_version), val(analysis_start)
-
-  output:
-  file("pipeline_provenance.yml")
-
-  script:
-  """
-  printf -- "- pipeline_name: ${pipeline_name}\\n"             >> pipeline_provenance.yml
-  printf -- "  pipeline_version: ${pipeline_version}\\n"       >> pipeline_provenance.yml
-  printf -- "  timestamp_analysis_start: ${analysis_start}\\n" >> pipeline_provenance.yml
-  """
+    tag { pipeline_name + " / " + pipeline_version }
+
+    executor 'local'
+
+    input:
+    tuple val(session_id), val(run_name), val(pipeline_name), val(pipeline_version), val(analysis_start_time)
+
+    output:
+    file("pipeline_provenance.yml")
+
+    script:
+    """
+    printf -- "- pipeline_name: ${pipeline_name}\\n"                  >> pipeline_provenance.yml
+    printf -- "  pipeline_version: ${pipeline_version}\\n"            >> pipeline_provenance.yml
+    printf -- "  nextflow_session_id: ${session_id}\\n"               >> pipeline_provenance.yml
+    printf -- "  nextflow_run_name: ${run_name}\\n"                   >> pipeline_provenance.yml
+    printf -- "  timestamp_analysis_start: ${analysis_start_time}\\n" >> pipeline_provenance.yml
+    """
 }