diff --git a/bin/vtfp.pl b/bin/vtfp.pl index d7c7ea44d..16aeb7c49 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -678,18 +678,13 @@ sub fetch_param_entry { } } - my $candidate; if(exists $param_entry->{_value}) { - $candidate = $param_entry; # already evaluated, return cached value (allowing undef) + return $param_entry; # already evaluated, return cached value (allowing undef) } push @{$aux->{irp}}, $param_name; $retval = resolve_subst_constructor($param_name, $param_entry->{subst_constructor}, $params, $ewi, $aux); - if(not $retval and $candidate) { - $retval = $candidate->{_value}; - } - if(defined $retval) { $param_entry->{_value} = $retval; } diff --git a/data/vtlib/bambi_i2b.json b/data/vtlib/bambi_i2b.json new file mode 100644 index 000000000..80d52de3d --- /dev/null +++ b/data/vtlib/bambi_i2b.json @@ -0,0 +1,119 @@ +{ +"version":"2.0", +"description":"This pipeline starts with Illumina2Bam, and ends by running SplitBamByReadGroup to create separate BAM files for each sample.", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ + "_stdout_":"i2b" + } + } +}, +"subst_params":[ + {"id":"rpt","description":"Run/Position/TagIndex, though here it is generally only Run/Position. Used in defaults/constructors for i2b_rg,decoder_metrics,md5_file,seqchksum_file,spatial_filter_file and filtered_bam"}, + {"id":"i2b_run_path","description":"path to runfolder. Provides a base on which i2b_runfolder_path can be constructed"}, + {"id":"i2b_runfolder","description":"runfolder directory. Provides a base on which platform unit and i2b_runfolder_path can be constructed"}, + {"id":"i2b_runfolder_path","description":"full path to runfolder. Provides a base on which platform unit, intensities directory and (indirectly) basecalls directories can be constructed", + "subst_constructor":{ + "vals":[ {"subst":"i2b_run_path","required":"yes"}, {"subst":"i2b_runfolder","required":"yes"} ], + "postproc":{"op":"concat","pad":"/"} + } + + }, + { + "id":"i2b_intensity_dir", + "required":"yes", + "description":"Illumina intensities directory including config xml file, and clocs, locs or pos files under lane directory, using Data/Intensities directory under runfolder if not given", + "subst_constructor":{ + "vals":[ {"subst":"i2b_runfolder_path","required":"yes"}, "/", {"subst":"i2b_data_intensities_dir_suffix","ifnull":"Data/Intensities"} ], + "postproc":{"op":"concat","pad":""} + } + }, + { + "id":"i2b_basecalls_dir", + "required":"no", + "description":"Illumina basecalls directory including config xml file, and filter files, bcl, maybe scl files under lane cycle directory, using BaseCalls directory under intensities if not given.", + "subst_constructor":{ + "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_basecalls_dir_suffix","ifnull":"BaseCalls"} ], + "postproc":{"op":"concat","pad":""} + } + }, + { + "id":"i2b_bam_basecalls_dir", + "required":"no", + "description":"full path to the BAM_basecalls directory; a default parameter value for the tag_metrics qc check", + "subst_constructor":{ + "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_bam_basecalls_dir_suffix","required":"yes"} ], + "postproc":{"op":"concat","pad":""} + } + }, + {"id":"i2b_lane","required":"yes","comment":"Lane number"}, + { + "id":"i2b_pu", + "description":"The platform unit, using runfolder name plus lane number if not given", + "subst_constructor":{ + "vals":[ {"subst":"i2b_runfolder"}, "_", {"subst":"i2b_lane"} ], + "postproc":{"op":"concat","pad":""} + } + }, + {"id":"i2b_library_name","description":"The name of the sequenced library"}, + {"id":"i2b_rg", "description":"ID used to link RG header record with RG tag in SAM record", "default":{"subst":"rpt"}}, + {"id":"i2b_bc_seq_val","required":"no","description":"Tag name for barcode sequence. Illumina2bam default is BC"}, + {"id":"i2b_bc_qual_val","required":"no","description":"Tag name for barcode quality. Illumina2bam default is QT"}, + {"id":"i2b_sec_bc_seq_val","required":"no","description":"Tag name for second barcode sequence. Illumina2bam default is null"}, + {"id":"i2b_sec_bc_qual_val","required":"no","description":"Tag name for second barcode quality. Illumina2bam default is null"}, + {"id":"i2b_study_name","description":"The name of the study"}, + { + "id":"i2b_sample_aliases", + "default":"UNSPECIFIED", + "comment":"produces a comma-separated string from array of i2b_sample_alias values", + "subst_constructor":{ + "vals":{"subst":"i2b_sample_alias", "ifnull":["UNSPEC"]}, + "postproc":{"op":"concat","pad":","} + } + } +], +"nodes":[ + { + "id":"i2b", + "type":"EXEC", + "description":"Create the initial BAM file from the data generated by the Illumina machine using bambi i2b", + "use_STDIN":false, + "use_STDOUT":true, + "cmd":[ + "bambi", + "i2b", + {"subst":"i2b_intensity_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--intensity-dir", {"subst":"i2b_intensity_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_basecalls_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--basecalls-dir", {"subst":"i2b_basecalls_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_lane_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--lane", {"subst":"i2b_lane","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_pu_flag","ifnull":{"subst_constructor":{"vals":[ "--platform-unit", {"subst":"i2b_pu"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_rg_flag","ifnull":{"subst_constructor":{"vals":[ "--read-group-id", {"subst":"i2b_rg"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--barcode-tag", {"subst":"i2b_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--quality-tag", {"subst":"i2b_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sec_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-barcode-tag", {"subst":"i2b_sec_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sec_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-quality-tag", {"subst":"i2b_sec_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_tile_flag","ifnull":{"subst_constructor":{"vals":[ "--first-tile", {"subst":"i2b_first_tile"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_tile_limit_flag","ifnull":{"subst_constructor":{"vals":[ "--tile-limit", {"subst":"i2b_tile_limit"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_library_flag","ifnull":{"subst_constructor":{"vals":[ "--library-name", {"subst":"i2b_library_name"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_study_name_flag","ifnull":{"subst_constructor":{"vals":[ "--study-name", {"subst":"i2b_study_name"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_sample_alias_flag","ifnull":{"subst_constructor":{"vals":[ "--sample-alias", {"subst":"i2b_sample_aliases"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_bc_read_flag","ifnull":{"subst_constructor":{"vals":[ "--bc-read", {"subst":"i2b_bc_read"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_0"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_first_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_final_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_1"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_thread_count_flag","ifnull":{"subst_constructor":{"vals":[ "--threads", {"subst":"i2b_thread_count"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"subst":"i2b_tqlen_flag","ifnull":{"subst_constructor":{"vals":[ "--queue-len", {"subst":"i2b_tqlen"} ],"postproc":{"op":"concat","pad":"="}}}}, + {"select":"i2b_nocall_qual_switch", "required":true, "select_range":[1], "default":"off", "cases":{ "on": "--nocall-quality", "off":[]}}, + {"subst":"i2b_arbitrary_flags", "comment":"this allows arbitrary sets of flag strings to be inserted in the command" }, + "--output-file=-", + "--compression-level=0" + ] + } +], +"edges":[] +} diff --git a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json index 52aa889cc..dd6c56255 100644 --- a/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json +++ b/data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json @@ -2,117 +2,25 @@ "description":"This pipeline starts with Illumina2Bam, and ends by running SplitBamByReadGroup to create separate BAM files for each sample.", "version":"2.0", "subst_params":[ - {"id":"rpt","description":"Run/Position/TagIndex, though here it is generally only Run/Position. Used in defaults/constructors for i2b_rg,decoder_metrics,md5_file,seqchksum_file,spatial_filter_file and filtered_bam"}, - {"id":"i2b_run_path","description":"path to runfolder. Provides a base on which i2b_runfolder_path can be constructed"}, - {"id":"i2b_runfolder","description":"runfolder directory. Provides a base on which platform unit and i2b_runfolder_path can be constructed"}, - {"id":"i2b_runfolder_path","description":"full path to runfolder. Provides a base on which platform unit, intensities directory and (indirectly) basecalls directories can be constructed", - "subst_constructor":{ - "vals":[ {"subst":"i2b_run_path","required":"yes"}, {"subst":"i2b_runfolder","required":"yes"} ], - "postproc":{"op":"concat","pad":"/"} - } - - }, - { - "id":"i2b_intensity_dir", - "required":"yes", - "description":"Illumina intensities directory including config xml file, and clocs, locs or pos files under lane directory, using Data/Intensities directory under runfolder if not given", - "subst_constructor":{ - "vals":[ {"subst":"i2b_runfolder_path","required":"yes"}, "/", {"subst":"i2b_data_intensities_dir_suffix","ifnull":"Data/Intensities"} ], - "postproc":{"op":"concat","pad":""} - } - }, - { - "id":"i2b_basecalls_dir", - "required":"no", - "description":"Illumina basecalls directory including config xml file, and filter files, bcl, maybe scl files under lane cycle directory, using BaseCalls directory under intensities if not given.", - "subst_constructor":{ - "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_basecalls_dir_suffix","ifnull":"BaseCalls"} ], - "postproc":{"op":"concat","pad":""} - } - }, - { - "id":"i2b_bam_basecalls_dir", - "required":"no", - "description":"full path to the BAM_basecalls directory; a default parameter value for the tag_metrics qc check", - "subst_constructor":{ - "vals":[ {"subst":"i2b_intensity_dir","required":"yes"}, "/", {"subst":"i2b_bam_basecalls_dir_suffix","required":"yes"} ], - "postproc":{"op":"concat","pad":""} - } - }, - {"id":"i2b_lane","required":"yes","comment":"Lane number"}, - { - "id":"i2b_pu", - "description":"The platform unit, using runfolder name plus lane number if not given", - "subst_constructor":{ - "vals":[ {"subst":"i2b_runfolder"}, "_", {"subst":"i2b_lane"} ], - "postproc":{"op":"concat","pad":""} - } - }, - {"id":"i2b_library_name","description":"The name of the sequenced library"}, - {"id":"i2b_rg", "description":"ID used to link RG header record with RG tag in SAM record", "default":{"subst":"rpt"}}, - {"id":"i2b_bc_seq_val","required":"no","description":"Tag name for barcode sequence. Illumina2bam default is BC"}, - {"id":"i2b_bc_qual_val","required":"no","description":"Tag name for barcode quality. Illumina2bam default is QT"}, - {"id":"i2b_sec_bc_seq_val","required":"no","description":"Tag name for second barcode sequence. Illumina2bam default is null"}, - {"id":"i2b_sec_bc_qual_val","required":"no","description":"Tag name for second barcode quality. Illumina2bam default is null"}, - {"id":"i2b_study_name","description":"The name of the study"}, - { - "id":"i2b_sample_aliases", - "default":"UNSPECIFIED", - "comment":"produces a comma-separated string from array of i2b_sample_alias values", - "subst_constructor":{ - "vals":{"subst":"i2b_sample_alias", "ifnull":["UNSPEC"]}, - "postproc":{"op":"concat","pad":","} - } - }, + {"id":"s1_runfolder_path","required":"yes","default":{"subst":"i2b_runfolder_path"}, "comment":"Stage1 runfolder path"}, + {"id":"s1_basecalls_dir","required":"yes","default":{"subst":"i2b_basecalls_dir"}, "comment":"Stage1 BAM_basecalls directory"}, + {"id":"s1_lane","required":"yes","default":{"subst":"i2b_lane"}, "comment":"Stage1 lane number"}, {"id":"qc_check_id_run","required":"yes"}, - {"id":"qc_check_position","required":"yes","default":{"subst":"i2b_lane"}}, - {"id":"qc_check_qc_in_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"i2b_runfolder_path"}, "Data/Intensities", {"subst":"i2b_bam_basecalls_dir"}],"postproc":{"op":"concat","pad":"/"}}}}, + {"id":"qc_check_position","required":"yes","default":{"subst":"s1_lane"}}, + {"id":"qc_check_qc_in_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"s1_runfolder_path"}, "Data/Intensities", {"subst":"s1_bam_basecalls_dir"}],"postproc":{"op":"concat","pad":"/"}}}}, {"id":"qc_check_qc_out_dir","required":"yes","default":{"subst_constructor":{"vals":[{"subst":"qc_check_qc_in_dir"}, "no_cal/archive/qc"],"postproc":{"op":"concat","pad":"/"}}}}, - {"id":"run_lane_label", "description":"label constructed from id_run and position", "default":{"subst_constructor":{"vals":[{"subst":"qc_check_id_run"}, "_", {"subst":"i2b_lane"}],"postproc":{"op":"concat","pad":""}}}}, + {"id":"run_lane_label", "description":"label constructed from id_run and position", "default":{"subst_constructor":{"vals":[{"subst":"qc_check_id_run"}, "_", {"subst":"s1_lane"}],"postproc":{"op":"concat","pad":""}}}}, {"id":"s1_output_format", "description":"output format for deplexed reads (bam/cram/etc)", "default":"cram"} ], "nodes":[ - { - "id":"illumina2bam", - "type":"EXEC", - "use_STDIN":false, - "use_STDOUT":true, - "comment":"Actual executable used depends on the value of the 12b_implementation parameter: java - use illumina2bam (default); bambi - use new bambi i2b", - "cmd":[ - "bambi", - "i2b", - {"subst":"i2b_intensity_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--intensity-dir", {"subst":"i2b_intensity_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_basecalls_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--basecalls-dir", {"subst":"i2b_basecalls_dir","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_lane_flag","required":true,"ifnull":{"subst_constructor":{"vals":[ "--lane", {"subst":"i2b_lane","required":true} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_pu_flag","ifnull":{"subst_constructor":{"vals":[ "--platform-unit", {"subst":"i2b_pu"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_rg_flag","ifnull":{"subst_constructor":{"vals":[ "--read-group-id", {"subst":"i2b_rg"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--barcode-tag", {"subst":"i2b_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--quality-tag", {"subst":"i2b_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sec_bc_seq_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-barcode-tag", {"subst":"i2b_sec_bc_seq_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sec_bc_qual_flag","ifnull":{"subst_constructor":{"vals":[ "--sec-quality-tag", {"subst":"i2b_sec_bc_qual_val"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_tile_flag","ifnull":{"subst_constructor":{"vals":[ "--first-tile", {"subst":"i2b_first_tile"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_tile_limit_flag","ifnull":{"subst_constructor":{"vals":[ "--tile-limit", {"subst":"i2b_tile_limit"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_library_flag","ifnull":{"subst_constructor":{"vals":[ "--library-name", {"subst":"i2b_library_name"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_study_name_flag","ifnull":{"subst_constructor":{"vals":[ "--study-name", {"subst":"i2b_study_name"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_sample_alias_flag","ifnull":{"subst_constructor":{"vals":[ "--sample-alias", {"subst":"i2b_sample_aliases"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_bc_read_flag","ifnull":{"subst_constructor":{"vals":[ "--bc-read", {"subst":"i2b_bc_read"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_index_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-index-cycle", {"subst":"i2b_first_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_index_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-index-cycle", {"subst":"i2b_final_index_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_0_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_0_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_0"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_first_1_flag","ifnull":{"subst_constructor":{"vals":[ "--first-cycle", {"subst":"i2b_first_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_final_1_flag","ifnull":{"subst_constructor":{"vals":[ "--final-cycle", {"subst":"i2b_final_1"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_thread_count_flag","ifnull":{"subst_constructor":{"vals":[ "--threads", {"subst":"i2b_thread_count"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"subst":"i2b_tqlen_flag","ifnull":{"subst_constructor":{"vals":[ "--queue-len", {"subst":"i2b_tqlen"} ],"postproc":{"op":"concat","pad":"="}}}}, - {"select":"i2b_nocall_qual_switch", "required":true, "select_range":[1], "default":"off", "cases":{ "on": "--nocall-quality", "off":[]}}, - {"subst":"i2b_arbitrary_flags", "comment":"this allows arbitrary sets of flag strings to be inserted in the command" }, - "--output-file=-", - "--compression-level=0" - ], - "description":"Create the initial BAM file from the data generated by the Illumina machine" - }, + { + "id":"produce_init_bam", + "type":"VTFILE", + "comment":"inputs: _stdin_ (bam), reference; outputs: _stdout_ (bam)", + "node_prefix":"pib_", + "name":{"subst":"s1_produce_init_bam_method", "required":true, "ifnull":{"select":"i2b_switch", "default":"i2b", "select_range":[1], "cases":{"i2b":"bambi_i2b.json", "reanalysis":"pib_reanalysis.json"}}}, + "description":"subgraph containing i2b or reanalysis initialisation" + }, { "id":"tee_i2b", "type":"EXEC", @@ -435,7 +343,7 @@ } ], "edges":[ - { "id":"illumina2bam_to_ti2b", "from":"illumina2bam", "to":"tee_i2b" }, + { "id":"illumina2bam_to_ti2b", "from":"produce_init_bam", "to":"tee_i2b" }, { "id":"ti2b_to_bamadapterfind", "from":"tee_i2b:baf", "to":"bamindexdecoder" }, { "id":"decoder_to_metrics", "from":"bamindexdecoder:metrics_file", "to":"decoder_metrics" }, { "id":"decoder_metrics_to_qc_tag_metrics", "from":"decoder_metrics", "to":"qc_tag_metrics_check" }, diff --git a/data/vtlib/pib_reanalysis.json b/data/vtlib/pib_reanalysis.json new file mode 100644 index 000000000..26e3723fb --- /dev/null +++ b/data/vtlib/pib_reanalysis.json @@ -0,0 +1,123 @@ +{ +"version":"2.0", +"description":"run bwa mem to to align input bam to supplied reference genome", +"subgraph_io":{ + "ports":{ + "inputs":{}, + "outputs":{ + "_stdout_":"name_collate" + } + } +}, +"subst_params":[], +"nodes":[ + { + "id":"merge", + "type":"EXEC", + "use_STDIN": false, + "use_STDOUT": true, + "cmd": [ + "samtools", + "merge", + {"select":"pib_merge_input_order", "default":"coord", "select_range":[1], "cases":{ + "coord":[], + "qname_alpha_numeric":"-n", + "qname_lexicographical":"-N" + }}, + {"subst_constructor":{"vals":["-t", {"subst":"pib_sort_tag", "required":false}]}}, + "-c", + "-r", + "-O", "BAM", + "-l", "0", + {"select":"s1_input_format", "default":"cram", "select_range":[1], "cases":{ + "cram":["--input-fmt-option", "no_ref=1"], + "bam":[] + }}, + {"subst":"merge_hdr_file_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "-h", + {"subst":"merge_hdr_file","required":true, "ifnull":{"subst_constructor":{"vals":[{"subst":"reanalysis_root"}, "/auxdata/", {"subst":"qc_check_id_run"}, "/", {"subst":"qc_check_id_run"},"_", {"subst":"s1_lane"}, ".rg_hdr.sam"], "postproc":{"op":"concat","pad":""}}}} + ] + }}}, + {"subst":"pib_merge_arbitrary_flags", "required":false}, + "-", + {"subst":"incrams"} + ], + "description":"merge individual cram files from a sample into one bam file" + }, + { + "id":"reheader", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "comment": "remove unwanted RG header lines", + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reheader", + "--no-PG", + "-c", + {"subst":"reheader_script","required":true, + "ifnull":{"subst_constructor":{"vals":[ + "perl -ne \"(!/^\\@RG/ || /\\tID:", + {"subst":"reheader_rg_id", "required":true, "ifnull":{"subst":"i2b_rg"}}, + "\\t/) && print;\"" + ], "postproc":{"op":"concat","pad":""}}}}, + {"subst":"pib_reanalysis_reheader_extra_flags", "required":false}, + "-" + ] + }, + { + "id":"reset", + "type":"EXEC", + "comment": "reset bam stream", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reset", + {"subst":"reset_keep_tag_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "--keep-tag", + {"subst":"reset_keep_tags","required":false, "ifnull":{"subst_constructor":{"vals":["RG","BC", "QT", "FI", "RT", "TC"], "postproc":{"op":"concat","pad":","}}}} + ] + }}}, + {"subst":"reset_reject_PG_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "--reject-PG", + {"subst":"reset_reject_PG_id","required":false, "ifnull":"SCS"} + ] + }}}, + {"subst":"reset_output_format_flag","ifnull":{"subst_constructor":{"vals":["--output-fmt", {"subst":"reset_output_format", "ifnull":"BAM", "required":false}]}}}, + "--threads", {"subst":"reset_threads","required":true,"ifnull":4}, + {"subst":"reset_extra_flags", "required":false}, + "-" + ] + }, + { + "id":"name_collate", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd": [ + {"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "collate", "-O", + "-l", {"subst":"pib_reanalysis_collate_compression","required":true,"ifnull":["0"]}, + "--threads", {"subst":"pib_reanalysis_collate_threads","required":true,"ifnull":4}, + {"subst":"pib_reanalysis_collate_extra_flags", "required":false}, + {"subst":"pib_collate_tempfile_flag","ifnull":{ + "subst_constructor":{ + "vals":[ + "-T", + {"subst":"pib_collate_tempfile_prefix","required":false, "ifnull":{"subst_constructor":{"vals":[ {"subst":"qc_check_id_run"}, {"subst":"s1_lane"}, "collate_tmp"], "postproc":{"op":"concat","pad":"_"}}}} + ] + }}}, + "-" + ] + } +], +"edges":[ + { "id":"merge_to_reheader", "from":"merge", "to":"reheader" }, + { "id":"reheader_to_reset", "from":"reheader", "to":"reset" }, + { "id":"reset_to_collate", "from":"reset", "to":"name_collate" } +] +} diff --git a/t/10-vtfp-vtfile_v2.t b/t/10-vtfp-vtfile_v2.t index d4cdedd47..f276a7af4 100644 --- a/t/10-vtfp-vtfile_v2.t +++ b/t/10-vtfp-vtfile_v2.t @@ -1,7 +1,7 @@ use strict; use warnings; use Carp; -use Test::More tests => 6; +use Test::More tests => 7; use Test::Cmd; use File::Slurp; use Perl6::Slurp; @@ -568,7 +568,7 @@ subtest 'multilevel_vtf_required_param' => sub { { id => 'vfile', type => 'OUTFILE', - name => { subst_constructor => { vals => [ 'tmp.', {subst => 'ext', 'required' => 'true'} ], postproc => { op => 'concat', pad => ''} }, } + name => { subst_constructor => { vals => [ 'tmp.', {subst => 'ext', required => JSON::true} ], postproc => { op => 'concat', pad => ''} }, } }, ] }; @@ -628,4 +628,186 @@ subtest 'multilevel_vtf_required_param' => sub { is_deeply ($vtfp_results, $expected_result, 'multilevel local param reeval'); }; +subtest 'multilevel_vtf_forced_undef' => sub { + plan tests => 4; + + my $top_container = { + version => '2.0', + description => 'outermost of a nest of test VTFILEs', + subst_params => [ + { id => 'top_sp_contents', required => 'false', default => 'top SP' }, + { + id => 'top_box', + required => 'false', + default => 'TB_DFLT', + subst_constructor => { + vals => [ 'TB [', {subst => 'top_sp_contents'}, '] TB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'middle', + type => 'VTFILE', + node_prefix => 'mid_', + name => 'middle.json' + } + ], + edges => [] + }; + + my $middle = { + version => '2.0', + description => 'middle of a nest of test VTFILEs', + subst_params => [ + {id => 'mid_sp_contents', required => 'false', default => 'mid SP'}, + { + id => 'mid_box', + required => 'false', + default => 'MID_DFLT', + subst_constructor => { + vals => [ 'MIDB [', {subst => 'mid_sp_contents'}, '] MIDB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'bottom', + type => 'VTFILE', + node_prefix => 'bot_', + name => 'bottom.json' + }, + { + id => 'blather', + type => 'EXEC', + use_STDIN => 'false', + use_STDOUT => 'true', + cmd => [ + 'echo', + {subst => 'top_box'}, {subst => 'mid_box'}, {subst => 'bot_box'} + ] + } + ], + edges => [] + }; + + my $bottom = { + version => '2.0', + description => 'innermost of a nest of test VTFILEs', + subst_params => [ + {id => 'bot_sp_contents', required => 'false', default => 'bot SP'}, + { + id => 'bot_box', + required => 'false', + default => 'BOT_DFLT', + subst_constructor => { + vals => [ 'BOTB [', {subst => 'bot_sp_contents'}, '] BOTB' ], + postproc => {op => 'concat', pad => ''} + } + } + ], + nodes => [ + { + id => 'haver', + type => 'EXEC', + use_STDIN => 'false', + use_STDOUT => 'true', + cmd => [ + 'echo', + {subst => 'top_box'}, {subst => 'mid_box'}, {subst => 'bot_box'} + ] + } + ], + edges => [] + }; + + my ($template, $fn); + $fn = $template = $tdir.q[/10-vtfp-multilevel_vtf_forced_undef.json]; + my $contents = to_json($top_container); + write_file($fn, $contents); + + $fn = $tdir.q[/middle.json]; + $contents = to_json($middle); + write_file($fn, $contents); + + $fn = $tdir.q[/bottom.json]; + $contents = to_json($bottom); + write_file($fn, $contents); + + my $exit_status = $test->run(chdir => $test->curdir, args => qq[-no-absolute_program_paths -verbosity_level 0 -template_path $tdir $template]); + ok($exit_status>>8 == 0, "non-zero exit: $exit_status"); + my $vtfp_results = from_json($test->stdout); + my $vtfp_err = $test->stderr; + + my $expected_result = { + 'version' => '2.0', + 'edges' => [], + 'nodes' => [ + { + 'id' => 'mid_blather', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MIDB [mid SP] MIDB' + ] + }, + { + 'id' => 'mid_bot_haver', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MIDB [mid SP] MIDB', + 'BOTB [bot SP] BOTB' + ], + } + ] + }; + is_deeply ($vtfp_results, $expected_result, 'multilevel vtf forced undef (no nullkeys)'); + + $exit_status = $test->run(chdir => $test->curdir, args => qq[-no-absolute_program_paths -verbosity_level 0 -template_path $tdir -nullkeys mid_sp_contents $template]); + ok($exit_status>>8 == 0, "non-zero exit: $exit_status"); + $vtfp_results = from_json($test->stdout); + $vtfp_err = $test->stderr; + + $expected_result = { + 'version' => '2.0', + 'edges' => [], + 'nodes' => [ + { + 'id' => 'mid_blather', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MID_DFLT' + ] + }, + { + 'id' => 'mid_bot_haver', + 'type' => 'EXEC', + 'use_STDIN' => 'false', + 'use_STDOUT' => 'true', + 'cmd' => [ + 'echo', + 'TB [top SP] TB', + 'MID_DFLT', + 'BOTB [bot SP] BOTB' + ], + } + ] + }; + + is_deeply ($vtfp_results, $expected_result, 'multilevel vtf forced undef (nullkeys: mid_sp_contents)'); +}; + 1;