Skip to content

Commit

Permalink
Merge pull request #349 from dozy/stage2_init_options
Browse files Browse the repository at this point in the history
Stage2 init options
  • Loading branch information
dozy authored Sep 6, 2024
2 parents 5e76668 + bc8b2d2 commit a325e41
Show file tree
Hide file tree
Showing 11 changed files with 377 additions and 39 deletions.
34 changes: 29 additions & 5 deletions bin/vtfp.pl
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,12 @@ sub apply_subst {
$ewi->{removelabel}->();
}

if($cfg->{subgraph_io}) {
$ewi->{addlabel}->(q{subgraph_io});
$cfg->{subgraph_io} = subst_walk($cfg->{subgraph_io}, $params, $ewi);
$ewi->{removelabel}->();
}

return;
}

Expand Down Expand Up @@ -1412,9 +1418,9 @@ sub validate_splice_candidates {
}
}

# all edge termini must be unique (over replacement and pruning edges) except for STDIN/STDOUT
# all edge termini must be unique (over replacement edges) except for STDIN/STDOUT
my %endpoints;
for my $edge (@{$splice_candidates->{replacement_edges}}, @{$prune_edges}) {
for my $edge (@{$splice_candidates->{replacement_edges}}) {
my $from_end = $edge->{from};
if($from_end and $from_end !~ /:/) { $from_end .= q[:STDOUT] };

Expand Down Expand Up @@ -1472,15 +1478,33 @@ sub final_splice {
# add new edges
push @{$flat_graph->{edges}}, @{$splice_candidates->{replacement_edges}};

# remove pruned ports - prune edges are not required to be two-ended; just disregard undefined to/from attributes
# remove pruned ports - prune edges are not required to be two-ended; just disregard undefined to/from attributes; only remove ports
# that do not appear in splice edges (aka replacement edges)
for my $prune_edge (@{$splice_candidates->{prune_edges}}) {
if($prune_edge->{from}) { remove_port($prune_edge->{from}, $SRC, $flat_graph); }
if($prune_edge->{to}) { remove_port($prune_edge->{to}, $DST, $flat_graph); }
if($prune_edge->{from} and not _in_replacement_edges($prune_edge->{from}, $splice_candidates, $SRC)) { remove_port($prune_edge->{from}, $SRC, $flat_graph); }
if($prune_edge->{to} and not _in_replacement_edges($prune_edge->{to}, $splice_candidates, $DST)) { remove_port($prune_edge->{to}, $DST, $flat_graph); }
}

return $flat_graph;
}

sub _in_replacement_edges {
my ($port_spec, $splice_candidates, $type) = @_;

my $direction = ($type == $SRC)? q[from]: q[to];
my $std_port = ($type == $SRC)? q[STDIN]: q[STDOUT];

for my $edge (@{$splice_candidates->{replacement_edges}}) {
my $end = $edge->{$direction};
if($end and $end !~ /:/) { $end .= qq[:$std_port] };

if($end eq $port_spec) { return 1; }
}

return 0;

}

################################################################################################
# resolve_ports:
# given a splice_pair specification, fully determine the [set of] source and destination ports
Expand Down
13 changes: 13 additions & 0 deletions data/static_params/stage2_reanalysis/align_bwa_mem2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"assign": [
{
"alignment_method": "bwa_mem",
"bwa_executable": "bwa-mem2"
}
],
"assign_local": {},
"ops": {
"splice": [],
"prune": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"assign": [
{
"spatial_filter_switch":"off",
"markdup_optical_distance_value": "100",
"s2_se_pe": "pe",
"samtools_executable": "samtools",
"s2_input_format": "cram",
"markdup_method": "duplexseq",
"s2_ppi_switch":"s2_ppi",
"pp_read2tags":"on",
"pp_import_method":"crammerge",
"fastq_s2_pi_fq1": "DUMMY",
"fastq_s2_pi_fq2": "DUMMY",
"fastq_s2_pi_RG_ID": "DUMMY",
"s2_filter_files": "DUMMY",
"spatial_filter_file": "DUMMY",
"phix_reference_genome_fasta":"DUMMY",
"realignment_switch":0
}
],
"assign_local": {},
"ops": {
"splice": [
"aln_bam12auxmerge:-foptgt_000_fixmate:",
"foptgt_seqchksum_file:-scs_cmp_seqchksum:outputchk"
],
"prune": [
"foptgt.*_bmd_multiway:calibration_pu-",
"foptgt_cram_tee:c2a-",
"foptgt.*samtools_stats_F0.*_target.*-",
"foptgt.*samtools_stats_F0.*00_bait.*-",
"aln_tee3_tee3:to_phix_aln-scs_cmp_seqchksum:outputchk",
"ssfqc_tee_ssfqc:subsample-"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"assign": [
{
"spatial_filter_switch":"off",
"markdup_optical_distance_value": "100",
"s2_se_pe": "pe",
"samtools_executable": "samtools",
"s2_input_format": "cram",
"markdup_method": "duplexseq",
"s2_ppi_switch":"s2_ppi",
"pp_read2tags":"on",
"pp_import_method":"fastq",
"incrams": "DUMMY",
"s2_filter_files": "DUMMY",
"spatial_filter_file": "DUMMY",
"phix_reference_genome_fasta":"DUMMY",
"realignment_switch":0
}
],
"assign_local": {},
"ops": {
"splice": [
"aln_bam12auxmerge:-foptgt_000_fixmate:",
"foptgt_seqchksum_file:-scs_cmp_seqchksum:outputchk"
],
"prune": [
"foptgt.*_bmd_multiway:calibration_pu-",
"foptgt_cram_tee:c2a-",
"foptgt.*samtools_stats_F0.*_target.*-",
"foptgt.*samtools_stats_F0.*00_bait.*-",
"aln_tee3_tee3:to_phix_aln-scs_cmp_seqchksum:outputchk",
"ssfqc_tee_ssfqc:subsample-"
]
}
}
32 changes: 14 additions & 18 deletions data/vtlib/alignment_wtsi_stage2_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,20 @@
],
"nodes":[
{
"id":"crammerge",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"cmd": [
"samtools",
"merge",
"-n",
"-O", "BAM",
"-l", "0",
{"select":"s2_input_format", "default":"cram", "select_range":[1], "cases":{
"cram":["--input-fmt-option", "no_ref=1"],
"bam":["--input-fmt", "bam"]
"id":"preprocess_inputs",
"type":"VTFILE",
"name":{"subst":"s2_preprocess_inputs_method", "required":true,
"ifnull":{
"select":"s2_ppi_switch", "default":"crammerge","select_range":[1],
"cases":{
"crammerge":"crammerge.json",
"s2_ppi":"stage2_preprocess_inputs.json"
}
}},
"-",
{"subst":"incrams"}
],
"description":"merge individual cram files from a sample into one bam file"
"subst_map":{"input_format":{"subst":"s2_input_format"}},
"comment":"inputs: NONE; outputs: _stdout_ (bam), subst_map_parameters:[input_format]",
"node_prefix":"ppi_",
"description":"subgraph to preprocess inputs. Default: merge individual cram files from a sample into one bam file"
},
{
"id":"spatial_filter",
Expand Down Expand Up @@ -264,7 +260,7 @@
}
],
"edges":[
{ "id":"src_to_bc2", "from":"crammerge", "to":{"subst":"post_cm","required":true} },
{ "id":"src_to_bc2", "from":"preprocess_inputs", "to":{"subst":"post_cm","required":true} },
{"select":"spatial_filter_switch", "required":true, "select_range":[1], "default":"on", "allow_unspec_keys":true,
"cases":{
"on": [
Expand Down
37 changes: 37 additions & 0 deletions data/vtlib/crammerge.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"version":"2.0",
"description":"run bwa mem to to align input bam to supplied reference genome",
"subgraph_io":{
"ports":{
"inputs":{
},
"outputs":{
"_stdout_":"crammerge"
}
}
},
"subst_params":[],
"nodes":[
{
"id":"crammerge",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"cmd": [
"samtools",
"merge",
"-n",
"-O", "BAM",
"-l", "0",
{"select":"input_format", "default":"cram", "select_range":[1], "cases":{
"cram":["--input-fmt-option", "no_ref=1"],
"bam":["--input-fmt", "bam"]
}},
"-",
{"subst":"incrams", "required":true}
],
"description":"merge individual cram files from a sample into one bam file"
}
],
"edges":[]
}
54 changes: 54 additions & 0 deletions data/vtlib/read2tags.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"version":"2.0",
"description":"read2tags for NanoSeq processing, including preparatory collation and reset",
"subgraph_io":{
"ports":{
"inputs":{"_stdin_":"collate"},
"outputs":{ "_stdout_":"read2tags" }
}
},
"nodes":[
{
"id":"collate",
"type": "EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "collate",
"--threads", {"subst":"s2_r2t_coll_threads","required":true,"ifnull":2},
"-u",
"-O",
"-"
]
},
{
"id":"reset",
"type": "EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "reset",
"--threads", {"subst":"s2_r2t_rs_threads","required":true,"ifnull":4},
"--output-fmt", "BAM,level=0"
]
},
{
"id":"read2tags",
"type": "EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"bambi_executable", "required":true, "ifnull":"bambi"}, "read2tags",
"--tags", "rb,mb,br,rb,mb,br",
"--qtags", "rq,mq,bq,rq,mq,bq",
"--positions", "1:1:1:3,1:2:1:3,1:1:4:7,2:2:1:3,2:1:1:3,2:2:4:7",
"--compression-level", 0,
"--output-fmt", "bam"
]
}
],
"edges":[
{ "id":"collate_to_reset", "from":"collate","to":"reset" },
{ "id":"reset_to_read2tags", "from":"reset", "to":"read2tags" }
]
}
81 changes: 81 additions & 0 deletions data/vtlib/stage2_preprocess_inputs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
{
"version":"2.0",
"description":"alternate pre-processing method for stage2 inputs accepting FASTQ input (for e.g. Elembio NanoSeq)",
"subgraph_io":{
"ports":{
"inputs":{},
"outputs":{ "_stdout_":
{"select":"pp_read2tags", "required":true, "default":"off",
"cases":{
"off": "import",
"on": "read2tags"
}
}
}
}
},
"nodes":[
{
"id":"import",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"cmd": {
"select":"pp_import_method",
"required":true,
"select_range":[1],
"default":"crammerge",
"cases":{
"crammerge":
[
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "merge",
"-n",
"-O", "BAM",
"-l", "0",
{"select":"input_format", "default":"cram", "select_range":[1], "cases":{
"cram":["--input-fmt-option", "no_ref=1"],
"bam":["--input-fmt", "bam"]
}},
"-",
{"subst":"incrams", "required":true}
],
"fastq":
[
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "import",
"-R", {"subst":"fastq_s2_pi_RG_ID","required":true, "comment":"readgroup"},
"-1", {"subst":"fastq_s2_pi_fq1","required":true, "comment":"FASTQ read 1"},
"-2", {"subst":"fastq_s2_pi_fq2","required":true, "comment":"FASTQ read 2"},
{"select":"parse_casava_id", "default":"on", "select_range":[1], "cases":{ "on":["-i"], "off":[] }},
{"subst":"parse_import_tags_flag", "ifnull":["-T", {"subst":"parse_import_tags","required":true,"ifnull":"*"}]},
"-u",
"-O", "bam"
]
}
}
},
{
"id":"read2tags",
"type":{
"select":"pp_read2tags",
"required":true,
"select_range":[1],
"default":"off",
"cases":{
"on":"VTFILE",
"off":"INACTIVE"
}
},
"use_STDIN": true,
"use_STDOUT": true,
"name":"read2tags.json",
"node_prefix":"r2t_"
}
],
"edges":[
{"select":"pp_read2tags", "required":true, "default":"off", "cases":{
"off": [],
"on": [ { "id":"import_to_read2tags", "from":"import", "to":"read2tags" }]
}
}
]
}
Loading

0 comments on commit a325e41

Please sign in to comment.