11version 1.0
22
3- import "../../../tasks/Utility/PBUtils.wdl"
4- import "../../../tasks/Utility/BAMutils.wdl" as BU
53import "../../../tasks/Utility/GeneralUtils.wdl" as GU
6-
7- import "../../../tasks/Utility/Finalize.wdl" as FF
4+ import "../../../tasks/Utility/Utils.wdl"
85
96import "../../../tasks/Alignment/AlignAndCheckFingerprintCCS.wdl" as major
10- import "../../../tasks/QC/AlignedMetrics.wdl"
7+
8+ import "../../TechAgnostic/Utility/LongReadsContaminationEstimation.wdl" as QC1
9+ import "../../TechAgnostic/Utility/SexCheckNaive.wdl" as QC2
10+ import "../../TechAgnostic/Utility/CountTheBeans.wdl" as QC3
11+
12+ import "../../../tasks/Utility/Finalize.wdl" as FF
1113
1214workflow ProcessOnInstrumentDemuxedChunk {
1315
1416 meta {
1517 desciption : "!!! WARN: THIS IS PROJECT-CENTER SPECIFIC !!! Given an on-instrument demultiplexed hifi_reads.bam, perform alignment and QC check."
1618 }
1719
20+ parameter_meta {
21+ readgroup_id : "ID of a readgroup; used for storing outputs; no whitespaces allowed"
22+ bam_SM_field : "value to place in the SM field of the resulting BAM header's @RG lines"
23+
24+ platform : "PacBio platform used for generating the data; accepted value: [Sequel, Revio]"
25+
26+ fingerprint_store : "Path to the GCS folder holding fingerprint VCFs"
27+ sample_id_at_store : "Sampld ID at the fingerprint store for uniquely locating the fingerprint VCF (assumes some naming convention)"
28+ vbid2_config_json : "Path to Json file specifying config to be passed to VBID2 workflow (cross-individual contamination); currently only supports GRCh38."
29+ expected_sex_type : "Expected sex type of the sample that generated the BAM; accepted value: [M, F, NA]; if provided, performs sex concordance check"
30+ check_postaln_methyl_tags : "if true, gather statistics and reads without MM/ML tags from the aligned BAM"
31+ }
32+
1833 input {
1934 File uBAM
2035 File ? uPBI
@@ -23,13 +38,20 @@ workflow ProcessOnInstrumentDemuxedChunk {
2338
2439 String bam_SM_field
2540
26- String fingerprint_store
27- String sample_id_at_store
28- Boolean turn_off_fingperprint_check = false
41+ String platform
2942
3043 File ref_map_file
3144
3245 String gcs_out_root_dir
46+
47+ String disk_type = "SSD"
48+
49+ # args for optional QC subworkflows
50+ String ? fingerprint_store
51+ String ? sample_id_at_store
52+ File ? vbid2_config_json
53+ String ? expected_sex_type
54+ Boolean check_postaln_methyl_tags = true
3355 }
3456
3557 ###################################################################################
@@ -39,38 +61,83 @@ workflow ProcessOnInstrumentDemuxedChunk {
3961 String workflow_name = "ProcessOnInstrumentDemuxedChunk"
4062 String outdir = sub (gcs_out_root_dir , "/$" , "" ) + "/" + workflow_name
4163
42- ###################################################################################
43- call BU .GetReadGroupInfo as RG {input : bam = uBAM , keys = ['SM' , 'LB' , 'PU' ]}
64+ if (defined (fingerprint_store ) != defined (sample_id_at_store )) {
65+ call Utils .StopWorkflow { input : reason = "fingerprint_store and sample_id_at_store must be specified together or omitted together" }
66+ }
4467
68+ ###################################################################################
4569 # major work
70+ String fp_store = select_first ([fingerprint_store , 'None' ])
71+ String fp_smid = select_first ([sample_id_at_store , 'None' ])
72+
4673 call major .AlignAndCheckFingerprintCCS {
4774 input :
4875 uBAM = uBAM ,
4976 uPBI = uPBI ,
5077 bam_sample_name = bam_SM_field ,
51- library = RG .read_group_info ['LB' ],
5278
53- turn_off_fingperprint_check = turn_off_fingperprint_check ,
54- fp_store = fingerprint_store ,
55- sample_id_at_store = sample_id_at_store ,
79+ turn_off_fingperprint_check = !( defined ( fingerprint_store )) ,
80+ fp_store = fp_store ,
81+ sample_id_at_store = fp_smid ,
5682 ref_map_file = ref_map_file
5783 }
84+ File aBAM = AlignAndCheckFingerprintCCS .aligned_bam
85+ File aBAI = AlignAndCheckFingerprintCCS .aligned_bai
86+ File aPBI = AlignAndCheckFingerprintCCS .aligned_pbi
87+
88+ ###################################################################################
89+ # more QCs and metrics
90+
91+ # (optional) contamination
92+ if (defined (vbid2_config_json )) {
93+ Map [String , String ] vbid2_config = read_json (select_first ([vbid2_config_json ]))
94+ call QC1 .LongReadsContaminationEstimation as VBID2 { input :
95+ bam =aBAM ,
96+ bai =aBAI ,
97+ ref_map_file =ref_map_file ,
98+ tech = platform ,
99+ gt_sites_bed = vbid2_config ['genotyping_sites_bed' ],
100+ is_hgdp_sites = vbid2_config ['is_HGDP_sites' ],
101+ is_100k_sites = vbid2_config ['is_100K_sites' ],
102+ disable_baq = vbid2_config ['disable_BAQ' ],
103+ disk_type = disk_type ,
104+ }
105+ }
106+
107+ # (optional) sex concordance
108+ if (defined (expected_sex_type )) {
109+ call QC2 .SexCheckNaive as SexConcordance { input :
110+ bam =aBAM ,
111+ bai =aBAI ,
112+ expected_sex_type =select_first ([expected_sex_type ]),
113+ mosdepth_summary_txt =AlignAndCheckFingerprintCCS .coverage_per_chr
114+ }
115+ }
116+
117+ # (optional) verify methylation tags aren't missing
118+ if (check_postaln_methyl_tags ) {
119+ call QC3 .CountTheBeans as NoMissingBeans { input :
120+ bam =aBAM ,
121+ bai =aBAI ,
122+ bam_descriptor ="POST_ALN" ,
123+ gcs_out_root_dir =gcs_out_root_dir ,
124+ use_local_ssd =disk_type =='LOCAL'
125+ }
126+ }
58127
59128 ###################################################################################
60129 # finalize
61- String movie_name = RG .read_group_info ['PU' ]
62130 String bc_specific_aln_out = outdir + '/alignments/' + readgroup_id
63131 String bc_specific_metric_out = outdir + "/metrics/" + readgroup_id
64132
65- call FF .FinalizeToFile as FinalizeAlignedBam { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_bam , name = readgroup_id + '.bam' }
66- call FF .FinalizeToFile as FinalizeAlignedBai { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_bai , name = readgroup_id + '.bai' }
67- call FF .FinalizeToFile as FinalizeAlignedPbi { input : outdir = bc_specific_aln_out , file = AlignAndCheckFingerprintCCS . aligned_pbi , name = readgroup_id + '.pbi' }
133+ call FF .FinalizeToFile as FinalizeAlignedBam { input : outdir = bc_specific_aln_out , file = aBAM , name = readgroup_id + '.bam' }
134+ call FF .FinalizeToFile as FinalizeAlignedBai { input : outdir = bc_specific_aln_out , file = aBAI , name = readgroup_id + '.bai' }
135+ call FF .FinalizeToFile as FinalizeAlignedPbi { input : outdir = bc_specific_aln_out , file = aPBI , name = readgroup_id + '.pbi' }
68136
69137 call FF .FinalizeToFile as FinalizePerChrCov { input : outdir = bc_specific_metric_out , file = AlignAndCheckFingerprintCCS .coverage_per_chr }
70-
71138 call FF .FinalizeToFile as FinalizeAlnMetrics { input : outdir = bc_specific_metric_out , file = AlignAndCheckFingerprintCCS .alignment_metrics_tar_gz }
72139
73- if (! turn_off_fingperprint_check ) {
140+ if (defined ( fingerprint_store ) ) {
74141 call FF .FinalizeToFile as FinalizeFPDetails { input : outdir = bc_specific_metric_out , file = select_first ([AlignAndCheckFingerprintCCS .fingerprint_detail_tar_gz ]) }
75142 }
76143
@@ -86,15 +153,27 @@ workflow ProcessOnInstrumentDemuxedChunk {
86153
87154 File coverage_per_chr = FinalizePerChrCov .gcs_path
88155
89- Map [String , Float ] alignment_metrics = AlignAndCheckFingerprintCCS .alignment_metrics
90156 File alignment_metrics_tar_gz = FinalizeAlnMetrics .gcs_path
157+ Map [String , Float ] alignment_metrics = AlignAndCheckFingerprintCCS .alignment_metrics
158+ Map [String , Float ] sam_flag_stats = AlignAndCheckFingerprintCCS .sam_flag_stats
91159
92- String movie = movie_name
160+ String movie = AlignAndCheckFingerprintCCS . movie
93161
162+ # the following QC/metrics aren't always available
163+ # fingerprint QC
94164 String ? fingerprint_check_result = AlignAndCheckFingerprintCCS .fp_status
95165 Float ? fingerprint_check_LOD = AlignAndCheckFingerprintCCS .fp_lod_expected_sample
96166 File ? fingerprint_check_tar_gz = FinalizeFPDetails .gcs_path
97167
168+ # contamination QC
169+ Float ? contamination_est = VBID2 .contamination_est
170+
171+ # sex concordance QC
172+ Map [String , String ]? inferred_sex_info = SexConcordance .inferred_sex_info
173+
174+ # post-alignment methylation
175+ Map [String , String ]? methyl_tag_simple_stats = NoMissingBeans .methyl_tag_simple_stats
176+
98177 String last_processing_date = today .yyyy_mm_dd
99178 }
100179}
0 commit comments