Skip to content

Commit b068401

Browse files
czyszCTDSKyle HernandezShenglai
authored
Feat/BINF-309: DNA-Seq revamp (#103)
* feat(BINF-309): removing old * feat(BINF-309): streamline bamfastq * feat(clean): tmp mv * feat(extract): standardize * feat(BINF-309): standarize extract * feat(bwa): std bwa wfs * feat(BINF-309): rg matching tests * feat(metrics): add metrics wfs * feat(decider): markdup * feat(format): format tools * feat(picard): sqlite * fix(format): validation fixes * feat(picard): reduce validation * feat(integrity): remove wf * chore(docs): starting rewrite of docs * feat(inputs): rm header file * chore(rename): rename and organize * chore(organize): group wfs * chore(doc): update main wf name * fix(tests): update wf paths * feat(expr): add function to expression_lib for fastq_cleaner * BINF-372: add example input json for external users * chore(doc): update readme Co-authored-by: Kyle Hernandez <khernandez@bsd.uchicago.edu> Co-authored-by: Shenglai <sli6@uchicago.edu>
1 parent 1046947 commit b068401

File tree

455 files changed

+3151
-45245
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

455 files changed

+3151
-45245
lines changed

README.md

Lines changed: 117 additions & 261 deletions
Large diffs are not rendered by default.
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
{
2+
"bam_name": "16b6472c-a4fa-4bd2-9b94-789f30c192aa_new_gdc_aln_wgs_test.bam",
3+
"job_uuid": "16b6472c-a4fa-4bd2-9b94-789f30c192aa",
4+
"collect_wgs_metrics": true,
5+
"amplicon_kit_set_file_list": [],
6+
"capture_kit_set_file_list": [],
7+
"readgroup_fastq_pe_file_list": [],
8+
"readgroup_fastq_se_file_list": [],
9+
"readgroups_bam_file_list": [
10+
{
11+
"bam": {"class":"File", "path": "{PATH_TO}/A77474_1_lane_dupsFlagged.bam"},
12+
"readgroup_meta_list": [
13+
{
14+
"CN": "BCGSC",
15+
"DT": "2017-09-07T10:26:26-07:00",
16+
"ID": "CBGL8ANXX.3",
17+
"LB": "A77474",
18+
"PL": "ILLUMINA",
19+
"PM": "Illumina HiSeq 2500",
20+
"PU": "CBGL8ANXX.3.CGGCCT",
21+
"SM": "BLGSP-71-06-00090-01B-01E-A77H-33"
22+
}
23+
]
24+
}
25+
],
26+
"common_biallelic_vcf": {
27+
"class": "File",
28+
"path": "{PATH_TO}/af-only-gnomad-biallelic-autoallo.hg38.vcf.gz",
29+
"secondaryFiles": [
30+
{
31+
"class": "File",
32+
"path": "{PATH_TO}/af-only-gnomad-biallelic-autoallo.hg38.vcf.gz.tbi"
33+
}
34+
]
35+
},
36+
"known_snp": {
37+
"class": "File",
38+
"path": "{PATH_TO}/dbsnp_144.hg38.vcf.gz",
39+
"secondaryFiles": [
40+
{
41+
"class": "File",
42+
"path": "{PATH_TO}/dbsnp_144.hg38.vcf.gz.tbi"
43+
}
44+
]
45+
},
46+
"run_markduplicates": true,
47+
"reference_sequence": {
48+
"class": "File",
49+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa",
50+
"secondaryFiles": [
51+
{
52+
"class": "File",
53+
"path": "{PATH_TO}/GRCh38.d1.vd1.dict"
54+
},
55+
{
56+
"class": "File",
57+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.amb"
58+
},
59+
{
60+
"class": "File",
61+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.ann"
62+
},
63+
{
64+
"class": "File",
65+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.bwt"
66+
},
67+
{
68+
"class": "File",
69+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.fai"
70+
},
71+
{
72+
"class": "File",
73+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.pac"
74+
},
75+
{
76+
"class": "File",
77+
"path": "{PATH_TO}/GRCh38.d1.vd1.fa.sa"
78+
}
79+
]
80+
},
81+
"thread_count": 4
82+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"INPUT": {"class": "File", "location": "./data/test.bam"},
3+
"MODE": "lenient"
4+
}

tests/bam_rg_matching_tests.yml

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
- output:
2+
OUTPUT: [
3+
"{\"ID\": \"1\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake\"}",
4+
"{\"ID\": \"2\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake\"}",
5+
"{\"ID\": \"3\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake2\", \"PU\": \"3-PU\"}"
6+
]
7+
log:
8+
class: "File"
9+
basename: output.log
10+
job: ./tests/bam_readgroup_to_contents.job.json
11+
tool: ./tools/bam_readgroup_to_contents.cwl
12+
doc: Extracting readgroup from bam header.
13+
14+
- output:
15+
pe_file_list:
16+
- forward_fastq:
17+
class: "File"
18+
basename: 1_1.fq.gz
19+
reverse_fastq:
20+
class: "File"
21+
basename: 1_2.fq.gz
22+
readgroup_meta:
23+
LB: "A"
24+
CN: "fake"
25+
PU: "1-PU"
26+
FO: null
27+
ID: "1"
28+
KS: null
29+
SM: "newSample"
30+
DT: null
31+
PI: null
32+
DS: null
33+
PL: "ILLUMINA"
34+
PM: null
35+
- forward_fastq:
36+
class: "File"
37+
basename: 2_1.fq.gz
38+
reverse_fastq:
39+
class: "File"
40+
basename: 2_2.fq.gz
41+
readgroup_meta:
42+
LB: "newSample"
43+
CN: "fake"
44+
PU: null
45+
FO: null
46+
ID: "2"
47+
KS: null
48+
SM: "newSample"
49+
DT: null
50+
PI: null
51+
DS: null
52+
PL: "ILLUMINA"
53+
PM: null
54+
- forward_fastq:
55+
class: "File"
56+
basename: 3_1.fq.gz
57+
reverse_fastq:
58+
class: "File"
59+
basename: 3_2.fq.gz
60+
readgroup_meta:
61+
LB: "newSample"
62+
CN: "fake"
63+
PU: "3-PU"
64+
FO: null
65+
ID: "3"
66+
KS: null
67+
SM: "newSample"
68+
DT: null
69+
PI: null
70+
DS: null
71+
PL: "ILLUMINA"
72+
PM: null
73+
se_file_list: []
74+
o1_file_list:
75+
- forward_fastq:
76+
class: "File"
77+
basename: 3_o1.fq.gz
78+
reverse_fastq: null
79+
readgroup_meta:
80+
LB: "newSample"
81+
CN: "fake"
82+
PU: "3-PU"
83+
FO: null
84+
ID: "3"
85+
KS: null
86+
SM: "newSample"
87+
DT: null
88+
PI: null
89+
DS: null
90+
PL: "ILLUMINA"
91+
PM: null
92+
o2_file_list: []
93+
job: ./tests/readgroups_bam_to_readgroups_fastq_lists.job.1.json
94+
tool: ./workflows/utils/readgroups_bam_to_readgroups_fastq_lists.cwl
95+
doc: Processing bam and matching to readgroups.
96+
97+
- output:
98+
pe_file_list:
99+
- forward_fastq:
100+
class: "File"
101+
basename: 1_1.fq.gz
102+
reverse_fastq:
103+
class: "File"
104+
basename: 1_2.fq.gz
105+
readgroup_meta:
106+
LB: "A"
107+
CN: "fake"
108+
PU: "1-PU"
109+
FO: null
110+
ID: "1"
111+
KS: null
112+
SM: "newSample"
113+
DT: null
114+
PI: null
115+
DS: null
116+
PL: "ILLUMINA"
117+
PM: null
118+
- forward_fastq:
119+
class: "File"
120+
basename: 2_1.fq.gz
121+
reverse_fastq:
122+
class: "File"
123+
basename: 2_2.fq.gz
124+
readgroup_meta:
125+
LB: "B"
126+
CN: "fake"
127+
PU: "2-PU"
128+
FO: null
129+
ID: "2"
130+
KS: null
131+
SM: "newSample"
132+
DT: null
133+
PI: null
134+
DS: null
135+
PL: "ILLUMINA"
136+
PM: null
137+
- forward_fastq:
138+
class: "File"
139+
basename: 3_1.fq.gz
140+
reverse_fastq:
141+
class: "File"
142+
basename: 3_2.fq.gz
143+
readgroup_meta:
144+
LB: "newSample"
145+
CN: "fake"
146+
PU: "3-PU"
147+
FO: null
148+
ID: "3"
149+
KS: null
150+
SM: "newSample"
151+
DT: null
152+
PI: null
153+
DS: null
154+
PL: "ILLUMINA"
155+
PM: null
156+
se_file_list: []
157+
o1_file_list:
158+
- forward_fastq:
159+
class: "File"
160+
basename: 3_o1.fq.gz
161+
reverse_fastq: null
162+
readgroup_meta:
163+
LB: "newSample"
164+
CN: "fake"
165+
PU: "3-PU"
166+
FO: null
167+
ID: "3"
168+
KS: null
169+
SM: "newSample"
170+
DT: null
171+
PI: null
172+
DS: null
173+
PL: "ILLUMINA"
174+
PM: null
175+
o2_file_list: []
176+
job: ./tests/readgroups_bam_to_readgroups_fastq_lists.job.2.json
177+
tool: ./workflows/utils/readgroups_bam_to_readgroups_fastq_lists.cwl
178+
doc: Processing bam and matching to readgroups - second situation.

tests/data/test.bam

1.19 KB
Binary file not shown.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"readgroups_bam_file": {
3+
"bam": {"class": "File", "location": "./data/test.bam"},
4+
"readgroup_meta_list": [
5+
{"ID": "1", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "A", "PU": "1-PU"},
6+
{"ID": "Unknown", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "B", "PU": "2-PU"},
7+
{"ID": "Other", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "C", "PU": "3-PU"}
8+
]
9+
}
10+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{
2+
"readgroups_bam_file": {
3+
"bam": {"class": "File", "location": "./data/test.bam"},
4+
"readgroup_meta_list": [
5+
{"ID": "1", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "A", "PU": "1-PU"},
6+
{"ID": "2", "PL": "illumina", "CN": "fake", "SM": "newSample", "LB": "B", "PU": "2-PU"},
7+
{"ID": "Other", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "C", "PU": "3-PU"}
8+
]
9+
}
10+
}

0 commit comments

Comments
 (0)