NCI-GDC
diff --git a/‎README.md
Lines changed: 117 additions & 261 deletions b/‎README.md
Lines changed: 117 additions & 261 deletions
diff --git a/‎example/main_workflow_example_wgs_input.json
Lines changed: 82 additions & 0 deletions b/‎example/main_workflow_example_wgs_input.json
Lines changed: 82 additions & 0 deletions
diff --git a/‎tests/bam_readgroup_to_contents.job.json
Lines changed: 4 additions & 0 deletions b/‎tests/bam_readgroup_to_contents.job.json
Lines changed: 4 additions & 0 deletions
diff --git a/‎tests/bam_rg_matching_tests.yml
Lines changed: 178 additions & 0 deletions b/‎tests/bam_rg_matching_tests.yml
Lines changed: 178 additions & 0 deletions
diff --git a/‎tests/data/test.bam
1.19 KB b/‎tests/data/test.bam
1.19 KB
diff --git a/‎tests/readgroups_bam_to_readgroups_fastq_lists.job.1.json
Lines changed: 10 additions & 0 deletions b/‎tests/readgroups_bam_to_readgroups_fastq_lists.job.1.json
Lines changed: 10 additions & 0 deletions
diff --git a/‎tests/readgroups_bam_to_readgroups_fastq_lists.job.2.json
Lines changed: 10 additions & 0 deletions b/‎tests/readgroups_bam_to_readgroups_fastq_lists.job.2.json
Lines changed: 10 additions & 0 deletions
@@ -0,0 +1,82 @@
+{
+    "bam_name": "16b6472c-a4fa-4bd2-9b94-789f30c192aa_new_gdc_aln_wgs_test.bam",
+    "job_uuid": "16b6472c-a4fa-4bd2-9b94-789f30c192aa",
+    "collect_wgs_metrics": true,
+    "amplicon_kit_set_file_list": [],
+    "capture_kit_set_file_list": [],
+    "readgroup_fastq_pe_file_list": [],
+    "readgroup_fastq_se_file_list": [],
+    "readgroups_bam_file_list": [
+        {
+            "bam": {"class":"File", "path": "{PATH_TO}/A77474_1_lane_dupsFlagged.bam"},
+            "readgroup_meta_list": [
+                {
+                  "CN": "BCGSC",
+                  "DT": "2017-09-07T10:26:26-07:00",
+                  "ID": "CBGL8ANXX.3",
+                  "LB": "A77474",
+                  "PL": "ILLUMINA",
+                  "PM": "Illumina HiSeq 2500",
+                  "PU": "CBGL8ANXX.3.CGGCCT",
+                  "SM": "BLGSP-71-06-00090-01B-01E-A77H-33"
+                }
+              ]
+        }
+    ],
+    "common_biallelic_vcf": {
+        "class": "File",
+        "path": "{PATH_TO}/af-only-gnomad-biallelic-autoallo.hg38.vcf.gz",
+        "secondaryFiles": [
+            {
+              "class": "File",
+              "path": "{PATH_TO}/af-only-gnomad-biallelic-autoallo.hg38.vcf.gz.tbi"
+            }
+        ]
+      },
+    "known_snp": {
+        "class": "File",
+        "path": "{PATH_TO}/dbsnp_144.hg38.vcf.gz",
+        "secondaryFiles": [
+            {
+              "class": "File",
+              "path": "{PATH_TO}/dbsnp_144.hg38.vcf.gz.tbi"
+            }
+        ]
+      },
+    "run_markduplicates": true,
+    "reference_sequence": {
+        "class": "File",
+        "path": "{PATH_TO}/GRCh38.d1.vd1.fa",
+        "secondaryFiles": [
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.dict"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.amb"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.ann"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.bwt"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.fai"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.pac"
+            },
+            {
+              "class": "File",
+              "path": "{PATH_TO}/GRCh38.d1.vd1.fa.sa"
+            }
+        ]
+      },
+    "thread_count": 4
+}
@@ -0,0 +1,4 @@
+{
+  "INPUT": {"class": "File", "location": "./data/test.bam"},
+  "MODE": "lenient"
+}
@@ -0,0 +1,178 @@
+- output:
+    OUTPUT: [
+        "{\"ID\": \"1\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake\"}", 
+        "{\"ID\": \"2\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake\"}", 
+        "{\"ID\": \"3\", \"CN\": \"fake\", \"PL\": \"ILLUMINA\", \"SM\": \"fake2\", \"PU\": \"3-PU\"}" 
+    ]
+    log:
+      class: "File"
+      basename: output.log 
+  job: ./tests/bam_readgroup_to_contents.job.json
+  tool: ./tools/bam_readgroup_to_contents.cwl
+  doc: Extracting readgroup from bam header. 
+
+- output:
+    pe_file_list:
+      - forward_fastq:
+          class: "File"
+          basename: 1_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 1_2.fq.gz
+        readgroup_meta:
+          LB: "A" 
+          CN: "fake" 
+          PU: "1-PU" 
+          FO: null 
+          ID: "1" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+      - forward_fastq:
+          class: "File"
+          basename: 2_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 2_2.fq.gz
+        readgroup_meta:
+          LB: "newSample" 
+          CN: "fake" 
+          PU: null
+          FO: null
+          ID: "2" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+      - forward_fastq:
+          class: "File"
+          basename: 3_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 3_2.fq.gz
+        readgroup_meta:
+          LB: "newSample" 
+          CN: "fake" 
+          PU: "3-PU"
+          FO: null
+          ID: "3" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+    se_file_list: []
+    o1_file_list:
+      - forward_fastq:
+          class: "File"
+          basename: 3_o1.fq.gz
+        reverse_fastq: null
+        readgroup_meta:
+          LB: "newSample" 
+          CN: "fake" 
+          PU: "3-PU"
+          FO: null
+          ID: "3" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+    o2_file_list: []
+  job: ./tests/readgroups_bam_to_readgroups_fastq_lists.job.1.json
+  tool: ./workflows/utils/readgroups_bam_to_readgroups_fastq_lists.cwl
+  doc: Processing bam and matching to readgroups. 
+
+- output:
+    pe_file_list:
+      - forward_fastq:
+          class: "File"
+          basename: 1_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 1_2.fq.gz
+        readgroup_meta:
+          LB: "A" 
+          CN: "fake" 
+          PU: "1-PU" 
+          FO: null 
+          ID: "1" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+      - forward_fastq:
+          class: "File"
+          basename: 2_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 2_2.fq.gz
+        readgroup_meta:
+          LB: "B" 
+          CN: "fake" 
+          PU: "2-PU" 
+          FO: null
+          ID: "2" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+      - forward_fastq:
+          class: "File"
+          basename: 3_1.fq.gz
+        reverse_fastq:
+          class: "File"
+          basename: 3_2.fq.gz
+        readgroup_meta:
+          LB: "newSample" 
+          CN: "fake" 
+          PU: "3-PU"
+          FO: null
+          ID: "3" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+    se_file_list: []
+    o1_file_list:
+      - forward_fastq:
+          class: "File"
+          basename: 3_o1.fq.gz
+        reverse_fastq: null
+        readgroup_meta:
+          LB: "newSample" 
+          CN: "fake" 
+          PU: "3-PU"
+          FO: null
+          ID: "3" 
+          KS: null 
+          SM: "newSample" 
+          DT: null 
+          PI: null 
+          DS: null 
+          PL: "ILLUMINA" 
+          PM: null
+    o2_file_list: []
+  job: ./tests/readgroups_bam_to_readgroups_fastq_lists.job.2.json
+  tool: ./workflows/utils/readgroups_bam_to_readgroups_fastq_lists.cwl
+  doc: Processing bam and matching to readgroups - second situation. 
@@ -0,0 +1,10 @@
+{
+  "readgroups_bam_file": {
+    "bam": {"class": "File", "location": "./data/test.bam"},
+    "readgroup_meta_list": [
+      {"ID": "1", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "A", "PU": "1-PU"},
+      {"ID": "Unknown", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "B", "PU": "2-PU"},
+      {"ID": "Other", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "C", "PU": "3-PU"}
+    ]
+  }
+}
@@ -0,0 +1,10 @@
+{
+  "readgroups_bam_file": {
+    "bam": {"class": "File", "location": "./data/test.bam"},
+    "readgroup_meta_list": [
+      {"ID": "1", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "A", "PU": "1-PU"},
+      {"ID": "2", "PL": "illumina", "CN": "fake", "SM": "newSample", "LB": "B", "PU": "2-PU"},
+      {"ID": "Other", "PL": "ILLUMINA", "CN": "fake", "SM": "newSample", "LB": "C", "PU": "3-PU"}
+    ]
+  }
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +{
 +  "INPUT": {"class": "File", "location": "./data/test.bam"},
 +  "MODE": "lenient"
 +}