Skip to content

Commit 5030939

Browse files
authored
Merge pull request #19 from ikmb/devel
Devel
2 parents 1b5b938 + 89cc0f9 commit 5030939

File tree

5 files changed

+46
-35
lines changed

5 files changed

+46
-35
lines changed

bin/samplesheet_from_folder.rb

+27-13
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
opts.on("-p", "--[no-]pacbio [FLAG]", TrueClass, "Pacbio data") {|argument| options.pacbio = argument.nil? ? true : argument }
3737
opts.on("-c","--centre", "=CENTRE","Name of sequencing centre") {|argument| options.centre = argument }
3838
opts.on("-s","--platform","=PLATFORM", "Name of the sequencing platform") {|argument| options.platform = argument }
39+
opts.on("-l","--lookup", "=LOOKUP", "Lookup file with lib id <> other id") {|argument| options.lookup = argument }
3940
opts.on("-h","--help","Display the usage information") {
4041
puts opts
4142
exit
@@ -49,12 +50,19 @@
4950
options.centre ? center = options.centre : center = "IKMB"
5051
options.platform ? platform = options.platform : platform = "NovaSeq6000"
5152

53+
lookup = {}
54+
if options.lookup
55+
IO.readlines(options.lookup).each do |line|
56+
key,value = line.strip.split("\t")
57+
lookup[key] = value
58+
end
59+
end
5260

5361
if options.pacbio
5462

5563
fastq_files = Dir["#{options.folder}/*.fastq.gz"]
5664

57-
puts "IndivID;SampleID;R1"
65+
puts "patient;sample;R1"
5866

5967
fastq_files.each do |file|
6068

@@ -72,27 +80,33 @@
7280

7381
groups = fastq_files.group_by{|f| f.split("/")[-1].split(/_R[1,2]/)[0] }
7482

75-
puts "IndivID;SampleID;libraryID;rgID;rgPU;R1;R2"
83+
puts "patient;sample;library;rgid;rgpu;R1;R2"
7684

7785
groups.each do |group, files|
7886

79-
left,right = files.sort.collect{|f| File.absolute_path(f)}
87+
left,right = files.sort.collect{|f| File.absolute_path(f)}
88+
89+
abort "Missing one member of the pair for #{group}" unless left && right
90+
91+
library = group.split("_")[1]
92+
sample = library
8093

81-
library = group.split("_L00")[0]
82-
sample = group.split("_L00")[0]
94+
if lookup.has_key?(library)
95+
sample = "#{lookup[library]}"
96+
end
8397

84-
e = `zcat #{left} | head -n1 `
85-
e.gsub!("@", "")
86-
header = e
98+
e = `zcat #{left} | head -n1 `
99+
e.gsub!("@", "")
100+
header = e
87101

88-
instrument,run_id,flowcell_id,lane,tile,x,y = header.split(" ")[0].split(":")
102+
instrument,run_id,flowcell_id,lane,tile,x,y = header.split(" ")[0].split(":")
89103

90-
index = header.split(" ")[-1].split(":")[-1]
91-
readgroup = flowcell_id + "." + lane + "." + library
104+
index = header.split(" ")[-1].split(":")[-1]
105+
readgroup = flowcell_id + "." + lane + "." + library
92106

93-
pgu = flowcell_id + "." + lane + "." + index
107+
pgu = flowcell_id + "." + lane + "." + index
94108

95-
puts "#{sample};Sample_#{sample};#{library};#{readgroup};#{pgu};#{left};#{right}"
109+
puts "#{sample};#{sample};#{library};#{readgroup};#{pgu};#{left};#{right}"
96110

97111
end
98112

modules/helper/bam_select_reads.nf

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ process BAM_SELECT_READS {
1818
path("versions.yml"), emit: versions
1919

2020
script:
21-
R1 = bam.getBaseName() + "_R1_001.fastq.gz"
22-
R2 = bam.getBaseName() + "_R2_001.fastq.gz"
21+
R1 = meta.sample_id + "_R1_001.fastq.gz"
22+
R2 = meta.sample_id + "_R2_001.fastq.gz"
2323

2424
"""
2525
samtools view --reference $fasta -hb -o mapped.cram -L $bed $bam

modules/picard/collect_wgs_metrics.nf

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ process PICARD_WGS_METRICS {
33

44
tag "${meta.patient_id}|${meta.sample_id}"
55

6-
publishDir "${params.outdir}/${meta.patient_id}/${meta.sample_id}", mode: 'copy'
6+
publishDir "${params.outdir}/${meta.patient_id}/${meta.sample_id}/QC", mode: 'copy'
77

88
container "docker://quay.io/biocontainers/picard:2.26.11--hdfd78af_0"
99

subworkflows/trim_and_align.nf

+3-4
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ workflow TRIM_AND_ALIGN {
3535
new_meta = [:]
3636
new_meta.patient_id = meta.patient_id
3737
new_meta.sample_id = meta.sample_id
38-
def groupKey = meta.sample_id
39-
tuple( groupKey, new_meta, bam)
40-
}.groupTuple(by: [0,1]).map { g ,new_meta ,bam -> [ new_meta, bam ] }
38+
tuple( new_meta, bam)
39+
}.groupTuple()
4140

4241
bam_mapped.branch {
4342
single: it[1].size() == 1
@@ -60,7 +59,7 @@ workflow TRIM_AND_ALIGN {
6059
SAMTOOLS_INDEX.out.bam,
6160
ch_fasta
6261
)
63-
62+
6463
ch_versions = ch_versions.mix(SAMTOOLS_MARKDUP.out.versions)
6564

6665
emit:

workflows/deepvariant_pipeline.nf

+13-15
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ workflow DEEPVARIANT_PIPELINE {
110110
if ('intersect' in tools) {
111111
BAM_SELECT_READS(
112112
TRIM_AND_ALIGN.out.bam,
113-
ch_bed,
113+
ch_bed.collect(),
114114
ch_fasta
115115
)
116116

@@ -121,7 +121,7 @@ workflow DEEPVARIANT_PIPELINE {
121121
if ('deepvariant' in tools) {
122122
DEEPVARIANT_SHORT_READS(
123123
TRIM_AND_ALIGN.out.bam,
124-
ch_bed,
124+
ch_bed.collect(),
125125
ch_fasta
126126
)
127127
ch_vcf = ch_vcf.mix(DEEPVARIANT_SHORT_READS.out.vcf)
@@ -150,13 +150,13 @@ workflow DEEPVARIANT_PIPELINE {
150150
MOSDEPTH(
151151
ch_bam,
152152
ch_fasta,
153-
ch_bed
153+
ch_bed.collect()
154154
)
155155

156156
PICARD_WGS_METRICS(
157157
ch_bam,
158158
ch_fasta,
159-
ch_bed
159+
ch_bed.collect()
160160
)
161161

162162
CUSTOM_DUMPSOFTWAREVERSIONS (
@@ -176,16 +176,14 @@ workflow DEEPVARIANT_PIPELINE {
176176

177177
def create_fastq_channel(LinkedHashMap row) {
178178

179-
// IndivID;SampleID;libraryID;rgID;rgPU;platform;platform_model;Center;Date;R1;R2
179+
// patient;sample;library;rgid;rgpu;R1;R2
180180

181181
def meta = [:]
182-
meta.patient_id = row.IndivID
183-
meta.sample_id = row.SampleID
184-
meta.library_id = row.libraryID
185-
meta.readgroup_id = row.rgID
186-
meta.center = row.Center
187-
meta.date = row.Date
188-
meta.platform_unit = row.rgPU
182+
meta.patient_id = row.patient
183+
meta.sample_id = row.sample
184+
meta.library_id = row.library
185+
meta.readgroup_id = row.rgid
186+
meta.platform_unit = row.rgpu
189187

190188
def array = []
191189
array = [ meta, file(row.R1), file(row.R2) ]
@@ -195,11 +193,11 @@ def create_fastq_channel(LinkedHashMap row) {
195193

196194
def create_pacbio_channel(LinkedHashMap row) {
197195

198-
// IndivID;SampleID;libraryID;rgID;rgPU;platform;platform_model;Center;Date;R1;R2
196+
// patient;sample;R1
199197

200198
def meta = [:]
201-
meta.patient_id = row.IndivID
202-
meta.sample_id = row.SampleID
199+
meta.patient_id = row.patient
200+
meta.sample_id = row.sample
203201

204202
def array = []
205203
array = [ meta, file(row.R1) ]

0 commit comments

Comments
 (0)