|
36 | 36 | opts.on("-p", "--[no-]pacbio [FLAG]", TrueClass, "Pacbio data") {|argument| options.pacbio = argument.nil? ? true : argument }
|
37 | 37 | opts.on("-c","--centre", "=CENTRE","Name of sequencing centre") {|argument| options.centre = argument }
|
38 | 38 | opts.on("-s","--platform","=PLATFORM", "Name of the sequencing platform") {|argument| options.platform = argument }
|
| 39 | +opts.on("-l","--lookup", "=LOOKUP", "Lookup file with lib id <> other id") {|argument| options.lookup = argument } |
39 | 40 | opts.on("-h","--help","Display the usage information") {
|
40 | 41 | puts opts
|
41 | 42 | exit
|
|
49 | 50 | options.centre ? center = options.centre : center = "IKMB"
|
50 | 51 | options.platform ? platform = options.platform : platform = "NovaSeq6000"
|
51 | 52 |
|
| 53 | +lookup = {} |
| 54 | +if options.lookup |
| 55 | + IO.readlines(options.lookup).each do |line| |
| 56 | + key,value = line.strip.split("\t") |
| 57 | + lookup[key] = value |
| 58 | + end |
| 59 | +end |
52 | 60 |
|
53 | 61 | if options.pacbio
|
54 | 62 |
|
55 | 63 | fastq_files = Dir["#{options.folder}/*.fastq.gz"]
|
56 | 64 |
|
57 |
| - puts "IndivID;SampleID;R1" |
| 65 | + puts "patient;sample;R1" |
58 | 66 |
|
59 | 67 | fastq_files.each do |file|
|
60 | 68 |
|
|
72 | 80 |
|
73 | 81 | groups = fastq_files.group_by{|f| f.split("/")[-1].split(/_R[1,2]/)[0] }
|
74 | 82 |
|
75 |
| - puts "IndivID;SampleID;libraryID;rgID;rgPU;R1;R2" |
| 83 | + puts "patient;sample;library;rgid;rgpu;R1;R2" |
76 | 84 |
|
77 | 85 | groups.each do |group, files|
|
78 | 86 |
|
79 |
| - left,right = files.sort.collect{|f| File.absolute_path(f)} |
| 87 | + left,right = files.sort.collect{|f| File.absolute_path(f)} |
| 88 | + |
| 89 | + abort "Missing one member of the pair for #{group}" unless left && right |
| 90 | + |
| 91 | + library = group.split("_")[1] |
| 92 | + sample = library |
80 | 93 |
|
81 |
| - library = group.split("_L00")[0] |
82 |
| - sample = group.split("_L00")[0] |
| 94 | + if lookup.has_key?(library) |
| 95 | + sample = "#{lookup[library]}" |
| 96 | + end |
83 | 97 |
|
84 |
| - e = `zcat #{left} | head -n1 ` |
85 |
| - e.gsub!("@", "") |
86 |
| - header = e |
| 98 | + e = `zcat #{left} | head -n1 ` |
| 99 | + e.gsub!("@", "") |
| 100 | + header = e |
87 | 101 |
|
88 |
| - instrument,run_id,flowcell_id,lane,tile,x,y = header.split(" ")[0].split(":") |
| 102 | + instrument,run_id,flowcell_id,lane,tile,x,y = header.split(" ")[0].split(":") |
89 | 103 |
|
90 |
| - index = header.split(" ")[-1].split(":")[-1] |
91 |
| - readgroup = flowcell_id + "." + lane + "." + library |
| 104 | + index = header.split(" ")[-1].split(":")[-1] |
| 105 | + readgroup = flowcell_id + "." + lane + "." + library |
92 | 106 |
|
93 |
| - pgu = flowcell_id + "." + lane + "." + index |
| 107 | + pgu = flowcell_id + "." + lane + "." + index |
94 | 108 |
|
95 |
| - puts "#{sample};Sample_#{sample};#{library};#{readgroup};#{pgu};#{left};#{right}" |
| 109 | + puts "#{sample};#{sample};#{library};#{readgroup};#{pgu};#{left};#{right}" |
96 | 110 |
|
97 | 111 | end
|
98 | 112 |
|
|
0 commit comments