Skip to content

Commit

Permalink
Merge pull request #122 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
prep for release 41.1
  • Loading branch information
dozy authored Jun 23, 2019
2 parents 0fb8131 + b6c6208 commit 1165161
Show file tree
Hide file tree
Showing 5 changed files with 34 additions and 21 deletions.
1 change: 1 addition & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/lane2/qc/6642_2#4.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/lane2/qc/6642_2#4_human.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/lane2/qc/6642_2#5_xahuman.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/lane2/qc/6642_2#5.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/lane2/qc/6642_2#6_yhuman.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/qc/6642_1.bam_flagstats.json
t/data/runfolders/110804_HS22_06642_A_B020JACXX/Data/Intensities/PB_basecalls_20110813-160456/no_cal/archive/qc/6642_2.tag_metrics.json
Expand Down
39 changes: 22 additions & 17 deletions lib/npg_warehouse/loader/autoqc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -303,30 +303,32 @@ sub _tag_decode_stats {
sub _bam_flagstats {
my ($self, $result, $composition) = @_;

my $check_name = $result->check_name;
if ($check_name =~ /phix/xsmg) {
return ();
my $subset = $composition->get_component(0)->subset();

if ($subset && ($subset !~ /human/xsm)) {
return ();
}

my $c = $result->composition->get_component(0)->subset ?
# Only no subset or flavours of human subset below
my $c = $subset ?
$self->_composition_without_subset($composition) : $composition;
my $data = $self->_basic_data($c);

$check_name =~ s/[ ]flagstats//xsmg;
$check_name =~ s/[ ]/_/xsmg;
foreach my $method (qw(percent_mapped_reads percent_duplicate_reads)) {
#####
# Separate columns for a 'human' split data might have been originally
# created for pathogen studies. Eventually the data for y and xa
# splits got saved to these columns. So the names of these colums
# are now slightly misleading.
my $column_name_prefix = $subset ? 'human_' : q[];
$column_name_prefix = q[bam_] . $column_name_prefix;

foreach my $m (qw(percent_mapped percent_duplicate)) {
my $method = join q[_], $m, 'reads';
if (my $r = $result->$method ) {
my $m = $method;
$m =~ s/_reads\z//xsmg;
my $check = $check_name;
$check =~ s/_nonhuman//xsmg;
$check =~ s/_xahuman/_human/xsmg;
$check =~ s/_yhuman/_human/xsmg;
$data->{$check.q[_].$m} = $r;
$data->{$column_name_prefix . $m} = $r;
}
}

if ($check_name !~ /_human/xsmg) {
if (not $subset) { # No splits below here
my $num_reads = $result->total_reads;
$data->{'bam_num_reads'} = $num_reads;
my $chimeric_reads = $self->_truncate_float(
Expand All @@ -337,7 +339,10 @@ sub _bam_flagstats {
foreach my $method ( map { 'target_' . $_ } qw(
filter length mapped_reads mapped_bases
proper_pair_mapped_reads coverage_threshold
percent_gt_coverage_threshold)) {
percent_gt_coverage_threshold
autosome_coverage_threshold
autosome_percent_gt_coverage_threshold
)) {
if (my $r = $result->$method ) {
$data->{$method} = $r;
}
Expand Down
12 changes: 9 additions & 3 deletions t/10-npg_warehouse-loader-autoqc.t
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ subtest 'retrieve data for run 6624' => sub {
};

subtest 'retrieve data for run 6642' => sub {
plan tests => 16;
plan tests => 20;

my $id_run = 6642;
lives_ok {$schema_npg->resultset('Run')->update_or_create({folder_path_glob => $folder_glob, id_run => $id_run, })}
Expand Down Expand Up @@ -259,12 +259,16 @@ subtest 'retrieve data for run 6642' => sub {
$d = $compos_pkg->new(components =>
[$compon_pkg->new(id_run => $id_run, position => 2, tag_index => 5)])->digest;
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_human_percent_mapped}), q(==), 55.3, 'bam xahuman mapped percent as human');
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_human_percent_duplicate}), q(==), 68.09, 'bam xahuman duplicate percent as human');
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_human_percent_duplicate}), q(==), 68.09, 'bam duplicate percent as human');
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_percent_mapped}), q(==), 96.3, 'bam xahuman mapped percent as human');
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_percent_duplicate}), q(==), 6.34, 'bam duplicate percent as human');

$d = $compos_pkg->new(components =>
[$compon_pkg->new(id_run => $id_run, position => 2, tag_index => 6)])->digest;
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_human_percent_mapped}), q(==), 55.3, 'bam yhuman mapped percent as human');
cmp_ok(sprintf('%.2f',$auto->{$d}->{bam_human_percent_duplicate}), q(==), 68.09, 'bam yhuman duplicate percent as human');
is ($auto->{$d}->{bam_percent_mapped}, undef, 'data not set');
is ($auto->{$d}->{bam_percent_duplicate}, undef, 'data not set');
};

subtest 'retrieve rna data' => sub {
Expand Down Expand Up @@ -313,7 +317,7 @@ subtest 'retrieve gbs data' => sub {
};

subtest 'retrieve target stats data' => sub {
plan tests => 11;
plan tests => 13;

my $id_run = 27116;
lives_ok {$schema_npg->resultset('Run')->update_or_create({folder_path_glob => $folder_glob, id_run => $id_run, })}
Expand All @@ -333,6 +337,8 @@ subtest 'retrieve target stats data' => sub {
cmp_ok(sprintf('%.2f',$auto->{$d}->{target_mapped_reads}), q(==), 58704583, 'target - target_mapped_reads');
cmp_ok(sprintf('%.2f',$auto->{$d}->{target_percent_gt_coverage_threshold}), q(==), 0.15, 'target - target_percent_gt_coverage_threshold');
cmp_ok(sprintf('%.2f',$auto->{$d}->{target_proper_pair_mapped_reads}), q(==), 57355728, 'target - target_proper_pair_mapped_reads');
cmp_ok(sprintf('%.2f',$auto->{$d}->{target_autosome_coverage_threshold}), q(==), 15, 'target - target_autosome_coverage_threshold');
cmp_ok(sprintf('%.2f',$auto->{$d}->{target_autosome_percent_gt_coverage_threshold}), q(==), 0.17, 'target - target_autosome_percent_gt_coverage_threshold');
cmp_ok(sprintf('%.2f',$auto->{$d}->{nrd_percent}), q(==), 0.00, 'nrd');
};

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"id_run":6642,"mate_mapped_defferent_chr":588198,"percent_duplicate":0.063378,"unpaired_mapped_reads":610139,"library_size":537513816,"position":2,"mate_mapped_defferent_chr_5":460869,"library":"3055213","histogram":{"71.0":"8.591995","70.0":"8.591822","38.0":"8.515275","36.0":"8.493366","55.0":"8.583787","93.0":"8.593224","58.0":"8.58674","7.0":"4.978982","30.0":"8.383338","54.0":"8.582532","47.0":"8.567684","8.0":"5.399609","41.0":"8.539472","35.0":"8.480202","29.0":"8.355684","20.0":"7.869717","40.0":"8.532381","17.0":"7.544506","78.0":"8.592758","11.0":"6.389905","45.0":"8.560489","67.0":"8.591153","68.0":"8.591404","42.0":"8.545738","31.0":"8.407775","66.0":"8.590869","4.0":"3.354564","74.0":"8.592403","89.0":"8.593169","62.0":"8.589305","22.0":"8.028337","64.0":"8.590183","16.0":"7.406373","48.0":"8.570666","63.0":"8.589771","69.0":"8.591626","60.0":"8.58818","59.0":"8.587505","77.0":"8.592685","81.0":"8.592929","72.0":"8.592149","28.0":"8.324387","3.0":"2.664594","95.0":"8.593243","90.0":"8.593186","94.0":"8.593235","76.0":"8.592602","98.0":"8.593264","97.0":"8.593258","53.0":"8.581113","99.0":"8.59327","14.0":"7.073132","82.0":"8.592974","33.0":"8.448446","18.0":"7.666563","83.0":"8.593013","6.0":"4.502957","65.0":"8.590547","15.0":"7.250047","1.0":"1.000069","49.0":"8.573302","86.0":"8.593105","23.0":"8.094087","26.0":"8.248885","75.0":"8.592509","21.0":"7.953927","51.0":"8.577688","44.0":"8.556166","5.0":"3.964236","12.0":"6.646332","43.0":"8.551274","34.0":"8.465305","56.0":"8.584895","24.0":"8.152186","9.0":"5.771285","52.0":"8.579506","19.0":"7.774416","57.0":"8.585875","10.0":"6.099705","50.0":"8.57563","92.0":"8.593213","32.0":"8.429367","79.0":"8.592822","88.0":"8.59315","27.0":"8.288969","61.0":"8.588777","39.0":"8.524356","46.0":"8.564309","80.0":"8.592879","100.0":"8.593275","85.0":"8.593078","2.0":"1.883752","87.0":"8.593129","73.0":"8.592284","37.0":"8.504997","84.0":"8.593048","25.0":"8.203523","96.0":"8.593251","91.0":"8.5932","13.0":"6.872917"},"tag_index":5,"read_pair_optical_duplicates":4864,"paired_mapped_reads":66504129,"unmapped_reads":5138227,"__CLASS__":"npg_qc::autoqc::results::bam_flagstats","paired_read_duplicates":3953871,"path":"archive/lane2","info":{"Picard-tools":"1.47","Samtools":"0.1.16 (r963:234)"},"unpaired_read_duplicates":560724,"composition":{"components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-87.3-54-gf4380c3","position":2,"id_run":6642,"tag_index":5}],"__CLASS__":"npg_tracking::glossary::composition-87.3-54-gf4380c3"},"num_total_reads":138756624,"proper_mapped_pair":130900013}
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"__CLASS__":"npg_qc::autoqc::results::bam_flagstats-65.4-129-gb693417","composition":{"__CLASS__":"npg_tracking::glossary::composition-87.3-66-g000d8a0","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-87.3-66-g000d8a0","id_run":27116,"position":1,"tag_index":1}]},"histogram":{"1":"1.01646","10":"5.60454","100":"7.14416","11":"5.8236","12":"6.01149","13":"6.17264","14":"6.31087","15":"6.42943","16":"6.53112","17":"6.61834","18":"6.69316","19":"6.75733","2":"1.8883","20":"6.81237","21":"6.85957","22":"6.90006","23":"6.9348","24":"6.96458","25":"6.99013","26":"7.01205","27":"7.03085","28":"7.04697","29":"7.0608","3":"2.6361","30":"7.07266","31":"7.08283","32":"7.09156","33":"7.09905","34":"7.10546","35":"7.11097","36":"7.11569","37":"7.11975","38":"7.12322","39":"7.1262","4":"3.2775","40":"7.12876","41":"7.13095","42":"7.13283","43":"7.13444","44":"7.13583","45":"7.13701","46":"7.13803","47":"7.1389","48":"7.13965","49":"7.14029","5":"3.82764","50":"7.14085","51":"7.14132","52":"7.14172","53":"7.14207","54":"7.14237","55":"7.14262","56":"7.14284","57":"7.14303","58":"7.14319","59":"7.14333","6":"4.29951","60":"7.14345","61":"7.14355","62":"7.14364","63":"7.14371","64":"7.14378","65":"7.14383","66":"7.14388","67":"7.14392","68":"7.14396","69":"7.14399","7":"4.70425","70":"7.14401","71":"7.14403","72":"7.14405","73":"7.14407","74":"7.14408","75":"7.14409","76":"7.1441","77":"7.14411","78":"7.14412","79":"7.14413","8":"5.05139","80":"7.14413","81":"7.14414","82":"7.14414","83":"7.14415","84":"7.14415","85":"7.14415","86":"7.14415","87":"7.14415","88":"7.14416","89":"7.14416","9":"5.34915","90":"7.14416","91":"7.14416","92":"7.14416","93":"7.14416","94":"7.14416","95":"7.14416","96":"7.14416","97":"7.14416","98":"7.14416","99":"7.14416"},"id_run":27116,"info":{"Check":"npg_qc::autoqc::checks::bam_flagstats","Check_version":"65.4-129-gb693417","markdups_metrics_header":"# /software/sciops/pkg/biobambam2/2.0.79/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/nfs/sf32/ILorHSany_sf32/analysis/181008_HX1_27116_B_HNYKNCCXY/Data/Intensities/BAM_basecalls_20181015-171739/no_cal/archive/lane1/bmdfopt_27116_1#1.tmp M=/nfs/sf32/ILorHSany_sf32/analysis/181008_HX1_27116_B_HNYKNCCXY/Data/Intensities/BAM_basecalls_20181015-171739/no_cal/archive/lane1/27116_1#1.markdups_metrics.txt\n"},"library":"22101385","library_size":209058671,"mate_mapped_defferent_chr":769326,"mate_mapped_defferent_chr_5":375354,"num_total_reads":64898020,"paired_mapped_reads":32191694,"paired_read_duplicates":2822602,"path":"/nfs/team105/ces/NPG_GIT_2/191018/test/qc","percent_duplicate":0.0881866,"position":1,"proper_mapped_pair":63187140,"read_pair_optical_duplicates":560856,"read_pairs_examined":32085453,"tag_index":1,"target_coverage_threshold":15,"target_filter":"F0xF04_target","target_length":2945869055,"target_mapped_bases":8700474137,"target_mapped_reads":58704583,"target_percent_gt_coverage_threshold":0.15,"target_proper_pair_mapped_reads":57355728,"unmapped_reads":268575,"unpaired_mapped_reads":246057,"unpaired_read_duplicates":35442}
{"__CLASS__":"npg_qc::autoqc::results::bam_flagstats-65.4-129-gb693417","composition":{"__CLASS__":"npg_tracking::glossary::composition-87.3-66-g000d8a0","components":[{"__CLASS__":"npg_tracking::glossary::composition::component::illumina-87.3-66-g000d8a0","id_run":27116,"position":1,"tag_index":1}]},"histogram":{"1":"1.01646","10":"5.60454","100":"7.14416","11":"5.8236","12":"6.01149","13":"6.17264","14":"6.31087","15":"6.42943","16":"6.53112","17":"6.61834","18":"6.69316","19":"6.75733","2":"1.8883","20":"6.81237","21":"6.85957","22":"6.90006","23":"6.9348","24":"6.96458","25":"6.99013","26":"7.01205","27":"7.03085","28":"7.04697","29":"7.0608","3":"2.6361","30":"7.07266","31":"7.08283","32":"7.09156","33":"7.09905","34":"7.10546","35":"7.11097","36":"7.11569","37":"7.11975","38":"7.12322","39":"7.1262","4":"3.2775","40":"7.12876","41":"7.13095","42":"7.13283","43":"7.13444","44":"7.13583","45":"7.13701","46":"7.13803","47":"7.1389","48":"7.13965","49":"7.14029","5":"3.82764","50":"7.14085","51":"7.14132","52":"7.14172","53":"7.14207","54":"7.14237","55":"7.14262","56":"7.14284","57":"7.14303","58":"7.14319","59":"7.14333","6":"4.29951","60":"7.14345","61":"7.14355","62":"7.14364","63":"7.14371","64":"7.14378","65":"7.14383","66":"7.14388","67":"7.14392","68":"7.14396","69":"7.14399","7":"4.70425","70":"7.14401","71":"7.14403","72":"7.14405","73":"7.14407","74":"7.14408","75":"7.14409","76":"7.1441","77":"7.14411","78":"7.14412","79":"7.14413","8":"5.05139","80":"7.14413","81":"7.14414","82":"7.14414","83":"7.14415","84":"7.14415","85":"7.14415","86":"7.14415","87":"7.14415","88":"7.14416","89":"7.14416","9":"5.34915","90":"7.14416","91":"7.14416","92":"7.14416","93":"7.14416","94":"7.14416","95":"7.14416","96":"7.14416","97":"7.14416","98":"7.14416","99":"7.14416"},"id_run":27116,"info":{"Check":"npg_qc::autoqc::checks::bam_flagstats","Check_version":"65.4-129-gb693417","markdups_metrics_header":"# /software/sciops/pkg/biobambam2/2.0.79/bin/bamstreamingmarkduplicates level=0 verbose=0 tmpfile=/nfs/sf32/ILorHSany_sf32/analysis/181008_HX1_27116_B_HNYKNCCXY/Data/Intensities/BAM_basecalls_20181015-171739/no_cal/archive/lane1/bmdfopt_27116_1#1.tmp M=/nfs/sf32/ILorHSany_sf32/analysis/181008_HX1_27116_B_HNYKNCCXY/Data/Intensities/BAM_basecalls_20181015-171739/no_cal/archive/lane1/27116_1#1.markdups_metrics.txt\n"},"library":"22101385","library_size":209058671,"mate_mapped_defferent_chr":769326,"mate_mapped_defferent_chr_5":375354,"num_total_reads":64898020,"paired_mapped_reads":32191694,"paired_read_duplicates":2822602,"path":"/nfs/team105/ces/NPG_GIT_2/191018/test/qc","percent_duplicate":0.0881866,"position":1,"proper_mapped_pair":63187140,"read_pair_optical_duplicates":560856,"read_pairs_examined":32085453,"tag_index":1,"target_coverage_threshold":15,"target_filter":"F0xF04_target","target_length":2945869055,"target_mapped_bases":8700474137,"target_mapped_reads":58704583,"target_percent_gt_coverage_threshold":0.15,"target_proper_pair_mapped_reads":57355728,"target_autosome_coverage_threshold":15,"target_autosome_filter":"F0xF04_target_autosome","target_autosome_length":2762956253,"target_autosome_mapped_bases":8282053108,"target_autosome_mapped_reads":55438055,"target_autosome_percent_gt_coverage_threshold":0.17,"target_autosome_proper_pair_mapped_reads":54492577,"unmapped_reads":268575,"unpaired_mapped_reads":246057,"unpaired_read_duplicates":35442}

0 comments on commit 1165161

Please sign in to comment.