Skip to content

Commit

Permalink
Merge pull request #188 from hall-lab/develop
Browse files Browse the repository at this point in the history
v0.3.1 Release
  • Loading branch information
ernfrid authored Dec 12, 2016
2 parents 87e89a4 + 46274f0 commit 59c8e55
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 58 deletions.
140 changes: 85 additions & 55 deletions svtools/bin/svtyper/svtyper
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ from collections import Counter
from argparse import RawTextHelpFormatter

__author__ = "Colby Chiang (colbychiang@wustl.edu)"
__version__ = "v0.1.0"
__version__ = "v0.1.1"

# --------------------------------------
# define functions
Expand Down Expand Up @@ -358,7 +358,7 @@ def bayes_gt(ref, alt, is_dup):

total = ref + alt
log_combo = log_choose(total, alt)

lp_homref = log_combo + alt * math.log(p_alt[0], 10) + ref * math.log(1 - p_alt[0], 10)
lp_het = log_combo + alt * math.log(p_alt[1], 10) + ref * math.log(1 - p_alt[1], 10)
lp_homalt = log_combo + alt * math.log(p_alt[2], 10) + ref * math.log(1 - p_alt[2], 10)
Expand Down Expand Up @@ -736,7 +736,7 @@ class Sample(object):
genome_size = float(sum(self.bam.lengths))
weighted_mean_span = sum([(lib.mean - 2 * lib.read_length + 2 * min_aligned) * lib.prevalence for lib in self.lib_dict.values()])
exp_spanning_depth = (weighted_mean_span * self.bam_mapped) / genome_size

self.exp_spanning_depth = exp_spanning_depth

return
Expand Down Expand Up @@ -779,7 +779,7 @@ class SamFragment(object):
return
else:
self.read_set.add(read_hash)

if is_primary(read):
# add the primary reads
self.primary_reads.append(read)
Expand Down Expand Up @@ -880,7 +880,7 @@ class SamFragment(object):
return False

return True

# calculate the probability that a read pair is concordant at a breakpoint,
# given the putative variant size and insert distribution of the library.
def p_concordant(self, var_length=None):
Expand Down Expand Up @@ -1008,10 +1008,13 @@ class SplitRead(object):
# set query_left and query_right splitter by alignment position on the reference
# this is used for non-overlap and off-diagonal filtering
# (query_left and query_right are random when on different chromosomes
if (self.read.reference_name == mate_chrom
and self.read.pos > mate_pos):
self.query_left = b
self.query_right = a
if self.read.reference_name == mate_chrom:
if self.read.pos > mate_pos:
self.query_left = b
self.query_right = a
else:
self.query_left = a
self.query_right = b
else:
self.set_order_by_clip(a, b)

Expand Down Expand Up @@ -1059,11 +1062,26 @@ class SplitRead(object):

return non_overlap

@staticmethod
def check_split_support(split, chrom, pos, is_reverse, split_slop):
if split.chrom != chrom:
return False

if is_reverse:
coord = split.reference_start
else:
coord = split.reference_end

if (coord > pos + split_slop
or coord < pos - split_slop):
return False
return True

def is_split_straddle(self,
chromA, posA, ciA,
chromB, posB, ciB,
o1_is_reverse, o2_is_reverse,
split_slop):
svtype, split_slop):

# arrange the SV breakends from left to right
if (chromA != chromB
Expand All @@ -1087,44 +1105,56 @@ class SplitRead(object):
is_reverse_right = o2_is_reverse


left_split = True
right_split = True

# check split chromosomes against variant
if self.query_left.chrom != chrom_left:
left_split = False
if self.query_right.chrom != chrom_right:
right_split = False

''' Check left side of variant '''
# left side of variant is negative strand
if is_reverse_left:
if self.query_left.reference_start > pos_left + split_slop:
left_split = False
if self.query_left.reference_start < pos_left - split_slop:
left_split = False

# left side of variant is positive strand
else:
if self.query_left.reference_end > pos_left + split_slop:
left_split = False
if self.query_left.reference_end < pos_left - split_slop:
left_split = False

''' Check right side of variant '''
# right side of variant is negative strand
if is_reverse_right:
if self.query_right.reference_start > pos_right + split_slop:
right_split = False
if self.query_right.reference_start < pos_right - split_slop:
right_split = False

# right side of variant is positive strand
else:
if self.query_right.reference_end < pos_right - split_slop:
right_split = False
if self.query_right.reference_end > pos_right + split_slop:
right_split = False
left_split = False
right_split = False

if (not self.is_soft_clip) or svtype == 'DEL' or svtype == 'INS':
left_split = self.check_split_support(self.query_left,
chrom_left,
pos_left,
is_reverse_left,
split_slop)
right_split = self.check_split_support(self.query_right,
chrom_right,
pos_right,
is_reverse_right,
split_slop)
elif svtype == 'DUP':
left_split = self.check_split_support(self.query_left,
chrom_right,
pos_right,
is_reverse_right,
split_slop)
right_split = self.check_split_support(self.query_right,
chrom_left,
pos_left,
is_reverse_left,
split_slop)
elif svtype == 'INV':
# check all possible sides
left_split_left = self.check_split_support(self.query_left,
chrom_left,
pos_left,
is_reverse_left,
split_slop)
left_split_right = self.check_split_support(self.query_left,
chrom_right,
pos_right,
is_reverse_right,
split_slop)
left_split = left_split_left or left_split_right
right_split_left = self.check_split_support(self.query_right,
chrom_left,
pos_left,
is_reverse_left,
split_slop)
right_split_right = self.check_split_support(self.query_right,
chrom_right,
pos_right,
is_reverse_right,
split_slop)
right_split = right_split_left or right_split_right

return (left_split, right_split)

Expand All @@ -1136,12 +1166,12 @@ class SplitRead(object):
value = 'A'
else:
value = 'R'

self.read.set_tag('XV', value)

return

def set_order_by_clip(self, a, b):
def set_order_by_clip(self, a, b):
'''
Determine which SplitPiece is the leftmost based
on the side of the longest clipping operation
Expand Down Expand Up @@ -1331,7 +1361,7 @@ def sv_genotype(bam_string,
else:
sys.stderr.write('Reading library metrics from %s...' % lib_info_path)
sample = Sample.from_lib_info(bam_list[i], lib_info, min_lib_prevalence)

sample.set_exp_seq_depth(min_aligned)
sample.set_exp_spanning_depth(min_aligned)
sample_list.append(sample)
Expand Down Expand Up @@ -1416,7 +1446,7 @@ def sv_genotype(bam_string,
sys.stderr.write('Warning: Unsupported SV type at variant %s (%s). Skipping.\n' % (var.var_id, svtype))
vcf_out.write(var.get_var_string() + '\n')
continue

if svtype == 'BND':
if var.info['MATEID'] in breakend_dict:
var2 = var
Expand All @@ -1436,7 +1466,7 @@ def sv_genotype(bam_string,
if var2.alt[-1] == '[' or var2.alt[-1] == ']':
o2_is_reverse = False
else: o2_is_reverse = True

# remove the BND from the breakend_dict
# to free up memory
del breakend_dict[var.var_id]
Expand Down Expand Up @@ -1504,7 +1534,7 @@ def sv_genotype(bam_string,
split_lr = split.is_split_straddle(chromA, posA, ciA,
chromB, posB, ciB,
o1_is_reverse, o2_is_reverse,
split_slop)
svtype, split_slop)
# p_alt = prob_mapq(split.query_left) * prob_mapq(split.query_right)
p_alt = (prob_mapq(split.query_left) * split_lr[0] + prob_mapq(split.query_right) * split_lr[1]) / 2.0
if split.is_soft_clip:
Expand All @@ -1519,7 +1549,7 @@ def sv_genotype(bam_string,
# -------------------------------------
# Check for paired-end evidence
# -------------------------------------

# tally spanning alternate pairs
if svtype == 'DEL' and posB - posA < 2 * fragment.lib.sd:
alt_straddle = False
Expand Down Expand Up @@ -1588,7 +1618,7 @@ def sv_genotype(bam_string,
if p_conc is not None:
p_reference = p_conc * prob_mapq(fragment.readA) * prob_mapq(fragment.readB)
ref_span += (ref_straddle_A + ref_straddle_B) * p_reference / 2

fragment.tag_span(1 - p_conc)
write_fragment = True

Expand Down
6 changes: 3 additions & 3 deletions svtools/bin/svtyper/test/example.gt.vcf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
##fileformat=VCFv4.1
##fileDate=20161116
##fileDate=20161212
##reference=/shared/genomes/b37/full/human_g1k_v37.fasta
##INFO=<ID=TOOL,Number=1,Type=String,Description="Tool used to generate variant call">
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
Expand Down Expand Up @@ -268,7 +268,7 @@
3 20311457 156765 A <DEL> 0.00 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-6276;END=20317733;STR=+-:20;IMPRECISE;CIPOS=-3,35;CIEND=0,0;EVENT=156765;SUP=20;PESUP=20;SRSUP=0;EVTYPE=PE;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/0:200:115:115:0:0.00:-0,-34,-114:114:0:50:0:0:64:0:0
3 20492792 156827 G <DEL> 0.00 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-65;END=20492857;STR=+-:11;CIPOS=0,0;CIEND=0,0;EVENT=156827;SUP=11;PESUP=0;SRSUP=11;EVTYPE=SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/0:147:49:49:0:0.00:-0,-15,-49:49:0:49:0:0:0:0:0
3 20919493 156974 T <DEL> 0.00 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-272;END=20919765;STR=+-:20;CIPOS=0,0;CIEND=0,0;EVENT=156974;SUP=20;PESUP=6;SRSUP=14;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/0:200:115:114:0:0.00:-0,-34,-113:113:0:53:0:0:60:0:0
3 21723705 157253 T <DEL> 650.38 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-2260;END=21725965;STR=+-:167;IMPRECISE;CIPOS=-1,8;CIEND=-8,0;EVENT=157253;SUP=167;PESUP=167;SRSUP=0;EVTYPE=PE;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:86:54:31:650.38:-67,-2,-32:53:30:27:1:2:26:27:0.36
3 21723705 157253 T <DEL> 650.38 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-2260;END=21725965;STR=+-:167;IMPRECISE;CIPOS=-1,8;CIEND=-8,0;EVENT=157253;SUP=167;PESUP=167;SRSUP=0;EVTYPE=PE;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:86:54:31:650.38:-67,-2,-32:53:30:27:0:2:26:27:0.36
3 25030887 158374 A <DEL> 797.16 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-1361;END=25032248;STR=+-:515;CIPOS=0,0;CIEND=0,0;EVENT=158374;SUP=515;PESUP=378;SRSUP=137;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:117:77:39:797.16:-84,-4,-47:76:38:36:4:5:40:28:0.33
3 26123848 158740 A <DEL> 649.93 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-126;END=26123974;STR=+-:212;CIPOS=0,0;CIEND=0,0;EVENT=158740;SUP=212;PESUP=3;SRSUP=209;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:56:22:0:22:649.93:-66,-7,-1:0:22:0:22:0:0:0:1
3 26450970 158848 T <DEL> 935.17 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-1327;END=26452297;STR=+-:907;CIPOS=0,0;CIEND=0,0;EVENT=158848;SUP=907;PESUP=654;SRSUP=253;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:110:67:43:935.17:-96,-2,-38:66:42:32:8:2:34:31:0.39
Expand All @@ -287,7 +287,7 @@
3 61666417 170381 G <DEL> 0.00 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-267;END=61666684;STR=+-:93;CIPOS=0,0;CIEND=0,0;EVENT=170381;SUP=93;PESUP=20;SRSUP=73;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/0:200:126:126:0:0.00:-0,-38,-125:125:0:60:0:0:65:0:0
3 65188871 172115 T <DEL> 644.36 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-25876;END=65214747;STR=+-:227;CIPOS=0,0;CIEND=0,0;EVENT=172115;SUP=227;PESUP=191;SRSUP=36;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:150:88:56:31:644.36:-67,-3,-33:55:30:25:7:1:30:22:0.35
3 68634773 173189 T <DEL> 959.09 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-5479;END=68640252;STR=+-:333;CIPOS=0,0;CIEND=0,0;EVENT=173189;SUP=333;PESUP=258;SRSUP=75;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:149:123:77:45:959.09:-99,-3,-45:76:44:36:11:3:40:29:0.37
3 68739682 173219 A <DEL> 812.31 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-8180;END=68747862;STR=+-:687;CIPOS=0,0;CIEND=0,0;EVENT=173219;SUP=687;PESUP=531;SRSUP=156;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:91:54:37:812.31:-83,-2,-30:53:36:25:10:2:28:23:0.4
3 68739682 173219 A <DEL> 785.32 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-8180;END=68747862;STR=+-:687;CIPOS=0,0;CIEND=0,0;EVENT=173219;SUP=687;PESUP=531;SRSUP=156;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:152:91:54:36:785.32:-80,-2,-30:53:35:25:9:2:28:23:0.4
3 73063077 174617 C <DEL> 1244.15 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-476;END=73063553;STR=+-:66;CIPOS=0,0;CIEND=0,0;EVENT=174617;SUP=66;PESUP=0;SRSUP=66;EVTYPE=SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/1:200:107:54:53:1244.15:-126,-1,-25:53:52:21:13:2:32:36:0.5
3 73939035 174881 A <DEL> 0.00 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-342;END=73939377;STR=+-:219;CIPOS=0,0;CIEND=0,0;EVENT=174881;SUP=219;PESUP=118;SRSUP=101;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 0/0:200:140:140:0:0.00:-0,-42,-139:139:0:65:0:0:74:0:0
3 78779423 176268 A <DEL> 2008.88 . TOOL=LUMPY;SVTYPE=DEL;SVLEN=-294;END=78779717;STR=+-:686;CIPOS=0,0;CIEND=0,0;EVENT=176268;SUP=686;PESUP=333;SRSUP=353;EVTYPE=PE,SR;PRIN GT:GQ:DP:RO:AO:SQ:GL:QR:QA:RS:AS:ASC:RP:AP:AB 1/1:200:69:0:69:2008.88:-204,-20,-3:0:68:0:20:3:0:44:1
Expand Down
2 changes: 2 additions & 0 deletions svtools/bin/svtyper/test/test.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env bash

set -e

# # make truth set and sort the BAMs
# BAM=/gscmnt/gc2802/halllab/sv_aggregate/MISC/realigned_BAMs/NA12878/NA12878.bam
# BAM_BASE=`basename $BAM .bam`
Expand Down

0 comments on commit 59c8e55

Please sign in to comment.