Skip to content

Commit

Permalink
Merge branch 'develop' into feature/rust
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed Feb 3, 2024
2 parents 7c0a018 + 9041ef7 commit 5170101
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 21 deletions.
4 changes: 4 additions & 0 deletions docs/api/truvari.rst
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ performance_metrics
^^^^^^^^^^^^^^^^^^^
.. autofunction:: performance_metrics

region_filter
^^^^^^^^^^^^^
.. autofunction:: region_filter

restricted_float
^^^^^^^^^^^^^^^^
.. autofunction:: restricted_float
Expand Down
4 changes: 3 additions & 1 deletion truvari/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
:meth:`opt_gz_open`
:meth:`optimize_df_memory`
:meth:`performance_metrics`
:meth:`region_filter`
:meth:`restricted_float`
:meth:`restricted_int`
:meth:`setup_logging`
Expand Down Expand Up @@ -145,7 +146,8 @@

from truvari.region_vcf_iter import (
RegionVCFIterator,
build_anno_tree
build_anno_tree,
region_filter,
)

from truvari.stratify import (
Expand Down
2 changes: 2 additions & 0 deletions truvari/phab.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ def make_consensus(data, ref_fn):
vcf_fn, sample, prefix = data
reference = pysam.FastaFile(ref_fn)
vcf = pysam.VariantFile(vcf_fn)
# could swap these fors with data structures and more memory..
# or I could do the work to iter chroms and pretty much -T it
o_samp = 'p:' + sample if prefix else sample
ret = {}
for ref in list(reference.references):
Expand Down
23 changes: 4 additions & 19 deletions truvari/refine.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,21 +169,6 @@ def make_variant_report(regions):
return summary


def region_filter(vcf, tree, inside=True):
"""
Given a VariantRecord iter and defaultdict(IntervalTree), yield variants which are inside/outside the tree regions
"""
for entry in vcf:
start, end = truvari.entry_boundaries(entry)
m_ovl = list(tree[entry.chrom].overlap(start, end))
if len(m_ovl) == 1:
end_within = truvari.entry_variant_type(entry) != truvari.SV.INS
is_within = truvari.coords_within(start, end, m_ovl[0].begin, m_ovl[0].end - 1, end_within)
if is_within == inside:
yield entry
elif not inside:
yield entry

def recount_variant_report(orig_dir, phab_dir, regions):
"""
Count original variants not in refined regions and
Expand All @@ -201,19 +186,19 @@ def recount_variant_report(orig_dir, phab_dir, regions):
summary.update(json.load(fh))

vcf = pysam.VariantFile(os.path.join(orig_dir, 'tp-base.vcf.gz'))
tpb = len(list(region_filter(vcf, tree, False)))
tpb = len(list(truvari.region_filter(vcf, tree, False)))
summary["TP-base"] += tpb

vcf = pysam.VariantFile(os.path.join(orig_dir, 'tp-comp.vcf.gz'))
tpc = len(list(region_filter(vcf, tree, False)))
tpc = len(list(truvari.region_filter(vcf, tree, False)))
summary["TP-comp"] += tpc

vcf = pysam.VariantFile(os.path.join(orig_dir, 'fp.vcf.gz'))
fp = len(list(region_filter(vcf, tree, False)))
fp = len(list(truvari.region_filter(vcf, tree, False)))
summary["FP"] += fp

vcf = pysam.VariantFile(os.path.join(orig_dir, 'fn.vcf.gz'))
fn = len(list(region_filter(vcf, tree, False)))
fn = len(list(truvari.region_filter(vcf, tree, False)))
summary["FN"] += fn

summary["comp cnt"] += tpc + fp
Expand Down
16 changes: 15 additions & 1 deletion truvari/region_vcf_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from intervaltree import IntervalTree
import truvari


class RegionVCFIterator():
"""
Helper class to specify include regions of the genome when iterating a VCF
Expand Down Expand Up @@ -164,3 +163,18 @@ def build_anno_tree(filename, chrom_col=0, start_col=1, end_col=2, one_based=Fal
tree[chrom].addi(start, end + 1, data=m_idx)
idx += 1
return tree, idx

def region_filter(vcf, tree, inside=True):
"""
Given a VariantRecord iter and defaultdict(IntervalTree), yield variants which are inside/outside the tree regions
"""
for entry in vcf:
start, end = truvari.entry_boundaries(entry)
m_ovl = list(tree[entry.chrom].overlap(start, end))
if len(m_ovl) == 1:
end_within = truvari.entry_variant_type(entry) != truvari.SV.INS
is_within = truvari.coords_within(start, end, m_ovl[0].begin, m_ovl[0].end - 1, end_within)
if is_within == inside:
yield entry
elif not inside:
yield entry

0 comments on commit 5170101

Please sign in to comment.