From 65db15126e814afbe23af13850c5dc310f3e7632 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 16 Apr 2021 12:04:23 -0700
Subject: [PATCH 001/137] Use snakemake to manage mavis scheduling

- replace mavis scheduler with snakemake
- use JSON config for snakemake instead of many command line arguments
---
 .github/workflows/build.yml                   |   1 -
 .gitignore                                    |   9 +-
 MANIFEST.in                                   |   3 +-
 Snakefile                                     | 198 ++++
 docs/configuration/pipeline.md                |   3 +
 docs/hooks.py                                 |   2 -
 .../snakemake.cluster.full-tutorial.png       | Bin 0 -> 28669 bytes
 .../snakemake.cluster.mini-tutorial.png       | Bin 0 -> 12934 bytes
 .../snakemake.validate.mini-tutorial.png      | Bin 0 -> 34485 bytes
 docs/inputs/support.md                        |  23 +-
 docs/tutorials/mini.md                        | 106 +-
 mavis/annotate/file_io.py                     |  17 +-
 mavis/annotate/main.py                        |  86 +-
 mavis/cluster/main.py                         | 128 ++-
 mavis/config.py                               | 814 +++------------
 mavis/constants.py                            |   8 +-
 mavis/main.py                                 | 597 +++--------
 mavis/overlay.py                              | 159 +++
 mavis/pairing/main.py                         |  46 +-
 mavis/schedule/constants.py                   | 109 --
 mavis/schedule/job.py                         | 265 -----
 mavis/schedule/local.py                       | 161 ---
 mavis/schedule/pipeline.py                    | 945 ------------------
 mavis/schedule/scheduler.py                   | 942 -----------------
 mavis/schemas/config.json                     | 781 +++++++++++++++
 mavis/schemas/overlay.json                    | 142 +++
 mavis/summary/main.py                         |  67 +-
 mavis/validate/base.py                        |   5 +-
 mavis/validate/constants.py                   |   2 +-
 mavis/validate/main.py                        | 116 ++-
 setup.py                                      |   8 +-
 tests/end_to_end/__init__.py                  |  21 -
 tests/end_to_end/test_config.py               | 202 ----
 tests/end_to_end/test_convert.py              |   6 +-
 tests/end_to_end/test_full_pipeline.py        | 406 --------
 tests/end_to_end/test_help.py                 |   8 -
 tests/end_to_end/test_overlay.py              | 158 +--
 tests/end_to_end/test_pairing.py              |  51 -
 tests/end_to_end/test_ref_alt_count.py        |   7 +-
 tests/full-tutorial.config.json               |  98 ++
 tests/integration/schedule/__init__.py        |   0
 tests/integration/schedule/test_pipeline.py   | 157 ---
 tests/integration/schedule/test_sge.py        | 748 --------------
 tests/integration/schedule/test_slurm.py      | 617 ------------
 tests/integration/schedule/test_torque.py     | 441 --------
 tests/integration/test_args.py                | 518 +++++-----
 tests/integration/test_checker.py             | 111 --
 tests/integration/test_config.py              |  64 --
 tests/integration/test_mains.py               | 135 ---
 tests/mini-tutorial.config.json               |  64 ++
 .../schedule => tests/snakemake}/__init__.py  |   0
 tests/snakemake/test_mini_workflow.py         |  55 +
 tests/unit/test_config.py                     |  67 --
 tests/util.py                                 |  24 +
 54 files changed, 2419 insertions(+), 7282 deletions(-)
 create mode 100644 Snakefile
 create mode 100644 docs/images/snakemake.cluster.full-tutorial.png
 create mode 100644 docs/images/snakemake.cluster.mini-tutorial.png
 create mode 100644 docs/images/snakemake.validate.mini-tutorial.png
 create mode 100644 mavis/overlay.py
 delete mode 100644 mavis/schedule/constants.py
 delete mode 100644 mavis/schedule/job.py
 delete mode 100644 mavis/schedule/local.py
 delete mode 100644 mavis/schedule/pipeline.py
 delete mode 100644 mavis/schedule/scheduler.py
 create mode 100644 mavis/schemas/config.json
 create mode 100644 mavis/schemas/overlay.json
 delete mode 100644 tests/end_to_end/test_config.py
 delete mode 100644 tests/end_to_end/test_full_pipeline.py
 delete mode 100644 tests/end_to_end/test_pairing.py
 create mode 100644 tests/full-tutorial.config.json
 delete mode 100644 tests/integration/schedule/__init__.py
 delete mode 100644 tests/integration/schedule/test_pipeline.py
 delete mode 100644 tests/integration/schedule/test_sge.py
 delete mode 100644 tests/integration/schedule/test_slurm.py
 delete mode 100644 tests/integration/schedule/test_torque.py
 delete mode 100644 tests/integration/test_checker.py
 delete mode 100644 tests/integration/test_config.py
 delete mode 100644 tests/integration/test_mains.py
 create mode 100644 tests/mini-tutorial.config.json
 rename {mavis/schedule => tests/snakemake}/__init__.py (100%)
 create mode 100644 tests/snakemake/test_mini_workflow.py
 delete mode 100644 tests/unit/test_config.py

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index aa09b829..1601aeba 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,7 +11,6 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
diff --git a/.gitignore b/.gitignore
index 01fe3a1e..26638751 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 # python generated files
 /.eggs
 /coverage
-/venv
+/venv*
 /.coverage
 *.pyc
 *__pycache__
@@ -32,3 +32,10 @@ junit
 /docs/package/mavis/*/*.md
 # don't ignore subpackage summary files
 !/docs/package/mavis/*/index.md
+
+.snakemake
+output_dir*
+bin
+dag*
+tutorial_data
+reference_inputs
diff --git a/MANIFEST.in b/MANIFEST.in
index 7b0f98c6..165d54e6 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,10 +1,11 @@
 recursive-include docs *
 recursive-include tests *.py 
 include tests/*/data/*
-recursive-include mavis *.py
+recursive-include mavis *.py *.json
 recursive-include tools *.pl *.py *.pm
 recursive-include tab *.py
 include README.md
 include LICENSE.txt
+include mavis/config-schema.json
 prune docs/build
 prune docs/source/auto
diff --git a/Snakefile b/Snakefile
new file mode 100644
index 00000000..2e617c92
--- /dev/null
+++ b/Snakefile
@@ -0,0 +1,198 @@
+from snakemake.utils import validate
+from snakemake import WorkflowError
+import os
+from typing import List, Dict
+import re
+import json
+import pandas as pd
+
+CONTAINER = 'creisle/mavis:latest'
+
+def output_dir(*paths):
+    return os.path.join(config['output_dir'], *paths)
+
+INITIALIZED_CONFIG = output_dir('config.json')
+
+
+try:
+    # TODO: replace with URL so that the user does not need a copy of the config schema
+    validate(
+        config,
+        os.path.join(os.getcwd(), 'mavis/schemas/config.json')
+    )
+    for key in [
+        "libraries",
+        "reference.annotations",
+        "output_dir"
+    ]:
+        if key not in config:
+            raise ValueError(f'missing required property: {key}')
+except Exception as err:
+    short_msg = ' '.join(str(err).split('\n')[:2]) # these can get super long
+    raise WorkflowError(short_msg)
+
+libraries = sorted(list(config['libraries']))
+VALIDATE_OUTPUT = output_dir('{library}/validate/batch-{job_id}/validation-passed.tab')
+CLUSTER_OUTPUT = output_dir('{library}/cluster/batch-{job_id}.tab')
+
+# create the cluster inputs and guess the cluster sizes
+def count_total_rows(filenames):
+    row_count = 0
+    for filename in filenames:
+        df = pd.read_csv(filename, sep='\t').drop_duplicates()
+        row_count += df.shape[0]
+    return row_count
+
+
+for library in libraries:
+    lib_config = config['libraries'][library]
+    if 'total_batches' in lib_config:
+        continue
+    inputs = []
+    for assignment in lib_config['assign']:
+        if assignment in config['convert']:
+            inputs.extend(config['convert'][assignment]['inputs'])
+        else:
+            inputs.append(assignment)
+
+    # if not input by user, estimate the clusters based on the input files
+    max_files = config['cluster.max_files']
+    min_rows = config['cluster.min_clusters_per_file']
+    total_rows = count_total_rows(inputs)
+
+    if round(total_rows / max_files) >= min_rows:
+        # use max number of jobs
+        lib_config['total_batches'] = max_files
+    else:
+        lib_config['total_batches'] = total_rows // min_rows
+
+
+libs_args = []
+jobs_args = []
+for library in libraries:
+    for job_id in range(1, config['libraries'][library]['total_batches'] + 1):
+        libs_args.append(library)
+        jobs_args.append(job_id)
+
+
+rule all:
+    input: output_dir('summary/MAVIS.COMPLETE')
+
+
+rule copy_config:
+    output: output_dir('config.raw.json')
+    run:
+        with open(output_dir('config.raw.json'), 'w') as fh:
+            fh.write(json.dumps(config, sort_keys=True, indent='  '))
+
+
+rule init_config:
+    input: rules.copy_config.output
+    output: INITIALIZED_CONFIG
+    container: CONTAINER
+    shell: 'mavis setup --config {input} --outputfile {output}'
+
+
+rule convert:
+    output: output_dir('converted_outputs/{alias}.tab')
+    input: rules.init_config.output
+    log: output_dir('converted_outputs/snakemake.{alias}.log.txt')
+    params:
+        file_type=lambda w: config['convert'][w.alias]['file_type'],
+        strand_specific=lambda w: config['convert'][w.alias]['strand_specific'],
+        assume_no_untemplated=lambda w: config['convert'][w.alias]['assume_no_untemplated'],
+        input_files=lambda w: config['convert'][w.alias]['inputs']
+    container: CONTAINER
+    shell:
+        'mavis convert --file_type {params.file_type}'
+            + ' --strand_specific {params.strand_specific}'
+            + ' --assume_no_untemplated {params.assume_no_untemplated}'
+            + ' --inputs {params.input_files}'
+            + ' --outputfile {output}'
+            + ' &> {log}'
+
+
+def get_cluster_inputs(w):
+    conversions = config['convert']
+    inputs = []
+    for assignment in config['libraries'][w.library]['assign']:
+        if assignment in conversions:
+            inputs.extend(expand(rules.convert.output, alias=assignment))
+        else:
+            inputs.append(assignment)
+
+    return inputs
+
+
+rule cluster:
+    input: files=get_cluster_inputs,
+        config=rules.init_config.output
+    output: directory(output_dir('{library}/cluster'))
+    log: output_dir('snakemake.cluster.{library}.log.txt')
+    container: CONTAINER
+    shell:
+        'mavis cluster --config {input.config}'
+            + ' --library {wildcards.library}'
+            + ' --inputs {input.files}'
+            + ' --output {output}'
+            + ' &> {log}'
+
+
+if not config['skip_stage.validate']:
+    rule validate:
+        input: rules.cluster.output
+        params:
+            dirname=lambda w: output_dir(f'{w.library}/validate/batch-{w.job_id}'),
+            inputfile=lambda w: expand(CLUSTER_OUTPUT, library=[w.library], job_id=[w.job_id])
+        output: VALIDATE_OUTPUT
+        log: output_dir('{library}/validate/snakemake.batch-{job_id}.log.txt')
+        container: CONTAINER
+        shell:
+            'mavis validate --config {rules.init_config.output}'
+                + ' --library {wildcards.library}'
+                + ' --inputs {params.inputfile}'
+                + ' --output {params.dirname}'
+                + ' &> {log}'
+
+
+rule annotate:
+    input: rules.validate.output if not config['skip_stage.validate'] else rules.cluster.output
+    output: stamp=output_dir('{library}/annotate/batch-{job_id}/MAVIS.COMPLETE'),
+        result=output_dir('{library}/annotate/batch-{job_id}/annotations.tab')
+    log: output_dir('{library}/annotate/snakemake.batch-{job_id}.log.txt')
+    container: CONTAINER
+    shell:
+        'mavis annotate --config {rules.init_config.output}'
+            + ' --library {wildcards.library}'
+            + ' --inputs {input}'
+            + ' --output ' + output_dir('{wildcards.library}/annotate/batch-{wildcards.job_id}')
+            + ' &> {log}'
+
+
+rule pairing:
+    input: expand(rules.annotate.output.result, zip, library=libs_args, job_id=jobs_args)
+    output: stamp=output_dir('pairing/MAVIS.COMPLETE'),
+        result=output_dir('pairing/mavis_paired.tab')
+    params:
+        dirname=output_dir('pairing')
+    log: output_dir('snakemake.pairing.log.txt')
+    container: CONTAINER
+    shell:
+        'mavis pairing --config {rules.init_config.output}'
+            + ' --inputs {input}'
+            + ' --output {params.dirname}'
+            + ' &> {log}'
+
+
+rule summary:
+    input: rules.pairing.output.result,
+    output: output_dir('summary/MAVIS.COMPLETE')
+    params:
+        dirname=output_dir('summary')
+    log: output_dir('snakemake.summary.log.txt')
+    container: CONTAINER
+    shell:
+        'mavis summary --config {rules.init_config.output}'
+            + ' --inputs {input}'
+            + ' --output {params.dirname}'
+            + ' &> {log}'
diff --git a/docs/configuration/pipeline.md b/docs/configuration/pipeline.md
index 73e2126e..76ffdbbf 100644
--- a/docs/configuration/pipeline.md
+++ b/docs/configuration/pipeline.md
@@ -2,6 +2,9 @@
 
 ## Running MAVIS using a Job Scheduler
 
+MAVIS v3 uses [snakemake](https://snakemake.readthedocs.io/en/stable/) to handle job scheduling
+and setup
+
 The setup step of MAVIS is set up to use a job scheduler on a
 compute cluster. will generate submission scripts and a wrapper bash
 script for the user to execute on their cluster head node.
diff --git a/docs/hooks.py b/docs/hooks.py
index 3727c646..30314742 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -7,7 +7,6 @@
 from mavis.config import REFERENCE_DEFAULTS
 from mavis.illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
 from mavis.pairing.constants import DEFAULTS as PAIRING_DEFAULTS
-from mavis.schedule.constants import OPTIONS as SUBMIT_OPTIONS
 from mavis.summary.constants import DEFAULTS as SUMMARY_DEFAULTS
 from mavis.util import ENV_VAR_PREFIX
 from mavis.validate.constants import DEFAULTS as VALIDATION_DEFAULTS
@@ -21,7 +20,6 @@ def generate_settings_doc():
             'configuration/settings.md',
             'Configurable Settings',
             [
-                SUBMIT_OPTIONS,
                 REFERENCE_DEFAULTS,
                 SUMMARY_DEFAULTS,
                 PAIRING_DEFAULTS,
diff --git a/docs/images/snakemake.cluster.full-tutorial.png b/docs/images/snakemake.cluster.full-tutorial.png
new file mode 100644
index 0000000000000000000000000000000000000000..cae3105155640b6081170207d6a16a1227dbcf2e
GIT binary patch
literal 28669
zcmZU4Wmr{F*X^NGQo2LBMOwN+TDn^a>FyAa4h5v7LAtveq@+Q*ySwh<eZTMCANTPn
zcw+Ch=bmfKF~^uEOi4lN6*3_*1Oj;_BQ351fk0b<-}e#W!M|K4<2c|4tcA!25eTFr
z8s)(V4*Va=QAJ7=QaVDs3;siHBCYZP0`Z`MK>UIrkUQ{{-wp)g%m#t%8A2d@Ne~F0
zU23DU0C)l3SWZeD^8E7WS4)08cm~N%TFVjql>X0O$pI@t2*fr^MqEVAZT|4l-HT|d
zMd+zbvdNlxiCO0?ZdNAMZ9sBOSac4V4EgoVWi1p79C9_@j;P#_itPdl+)<q{O!XLa
zZuP5NW*ekeW~0HP6x6WLS~h)JEz)O{I<+;&d~HIeABx@LZDgqAj<?r40yT$r{-nNr
zdz{R>?akAR4?_`zA&Mv?N12HtjQH|9jZ_EW%TH;-_?*3v<_=7^KrzIiMaa&;zOEgH
z2<4ziR5c<;h4#+cAMGT#)w63#j4SePFHt-xC1VAsLLLN$LNsDp+(w?NA7Rcv>7A((
zFi_ezkHyFjB$IA~Srnk{W4<~3O28FmnMT%2;x%G|wfKeZA45d3h11A$PKv;uvqroj
z{fz==o5nO*24xx_zxYUujo0PZOaoEtFJ?0$h7`It!E_pB!TK$vaD&>A)G>*laY7q@
zbElZ{S0wmXq!LNNK`biTnD#2K84}xn&G|W8YD-Im&5g&?>+CrRPDd&=cNn#Ph`8Zv
z=cxW5PEUPHr&{7KQbig(_(uAAoecup$#TIX=s)m8QJmg|Smie+F(}>eS0$lnrsJ0u
zdP(!=>7i9Dp?1HHjT=N87+-&WHu4kmL6*<Sq?bJ!V)@$4PRhNf@KR%3H_-A|hXkG+
zikz6V5;a%4lQ=d(^2Jz<JnQ27la|Zrr?l&;xt@!NJtX+T`|TOy>o@qQMw#JMp}L>w
zf8adRUhFxG;+cs)#b6&!<Bjv4u_BMDFez4GM828HQfhGOUH)KMZhlQr(e|vR`Tp?1
zic;a*-+wrCgIZs~0lF!Z#}*i=f;t6x9$WD3>N?j1F&rzIhV}gO{u0G7GUGHR85;5k
zUJ%Zguu|Y(os@0M$|>jR!;~~5JJ^%*usoKD$A_`Qf3ZX4GNAP!&S=N3K06-h{>;SK
zh&AQDO+ivROV9~Ee@E@yh^y^xCONh1^_wU&NDD)%SjXOJ2PddkU{7u6;w8A+`cT29
zK0*ws(sT!bjAPMIu0%|w-iTG?&-%&=?|&pNA=b#VI<ZT!J4HR9*aZ3p3vH|zL<**j
z4;E#{Yy_?sFnUtLc9HTtyf3f6>e>^8<;uHH)c1xblq(GujU&50!@@1T<P+-2Z;7UQ
z9d)6qEPYmyb0TComMRjB688tMvdeTis%yIBL4gjtsJ!;!o^G%ZuUNOE953Ta&|h#n
z|G{c=&wO&rAOL!k()4`5hGCoqRx0Ev%x6aM=@NN(yIkek0K;CTV-UA+qHlcrA;Pyr
zx5c(vVd9znK{wYC+(I&kwfJ-?wjEm{?_jauE%`}YUDT3huuXEE_>+kF?Pl+=mJg*U
zRg6E}x+?!{QW>iZ)`EE6r~8jTK|O2B962zt)Ds8u%5|9C_%L#UsO!~U+kq&;cEt+%
zBg;T3aR0-(7L1EAJe)nTvaESPLPaH}m9%+!qp5Q994Mai+Jndp$G1OBEYMf1uYUo8
zFP@4&B-iH8aL7x~8qUoV5iO72xwQ`qDKLZQ{H`CpmI|NAJ<<Gk`vDb!DP<K+69Ney
z<m24`f@f+N#?2E1-rh60d-j(IwuKx+g>NcSN%u+6%hU%$!I#yq-z0rAmz&Z+nbBz+
z78|k2-tFWKF_{kxs*xQ=X2k{{)1Nk_n6n!(G%xF=ZlTk`*dM<NH>^I*>CsHM2X<K!
z++fRJp+iB8V~MQ>ZG#n@=c|{~9LuN8Jkdc!;33lD$f@dAdOK=GK}$4RC4Chr8JF*Q
zJrd))K5f=@{t~1zYa>JTEN{6do%Pa#P)3{0B=|-LTx)&%*WmU#@JJmZoaulxt?@ve
z>uMihgf6)AL^kWa2q{p={+nqVq~nUPbo|5)E(hYL@6D(F-5c9Ds<9Cw+-61Qa{Ul+
z%up_RZBQ_sp7@soWHbMV=b`0GCN-80?Z?BI@OPqL(u=2g&(P2`eWKY=M7qQ5zphG6
z=wEz0Zn<9aw73t1Il}dtzn5cUqoa1`san&2y)}u{LQ<KoZ7VEeOXWdKM)l`m$MsO`
z)6p5hd$J7XuWT+UWb0*XxEp$YQ=7|qFgL`Z5*+xP`>V(oT}N*_v8y`)J~Vi|QaPQC
zrHD}c6K&k@h!2_C)fAnHRO)2%i<TE!XYK#-+6QS!?n_XpoEBF>b$;Bg(r@iB{XSYm
zb~2_?m={nW{-pd(w!J+6_){y<H-LA;cMBB5XDG!^G5MOI0urx0NWjtpb{r2y8XDaP
zJ7lYlWOh*IDw?+0gR~Hh6~3B3k|+%^1}(vxs<md}T3y!puXiH`d*949hxw|f%S#or
zW{=h&K$P<MUVX^r_RIb^<Vdqdt(kjI^UdK06eQy6Fgv;_{y5Z`@lMYZ3S##AomzfA
zMx|}gU%nGOO{u_Z*~2+aZ6`PmUY;~&07Eb9+sZ+#f(Wt;eG~5g9=Cji?;diTt)5VY
zTI95xW5^eouvOYFyEqKtc~ZVq%igo_?tagC!+j&B0BQeSsZ-e)E!xO1@=4rez!?Hj
z9Q#Hk#!7p6E#NQm#Lamoo2u@@=&bvJ9Fw+V{_L1aN*fx&!+CR^q^vi9Q8?iX9h&N<
zz<7@~wF?_mK$OYjC7Fsp2T%T2@r%SR_e+IgRAT8xUi5{6cu^3cX+5W3EJs`!z*Blp
zPXF-DfUe;*9kXs<>l}iDxENd~;JH^JPg}PtI7K$2K*kEy#WFgdluXTWxAfS4=3w%0
zT@dQ}Kjl2C54C69`ZzGkwn0@$vs~<15GQL!=EdzpiQl1;5Xv9<x0$UddZn?jKJrcN
zzE;uW7UeaZL*<&EF%Ld!qeBb@(Bnuqpoa$8!FMNe$zU&Rn-SbH&Me%xD<-_)Sf0f3
zAz;uVfc#p#Tg&;+lA>O<Ab({OPlbKeGlSSt`l+n66zxmP<qzSuk<;iBkX_ha1N0%Z
zY=~9sR1jeRY_I9?P^w#ZDUzK_RZG@Xp`7NHKFpI4yBb2w`&b}zs`-v3b!q@EB%5j3
zR&1dlO#Mx4kq)76G$^mOd*k-W*SZavg4VmrXiJLyV^Gp+`-c<a{?QPg;2}_qj#DZG
z<i!y|#E(K<BBvMYM~9s{GRx6hvPp0+_a<&bV<Pa-LNL)#GJ13v%OquNu_5iI%drX8
z=aTm!p(D4A^(!iw8d_|Hzt4Mu6D542;l}s>r6nx0nJK5yR4%C~bBnU`x<DX2cA9E-
zVPicqjpDW8mH~zkHhve1gq%5;KO=e%e_-X1+Y)}hdZ(B>#M?++heYW9#;>2(gYqrf
zw1-~CDaq8UpO*n8sZ*bOBDt3k13Mrk>fx`5xY}*&U?35aR4h#g9ItV4vl<%-0Uof5
zX2wIw+So9Yx~)K^x^E?G)>;#c@NN?)4Vy#Sv!eySIgY;P-l}_g*cQqzW#Jffc5}n$
z{$~es;ql&ykHofN6SscxD9LQu_xVw?&Xxq1Nz)I7wHc!?nyk-xMlWP<@AJb+h|p_O
zPl^3Oyh@|739i1}NWAx<R-02YM3$n?J}Ul|XtFX!rVEOdj-%fPDzMxC?Q<^dG2=K>
z!NS8=y?3~7rpt#ymgz|Sk!W)W&Ermt_Vs;v1lqg|XVv@4Wd=aDl#mnCD2Vo5S+Z=p
zw)O+Mz7Re`aftz_Hqu$(uAGVTiVW`TNcMSeG{S!@aP_^+EBF;EYPlg>zsu3T0bLe6
zl#s52>InrC?nNhT$EL-D*$NZu@hpjhSSixgHXT*fE-vfYc49OHlZc8G+s3d(xAn*f
z_TIa@yCgLRv&F^5hi%s86Va=77<9H?{HB!vl-YMPx?PfzlC%B&h)o-Da<evdi?~$>
z@2sZFV`DLU@l96kl9;tBgIaF4QUYukd+||%{vB*~X;hRP5@{{88(=qD8;p(A)_yU4
zxqz#V#JE~Cwe1SDi!bIcPxz9y@~z&l&GNk$xQXUvW+d8YPIjhx9a0wZ1)HSn%c#5i
z+$@Wzk&x$R|2u1|dr6mH&*(^H*-AY9PwqE+<C|X;Zq6(yUjml;B<S<Z$KGu7?;fO&
z;LaUcZH+h8{jYsrSe?SFD;FB9u$##D>T+@@@zT^@Z{GIK9{XJLPtVTwKkI?G=)0}L
zJsoT4y9bwORs=lXqZr&k-_MQu^y2f+vGMWq&&5(nC5clSxiqCcAEdwcOj5`X*Sgvo
zuHcv9<Am+S_uTHbd0F69g}bU~BL9wHH+QAr_EJtl+MZXl?ZW+oOWnOZlAcS}9;eQK
zt}%hMkM@D@m3!3iC#|`87JQR~<uunS5Vbk9br>YAB~6!%u(E9MNhAZG=E?>i3)VmS
z{Acf8E^1-<`QH9`<c7UKh*)L@&^9E8A`D1lW22HTU_ver-R(5-ND>kfN`(qEi8}Rm
z6!Lkq3*RIpXtA*wzRU6u)AB<bcplccJ>70YLBRcno-Q+p{{5T5$;Aa4GF_|{*x1N7
zIW@KVjj>di4PTtxuJT#L-MztZ!JOf2JS-ubUIWYQ;4E&6rC_8lK3vguzk6nPz~*hD
z>EN99mw{rU%<kH-<~dfxxdj%8uwdh19QzIRm1gP#^-lc2)?M0r+dsS>UJFU5N&9Rl
z(0;!r3e`S-G_te9;7YO4HDSX?#U~~Xn%LbS5|ffbK!q3kH`I1eIb=8SfqS)TRIuxO
zR4Dq3_r4Cq@7p(IX=<#4g*tTPVB^2Xj=I*Q46$_Yu|g%^j)ZhR1+j1o<Yik|emzx9
zi>Ad*OBEqcT^Sh+`#oV@QfCU9ldL`;!=rom+FBID1ACs+LrR>!H8)eo$B`a28>;uR
zJYsS`m^zXz(!)}|v=XQn;bbZyHn|)IEFAV$nhlLjP6qKGH)E4VBhV_Owu_;^585wn
zVmaDm{4VQtJ7mxs?Q=mz%;$t?@N||4CI0og)S!)&d)ezPJOTm~#Jq~&OY>=O!qCL7
z6OY$T`B;jLel}l5M!{0Qc!8cN*YD--l*A4~Nb}8Fc+7xlL0OrUkrBmF{W@BZD1{wy
zNQq8erzuB=A=}D9)o8Cq>lu#Mb<=X%_*q%YmA3Ul?W;rsA6`zv4}XpsULzN4*B}!{
z!f$8z3Zjwlac))~wcgtbJl(AKkB;`0)GaYw<fYUVjBL~X=H|fsjF%j_$6Wy$)|3If
zrK1}bz~0m>N4obazo6*(aKMly`+NmleS`6{z53KMjdg>0pVaK)ISQoR9fj0i8~6Fp
zn!{6Fk-ye;83v+haEkqUo0Hml&b&|I;OO=ZWThD{+Wf=Wgt(0PB4Vz|#&dbwvn#}p
zby5(fL6g7ljd}j!`?i-3bCLsQMCU|WH1Yk4Z3o)=BgF45<FQ1hL*ZDFocT)22*1&}
zbEbR^?(iL1cQf~w<6=xv&{dnlAW#3Nx&Dg}XZ6x=Q+$dDc=z&34Cjvo!;;KQC=j9f
zdS6;qP$`z&x6^W-N34Avhkh{g99B=LXtz2^98$eSQZA!z$rVdU+O4Q-Ya_ErbH+V8
zKeq=dnA_Yu@B_`6W7?^2z=_U;Px>ychU{@i%F;6FnJQD-LFv9<K3TiW+1c4`zoagR
zrMH>WV7_}oQ7H24%mEc0eVx1QHWJjs-?vJ%=a~_OF&D4<H1wRmkL~j)@DRht2x^4~
zQ(x5nx{6KyTcICTz9pBkv$G@Y?cFlgBOL}lRLEe`NOo=U@wczQMAXpe4ZRuz&SxvD
z(?4%V<?rSy>e$G<PDCcBrxos6LP$Ljad2?@hla$=%-(Q)S^Cp<J#SUEs?l~gm9RPA
z<jVSk)gY?5S-@)*#Xwk55&c;o1m&ph*{4{i4h;pDsXYvvmQ6q)g2=WW%A<vpFAc5k
zp99K2Tkhwy-9i2z1E!*4VlX~;6B*E<2PfO9j%d}|%rE$Hb#;|As#te0Kd&ut(hkFO
z{^xCl^&Cg4;|MQX%J9QI*h}0?Zd9o7FyI5AB0-uK(I6A0GrsAUll3;S(r@Q%=jH|T
zv=euIxaVteccxdOSla^Igyz1UbL+;A@ujj}YU}8P_mO&{7O9trDkz|_wB8t_zIwHq
zviv{;j)}{n8|u33&3)vY^ttVRi@Pmsy)Sybalcf|+4-H%O&8gSTa)QeCu{5p63_hk
zw$i2}KI^$kxM+bh#NS0lr+Y<Zl&q}i_g9DOZEa>lxF8%XrIiX7I{J#n`S<)&Lg}CS
znDLtZ2iK-PsnV*Zl|fd<GQ})yZNJLq61tWW>GUQa9Z!(R(efRQn(@omq@NOdK3x<=
zpDb&%g)@%c-qs)bC6Ayy`_kH)3Ave)cs@?tYw{4AKR;c!Y1!JcEVOuXKPTVad0c^&
zMBQHl8q(g_)I|HR^z?YIB_aZ~zqe-`!Oq3T)(+xuYqkXV$w6_|$j@rsckkX6mzG93
zJ2|mM_jQ%FJ$blX%~|v{x0w$Ua&d5AB`*sSo8#%)K{ajtw0bG(L<9sziMkF*O^3A$
zH^=fHmv%UoeeP|h=jR7ODCR)s4zqf04t_uIT)rngzx67vsrgQT3SaASxkte4+4c3+
zpH;gTuCSYzG7DH*f15AZRTX7kn~c_mhJJ8?BDId&3R?~h_$JnWFXul!JkL&FMHV+R
z`2MR+LW%dLU6^l;wXs?5KM-86-v4oUXwvAMyyu#2vwy$*EVxg=+n(<BD5|dZqw7bR
zE>}Juu|Yn6?_?o;=_Sr@eT`Dx0fkhaj;|phbH6_p&2|Q(%#l6cNNru;&#gxbjQ|SA
zmd%&)xlO0i6v^{^nA>EKZJcBI_C&(xUP~7NTN`kC_U1?f+Ant0rH9FTT$rO3u8og(
zAnNAV%f3R-b?QZCp4Ug4&pxL;cv_B*j=E-PJ9SUzL>4dnW{#h>HRFV@vL3CHOrSFA
z@v22`*01Ot^IlLz;iv|<6V|t9n+N@|RDTX#s)ktG1k5x3CpD%n#rqx`tV0T?-=>==
zu785R>P%;?(98R<9`znHFE4Z{<4T~Tt=4{Bvi8$N-Y252!d{=Ul9I?mHHNFRA=Yz7
zZftOZg8|x%jb~C~VpB-cxaG$3a<lu}=coTiReWtm)8EVIPkgu|$}hF}@ZnVxAj(==
zS_JofLQfrSJ|`<8G>Jnd)9d*{kDvZPQKg%;opho+ESEK(A?s8a!vUK3Khz1HYg%pp
z9hW9i6Z6*Iq@hit^U&25`&6U+brS+Qsi|ykoVx%i7oa^aYuE5CV}H}3-8j7uD0koU
zBcPMzfH9u<E?9HH`@KR(@6)r0KXi@wKwSKx`p><Oi-kI@6?REejE!-l$rpk9OZkAB
zxr3qLH+wnoR^xnxPIb8wY4Xmk#?}vl58&$i4jl90txk`QPbV)6{}agFX~*B2l0#?f
zeQo_OD`=8W=_T<rpR%yo=EAOh|4jGwp6shMbpU83K$7G?F8yHEIhi$y?y%&Hx?lR6
zEBAt*h7)-o>cP#gFzSN0p0EBSNrJ-2$aw3Ehl@+e&5Z|YHoTbWT{Zynr^k^I(D?p*
zXLU2vxylyMK@RN?$x+!&*o^epE6gu<*p$_EnKOH^0-rH6GwUB1ST!grEDZD15eI;L
z`mE0I-HwMC;#XSA@){3MtcXNtaY$_+2leaCOs{RYc(}!)`CW_rXEN22A*VJgt`uc$
zZRB84s6^T`AA5H+h}7S`w~PDE)-9V`TfGk?iwJ27v}b2$Th+_MrjLUR6K$+YlsP&n
zNk_9pqdxDktL75LI{JNM-sCH|*uGtz8kSnYzky&?=$`N;?|I=k?+FS_3E~|;k$%z3
z@L)+jbV;&(zdol?4JZ4woKNa${?B4~{<wqnXf{~qc{_aP=t$DSf|jLW1FQT~9{#&z
z`I|kBwr?L&xIWyq6qPn2FVuaJGB6<f!J_wVzpTwi(ro|eNKi|m2!|yR#p7{+_Z4r{
z`m>(0GE|s&fcO1XW=jjn;o%{V!v-QeJUsb#*>wh13<BN_mnQq+cQfR#zrl)u+7lQU
z_^=Eao0;LxwqgGc$SNutTBmtcr0194muuazXLc*bwLQ9F_3;SxbEP_5HPMCBW{t~J
z`<FY%$00NO5_H77_Z(yE@G<pw0yPR<Ofl1!gk=p!uI)E~!!o}m7Sx(CW|6Vje2r~S
zTXxvRl=R~z7muCpXT(8)ZPsUfuABNC>F%3%f0lyqgkT(oXso=pAYl-V(^hYS*XlCk
zaPVpQ2d*>6)Gb5uTFHLJ|L*T4?>B#-A#6qqN%rNqb~5BYLX}#Dp~(NTpIpuuWL$Z=
zG?AzjzLra5Njvehv9WP`e!O12zK@NM4}5s=lBT9bM*~C=J{_bLz3+M$ZMv|A2A-E7
z*-_&@i`S83#`^Kk)Zaiz*q<ru8|Ge)=;#oE#1>V)l-JTTNq`oBWfJWi9`;OW7(^Pf
zg@U3oIXStqo1Xy>nYHE;xId_}{}R5-q*V!Pk?u}o+j?uRS)vX|=D~aoifx_e;N=+{
zRb;5#Nthmu`YC{#qvWYZGu8W~&w6?Hi{NNX`px)4w*!hRXK0=OR@<K+4|&8>c|JMs
zy^8wNIGmTIzy5CK5e))5Y&0@KfkTeK)p*+LiUkM3_^_#mww8|7>i<^Zg*KYX&r=M|
zppSew>vciR7;b%dTF8jB9qq0|DPu&f5Qps&edU!ITU`)eIHEDYnbMd(<dXFH{raK?
zGzFet*g|haU`_AW@AVf};ir+Tf|0L?rq??V$hsx)^f(W%!G{03VB7R+czAfW+^DPm
zxM9)upU?dv9Rq`?y*>Mq=W$C?+Cd<?VEgjZRUL@+-F{hLZleCTxHv5QNXduWQQ!2l
zbJo@y%Gbzn>LuE6k&=O+6(I{jBl%X;Yyx)T8n~f-D=Xxs&1bT(fg;Tpf8}N?%}{=(
zq}cc1Xc(^dMuJGY{r`;q>9a%D<!2jyhy}Sr@PK{T@NCwPv}XQ~Dv??xAd}HkhPgHy
z<u99v`&PkvI*%5c#;aGb9@e6w?Voz*lbT4w3Mb-4iia22_qkHOc|-#Fr>W_<l_*;1
ziRXp20v@W+C~rXLU_`x^(;xeTrrI_LYk`JuqVj|Ib@V_~OZz0qaxR<sAe)6+qszUi
z`m;es`_*8)UcKqlwo7+nUN5hISMmu{A2Pbk40OKnbGHT#jXZm3Ps%v)%e%D7<|R*G
z>eh9c?h}l*VwqFGb_qf%wkPSLhBpWxkSY^eAoXHF>>5`?=!W%@FJ38ZjaWWIGA^G8
zA?2-WeRLzA<af@N51V0J47jV$7*bUq%g2gRPVw=dj&JH#khI6ozM!!_U*QWrUQ7f%
z^$zfr^zu%jBee{k_s%xbk9=ys`<8*IN3C7PIllh@OO`?g1p(r^RPyIx;zXV$&^iE1
zMwp(y8}413!cF|^IAW=<`jQLnC=kDMTmcT2q>R(FcX~&krNv{Ve$m{cWzIx*4_H29
zKz_w+WYW=|B9Q?V3Ia(fLqW?>-kBP9Q@^Hg3y5uf8$8&U^YB%Kq+~+j_XGjH4)^#Y
zCAQtlubEUR2)<KYClN7Lqx5-{{Y5H0&Sn;;tAvUV)%=!#2;*0QLq}agneUbYx!*Mu
z-DaG7JAP_XZ<oRUiHcsixJGV?G9{#1LPM=bgFr6!-nPFn`cZ*=2`|pyKt^zvKUS1_
z8Wm-0f=MgVn#D!mrkqKjlV67TKjBa<0p=bO&Z`vy{3x^*1}FP1oRH-R!EIZJa6==n
zIff!-q@K(DikxT}SD09aBl<yWKcs$u<;2{1x|F#dmRKBEMcVM`cpmQu67YfWjt&ox
zN!!!;fU@1%k3^R%F;=9AfYA`ny}?U)@y()GaN7b1>@X0uPoq8GiQ0aiNc|G7R-j6r
zkGwim%eUvLvE?wc6H!YyhWh46xl70=S=+J7jii_3zW&SXZ6_0K6m~X^uetXd3sHu_
z7vB!j?srdqmzsgpvS*4M%d-~^p@c#CUA--T)1U=srS1Oi_Zekm=IAYfd{mlf8EbNP
z*s9HgOl|E7)&@;Qm7~~z1_kZ_6x+f6v(2ASo;duc7#QHWK#5CkGc(Wg)WIoaXY$l4
zh-H%)4!9MT5@m9J_j(1)ouuIMvSWfEEu|gG+!mm@lt&0IYR0$<a-Mz5dD7P#m-M+f
zf*LZv5gQook_Ij69p!USW$6hcdv<Bx&&%`jTnUX6l*NVn*I!%ug7hsMki+*OXi<^}
zvN9C7c)ID6%S87?AiD+ky`fRx61IuvVVH|}{XM7q2ZiD{>&>Q>@RyQ_C6xN2=DzWi
zerh-9MFlU|#L-N;euhBQM~%Ssl#EHpTA9?v>K6C>?c6{(<)}cG3#UAXYl6XjITGB|
zr+$#MQpK^h*xK$mxEpCgyB0lw?H#vN-C~AGM+x+ZCXw)i^IkMv{i1e-+rT=s#+Aad
zp7lf}OYx1uI2U1xTSx8k1>$__#WM3BE$Eawm=R&t)F;v#F_(nj>SLifyvSALR~PTY
z1h+|F1Zqc3buT!qG*rA$RK??i197_7qM&&}cHApa$d4Ipl)>xay#yPf1DiN#cn0Y!
z+c(F(w;pL0K-lbLnGi$#x(Ix}RNHzK2CXymXaYuq3XD=eI2QxFfBsPQ58h*i3NU?d
zT1PpO8-U*6HGR*6FB%gz1tf)ade#$jl42;~(rQ=`VUM&7O8l!%fGLFTW`l%O$(qq7
zc!EcBL58z1eqBJf$fT~Lf73SXAE@9=2>g&$lsUGw_feR~_Q3X{P*4e(m)}O`tr&V>
zTfWXwYJj!6Tt-M-5wGv+>H@8Idxc5giXG3&shg6%zMyt~(fIZ}wralF?}@l+8IM-!
zkBd1(AdBUl2|ZFX$T1u`_jqALmQm9_f9%BO9MyO%^}gx9Acr=hg%~y*?b9jl`o;JA
zSX~ka@UuahmP18c8J&KA^pZrNj2MY^1&)yh*nq%L+`n+8RID%@JpMzk2?Hs~BE+h|
zf+-CIF9MLjFct*ffZ3zJz|;c;E!eJ7`|Sqpl0+TABgs@bZ%Ft-#esapiCV9-f)d$|
zvm=&qu3E6^u^}rLv?PBnn{ocu>)$ydut*@WoFv0{{N$MQ+qEmoK;LR<X|eqBC1lPz
zYJj0sy+~P24K`L97Lyi$oO-K*XIfKLqe8qM_pSm{oLFKL7VfnVaDaFoXf$>2f5mEu
z&@;bobPGTVTIAt8nHh(Ig90Wcup_}%5*Zxow5Qxr%QxzGgiT)|2QcVC+yu<=%oGOb
z6ZnvdvS%$~0uKUI)EBFAs4XOp=WqQV9{M9wfCg(5OSCr%o5(VPFV-9UrH6D+gXov+
zh%J;kqslLRM*HfmRE6@zoxoe7eUq=<9ALyMc<2r7JrYlaGc+|^Fx&<UHz@H-iU!-n
zu)Gm{b!SZIHa1vz<8)qj(f7>;W&i$VR(?JLxOhZ(#Va$+@E$sPdjAf?&}3A8hJ}4j
z9Ot*o;5uLS8T?{Nc9~tlSZdhMhw!`>ofsK#2jrtjc&xp{GUgTk<2ql*{ZM7rH^OaS
zYPB$|_HA!dk82_hM`4jN8d2H%8)KUO3um#`cYSA!(JLY+;k9wvs)FU_lsz+*5m5YQ
zy(JHhpGk_-<0p7SdFW*w6SLVY=pAzTq{SUF>vSX$zqZ)jd9v@=2KRVlM5LuQW^=m2
z{gnM8z1KhL#?D4e=0S2e&72)=M&*$6=4E(8T_3JNGv)G;4#2H}9qfNc3{GS|`uF#D
znn`G6@g1K#KBcX$8YK;H*~cpb`EuvdW5<avv`1B__uJ&Im1SM)Yt{Uhv5dkQQ{A+@
z-?m#-Y$&Utmt;JA7s#T2M%zk%>}IN_!ACY5SAQ^7SHeKOUpc!51_^w)m9ISQQ&y@6
z6KjioXJKGa#~7HcsDAbI&fKX$MTN(Msr0LEoK$oLHZ_Jr$L?`pPYap)btXGZ6%YPo
zes<;tA8>Z<Sy?kXv|$4WPBG`<pP*e0g?A3WuY6pGuy{*#7MgiX;`!q6P5nk%{$gVj
ztX(1r+S{DJlS@!1|93D1v1)faDBj%)6Keq@ZseG&JNSFdE#r#n9|5W4(D*EE&!Zi-
zhq>*2&|{TrG69Lou0fam_*0EGh{S6goZ%aB!V=YNZ_htHqMf;`K8Xd51Dr?g5N(8~
z2-3bjLqrt4R2h3C-XuESPnHh3xdp6GiV4wMbn@K)Z=NJbi~Llmy*tM9M`z*$@>u^V
zzWcXeOQr_g;KAItPqtVAHHh$EDHQp^V9k#6(N}IB$_tmD?{cdMIF6x%sJra8Y_J=7
zD$Iyl82%Q`L<@fXnY3uzx6}g6UCLO*;Nak~{mZe>s{9@^WDe~IH;6R%3aFmZ3OuR0
ztNLickEm?1cu}3c<V>}>F4;7Qc<Q2Woe0D`yz1XAtPXujzT!7l_y%o8g8K9FC1cb$
z?^$4ht_}9XI;rzFaZ1pmW@lwVH#9gNXJII&jRb$*Q3*}L^69~JbG)W;ntL1D-#Fm3
zv~psZGV70NAmtEvOAvE&sl+!Ue8Bp(<bs!NMCdx0^6z23d<{JOum_kHG8A+|Kn&8`
zvG;<G_eKDW2=NWM-8#AC(Ft749d3KRb)eL}&vgFX!1rCH($A8?<}jRRgIcV#pE~9X
zfe9?&PXT6!{?u76cy%JBOreLV1@$nV1+k#<gp%WIi_+6a@Fz|Hbt{U5pX+Gkn|8@p
z{W2*)^To3!1#6M3Fm(L|VMSSjlF%cdy!BX6(Zx&MdW_L<!YBKc<y5pw#ZMapcRss<
z=Ck_x46dP{@n=1eUy`aYjXH&fHb;JJPy{>Zk}fVT4Bgy#w%+HegaJ_i>=|k5fw{c=
z>0MoR`08T<XcQQ@T;v@F$F1f+$+BvTrbTQh0F9-{Hx#5(l28)g`;~UZO8MOdGgzXS
zRq3l=@D#_q7G{}7Y}9w`(L@Br5abU9SCi>h1XSL-0?$_}D7z@_fZsiYrqW+R30BF!
zs-=b#e`3$c>L#vI!7U!Pc>n%AkH-Za#PD#wMu}nj*pfA*3#w`kHM}RF;v;LUv=Yz}
zfQ=eVdD+z{mWct}fbQ$uxS>r;OG`i5cWq?sylDyt%WWBLRW)v>#tGlQzj<&4;>O+m
zy%9(9%4IL9_4)a^vbs7~kyM%}k*Ac~T%{Qui-J$7a2yc$W^1hQ9(;t5k&(w{XX%_`
zIiO8YL1$mCNhy>0j_}pbpFi^spJms$g}!LzBuP4+{T?u{8s0bjP@<j<I#?7u7I9x+
zp+DwtL3;~M3Vs0((7<HQ>?>;~{an5FX(Iz#Q_uv#fK)<CN_cOtWdG*5FWWcS+|IMj
zp<BO0AhltHiPJHE__KnZKM|ImPI_^DO+_p?CuP~FoOT{O>~e;AGy515Gq=Kn1`M)?
zv~6MXn6sy*<gd^*lfMj^(HOoXV}-$X7y>Wq6<?ad4&`vNJdK!y1RT(+fpIgwurMUV
zMMO&4dvd~-Ap2%;a8OBI{YQ15InXv<);5agE}){M<YzYY!)!9DRHYC^A`}Jzjr=w8
z>d9&sgl*q1lAWt`B+J3U;YKPgx%^dj$4}AB{?L^)#Il?mSa1UkBa93UA)A{CU#x2t
zUQK58HOli`<q#oHujPzevT<_0cFfH{;)xOi+59a$eAT6Cd}1Q7wN()G>}~?2sXX@3
zdU|><^-I4@Q99(J03dsHP0eXE;?Uq=x#f74`*i^e>3Gp_1r$-AJso|RP-gM+adZO_
zpSn}Kw2x2gSkX@<AWA?~1TnsvvXrQ!0O6@@1|_^lv}2x$z1QyMWHm|H1}K{{P35C5
z)gRY$rarTCa>7RU!L%)-CtF{Us7`zYk~}aal=bu?{r#bm<Z0>`Z3TzCzkE4PRe2vV
zH8T@JB^7n@xb^$@ZzVlF<_j8W2GCA}8%iwTh5={@$QlTnO*@yCkXWG_a+(4{i+)C|
zYZ@@Pq5b<2-yu&+Q{v#^by;!(38-WL68W#9Yb(Fk4I4;QC~@eTygVA@$+*y3>(t(K
zCTlJoNmr*I($o*@qaYR+S69ZsYm8tA>IZoAn{t3O7b%+=tsj*FvKDYrA>x0ffgWNi
z>6*0lwnUvsqa7D5<kpXOK9QB-Cu3Q4HRmRkn<~AB26N*gqD;bn^#Mpb==@RP1Hp)-
zMienG2a8g6tM7nBhj!}Z=Xc_im_Q{waPgExurk@)-lh@~${3w{34<^>q-{AG$bk5f
zz4eZ@!Ck9K3tK=By1cy9a-~eCGsFh@%)mfcmzJH?a0<-OX{YLF7v31z+;fi@SQ#OZ
zue`jxc1??*ZoMuZZXeH<0y&C8&Ve8C-;7oO<>7P9&$g-r8<4NS7r{Rdo!oUgbM*sa
zTVC@zHY)u1-@lz-_yLCX_V&8JuXJGn2Fl+4K6pC7jwhnFmdlcpaQ5&jNh-~;A&_@j
zVnMbR?wtCatgP(V+#C`BnCQOvn!c>$7v%Bq@PLRn4lDio!3UP(r(P61Y#unV3tEH0
zK}^>1YV6$H-1R#ZUSc#JVl<Gelr$D8Gy6Rqfe0Xx!Q_qYJ5Em1M}^XHOTf0#;s?5g
zjH6`LmfLR!x6e;+*D5y^C}m14%i$ro;cy>6eiX%{FCpP~VWLseY4viKyjoPy0H+nc
z$mzfjT(ljnk9B>Eqd3A<qdknS0-+@F1ilGJW7(9G0*SNdZ{SYtT<&t*R<S9((nXTt
z%vC}9E(@1GA=5F>30#F2+$67Dq*Ic6k34O@GARN-M_MfL;d|Bas@+M(ge9kDvzz@i
z{AXZhnOC`Ye)<K%#)OLkZuHBCK{x{fpfkgnk}S({4}FBCIT8{Q+IO8g4_&%;oks2n
zRO42=!>}I=QNcTL7}b@2(Llgygehamfz-^B$jEqK^1;G-q&6xF&B@iZulY;eLWR{#
zY0ieVoQA(J#M|2&drOkvIWM%cQxxoP3SzYEni`zQ$Vg!KVSC`fVlK7;|5sfQo~BS)
zMI`{6Rsj+!fShZ{tPWgtvI6?o$ei!qDgUCu0B4mhn(EwiI3?-W6~_u^$DODXVZs&5
zpD+T&?Gx|9-`K8IVX$@UKmE%48CX+LfC${ub(q>f1axz=c3f2WRfh)17?o0-u0uoW
zakzDbX1U#}Xl8f<SFgU^(53^EL2K&SI8tap4Z+{fH9NbzB0R)jyH<aVjGCnK5i|jO
zH8G*`t@#;|YO_mCjMnwJ23XrY8;CHJh0udTL;1sz>S}61xpIkXz+}@RDp&S>7bpAX
z&71N$>yulr@`Xbuc`0hVtyy84o^Lp>3y>nfi3mN<ev#Z$gT3|R2X1Thym)*CEr*2;
z-|7A1%K{V?73FQKcXGyoIRAEU8Gm>c@71ifwl+X(p0e5jU2zz_sCCO{N*N7Z-3XJu
zD1UbN@SeC!8(ZtY4#<;#5#X%eQYQ}GI=$vfiHxH+elR__y}bn{eFkUEfUaY0E=tNU
zeAV2aW%`J?0aH3==Clb;B@>uhCAuIGw5-BHdPW86aega??+lC+>oJpYvbir-{MZ-<
zNTAQ3KXa7oNH#}En#li>j24^#XRG=(Zh2$KypRUsrI6pX))T(YH#-dW&X{QC%V$8L
zTrOjfEm0RMo5{|}p{78%Ut2Q{>N2XXtLuZcP8Q(fm~VmnlKMsM47M#OB0<9BezCE!
z#l*W`IFfHVa5<7uKYaL*K*tlQRL+Gm&WQq2%(jl-D!)@^!}Q>E!<ygu!ju6>reK%~
z)YQHW$#D480m4NPGvr=;pzKBfZ5h-#Q>XSVJDwXTf;l_9*qo58EJ`0f5fD1(j-Laj
zou})4FYvrMlxT72&a9^lPL;o<Gt?`YRy;79KRCw)yk-E=bl+utvfRU{-9IF;f6dIy
zL~_gq>6(z32y*g`w^EQoK8#;6;;f||6lQ=XlO*s%L1|`ZWAl>;$IZ354i_gPDv-%l
z0r_pV$;-U>a+-#Q27sA}u+SiB=olHX8>Q*7TZSf<m(j`NzGY=*QmoQWSMV+<4Bs4|
zy4kIDBRM%czj<SkvV1J{<qIo7wUz68lO;wvIzLcV-8;2GN$SN1I3G6-wJ_UYLZ5*x
zlM4y~FeQLIY-DMP2-pPxr`<p1j`F{1y>8En`+a%Iz)Ix6AclI;yP~@iV2xJS)*73d
z!U}H$&5w=6Toje3rV<0|z86(mT5_aT^?q1Fh8hcy?=C|&M2{YV7P?S)Yrr!8)CXEQ
zQPVAcq$*U`)I_Y%{Btcgd|_BpSBKXZMFJ!2Kkp?BYBlIz07-Rg(ZBI()r!7LVf|~w
z;Swg^DfA?-Hd4C_kD9X(I$2rC0n&t+-vt%i@%!uJ(WJm)FnfdWTd7=9<pVWVCfM$e
zkB{ZrC5XYI1pI`&^UL_5#2Q3IL~qyd;|Az7Q*u?$OC(g8O({dV{J~~>iZ$eu?-iU$
zwXv-u0w@K%hfNq*a))+>Yuzuv$cbPl7V;6uhOVs$3z~3Vi2tLmqS86aXI!I={TH2k
z@#6CGKSYVrUz^4yzh4}{d7O61mK6G&b#26}X>g-&H;RR+l?qCL(5cy+ykQ-VHcG~C
zX>{NK$e0R;AvtE&?PC_yf<Fwx)+1u6Wg>L4yd*tLC^=w~w@>}J;cwOh-XTwJu>#zq
z|EcF~YD#T#Uy?j*-l_=GXAcf50Y;t1dVVSi@GfN4J@yTbj!J21%{8ZHOd7KJv3~_X
z+J1a}?mD|=)sDLuy;DkvDeE*HyR@V)cs%b_?-nlQ>+3833k;S?`_Bu+k&@VN6<U%O
zRCwEPh&XXvS^lEI4OccW$f)(7FU;ay0Ja3zZnj8+Zd6Y=UUp$2Qcq9MIg3*G?}CCA
z+Kj9FM;^NshyZCcSW$)LvZCKv5vGyEaUOdw8xIeilRcI(Fxk;3yN%fi2?;HQQY^p?
z2~@o=F$(}#Gd->TGW!@SrNXFQj6scgXg##Nun?{&^!U1@q=Y>XzIV$MIQlQno8avR
zHlQHxB-R>gYQKK}{$^12cz@jmsFhwZLFU6}YQ2rk%~P0OCfx9`p5!V(>Rv{hBG?6$
zvc0XJGW&ix{+jySVNWbXXm5Q~0<b`soUQC`srxN~XCvP`JRLPB@FfmIKgn!=KnB31
z2@|fItV3<CtkYH~AU`)=f?$e^)rQva%1r=_JF!z8W&uf@35-)<HHK0Ty>LBK4kJ@j
z(|?xFVC;|%^kx6*6y9+u0eS>_7h!$<^syews`iQsq}=`pZ!qy-$Cox{$yrcb90Ul+
zqHV~ct(2MBW^ulO?7PIF;Fmm&lK1fu1mXWNNdUdYaF=Av1p^<@VJJURTY(i@^?{Cr
zgalZf@EGos{95f?oSX?~-L1+}(WDXp5CD||)gC+mT?EQtOh!gD;G80L6clJ7FrHTj
zP!O<<Uq(p4)_E}?myi2EhcZFAbL!E87}AA(rAk6fEUu$NL{{*Qiz`nedEg-DIGJY{
z04(67J_$KsxpH!He=Pp)MyUZWwOacy$n_44VUEqu^A!!u2EV}37CH&z5){lp|J)BW
zFzEn+faPwHg|Qg43eXtt1IrK00l@OF_HR0{^{%)yy>tqoeQCo(gw3-q3d#upO|dRZ
z$mn(#faTcIYfdmx3pinK?hyeY;kSltN&oo=sg8M6aC2VZ0W?JZ!iMvs`9n^;04l&#
zY={~|&)|1P6X@t2#7uGd-gIm59`mYX^xs)ID)hkc1I63o($csTP>A6GIFxq$z;VkU
z!1ln_EH~~!?l-MW!fXXYrvY$Q@%DUaUQ(9H^6+597wm-mOA`G!USPX|;szeR6iKE2
zKLHqnSah!1$(Qy3$Oh<1NsJ~2FsijR9l8cpSgM^MW@r<{*B<Wk_-KA0piaPt^Jy#q
z_a;<4E9XORx^!Wxks%FO&?oi4Fecr4j8!{);L!uI0}T5FD&)V696l$58pJ~!m4Ln+
zeNEYWd>y#XgFGqy_x8~ToN}bd-xyq^ZzcAJD7}v#g8?3p6QhB*FzM9Fmmi@~SM1R7
zh=t=acjjmK5Ke@7bL?UiH|VkeU^{)8e;^k0!YA%~<IQnZ0eWRHSz6(Cx>hgfyC>{N
z7c1R8I+0u)wewPN?bl#s<>ZXm@YfnsEGjg?9aQz0sjRKVjbI1DL;Lz^H|%F+$!ttO
z#^kh;{cf&N3e|q9BPGdf0nPz(|C~k>1wIh4oiKN148#-H;cVAQI;A0EfT%H|nZjTY
zDvC@<aC2LnQd#@Lk<aB|wtnHpBqSRMfSg=hG&R04ed`@B#2hH!pyS_7OLc=s1-(-4
zg@PQX#R3?kFu5B5MGQa;cx-HM1MZ=iywq*Y)n!{pg^!xY3DK&y#7tG#&Y{f&fB}Z9
z6KMWMa%+M?2$^v2P}7iL6uiz|Cr|-R=SPtjGwSm5Ee4&$z}(p{VdkzE9s(Z-1p@=4
zsvn#y=3CRz(Ls+c046<k$!f*J(V9aA`HpsXJL2BR84IqT-;M!fOHWU?k4{=g3Q`fF
ziO&Rz6?~wPt}yR^SjDOgjBH3(M;%VpTm&FLVB;nbpF~QoSG%G_NUqoO$<S12qE=`M
zY%cDCiMWIxKk|y@gTSOEu(-Vdo_tEnXPa*^VgS80h;70RS0GrRTsR(UuSJG8*QagO
zOfrATe2SmnMz>%39UmVbl|jQA?Qu}euZtj*u9tw;k;h8+Mt1%$CA{?WH(ldF19(rz
z!V<-Au}~Pz2vQv+1%Ps6hsZbD!08A1k^iU*WDzinr8;lE>2O=0K(9<0OLx?_8AKf?
zPATH;{r>7`k;w6%EG?+EC-;x4J~a5KYbzarme$ree#~M@={3CgDHD5Y;7s(#D4ajW
zEmOHPVJKaB)k}4gPsGcC%m-SxouCeDFk<@8_PYL4EI>A}dAAoY6gv!K+TObD;WU6&
zZqYWwj=x~)^GkYcj%>|#P@MwVk=|RN$@lq5=J;-Ua`FUw3Y#+M>B$EWPsO{ZZ#ayw
z1Oy}aU9=fqSl4Mn6`Bz*z4<JM@Ew{(3Ib^VX%6?PHqvT9x%h<P{`<cKS-~@x3p*_>
z4HTyS99NPObw;uR3I6jk{T4z{$b@BN2AYv4{racefGz{v^}gYEpR49*7SYDLQwEZT
z-m<cOk?gwk^z>XO-AqM{KO_T9vPX-c*(SLu<v9QSZm)4gH%Wm+bcv|YCPkP?w8*wF
zHI@;WlAmq!eXhR_`X`cA$hU@r7V@HMq$zOcFPlZ2&h6$?B@O{8N>$L1d`@9_&YY7l
z|DhS6S*!OwXd9Bo#!JQxhi&V!o0^E!i<}2XrS3a&K3DIF-y`(dYgU>LG^0$*nEx&*
z2_D+~mRzT9IYFZ67cZjy@gol0X98-+kvQmqp&<Z4;WE)sh<L@kX(RS#$@0x}PwSY-
z!FD@inV$j!=xVeh!PK)ztuh1M_rZFy_cxC|FZ0hpE<idKDj2sM13iSsm^qjjQdW+g
z^l?(ge$_5x&W9^%&hRte8VrN2o6BOZ1po!aPPcTR0`LZKp8>}FHTENtVM3n9`jwXl
zFD53&9*q?|ua%cVC_xi1l0T7IT+C#Yb`9eX0fgT?A-f;+X#1CWi;w_uK4-%YHPD5C
z7nC$KLe|#Sf)d{lRs+NXQxk%HOPb^VJ`^HmDYhgj)4eYE{ag0#7>ffN^cw)Ty1+Oi
z9GM;4oZdUI`O*|Z1_x!4|8i)G!sEkg;kWo82&_W9tnW-KlKa8ePS0-!Rww6sgE1IY
zn$~YZNbE>c+@$N&Qc&FONlo1u$o(~fDay!3vT1^v-yWamF|mr4w&L?r(tpe0RSMre
zX1-^`zk6h<@APr&gbHf$i6Bn(@#_<JV35nC`!IsISP;{Ppg2nYAQqK0BcXy+{o&n;
z_&|#zhLeY(kdp%>D6bR?%X!bCB^L#WRy0cb8&)B*?D#{A3tBmb$D;x`=FQs^;*}cI
zu0`iFXsN1dWUP;STcxk~{*~ADU5RaV8`H_2XO7|cVdpDeJMz+gRiTzhmVbqmD7g9a
z7`T~pqToA!+{XFYM19R-h!q#aTdA^mueLMiX%S#f<O|^_Se#*OdQj^JwP8P;Y)~=f
z^F)ZrqR%@D*o9*GLYR@H?Z@p%9XhKPDD<+}(pd?_@na9w=#-61=KU`BWG5L=VvpV@
zdaaVX@OzebjYq1x3VE%@FSxbnMQ$F=A&_NkHJ0lfidX6=`6vZUjSw@P(PP?_9$S%E
zeryEZ;HLsnN?m@8hQQg<ixs`YhkoJ=CSpj7e4z6lvh|o+mi&)-`gU^@o}CVb5KR8V
z#hsH^R0*qLa3d9;$|tdr#`GD8McX)?VH{d{5~7iPH`bOJjd^1BJN=2#A9U~1U3*dy
z!(YSkh^dB6OyP|ESr0#TVp}(~0mn0dM>7n`=3jRmNgjL^Za-y6(P?<&Js>RW>zT`l
zm(Q<eNIXV=p2^%7Xq$}AB!j8=T`}sJJ1f{UfXtA1MG9XZQ%tFQWgGuN*X`mN)kk?_
zSp>5SGZ>F_{01gUKd*PHXQ)E^yAM?*h)pdTNheX4)GCUiKuq>O)<6cPuS|bLMtJ%=
zDf<Z!+;2A86rVBz-2%p{KkRlOX!EPs^GpYIYwrH(QHh2|v&UzBjZSmvDgm+GV+~G3
zSr0a5Z2si+7qhc6)mT@j85-sFKuZ@N5eyXG6Q>=7j9=bRrY%*P%otu*P|feJ-)kM@
zuZFR?G@6Xa(x*69P+nPVXj}CaLVd5=EPZTT+53zF^N=-g-ccCDHRu_{(H|Kyqkh%+
zie^$&^J<BmmM%>`xQ5|9!>0S#H=Ho9PA*pW`;b+#Y7td~w8~}E4vd44M)F}*5&byy
z3V{aIi*hzG!)_=$Vx9`bR87=aP1sNgjIf?|RI<$5PKt{=5=&<tyY@vqN6d0cjSwPP
zjZC$lq$H!Rd=YL-mFQv}@w;5r#=O(NUuhQ53Hys9om#sDu)&}NW1t0h1W=TZKZiTw
z(0iwnp24l`lLd&U!D#t793Mp-@PADmROv_l`^=)k(ID!R^oVxge7RPzy#c{MBOaMz
zX`kx((2s+%j7Q(!Is4hBi%~4Zgw%P@jIdbwd$wq4Fjtpiw~;SK?KGMtu7+@G%1>u!
z*Gn<#L{a59p>>lY415?#QGKU-$&;|Ih$p{zUq$-36}jNc`W!=c@lVeO)GggO#qOO@
z4_Ov?R}GZ(1U!)ZB1Sv=4y;KA%clxeXIIy3&=Z@r;i1C6rFjf~ztQF5iC3$=u!1OY
z*HPHb;nntlI=P*Is9ws3|71EBJrfdrQpT)~yDEg7n8hx|-2`EdWbcg=Tq~)ndYg;A
zFpBU9MY}}68^s+GP02Q2lcSs0t9SP{GakvcV-4~4)N&N9x`9d&mB;QTwH6s~lU1}v
z;PL3`O+4sZG?^6U`Zv`M!UvotO?s@lq{@fv9sIf(KJvEe)S`WJ|EKtNDi1WpFt!4g
zOK91waaMUMr9H25i^|oKZ!oQrja!d(ciRch*1rTYyLoQl<5ND8wJJ9fZd>BzG&8l@
zVYD4A29Ku3j;Fa}!IjeMM_vA8Xyi(TnOCHl*>*LqGGo1p(kCzK;rLf-5X}b_)zsoc
zOUmKvr|-LgHXteiWy_=|fwyNWslAB%(CUjW|0Ml9$>2_U#E}{%DzgXw`HzJvyj8j#
zL}=G15q)y{_r;CN9ACc#7@`qJgd8{|otQ57odk=mK>lgBqFEHEy3WM;uVS7n9JokI
z&Z!U9)XVwOe9M{Rc8JZvg*h2i;@8LL0Yjp>(gF7jqY}9StY&6*L0&iZ*KK3mgav7t
zrR~gSRut<y#KPtycxY>3OVPLMg(7(!j~}-wq!Rv>@+S*>y?l3=f1HS-_vAbs{Z{Zb
zh<N9)nUy6Siwu3BZeXu2iJ%Mqs)f;k<E62CDcUW4BjNo}J5;m9J_N$KxpzX<r8OpP
zsfH~hh$fs?iO83X+?4hnv7GIy;jUa?FFCGH3JSOL*<wD5Wf>8&0+M3&>)$CSH1aB%
zRdF6YRVa(uXp<$1SusJUTWig-LRhI$`aI5E{kop{Z!o3O`pE!Lb#*G^xM(oNOR^w!
zyQ6+d>nkWB<pa%c&12*$4_tnp4vrZ?gmVSZmsy1Z?=)i)7Je0WYJ{gi#3hXd$M5ZB
zeXbQ@5bV0&B0_jHnjl2sggdChTM;lJW#5atwLhAot$3H}KXiFW?-=K{AO+Su`Jt3W
zrRU%>oSl|b)>Ox1iM-oNSZj>?4uM#WMJ9&CH8~<^BKlSzz(5Lm9R3w9vz5jFh`T+E
z3wXohS>3iJ;wmKQk`#C6xhP1qM9K>f(frNSJ0quo(t;!nzuLaKU^ZMp=$-SdxKDu!
z8<(<R-<rav=?Opt3KtpyZJF+U<Q;7)(BNLane%h3r=JgRW6C(J5)pDTKen1gu^M)@
zH-@PR9qWEm%3v$beyO~!e2>@b{D$!t$p@yphxVtOk=NyOm3S1rlrWx7`68*g(Sn5~
zw9&fM9lpEjfiNo;6c3*aGhXJW*00(H+v1NOT%%l|aPy2*dgYY?XQ^BAklQi-LqzNc
zU``Yk4GOncr%NjSjGqFxmnfT_@Au|yK!orx{N$4bjTCjy_nt`bO?a`sJW=VK^(Bhx
zMrQE+r47~qAJRX0F-HX@ipA>6p`)LQpv2+hB@ilOPJ7|377l5*_nWGJDeRoZ(gVS*
zgBDMr=CixtPX{?D2>-!AvV=)n5qeY%I>b)Ph~&)|dDXeur_phi6@;0Ca@SG&PuSrs
z8n9>ve+(!u7_d!UblQpy>7S3I<D8)4kihZzNn%NAz-c+4;@OagKi91F5`75*f{S`l
z&(~K(hIP>yZEXMZjh5#nFqAs|f9m=UXsZAJ|Epw&sO(ij*`tV%n~VlZR`%YR85LJt
zD`aIQWE4vFigN9d6&YFCdnbF`|MT7F|NDNwzw`V3&T$UMx$paay<hwFSnp4|pv(V$
zg9<q%d3t&RP*OkBo=AWiJ(v-9I@od2b02w}m7Cj0h1t1*ss^_6QTaI0Kf;h<=T?5l
zN>!Ow@FO%FZjXjPUhFfj@jW5oWa1X>-)XscIkJbPG=j>W^>%F>&h(E@>S^VCN%6z?
zmA6jV;OB#n2ma_KLPb{DZgqvckT*8^d3ErJ$@z9<+_)$W;3od><Fa?Hd)*|WT;SPE
zd`rgP(n)(0ql`x82sm{0K}!qL`JZJDYyklQYa1!}XN^UK%o2_$D%k@eIK(~&d}wK)
zh#0t%6R2yuQ_)%3*)?E82n5CcwVg3Pln-y-7aCxAL2M1qJE1#VwB84kY)yRIppb4T
zwIBN4+w1@OHDju{#<x%R7<L$r4-N-Y^`(XTXwJl%+pEaIBYf1+Ct!~<$o6tAwn+Q4
zlSI#&bc<iNg8&L&rgFyGRv{$}^Fhv}p;)PCv=J}G!cDCbQx`}z2~$rUY|!C!g&O<w
zLp5dH8GsTv@WBBt1fcRGTnn@GlhdnJ`vUN6Z#5P7f``GyR6ds$vNo3<{ySyAudJ#e
zj+aTan)fEnT*f6>JmVDv1!x-1P_A)*v>PGLI?ODy@nn^dkD7Vy<z^&y0(~yt%d~?)
zM=0!c$>tw5=^1U?t)UYM+2=<ZhIK+Z8c0yPI*$8KMG(qZKw!o_YT#jo>&0Zx83r&w
zkOIk44t@||0!8L$rKF??P_}(Ja{Br#3kwU5plN2Or#J`DLJF^7XNRJTskP$%vIOn3
zGc(LZR8!{jVmv)ZKN8WL;~R$;Dh{P;2Od+tElp-^EWNxRZz^>4BA0fE3Nr=!$D#*X
z|MGthc7Awp2_WkuhmXCC8%G*i`u@v9W$Af&RG^S=0D*NUj+>_8(d56apFkCZVyy|V
zE+HWyRQ8`KSLTFg{%7*6BpEQiQX@o788@2yynF^Rdy_-5x9#?+utkf^*X^aHm6|e$
z$+6LKQ5G_-2Vb4e#`5X-igsS3MLe20n!IqNaa$fd^J7=kruTz}2bhNlP=IfBUfZC+
z8vt_m3xoH9)7RA06uYdf?0Z3ACF;iO0(Wu1G3I+8J{f+t(N51C`sb!_Vmr93#j(FQ
zIYZR;xd(g)B@+YzrmYVVtRhLc6951MnAj&?O08Vy5tEr1Ew-kGUKr5(QET!WZ(hAW
zok25h7R(U)%Vp^R#=|Mz1J<xL@O}PX9rfBC@i<s-iUa^s*WqgnUf&=6T;rprTzc2o
z!A56O>J70o#LE0Ocurk4UG5Enrk^eH8f-r+*A}}xXITwBPThJpkXYfDQI?WWYMSzq
zuH*&!z@gWZR5y3Pp=n#{TnjHw*ITlKp2bs9Npd&O6p!VfsypFXQeNPyQ5(D8w<dg0
zQ%H<jvs*S-3-5>rmUDB5rTo)*=l<V)g^!;-;c})K(K-J&_sI10KMu_d;s5E-r0N66
zBY)!)1U!`L|33cZ@JUAW<A3~_EUB%^O5MN;k0ptpTlOBhAY1*i>+ty&fyk&B{IjIr
zAaUJY=GPXhkJ3J7aZ`zr)CP|waj2EDz3s3|v%nZjs$W`e_cNa$@rwQHwXXoyoCODf
z8qaBA+7$(T&*_*k34s~hocz~N;bFHSdpM|*0=Nrc&DvTW)y8PjiWdq#JQ-`KrPYKR
z?dp1dTi`PxOqe`;eD#R!79qnlWZ0v;_PcKj-ZU^^1<cN=Q+nH|ZS2>@(eTUz+U$=X
z>!58exR!8_bn8}O{aa&6^h3+P%R~%%u`}^~oxZKDX$X-#6X@lX4uS!rTBA?DCpNRP
zUhjS%0RDZALiAhbjMVxie*T6e_g}%_VX*J+9EKTf&EwhurZrA*wsqQ7ay?M?fB;7E
zPH&#KKQXHZDpB0w<)RmRl!Vh#VLx~>d2`EsF?cXF0Bm~`P|m@jA>*QfwPgp4RPJw(
zb?i>ZrN4cP0zh#UhnG3}lPNn6=CaVmLyvDMq!vR}mjn&M3%@0(n%&(;Egzr1zwL6*
zbN$H!O|5rkURj5hn)|B#SEN@9?g13BWQo~l8Txr7{{GYD{MoXqW_r_{_wNIw57z2(
zzyt-nlpDAUM2ZHO4;cXrG~OO@XS03u$RZ{tW@)Y~eaTXGp8@$lkaoy*hv45dHci?~
za=`2lEkVD3x!VTLi%u{YeKYXd)Yz)0Rn2e7F~lVwLbQXWy5~|sleUY?MtOf;`2?98
z7;1_<kd|Iz8r^Bk`Q>13ZG0{==&_sI?>vnM5AykSOV=>?aVm@v*u8^4J_BUi@~@lD
za34G$9ndo;o?xyly0`L|2?2Ws{HZozHkDxZrZ)|RXDM}GH8^qsV~9!HRxXq}TxO3)
z_p|c_^-D2JX(0k8njd3EV8y(X-j#oX{A@nvaMAE!wz9Hv0L&cUKr8k4+h&uWY75a%
zjrQA=EJ-=s99+P-SBZ|VCJk~;0$J2~#o>?%M|JIBTXwBA-HW~IO4R&5$~3Z~5`VNm
z01Nz2aIg&wSf;eYYKA{@U3O2~68Yg}*1p^?q+xzBdX|UV&JbxB!~|4nkUsi(+uw3$
z3%`vy@)Hy+!S4(zzAwYKbPyETAMZM}x;H|jS?I>FKe4Y`KxMBF0THa=DYg)zj|V#>
z_}7+@O~LTytKbPnqcyN;vf`oMyFkqX2PZwR&0gVE9JuIymfAA`%BrTW?g!BsUqFfu
z>-Nsbs(0Wj2EV@p>mT=FhxGQapMIrVYI?dK2LE|BxeliIa9GKybxZA(ALj4%r~?8i
zWHeIoSePMG^$Z2kIyje1z>Zq<1Yd?b<d?zkcBX$=nCe}i@?JZBFfE9wfxQaQ7_e*!
zj)7G${NuAO-Z=Hym$tUHH`uA;w+<6x*-**Pek&t3D7fPd5dFa8)s&ZQAjtx822V&q
zgf+LgSO@Mt$G!PXQ&0<)jkqyY9c|_v57Im7J%8H}4H{FZYWNRkToAKM?hS_jjw}OJ
z`)HS&$nY=)C(TKRs$H(Z!NGj978+!@q9z=0v?ZM$+ow=!i?<!OT(-xwn#xoMt7w7;
zWlJzd3og}mgV$YU_qLBl)C74HTWJN4p(vRpd;;0L>80G3szJ`UwKU*YV%yJ^zBe0g
z)pMmcsRV%4sPGk_MNQK-Vb@aXxiNF`d~Fz$cp(__)-kGol90cju6zy@KKofDro}9$
ztjvAT%0)m+11aDB&SW?eZSfN}e+dsivC<tXBAqqo^XG)!XH^Wi)y*e}-*_Ks<&8aH
z0}!7rJ&MT3{%8L4tIRLUztNHTYl92Cay)=3zIECJ2r~1Mk`f9Oh~$!?vU3f40HObh
zBq(e=yyo7Q{B-5&(JD+(KA&`o0`Nl?MeEZxFd}J!IW43PR@`x>AYl>4iH{_e%nwe<
zzpp4#_*<ciE3gob;k~D0w0;7rOmE7UyCrpRq|=PeIJCfl*(<@`4$t(obXBx!In>@T
zxT;8DIb{&jBR}giQ2Kn%+iYuKARu$a%=7lgfxuFMK7i%H*7p=9y@b~3*QF=T{K{8y
zG&GZ!aKNYs4G-%9rEydOp33-62QY?%yAJ5X3y3!yHr8v=y((Kth$$WeJI5OOzmXK=
z0_7;lB!TSb3sBG?Xe1Lrl*_@U=Lie;q$+0bM|r2$>eJD>CN@kc*z=ByB-k~s;wRbg
zK1!Um=5hLsIIt}{)tsO+0#h}pTIg9>6=16c&i0A&Z`?G&YtWnPObXsY+Ku7C1f5f_
zh8D0Ka-TF+_m5nsF~CvmQho}~3M|-6N=#(ha0dw<3K1Nci69*QrQVk84v>G5M7M}7
zj@~95B(`Fs|Cp5(gfF|FjjbC;`IHZ>$}fxl@*+e@{u&H+&)Xwi(R<vLvCJEpb*R3d
z(qT7P{qs<XFOFmGHwuNAJHanV1gk1qS(#J-LSu@Hi#bd9Gb-ba?$#z9hz3Qo5W+0C
zi_!h0S*|AlM~9=5f-;@ET(nhty?G{Jwf(n|G%EhorA*l3cgW(OfsFAB%xXJOF+otp
z9MeoUF2$Uqr}&K#ZA4ZpCa{+_LGXs3udgqJ3~YP%KqyFvZQj=_7Tl%Ue2CT2y=sF6
zXbNDg$B{e$nVqrYM?Tl|cac>ctlhOH1&Cdrgvd`D=Pg}t;-t5#s&SMD6SGn1bKwAx
zQXV~yw0ia|?|$~!=969AC(Xs2#cG&^936!$m|R_5tBoWE1`OZbuMypQDiY~YJ1SM;
z)o}ZoMI4Fn-)hBC-MNswMc!8Bbu4mg^A{Cs-S5iW8o@35#A2`Z0JA%veNAH(|D${I
z$da$)V?)lFS1x?`nD4dk23(=>eD}NZqrFA-U##8AZJJs^oCi4l)acIQ6psl+oKiEp
zF%Gc>wX-tYpPh(PcpyO``@stkNx_`KB8lxc#CYxb2FdRX_E!b0Lcaq6M>)jzpiuvE
zlj4{_xdqRM2e~Z@^r@huglhH`CIYc1ZnP6)xiQl&#27};z)-{F@a4H8t0$r1UUwWy
zo^c&9nu0%`4G!##uv<dhu{oHE?Q{^nJx7w5-s4D7(BKa=H78&XfgM^7_%Z)C0|O*?
zSr0x~w-Tc&-B$y;tm><)QOJyNu-A`kgwqjI|NeR`{s9C{1X8iR^tAtH=iSWt<)58*
zTMpCI{~f(Dv;FYb2mvah;?QO9tNO)y(`^D-yaUoAL5kOw-E)Z)0&0}&zw`CnxDeGa
zQZ<npdX;!*2cy29xy5$5>#9+%ef8OEo&|X{t>63l0+8ne!7->SL7Ei^M4M)a4ArN+
zJWIhZ2Q;IG<J=g}*TQF|Xp~@Ghq2FY;i?S&`lm41O_M(zO>xT_1372`6NhEL!BhUq
zi5I*(_s&Oq1qwGxN=ni<IsMI3zz^!Xk2G{hF6JnwOhQ0RGUgZCp=w^gXydu_hALhT
zV8(>i7rf<C+ruu%L>%2ZZMQzru#_6eYLcoJB;5G3y1Kf2^_UTz;WOyRiX(6!SPB*G
zh|gaFLfdpU$(^;ewbk_Dr&zC}jTB2_O6DL~5I1=*T{00#2>tZ1<i*#`K3(C~l!Zf&
zR?G+dJ`QgRLToeG;Vwyy?ovVk032M4W@ao(!JLb1+Y0eVt9aJ_Ks!4x*{z?Zkjx`6
ziKiqyChTPdtNl+p+|Er+HMQzXmoEJb#~hfRB_Z7DT$Sx<B#RR?*=;?WKRx>>ertQ;
z%<`<uj{>du(!T1d_2JIl=`NY;`_*QTx3`8IieMo-TAd$Um47T7?KKrKR2%EHAsb&&
z5z22^W>LF!c}T`KvFs>GmiXFQ|LAY?^exq8S(`MMYQ3TAzM3m;eXE$Z2Qly3SpO7`
zRJs@aP>~AD?zKTm)giNQ*9WKB^sP765{)LSw}17mb3Hkr?72j?hSNw<wH}*ut3>3I
zNmA~JOz$1;?o2q<Qqs=jRc@3Io7Nlu#y8zq0X!g!>#1MKZ$%Q~vOkC7_*2M=sUqgD
zSq_AV2ZW>BoIEOK?pxLW#_}gVACH~PGnSZ>Q-5^P+f0J@-J-&>tkhxtqP?p};609h
z6ZK6zI6JJZhwElHUn?ng1336+FG<v3%ucv*4R*FhAZjd*j=Jb&75*osC7MSqMLlY5
zT5+2|RU)WUPx_*+;2?3cYpv1Gd7)z{Mi*DolRYS(<AIwCWT@*Qt-g`M;h9lkgBUlF
zeU`a3%HyajS(eG6vbv#9NuIrbPo_(xTZ8%{;z}Yu-^ckCkzPOibQU>c&cBO;L`crC
z#~y)B)n^i3PE$O-ARq6gxZJ|m*(JvtTX**NcuJpJjyhA+Zz)q~yovSXPkvNy)9B6w
zml~)C_XmWt13M8o=ybQ`MMfT734Up_aFZH}=BN-g=*mS;mlJbKL*nkcrbn3!s{@iw
z-UcVnULvz7S}G52m{3;4QPVV_4t^EVQBd4h!IE#-0vu=Zvd(`uWWB7$Z|={@$&pCP
zXEF}P_xr=(Fvy}M;e$KLOo%#t+oeE7dSm|kM1k5ppS?FU0MB{2Owo6=e|&E6K6&3a
z^E%W&%cge0$S+cql_rzgx%M*^O4hcu!sY$1a(rIV`aK^{IO;SB=%qRL%D%42v7WZ&
zhl_Fz&8u4)E_xGFRoeOV@6w51;wV|`uUGT`S%~J_R&15|si0_Op#FtfM8MPQB2KfA
z#^mLVApi9{qnX0ib|lJ#h7v)eTNomu>y5$dy%T2;;qqO2r~;Su2Hy(+CnkB1?32zU
z*91lkNR2Htu@P?_Ly=v$TK>hz`aVYWTd>pe+$5V~b6{Zm&^RgaY^fUBV!kA`Zl{c2
z<cvD>t=G}6TKpdX{onPBl<HqjCb%|)PxzkbV@6FBV7TcW`WKBZtbNB5J42$MQ+#Ef
zg2+%HM^XI|r|r5x;3*8V*MYp!2bK-WaXM+KQzjJ7k@RJY{Y_JZcVd4$c%moyq^o<m
zXuK;qY3+Ws!u_QiPycdQV4<5WYwdeC<&JwejBWU)6tL@V&$6>`M4m*@{6<*Fu5_t^
zyQ7(cp4Yi`?OpAZ!81!=r);Ul-`L|j%G;Gfdv++Fn9t@rk9OyUgK0^X-|JJ-CNJ&N
zS2>a}`{=gzkB6RqNpE80QU8UuQjv-;#+iSmCq-H7Ip_R2zb}L9wbgQv{dtpSSj$rY
zc(04Re&?r}!YfDRMy)J-M0?JZzq7Ny&sG!FHcd{vF2CAzu-?P|W9;a|M#T9jGX3IW
zadS|T`w@t<ia024TUBdv^iKptX=ul)^SKTv6S)K&><k%^Bak-H^|s)(u<l+7#Y*;R
z^L76lJLap3J8cZglc_O0j(69cfdjqZ8Oqb5>E+FRMR&_2?;{=R?b%kxN^j|fSJ|>F
zTYE_!%?AW2->g&5v5Cj#qCcgn&9%$VL~53uz^#rRj~=B&Wkt=73)Cg>yyEc}82ln+
zpHC#el7EQsRAH<VKFw$P8cu{*2VNjGr`pXQ`Q`tq?}~&%=F|JZ5M#T`pxlyuHV+sn
zJ7A;&{u?7jyv{uHgp>1w>{z|j`KI-mwR?mLQk#qBHpAJnw30f%iF)odo%%i5mCR!0
zLWK&-&;cHb@gE+_aIRgU`}A?gwtvVcTQOO7RE}oMBw#xi=R2Y<PLTEaG`B3oq}Bmb
z7cyU#9-W=9@J{iaUdXiq6BXLi(MwY&iMtlv(|YY|<lNcNrmv@742uk_RlHzJxL6#{
ztZ>i;RH7Ue{<Q*M)58*i%P*cy-F_9}n0EExSj&}{5p00_Zb?3MYO|d-oUI=J^2L9r
zbAoH?eT``vJ2^so=25V`Y@vP|XYKg<1M?YY{$i68bEhLl=uPQKJ-eohjLYh<M?N8c
zIY3#mMJK14iHzCYHZdn++k16#R7WC?C@JO<VQ!760SO{Kv(_fz&vc402H5D~UTs13
z$&Ww{-_j8H^C5haoVeie9Q7}H88Wt$670(Pe3|t9F*!=Dm74!%b4giTxlEZE^=|RR
z(@XVIrfNlVM#N9%%c=W+eDOL$@E(c&8&B)Tnuo6dLxUj)EV;x2OWL(-438qUo^L1?
zqfli$p?%EaTeIckbQe~P^m$rp9+t2!_Qq&u+`%_30$-OR`Io#uTjqbPOj+!+OMZR2
ze30pmVod}@irof6Ue6EsbM3rbd7Y@}?_9Nx-~FJLm-3mq#+hDkHdKZNe;?B{@fRf~
z<9gwGm8Z0p<NZPMMgo>L6}W@V*JL^BTAa^pitR)fjqG#S$<t-Wu)L>z!n;U$X?Fvc
z93SgeALCxs;yiT&NMuiI8TqJzFiP6zH*&RKpp^xBOcK6-ylP895$nSQOh&Tf^|^_R
zGi6=J97d_xC)DzvAUUACFq<<+Qe5aB_8h@*SJ%#MNR5#akA;W^KarrumN}5fw32A=
zOf^(#UsPQ26;X1Yr)+!1Im_96ruA*~4El~fnud#`hOvbb9C&<Z!5LAvER)b>_}8z1
za2d{5^`#@VA(77qw1#l7g806Ru1-PKfU?7Z9dz=wroLh^xBNkAa+*&4Jpo$fGDdJ(
z>6;UCU^h5iV`FP=O42~W0}d!y3_%(QU;vn@<UnfV?DE6(I*Nl+8t`ebG=Y(*k<c2J
zOF%?;`0n6ysjwvezL!OK!Ho&%Afh{kBJ|pvFs1lM1&Sp2!L|s5vi<^>IOMN`=n&90
z5N*-#vnzh=P5bbHY<G8FJjge~^+pY20xejt;3xA3X><!=NW-y4dO)S&NB=Ty)H`rO
zfm{=FC+HF%pFEsQ#TdiB-{ds=_0}ki6MO3&8V-(dSOwzzAD7eA%uE_o4&eE(Lc${`
zW?Ut^Mn=Gz^*p|xoSgighQ^5!$wS2ony5!sJ_O1yz?B5%7ZA!Btxb{K2mhB}jSpZu
zLGohvd6>SV{Fa1DZ_A%X++!Ez;O5TGRx<i%ixA(-Q4gTf;ECYe0M?9yMQ}1rPftUn
zV}#Nikpow{5r5Hu-4)201koQB!_Sxj9v->ujL&SO3A~cGqcUhh8bDFTBPbZIynl*>
z5U7|F(Jk`UvLp$C@Z_++9CLj0#@?rUhdK&$jFGP-E-x$oVaM1EQa#Z_D?KTC_-Srw
z3CWH$3?Z@xI5DDS#=+SZ-F@bSE$8%v^1fVp1Bo(Q^o9EOD-_raqGf$~p;ude5r58}
z>_)zP-D9W!qSATJ5-Y*;z9H`qL9XOGVE4Y54NJl&DU=1>!W)-@(HDpVH>|IM3vHB%
z7~`u97I2w>)$rDSB!`vQ+<~x8gu*^6M{Sx>SjMLo73M!A7T&2w{D*gA=^|HbXrox?
z?kIL^iY<exoB@EU9dA*j2=zfk){xo>dwPsMoGgum)FKp=UPE(cNJlCmHK5F%*Uu!%
zuXp<70(8N9`3f^<Ztcr8FWrk!Xg-jK{u0NtOw6(F@>vfm|G=9j1}Fi*-73mCINOm=
zm&|v2tBo>6wZBo}hAcU-OHgk-fgTAOytkn36FA4gNeNG5b{d|;+e*x8A#9mTNE1wk
z(2uemxH&c`pbpR98w~#=SMzWIx)mHk!5|cm37eDvYZGY3$!@*7KX;A>ZSlNAlk<E-
z;+7v*GFL+Ne~qUk48P=y3XqoHAEFlNgd&MN{DalFfBzWt?es2^xYnE2vPuf{iQl+x
z@tytQPr|_v03GU##LJMDUJ%;>Sl#&U@U2Qe=g3WNt<{y4fT86XLpreP3ko9GjPwLI
zXWe(!yM-O{p9fKDgH%y{X>t$$M0%gztmEX|=t{m@s41p3lzlY|>PeZqt40I{pG-<P
zMxQGhs{49g{|^#Nv<Pz;(FL;}vw@MATNh-tpR2qORZN$&v@AT;{wg3KVhIK&M`n=9
zqY?02UT3ey2}HMmI94G%(E{vE3Bjc}b(a&6vHIY_gX@_;x{?ggg-~tSJ0^uqs$zqW
zoZ&zT)~=Zna0^gSQ20zrfRwWF3wNZF)%B*V7T;n3p5W5&lOE_oB){^GX<?T;82DgA
z{>sjkqHHJ(&NEGiyI?lvdsqzoJvrFokO;G<2VgZ-)6oeA*H3DQ63Hp#>b7e67Da;B
zwoW;nGW!%NnWDL!xq6>PY`IUR<b-pEDR5!mynt*>3**Aibo27$AlCy!p2=V*Vhs})
zC)Xc-1*RXTSDGJhf>s!yFf7tX+QApn%zVdfLgQmE3{{<h&gR(-<NjRzAKtlzg)|`E
zrLHKyD5|5IT3LvVn1@d*9a9?&*GA;k?G}l@MKhD!7@b{Q`~_^f2bSGn0uQew*9FWC
z`fApLOGx3BU4SZzx*i=i7s4gPFDMuY6MaRzaz9jgx?;G>o;M&{kTQARRXExvO>vsJ
zsY|aI8^Mrn6c)i5&$q1TYQz53fC{3o-~%9qHJR;5ieFkQG^#lUfCQO-TLyxxSz21Q
zvA?p4P&gk*%26Z-&8jTcn=u5)15D?jB1hASUIa;Fqx4>g4dglk8(0xUY7T;4n36z7
zjEg<Fd51(<2DmKc$lz{o`PFi0I5RywKUl4A1X6}Q)lcs-0-PF6=Wq**R4YxdK*<b~
z(;FfB%=#8=gggxGh|`HsTx3<?<(>Q{bwxzvSb_GNzVjqt&4xT@2GOx&C~(n&=+s}}
zR|d@Yu!4c!m7C=>%~#VwC<r7x;#j@f?y5(fLem7l<ichELhlbhKjyJ5W)tdH!T+QE
zsju9*1oB90=3UbY3WV*WzrAQM0mcz=j=<n8FYag_g~b5G@~BAbLY0%3>c$ZtEj;LD
zW5fCvFJ9!(Ux)XW@B<PXRLymJUwGh6#Q%3WP?H8RGBKsXf@Tm#dXkbd4YDEeQnIpa
z;3Y%T*`iI4Uq(S)LNF##lhc7u<jTq;piGZM$^1c?&9QjDh#Gw#Rt<D_y_vPF%1IIk
zvlNz)XvPVi4e0_yRDRnw-)Z#QJSvVja=W&XbRB;3>R;o=SOqFka%Y!&V*M|-OoZ#N
zUX3_?M()j9X7fxUn|mO<M?(&cyZiR5#}ND40y86rwZ(^GHW)*og=g4iiB!S{1Q3B<
zy>mr8Nku~Th<=MjYV7JWdb>MDB3fH69nYW6-*-%<ej)A<VD*w<OuIXZCMz*9bs(SM
z_rih>zv+kh0+^JPWup=kTUB$D?M2n8(O6_+u(l~IW{Vz|paiN#^mUxrl%vsF9>#xp
z6t80Cb0v1vsQUZ6I5g<`fB$;IYdv(^KtC+XHdjvN#*N|8V0Tt^aG%0L+uhxr5t9P1
zhe5t_!>88<Ve9k+`d_6?N>GAm_Qt(?T|MMx6m>(THGA8*^I{eW-D&5tUMF(oeemU!
zl{Iz`-wF?Xdybp<1nDi)Q(GQmtIPh2)db~3gFeDuhdu4>JfBx2mEMy+1S}S@04zJS
zOgkeHrySE<*oXn7V|q^PtWOFh-1*`C5{wP{6|UX);@VafSAmTwaN)+k1}x~bo*Tof
z+p#ERk1zcL1HoXv%J{QNMoCq-wTdyFOTo)~h=m1k%ChYZIRvJay49}MGiBWb%d;vQ
ziWaRDg-j#Ys40lR(<*>Pr<TifIxzzSFS1gg-)!MGKOQ(P?M8uO|1q!Te7M=HUvMsa
z+A{RQ*KjRIGeSRLT<lhHYy(6h4Bx5_?V)u{A-^!avjDu-5X~0T^P3jkY{sK~-i&o_
zVF7JvIU8E#VpH9C;7z|;NtN71+ULT^l!{xv5+q_bLpW5qnuZ^+tbt%<6JZMX(Xcm-
z3aBFTo;2v&_tezb|16wB7gC2^<>qYnuR~H&7%<_&0g#IcFfBo7>UKYF=J@4h9m^WW
z?GXmLECQdcVHe|_ReXSw05Q4$_l=*XYl-beWIf;l2e1?-QWJ|y?AVA0X;QY>jH|GD
z0CNNk3-H+>gJZNtLeY`&z?%@5YE;Bf58)vdhD4xFbXm%68Qm%9O&~<Hs$*%KQ6)3u
z^>UIuy5+4J4?B2K@r_6Pu&*^Zi6%*f7To1yrc|Y`Z}@VBvBz5GfZEf~FKcfU-`*@`
zLtF4@!1tH?sDEZI<zR*6RGwZ7F@gp<zdMDVIRP_Ti=!8A_oUrW+4)z|ehaJ50#|V{
zC$)L7xl*gxPBUMn?0R3rBbq3&q}L%xH}!%mj5sP;wzfx!j}chK@wI%oei9{7K!2g%
z5bTwS?OR(zXu4o(v;`8yEE4ZEtCHX4%H|)Z&*kbw82RHn>gL^(IB60Nvnr(I6f-{y
zj$h|Y88jm`DSB>k_ypxMY8T)Ta8ZqfL*5Em8dBp&sXDEG>uSWR#$~V;9j)|friR=g
z;Sfc`h++6P`lGHhAzM2ijvpkq?t?#4;W%oo{hTEW2bLrlDZzv0`6)bqODo`Rz9EnN
z^6NNU@4?*_pu=$9D7fx}7w3*7(Pyr45@A5GU$dyBMmK|@l97Yc9LV4`NKg*j7l%OU
z7TCieS<_LCaN)l-4m2|mIw%k7g?|S#LpmLrBw;(NUzmf_7$K1$mJ9YAV@TRy1D#<g
zC^p6+wV$@a3QH~G(3Nx<tyHTAjCEF47OKffVlk)45%OmzL2E@<p$#9&`{yHnBa5ZC
z=wW4mz6Bd(;CFaj3*b=<U@@eQqJ|Bn;o*W7a=UThTZvJ6k9#$YLP_<-pg8?=!aN+D
z97}FeAVLB@hYTg*I!A4QIeeTNa+8E(gB4M3aMlCf?l@#$miqZK15BZam<sX~Ise|}
z*WbGcA@uzU1P&^X0=1s7HVRe2r5G+pioDzEe~IB%KpN>q^sQY?k&zlS(aYUK6e{u#
z*MGD@u*M}A2a%`X1x@91SE$1V1mL0BkiA4eRBQ}ETC>2)@*Fj^9vRqD2bNK&&o%6>
zPXB7c;3;PwTU!EDhkm6GU$Sy_fh`?=^^JKe#KUl4+elz*WG1w^2bq2tL$)tX;pEWN
zKY?<F*=CU=FBe{@1$R4-bhH{V%Qplne4uDF^wxo*){b-^TMed-Toz{hLIa)RZ-2X!
z({<gYPX~pXT^Vs?kzjX?y#K-<J%_JjL!r{%Fk8vuJyS#Ep?Cg$@Z6nmyd2+J2vFMI
z_@`KX0kFx7+<-852N41k8Gcfhy$eo<G*C=r2WKWxpc#^u_v{1;=J1=y;6F=7s-KhL
zvNY&l!>@}%iTDxh8t(Xhzj+QDp@R$_z6iX(;B~J(c!65~^W$^)b_WqjBbTz`x2rAO
z1dZ$+?fyr^vSa%zr=aw8!m$mLyQD4W_^5r>GiCi}=kB4dtC*hu=1_TZVpuOVBvuVV
z+;8HGI(_}oWA!HVD^*Na!@|&XFzPMBU?3GL{jXCsdM>S92d3%cm?P08wr42zC`*;?
zPY!Y;!+fq?&`2uCIBONcv3*sQKGD>XbYKgdvYO+7xtf5^F-LgG*qg|)H6c6vk$Aw-
zt}0fV73G7hBT_3qY3~*0CZOnVVB{%XG5L=r&^4<#i{wLz?LTWE+Z!I~%94QLM}$wF
zj!Sw_uk`iB4>0sB_N7i$EV5tZp{|IBVM~oPaRZu?m<Ye8_4DY^As^|FZNXnE7%%i2
zguK~%k{SYA2-2I-VF_Ui(h)8j;MGLwLCHo$P6%(M(aU)EPEB0=Mv=Gl-Sam|TD0N&
z&?fn6Bm^NRI5KJ579ojoGPr?yb_V_M0pY!XFtZETh+yc&|4bU)McYEi5FsHz<`(!v
z>ES;9_!M#Gn8XU_3wUka1=pL}u4cxr=8~q)=I{$8A|xWjFD%S2BKAO7SW-+xQuH#v
zkc6a=kpI4?=6}1u-oeb;!t?*W;LrP-1-L*Sxr2+FrKE|mi#h*eXKR#&wVk=7prD|m
zvxAMfsjG`1+}hg3&fL(^*+$UP+Evir*v;C-(8Suo;<Aviv7ni;r2u?^<zA9Id>=~r
Mriy$n+Q|3+0Xl!95dZ)H

literal 0
HcmV?d00001

diff --git a/docs/images/snakemake.cluster.mini-tutorial.png b/docs/images/snakemake.cluster.mini-tutorial.png
new file mode 100644
index 0000000000000000000000000000000000000000..98565be61cef5e3b1dac0fb66b7aa28aea9c5c8f
GIT binary patch
literal 12934
zcmchebwHF~x9=ai8>AbFp}Rqb6c7*r5$R6pkXE`uQcz$-r9nWH?vO?i>5}g5x|`p7
z&OPV8=iYy>HN!m5vuDTJYk${gEu*wFl<}~suptP-d#IwI4M9jS@O~2$9sE0cJ{1jK
zP;KNM$w5#>0?w5g8u%H>U0WFrl?~FYgC7_yRJ0#KkS{9)1&2b=1^6g<4T3!RAZWuB
zf+R8^h}`8}-D4?m0o`0pSpmAe`<>gEpA0_1a#4Ba4qjFM`K#39AOk`CoDUV`bbKba
zXI}a|v6__K+wJ-F`O%bz8hM>)*tf3?q=a8FU+f7)&KFnqu&%Lh^s@^xuNCiVDp;c`
zzI{b$MW}?7`cy%VkW`V7l#~`RPP>-<E8tP`w~6dIdOLzgr|`+-<r~AN<;6YX;<fH>
zr#n48FLxp=V)+T-k+dXOv=C$HQ5ZHEW5>=z#|k18!92rqxvGg^J!|3%;Uu~rVpvP&
zqNr+(3@c)YcE*o$W%XL4)X(=qKBL+Qcm8lF42#n66vyRYQ~HokTsWIlGa-qa%~Uwd
zNc4;IMK0TjL+rrWmkp*tk+c?Em`1!tZxsK)(=ZH_WQP3Lid#C(IH^(m{(4z)A=Tox
z!qn%N@-q$mt(>@zcd*%61P~?!>tS#Ce^X{X9yW+@&(sF*&aGt3y}5MayWU4rAlg|z
z#sreVN+*Jwjih5ts>#wYkW`MNI-~=c2{{kRFN6~}TUrtei`!mhI$Ey5&MtR#I_7`Q
zyArw|N#j(|2iz-C4Y^1Cdk&#9hY3Y`opwgglXm!_V$(`iB;^R%ZxyM^SIc2QDhIf2
zIc?hOc?=cQjrWRJlj(Tx1%(JSqC<Q=k~Es(>Y?>2TprUMT|%tVv+U9#TPrZ=!G{Q9
zUM2Og(D;%_+BJThNByy)<=M+Iza4LVqy)G0VA>kJ&$q(KnLe9-@D_s@>T9tjIR|lQ
z?=eEY96Gm8-@j5fx(}IFYLe1v&!W~Hbk#l-=MSWpAYz`B`IBS&qx(A^8azT#<ut5F
zy}b2jof;G@_6*VskIiRVX8QP!A_!kiF}Q;S+B5T%@URXUHX|Y*b3#n&zC~vilt;x+
z2#XC#$xHAnQ(*M!{*}uQ1*2Qw$P^BeO0fL>TSr=?9l*?E+Xq43mqmojJDeV|8uu9)
zkxT;5KMk85*}d@=Q|gn4pze*sVRdC)g$G91quz0f5Z9RMz)1qRgiOAfNEY}LhHvp;
z=a?D^;^QN^8S&6vbBV;Mg6ZJ4Omvw;4;mtfRSON*&MrQE{*0a=b%5E{-j2{>=X-fz
zDI{W0b_wI)XniN{PDn@?ytZarVpM;Rb)@>$h^wosesiGoi~4PmSQ>F+?`c;IRW&sv
z$hqm9b*JG_qj>dVK7=4KF%g0aii_K$$v6q`-3w1lB$V3kAQ@iw(3=WePJPN}_lu>Y
zqr<c-mhwkmUwfrhA0aJm+-kO;#+uPsvE@rX*~xkJ=f;FgsKszyr(r}}E}E(MkvHk#
z>QIDIo}7UJmA!+*{>5gwJX2CIfB5BYGd%)<&?+81Tgh_XTZo|lyBl;{Kf0JeD;dt)
zbcX!w*|W%mgl;w7MpQ`ut1VH}?TuebO3L0Qcyp;=X^91yw!TK=ql){{+Y5j3g6Frr
zu(AKXVzCpwj6X$pJgxlW$3*e8lIW?bVMg^{h%a9VsYM(KeaDVfQSn9n!jQfgmbKVA
zvJqi#&cjD^MR-@4@BXHh7MsU2u(3r%DKP1on9Nx{Q&K`tR$)=k`?+_zJzbj|M<pEH
zT&i6-KHKPbJ{5saYi`jMyWo7gBHC<L-gE)8h^;s{aAQqXeP(9HSnsh;3?442s7Nw7
zy*fYSVqxj9pW1Ca&DK@l-{0Sg;cX)H{eC>Pe<3Bq20MNPkB~P^86`^X`nVc&>Yj+y
zrKqT=1xwXpCpAhd9JNP9M{n!vi)rQ+5xHkyJ!#+A(&EtmN=iy9_owaIlv+AZXSTE-
z1o@rrDB9b@K79C~rLT_(W$>9H6B84I^$k8hTy3`%YBR0z`ulSHiy3_P1NC3drZf!N
z!;nOT<!C~0OWFG^&lCj7<HwH+%F2}F<>k}O$>`|fH#Y2rgoXDzi*j?Jg@x-|eMy@^
z8mXJJ#k1vflP@+ybW+E^pFqKpk(eqh#2-I>l4IShEpUHN^*c%-zQSb6vq7ly!6Txm
z2Q-`IpQ*vfYqq~leQ{e>{r&rQi>1IqH+@j3Mt=K`BxZi!Bij<=Myi2<fgeLd-Cc3i
z6^;{?fzEU7?I;8?XAH(Z3uu-$HjAwo^r)t_{S3mwWSmdS6g)kJq1>-uNtF}mwmtnA
z85w^J43PPLC;F>J)1-ye0@}wF6%}Q<f1i<o0UDCI<WgrP5sPnsg@Tc*S4;#3IOW_-
z>%KbJIie{mox9CzR7VO-s6F6x27{iSzIZjY$5Q3t!&eCj1fL&eg>P+b8Gk>%Ivld-
zOXJ=<JZz`G*~371(=y_>7WuVBg(F`TU)ew{&x@X2@nQe13mny(=iLC)<C)~G?MKgq
znJ@N%DO5}prV~n7uN)RlXntCtMFih6ZpP7da}$ucy*lJ+mW-*X5u=d}G+g7`nyf&H
z;19g=L9{dQtkuOA+)V-t8ygO-BqgM5$1AVmUFPt4DGY}kn^tt4P1%7Pd~(KIghwr6
z+A}`2zdrs+OyZhgym4_(TBcx8xfxtw;NihHHa3ooiD^ICo+|w_Q{&3a#Kbhexf%20
zhf2fgtlw4TknAnZ?OC_1uYWe^i{oqqB?kw`-qBHq^yzfS<fMVr`BE}4&B(a8&L|SL
zv*Szq^<U2=GvdKce3P8Kf6&K!T6aj(*r;wY#lg<r!rWX-kI^L?yGS6Da5dc*lK49G
zNd9tdsnPGW^0rT^eZ}gQzTRR;t^Wy+jqL4}pscSr2S*gGjDJnvE--1U&d$z$N0BP$
z=IgW`NvWN`oiY7$b2(&WWc*ze#YIKtZ{NPX5*aIb*8W-J{p4b#U8-tlS3GUs!QtUE
zBO_Y1hY!svDk>aLcV^Vf$EvuWfBTY=LD5BFVO{k$j$Srf8W^*B{zwI|rY{Ta5mlnI
zJ|T0nM&2`>D}(R*ii(Ode%e~v3}ssxl%@rhl{epHT3K6H0yq23Y3iGdvd8Jy#+!~0
zz%MNIB%crBC&=E2`7I?WQ&^1E36YYMA8rn1@Q0L|G-va7jUTO#OuV_TrXLU>U0hyn
zQ)NA1UaVJIIe8L_MRFETY+Y45RfgB-)D1>kVbNXLsVUt>r+yneaCU`vk60u&j#~7E
zit+$S8-<0QlW|kw(CTyFY0*=pZ+0WBfzLF<imF~7uIwlD_4b;BCEl8E4e^(wzy#Cb
zwLRH6XAF*lEZ`D?>Jf-nNT!~io&n!H)=O#CFWpOx>eIr)!z(pqufGnB|7+zQYHHf-
z?CfkAS|R0-GzYojk-?ZSZ)iqut}iRRMvB?ld6313YPzgh3c^ocDtrsN4ZJgy`}gk?
zMky2&7Ros~azPCI{6tgVox8!_`C|K%Nj+PNGE2mXYB=gF-bs7Ot#;H&Q?rjS=kw>!
zGSBOd{@{O}0oySS4BdS2op?Z(FzL3H@ylg0sncn9y=|+en+pdh?L=U^-&%0aSP7Qu
zHQ_w7KK}W0{dP)P8k3L^semRy?w?920TC*j%j-cRb4jL_gNSOMee<gq*D)EM2i*M<
z`vmG);wUAj6SL>tPLp4;fN`;NadnhDt16-U?y>;)Kb`Sr=jBy4H)l%YHIZTG{b-?<
z?nV2&&chL7BpEEZT>gkz&zt*ttI2gEQzsYEpJZ41-=b6-417<w+l5V5G^%T8;JdlG
zxvc-XucNEGuZX=?f438`aB$>-w=#$w^VJ&F)g$QjQ&urifQHA%F$;B~lbDr}A*w^w
z{bTZgpwyY0nvM<y=z*7)NIZ>r8}CqWZ(HG${M@n4h4pnTSy@>Xma!&VDc>XZ*}zMu
ztcJuUgMX&)eR?{1Je_oOWu*X@!B<SByo&Bs_oEYm04FvM4lGzI_HMJqxprsn&xJ+r
z)A6N7Cd*uf;N=AW(IQ=a*ULt+{oz~rK}ih$ogIz<nVM@?5;lzvVAd68?dbjQ#FI_R
zM%MRMWp628y?O;sC$X0YEM8mVt-P`U7eX=@Ys{b{H(x#03u)Dr-D+u+<7wo;fbVhX
zn~s)N2`W4Dj!bz0-^-Tc|GFHXrfdHt@BB3VZG6}v`M<hSGTFcfX|B@Cg~`RBCOl}M
zdAW+`m?<V9NTIhvs}xd!X0eR$;kgn)$TRWuZgikwFpRPhLy1#{aYf%K?IImi$MkYr
z$C06o=``|COhg}HW{=Ech)eltguQ!cyr!0YHPz3bKWA>X=9*2`_Rwc+oOYY*Rbuy@
zMy!dLLV4tE+ot{b^ys2-CIx89hrHMAX4>1^&&&6X1`;xfEjM6D&<H8)hr*cfWHe#c
znwQo=WiEfB3sXkxm&Pkuq07h^JHBdjZ!0S30b|m_s}`2KewIza$u?Hv-Uc#5&{G5Z
ziqb2aOy)5~iVUnQ)n?_m3QA~CYu?r;`lD!#F8qahf|Kz*R95C5K}={bz=B_<piZT;
zw}&zV>v`Ycg0?#zG_Rq^_2Q|*o%)p~hw-zu(MQZgYl%8TEen%|SKAxUzGkjl<O=@E
zI6717v3ob7AAZi_%f7GT5)E`Ma_)$y&P<_z?S9dPWXcBjw%Uj30B{ZaQPz?|;ID#;
zVi?{idY|sa!)&>;C;G6ZqojoKC>b0w$aVnzuJNm)(l1h&X`7`x`_!-W-!>1IzcR!S
zq^R`J71K(N!!<$+x_uw5FGckZnO}MPDC&CcDFPWHu{bLc!N72+`f1-Jg3j}MfSRNs
zoUv_&yvh8OC&Vo3nvXwR!a@++U}Nve6ON5ZjSQ4v_*Q&GN}g*ylhPU=*7}FJup%Qc
zSM+*D9TQ!|8S6)N?a6ND>W8`utt~7U1<G50f^l@~fi4dmPMMGWjQWzhnvZ{VV?esm
z7%A)_a`ki0z`YlB`*@Q?q8hOZn)kIEnO&>ADWnP>5J`H^gcUI}3+7D^qrQKbpvP7#
z_LVUY`n;l7bMDu!woKEmETur~6E#h=#2SHlMD$(^KPxr{EPH7E`Se-%){h{C`@<i|
zuq$wxr9;B-BM8%#OtuN(TXbYFc;rkTToJRvPp1Hr(JSGf$ni0PDRf-=-vz+`pPPvh
zC|@uPO7HnL&Thyg_n<<<o5w)+R~fJ-_slOWFnWxK*5JnTcZK4^ZNXl7`SRtpanSFA
z$9t!z<Pwf>L-c?n6c9P1#xGt}m+8R5a&mHp8A<+X7ib~5xVwkI5o=?0s-G(76ypfZ
zsdWk_QPI#QJfz`~+gb?wJRKM#@o|&n^vuj<z!I1gT9}R#3-QC8)$BFfXhJhB#71s8
zTmfjJySEH{e1zr|HB(mE+1Yys2gDKza6@9UB76lVB4lLbIwK|JC!v@T*t%@eWGrAV
z|9N|JbCan0N$>QfpWjF#B^*^DtcWCjk&uGI){~z05fKwt7M|*#tjuA@lJM4#C?OBm
z#ysVx{c#^mR8KLLQ|YX4lm&%&J?MDINcOVr2^DIT3af{hXvbT(0ZKfjvYFxBrSL!S
z6rsR`;!O8pN?6#etVsK-5Y18INOfbrn!=w;K`LvBju(>+Gt7i50XS^|+KlPwIJDZ$
zVh&~Eg3nhbJ;{pfjaYwidgmdkwblaoZKZ_2w;4&IOLTs>J^n!~@lG!9h$if@oZ7-7
z9ft?|j9ZwYJDKPk2NakNcbCDHK`qWxVOirTNDS4Nl~F~8%J>B$G4kEwNB5Y)(AOpC
zu@sva731RQjGK=h4X>#^o;||Zcz$qxDXtMq(K$m=XlN_Co5Q)BDOl-Qb_~cw_0odj
z`lc<v!QiNh0v*MUEcN4PNrq%Go2lg2uaQB#he_L&Yx^r5k$b~Q`kOxF?X$tdvl?Yn
z?Qtp})zsHrE77$p{f<cv*DxK>uZ8r3pccy)Xpq90y4)K#B1>Wu#A$1;d8~MV8hKn3
zX|rBK49rLZYBBoI^6|^)UUDZEaYFG*CTC5O;bxWAsF2N~dXI0$TOt~^u7X^v$m45X
zgo$7UNBFyq>mP*^1dt1fpPA~CuTO0lz8z_bL)rX=Tj>~l^kZ}MlTO|?(_irZS~Kl4
zdv<AMtZUUC1`d0%S2)X4NPOS{bMRbERD`uZ^KzyGrES@<Piy|$w=QKzcogae5REoG
z=R=398yI~Og&2A8azoqs!S{%zmmFh;Ll4|byWzT8xA%p7g&_vUJ-N`An#32?NsNT_
zF0yTn&S+4I=rsn}<X5>Mjb7m~p+v*YZ(7xvREbZV&T-eb@5K|{+;Sf|)S314^qhgP
zAq>AA$%TTp0a?3(21+!p#j&=g`D&AA=x%jARQ&=+*X-Jrq94psLGC9)S98Lc&`?zW
z^W}7S1b@MF$E*sbtt?k_^ugy_+)4MxWyi5@+nFzLQVrbiLGzq3iiuaO2$Ps6{8<Bh
zSp)EhpZOGXc3|32px>E-AtzEP4k6ud-AdIE_O{%ipGV!A7^c~D*T(ojBqi(v1S=sO
z9ex=6RZlWo9+1m@zD-R{Dr#zr&bxEX^vSG7lDw6!i%JR#3N2$}<Vi_MJ|LKdAWfNb
zmV<+XFOCxe3d+ihwX50S>qbY-I-|)jf!<M3!OZsgi?!w<5$FeCgwKD`1O{1I`V)<g
zhkX5;*3bjR5wx3v?lkr>_OalbWrK9MZfU(p8F&BXJ%mYnaDR?0JyjgKot<4lX=&$X
zc{6^Lr70d$;Pvs-hLcGLG4~bRjm=Hho2!@8{#90NO~-$pYZdB@S38hn<KWzB7pUbm
z8jh(DRZ?nd*z~kv!Ki)1+20twDQW*RK0pe>KYU;+(19Vmb(?V=BnzF)(SHk@d^<nk
z0Nt59PIWLkal8x?^R6I}VnK2tt*x!S6Fz`xoO6`EsI{8!;Z0RVjBUCuw4s4G{XkZ_
z*8f~U<DJ+m5GnciJ;yg0D}5gER?z0}fau)PnBPWW{#gypvb}d0SLUrz9&h!zvS6=p
zZ?#td7&tSmu@(!O-!YZH7abj)s{?cPnQa78&~dJb7AR-9mKLFPf5xlW*jTzTUM?<F
zNIioO2@4BLMMb3rU<NZGDl<Ncv*U0)YStqjDn8KXAh+kvzsCJ9Qyfwpwl8|CdBS@8
zxp?(ECQ9;W6C<Oe;XtBMQBhqOj`j87aC38CZD(du6))qE^K^L61w;U(!NSUFetxh#
z=B1ziP!+_A3`r`9X=&l_-q9>AEzSS>q*hQ=RB3Ow#eO>>LE@S=W@;WzJ+1xp?h9#;
z(;PHLqQzun=)TW*QR|f%MuFBd)LF027Qs<VHTCJ^$9cd)AV^3^$n^4L`>L{}gzKGz
zC$?`R-JS10Jgky9B_t+(4VLZ33l2xte)6OO-~*kfPf;MivRseW9wWT5C|Ob*<cI9A
z$sqr${cdm=$!lv9gOg%!t7@pta$~Z{Yvl7IF|REOfE6OpaPHl)tSf={8WFO$=R=sd
zxS`sG9l-EG;DH}_emEqX0HVZ}p&^2xvv&Hk|G@wp5#AV-3g8=y`*N4))kHrutV{V@
zRn$mt=pq)GkuR?f1Skf$BM+O&dwM%XW<-{$)(gHHx)(76Md1G7-xa~JenFb%T<iCu
zwefFEAUN+%peL7jQUBnwO@w{yVrAp<gNd>ZZ^Irk2(827N9{`VE0AfrJuj;d?m9X?
zuK&YQ0GC<>_iig@W@VNB5%*Xl1mi>T_Vx}93p1Pg=5$pFEWAHkni}kBOnm$<fYw6e
zO}2V11zc!sj*}A;TX^RxEHGF#-eJB~w-)yCD$pvM&~vW+qftD{#L9|pZ2YG*__8;(
z$*r{9aHrvFyLvU(c|CAtGmxwZzy~G?3EvjUb>B&KF;$1?&C%b#Ljx{PTIc5ICB1jH
zDz9e!b~~S%1cZU81aJ)+u#bI!Q#)S<jP!mQk5RPj&3*zq4Av$(=O5y_*oE|^Khu3K
z;PkEm5KfADZjjCf-3By(ZGE*!O+__4J>7qMXZHXwuBKyPU|1Y)jOth0r!rhdxD9v#
z8}|YjkqJ<+y6L&+Wk+md7ro1aeTP*J`#)s`>j~a?9)$R!NG1v<C?;T9Mi9_j$MUD8
zq;%Ytfgs+Yys=9ufo^~l8!e090-ckadzro*d|v0dIo=UP^32kbr5<c=zvI!TZ&g2;
zyVbrc)PbFp2VM7Vh)c?@csF0t)?c3ht};<&L!hRv{!jbQzrV1Oeq5-t1w4SIMR&Xv
z9UYw`h#E^ie-4)hsI|YlJEF6r;|t)j;U>*BH5mih(pl%1msJG?>?gm}c=_{w{@^Vy
zE3@X*D={@QGrPMVkl@PjaP*tnyQmn@3@}q%fSTMz^2aOhy=Tn;3)?cey&!)AO9kBH
zw5CiW-=fN9$oh0HNH%EhY=3d0xV&j{`vS;3T!@o{V`10t`grUah+MyK+E`lhC8=}(
z_gT+1)$md<OU#u?(%+8bZqhzI-7)*7RLkmLc6~IW>3FpEb88qOug;TxT2ne$D==)h
z$&)V0o8$8oHA{%QVal++44w)s3>zC;-`!pCd6R$bDD@pi1<2!$gMjo()kOcv&P-o)
zOw8K}N9(!3OYY>p<*n76pq+hwU0q#u*5rz-EC0av^}9`3jlkhM4rB^DZu~B6WE~o>
zuy__2C{tWqYyl7zmxxH(_H1L;`mygftG;{pNJ!edMF2iK0p3RNy+(JLVQpJD0sY{t
z&mvx+pZAO>d6uL%&mBTi%1e4L?U!a(ZBV?_)8B6i;zUQVaY}fTSs$4PU5w=DTUan3
z9vxZc_#Zq>_x$!^ruX&h*BLr6D;XJ?Er4}m`MWYRGby@Vw}35h#>B;00X^z?vNcgs
zP!I{o&vIWnCG=UZxb%%O3$bU`M?hMHvSb2$zq?MVAwC#VHyDm=9&dfldSN~nbSr)N
zH-^`jTj#y|r{>x}Pk6k)Pmiee-W=S}e&f_}Zalu%9O&s;t!t$VOses8cGDAMFiSK6
z(2Sl9yxpq|0D&sfjeVo|t<N==R#v)tdwVbZ-F0AOQ{YGp^1Hq`c5Dm9y4nWjgC0CO
zI$9AMi_4m%lA)BhkiPpLq?^A>A7BiUC<T9Q?^8pEu`(;b?6&|U-4^`T->;4yY}DYB
z1jhA2F=o8~tzdo0*zeyRfd5GXC+oLW(S3Gn4lEKEq84{!sdrn(zFQ%HS*^ex7X(Y5
z+|vQv*#yWqWat%JrSqCt19@<@xn1z-Q`nv11OMSkUED|t>#cSevf4C!4%pS@wsTM)
zg#}0}?YRBty!dxr5jkE)J(~gg+9;cllCtNPj*t+lHH`oDOZ%p2(24bAcP`=gZyg0C
zCE%v00483>&^<ZIAmh|C1w_y^)@&$SIzuKv0uF~?kePHl^(?y;Pl2Q7YSp{iAg-n5
zL2+^<&_i0q+yQScMg|wRzt43&5}gSZcV7W{f$@qE+4FJ!mG_d~=KUG`6w<yzkm*FF
z6*y}$Mw=WG#JRX|NAfkhxxY`pzOR-BLG-I2+2IG&#@WS%`AnVX*?NAC*0X0=Nh+Zs
z7z7_GD~W;W)c^`x0a#Q`ZSDSb%w)jDdOm<^dahH<U^*P%I;8U$wE=ir0i30biG_FV
zuQ|g_j`aHH*?f9Y&80G9PPj(cx_-rIxvN+yF)=aca}M_Qxuf<(_wIGgja3YU8kBYc
zh*|-tApn$lAVC5^WaWid?x6)_I{DR(@2&cs)Aib&jRVtkR4b1GlaitVhGJ`P|96*@
zo&AB2k67?c_2Kw8Cu$D~+BB|bZ3L<TM?aNYxs;0mJi~w}aM8CWsxW|7!o<Ro12XU)
zuQ4uI^zS*__!hXp<v=api?>VTB2+)M0qFYp*|VsvEk}wtrH`LK!@0g?3z-25uV3fE
zF+Dx)JEOeKE!ttYyJ<wwE4<*g4YpfBNr?g=bYRi;hUEzWl4E3HK?RVu+IKelALA&q
z;PI{^WvwDw?tC<}0u<#k>w*20?a*Af<4heH$gbErIdx_VJMdZck_JY2|M?e%`)|<g
z*D5t9an#;kJHg^xn{2s9kMOJqGSMKhf0)~p2#Ey%bHsVd30)hb#XqN~V}XZqoUS1P
zc>$E+W-mIxU+-=OTkj5hQpC+@p1Q9MGujGK8P>QUfg=!T>-NqV@_pIcL)o*_=3Bfj
z3IhX!J&-0_JXr24C@=2@!0$`oweQtfkWtp3tBP*A#nYfCPe_ad&#k6?H%l?Gv5}!G
z5LB>paLDQEk^#mW7Ibr%qhIe;s>^kLetxG?>A<S+xMr%ee*OBj1tfqRMc{KEB7U0E
zrBEhwjD_}9E5_VI<mLvO+zbWXkbq<9u3O-2ICnZu<7`|uFsYq>dA^@)VmdcPNg*lz
zamsL0yP#pF{e0;sb-hS2MlkSe7QoAa&cGi&#F9Ci4>6w0)h*UD``ZzfA@x!~#Bn?r
zXy22~0iz~=@jJ=RW!#YAjO%rAWakeoM&uz~{G!Wz3lb3Jp%D>_CsWSd)3xsV;AlDA
zJj#C`VaU@B*r=9~Q5?{1`kv1nzq_w$+`&zdOu7oHy-QXe_6yed9>X-UrS7&7*lx@q
zIn>(P>T|Xb;R=L=+2)_pE3a(79dWQlQ5L)6<iRO!@cem&^PEiJIhf|eH*a3=?YV$_
z6B^K<?I|2jO3WvKAbL@|eD8*!E^i=96p%oayYx}u%{h60{W^)%_jzic`mokN>|hp!
z2?z*^tpWoBue!VAl{FfdRe^ST%HwU^WPa|<zrVvLb<u25KTyr%WK3m%<67y02Qi4-
zV*S0co>+>jqmy`#FDEA_=6Aw%`z#Ch{nvwScE>YxbB$<9*C{|FCOv?7|7c`V`)4^)
zb%Ac+Hvfqz{;dCt0JGS;k?FCa_6+k2GM9-w({M!h`!wF5f|TOW@j<rsk5ko#%dU>)
z_rTDLG=Cl25kf9{!H=)9)LGd{vDs5q{SSKB<eOJ9u{WAu&svJ_Q}0OoxbHkJOS)My
z_#OtEJmoPgl*SEt77^Pkt)-Me^{en(r+LNbcV<gl5aT92siKGGmql<HjTh0kYuU#P
zp9i^@PHh%UcVe@k%3wmnYoj$fhlRuC5xA7$CY-Zjtsl{#SZo84f~pR!vf9d<Rf7a_
z|E{ya`62qH-~O+)2MFNJFacCI2N}h}E;rgokz;3!=j=+gE1BqT9Hj5Th}gP1%z|F)
zaACB#G;ML~tp5rNbh0EQ6Mpx=>$n?&c)x3X43*+%8B(iV@kP&cFyhf%EBP!=ZKL|9
zy+3wdj9-Z(o#~G=dZeJ)kHXoZ9M1n6R2YJzLH`4295DW134}M>yM+i{XkQuZ^8}$T
z@;nAC#Qv=3FKQH!oiArp%8rY*kRAl6$pmr*;Bj|JK*0)2uNH2qh(6VXy!nA7yxym!
zbxVf5k<i-D^;Gt<M%f%Z{|`$6Lo~cYDLgvRuz5*md1}3j?%`rJUa)-dstRh4a=RK8
zcH#I5IXdx_t+3tjZ-Z#J?U%p5FCCt`zoX!98sQUgL1JMdK(wPis9jlee1FG`wu?O9
zs+3R10cwRfjgU`Ti)iKg*~0Y;lNMWZT^u{_by{1u^g)-X2<8_5SO@xj3Y*Yp+H4-|
z_{&Oe+jZEw_29t*>Y@3f6`HT%*oe~*BX<XlV4HTGjQe=QXY>$BH8wE@2L9|05$0hN
z+Ja1BqFPAUtnjrtZ!N39%l7&eBo}q$(%q+gHRN+=O7By+-M7QBb(PVR?@3q)<Fay7
zsEanqu{|*^QFIFXB-YdTDKhp{iP!0qBY*;oWIX_$$#J=n`+83kBHRWXt=WIrD>zgw
zi_7ON?cum>1&C9iGmkmy6{yS!2nwFRNCs*b6b+C|u>8`D<^PzUpI0_`;L6LygbdDZ
zc*L@R?dK)Nl#C2akko}9ynHFHs;U}_A9Xa6%K7w+>5~quPz8@4rXvrM*o+`kBJdVB
z_>-Txl@+MrNRk1E<Rb9$5ajIaEJ1t^A}eBCSy>5xoTntmRDs*nKQN$ZVX=t!A{95S
zs0c<%N(z1Hr+N48-L!?Ki-1po7FNs12+6~T59h`3t*a=(FQd;+Y;7a9LO*(XkWB3`
zlEJO3mK2sQS_tCE@^W6KSPG;T0S6T&C8dOci=JTzJ3BdFUkQ-)5{thJY(e5|*sPWd
zrh|+P4p#*QR7h=T2WYsAffed$SnXq_JfijW^{Wo7_6UNA9D2MNB4RSwtIn>jgC|D9
zLP89pqEFWn4XG?}?-Hj%x*+9`iAOuhL;R5wrsKsRBa_|ihyX$a9Fp*erAjw<_s1qC
z1TAiqa1W2Kpj=4{QHjHpgb9$=-rg77UtC;V3W{iW52@^FPwU}`2XOf7Va6m8or0}U
z9hjn<Tji(~&i3s@&-|tj3ukgqVoC~XQsj-)7Fr{TR|l9F&<+qcMuVw|tt0!>xe83J
zYd`bOC*DK_<if3NKpYmd?7r%p6S*xcD#|P<II<xSdXg~T5eT{=58Ng|7|5oz+4<K5
zQR8W?re7|U3F+wxOiWA=?MySEh5QeBc|<gh>vJQ+!!0I^j~w$3BZK2Fv&QMcU`rN*
z&f+Zwf5dHybyY4BaaCx7ZtGu)lW1zHIq&`bYo3ytibb2&zByj~VNqPpytzPYLOe?0
zHCX!nIXF1c-tNXIpqE+De~F)~B)(MIO+-oTv7`{TPXU8ADAim0LZbm<93G=O)Z9%^
zhmNMl2sgsn+1abqU#!Vkz!Cs~2iD04L=-UNR*Fpl;^1*vm=3-|q0U|49lTr_cDJzV
zLP<&{N50a(Mn=v>Bxt+!h`S$z^|k_0YYwV8Ag;KX%Bm{4L-!+)CkJyx0yqv-Qm;$c
zEf>~{1Tvia*GcyF_G^`4#-N1aFrCq3>sFBlhYW`CyLQnMoZGVJP%SGjZ$eJ?U1oi^
zK$V67TpX>9I0gGd>&X*l4}Gt^k`i2y6$oAy2n909MP7W7vw{P3wPdk`J(eG|n^#!)
zaP14~)924)`qFt(zVu%Rf|`yx+}2hwYL~lF|9stJ2?v%8&fYM5H9n9~zkQ9k7C`m(
z&h(o33GPBhLPEOgZv+VnE8gd8{*-W_@fsQ%!|-FqAu<@-UA@MaFMQAgAcHC^E9(ru
znF_p3%*xsv=C%NBCZ(s3Z98oG2vL%gM*!uHXumH459^HrY8w?575XBhiJ~7b!eiVJ
z38>ah7N7F(z&8zxk1bXY-9d2U|C4p_6?Q7gf*5{OR20ZMAe)}&@o;m8=@oYZ$Z=O`
zb4g2NfxEoC3~_~>0Rasb4-`Z#Ht!#uo_6mow1?rRj#G56Ra)ndOio4vf|grdO-9o#
z2Le8E8_&Z5QORlj?*Iw{_w;;*ElK6fT^q<m3PEe!4)cm(1Tl3UGmLF%XXnMJ6QZ7E
zb6c4;{$o9J>j~57EZ(c=6o8JFKZ1G$Uuiq=*p8M3>alM?Ir&$rIHkJz(I_x|0>Hh1
z>ZyRiZ_{J+O^t;G1#8t`1iD<#53H6}S8-@zl&Crd?;=}GTY&O>tgAa(eoVW*XcE*6
z-DyZYy)=1~*K&2>YZUse4};6v%1TQ;Ba6p3H#g5G%rDwpX@D9p&=RM-_%noM2b65B
zTbQ%+uPjgtQJ?ebzNBPUPAozc$RI#p{hwm}l>+xxL`P$T?>?QZ)Mrh;6NT`IN#?wt
zk@SIQh%sgsAe=y|;X$DeHZ2b&i_pjEC^QTVW{~{>&IeS8&1A>E0L~9oam+;y&Fbjr
z9O-{#A;w~2W(KC9!EN}C23Q5#y%-{D>X_kSE#P`R{5~|6l=K6ZuE};45)uNg^qnp;
z4aUh_)MnJxNq|11NxYA5PC()78X_Q;l!X1vRR3Xb`=w43p#^GV;^|ox3{hww*HW19
z@^%QKdp^V}3VGc!J<|#;(TXDveHB8qSN-zorhQ<}1S%ItnF`AA&2Db4`hRwK4ilxu
zn4+U>fV}sBO-}2ymr&>Q4B{;xQr(iJ3Kca`jW@3;+j`4KZ5^KH=v}Uf`T6ae=_W0d
zxDXW8EQ<`v<;*v9S_b2hd**ZD%gA4W0sWovHY+HY-zDi=-pk}es$6I{J2+O1GEUGU
zAn=)b*f(4BM^A_fmx47Fu45FpvgIiBUY6j`0=4>E)v$SG`mmBxI|f%{u;`Q)1>!Ey
z=;g`2vg*#ypP2shNI$yc(b!TAN?-6%Fl>9|Xw}7ZPEX?w)Vdc;G}DKF<3$rIBxU4$
z+GcZLiI8UviNsD#HfJU2{q7{cxIO~X>RPV|q*3Q2cTpAMr5RqW9~Yc&3I3*&!n77b
zrilo~pnEVsT?>y;LdODYq`N;TSpe7{#DIsdbe1XX;$B&nK9J6X@EBpum~5id;W5U&
z_1h(r;1a;3xisNi*V{(O`zO8W>}1^F4RWnxLWAg-l^8ZspE5YrGc|665DXvX++xz+
z`^Tcy-8JxwWUdH7>)7zKjZs3$eRZy1^QNJt&rp&gFj^>jzr6@+edX7dyaRR#tF<6(
zHh|m|mk9vUQe(UdIHQM7$e=V6a(nhm^m(z2=LTBGtr+kK1;<$64+>>nkxRqEAn5k?
z#<@mW0h9JuVp0MnO3u^^6v%ckJB)`@Z7nep546nWnG=NHa^U(S=nog7ozksWF@}pC
z3F$Yj43~#kM>yCHQE8k{bDcL|uZy}{r#pN!eAO{VF7xCw1brpvZTl@EP2uVf7E59A
ztsn$xaD@+Xf5)VaH}%(kf}~%@Jn~MG4YZ>K+QBliW`$aq`G4Nm$UW__=)hE&H2Cv%
zm*npmxQNn~%gc=h{V+EVjaSmA!8rXEFx;dWxtIrEM(yY3PD8MKd6)0%xqaF4`!|aR
z`#%l7IssGsC3yz}PF(GIZP|Df%@z-8oheE0GFJ`TZc*{gByQ6GRIB~(WOZ2|$TOTh
zy!ZF--!B1|UZC7TBX+Ri%_cD@4^3v%WCGA2Uo)rcu4pVJg*294)_83=lH>z3%*_ZC
zh1}H)y3j;`@ar!K&04{0D1M9>hr|b_F{|5~>v4b`fk<N@jsoXTh<>K!D_mljj%5pA
zOn@cCg7*KWeRBnlnFrOgnl&U$<{}Bn!O`(fbC^#p<xWgYh`B9Af}#gDC1voKQanx6
ze}tuXb(MuW8RG7r|G63#ZJ6%4`Ns^v7-^oNA3xSkyy0aS0Y}XLo~IW_DwB60FHYm-
z>7D*F8|L6}3a6n5w$1RS=gi!ysGMAarj>H)<dvXi!-oCcGQ=<DJ34c)6G~8-@rVEU
z7BIgPIx=1lgXw6r336aT!|&eXJ&_L?Ucmf({_8%)<^}|8Tp61dzk&$d@?lZvah&WG
zgNN8_yQ>>ND9x_DflY%{g2tQ~SV$-r5B>`T0t&9nQ*OmYN61sTbzS%h1)kDEeWbe{
z@x3p#fH?*_jnq)XU|O56pJYC5yp;h;Rqyh(7zSv*<8)ARfsPgrDv<UeEWLjp9wB52
z9L^s7WrK4gS++-DZy{)bm5JzeIxmJ8{>mS{l%|tRwRif?+#Ic=&9}5*`@F@Kiy_CM
zC9VL<S)+@Y5vezV4jH0=aUue@g)@Yx5bab#;nCPBFQ#qJe{{QIUSIDrVqp?WaG6_x
zAtc^zP3$#ZzJMO|rE;?SE}-SVn{5CSL3+J=+oq+925Ipapz>}^?4*b(IZ?hR7JRGK
z<U~!SnS%~RF1fLj^Mvjic_T4(-`rZha8d;B0*qu>z%p`?aIXag^PS|=wc#HO2>5b8
zh5W!Pl!Dps1h#%zGkG5f0i#GvpvBREkx8^6@r#PeyYJB0pX{zCe7JLXno_MM%1ZA@
z$YA8TjV?&3yNwtDFU5m$RCppzL$MO^=<y4in`iEEyE|~=rl#5s4k(VCADTL$#3S`p
zTeK#~U_Voj9X5L-rxez!$QOd-sT`lXQZ;1yXFU{uHC*NDnn)~2hV)53G7Z?umY86U
z`TI>DjD+C~zY|B<?u@xYdvl_(wTM#gx;USlX-_$}HorRYqXpb&72B$1g!jZEFM!U<
zl|~^uJMrvN{_iHIhmgX9e-)U-=V1M3naOofvtWMKoZ@9G`2Q34JrwmlEX_QuBrM#l
zzzZb6C&0(e&(AF&q{A;H!7nPoC&0}oCc($2@H&O(|9HU3+0xGX<^THu;W@51-~olZ
zZ*cdrkuW!Nx8nA6vxBVd9IYgHd3jyjob9bFJluJ~SKGNeTA8@G+4I`idGI=!dD*#}
jnA<s93-j@t@miYM@PGzvF5Y{C_MnG~8VY4_)7SqCkou1%

literal 0
HcmV?d00001

diff --git a/docs/images/snakemake.validate.mini-tutorial.png b/docs/images/snakemake.validate.mini-tutorial.png
new file mode 100644
index 0000000000000000000000000000000000000000..3badbe426045803d109ab25c0dc4fdd5b0658952
GIT binary patch
literal 34485
zcmaI81wfVC)-L?gUD92mbP0%bBO$4Ph%|_RbW3+5-3<aFCDJL~jdYg?(w%p%z4!UP
z^PhX}&&CalSZmHPM?KFoCLt<HvRLS3=nw>9$vu}+gCK+i2!bm@MFw9vt1aRH9}wS(
zzYvF@iYSa510?Vm&R$Jc0xBJ%*ai=1jGn8#fFQR=5ajI-L08~Q-oGHokrRS`>qC%0
z5(E+3q}8hmfo~uiD#%Jf_ptx6nhIjUSI}&pzqSV-pa1nwrq@CQ{5m8jCH~T7_RoT=
z3;9N!=*_-arE!1E2iK+5jz}XxT7OIbhioHiO0Tu66Y_Uel(J90i#?0WIBR6h-{)XE
z>HavY=BD<`;5|lApcQLC0&a{%U(BaZgEQ&VF`B*FY`wFax9y`+kxyr|Wxh3M?)-iw
z8zWoO_4G;omg}Y`qAGzjF5X{ngP$b!py0pW7#k|eAJW0za{LQps%Sr&p`Lw0Sq!F5
zdz0#Vgh=QRD)jPwX!e6RyP#OyO>I-7Qq_9SDaN-+7IEwYyz+@J*Jr~q6!7R~n@k&b
zW`jOtwA%zW+3iz@`l+d8slEx|4PfIhwt)-(0$(SY^1z-=kn4YcnRt^dUxC~HMUvGq
z6<+JdQm00<1z(>mg*^{(G!?N{b4iv`HbHU*=BD|T@E<y<!vDM`?AI{tBgPm;by779
zW}XRo7hK&%B8Y^YBx$RxVBQLS|81_C&F(7`{e7k?&Ny-eS0D)GAbJbe4c=+{P$+;b
zG5%{Cv4VyKlX2IFnt`g_E}dKlgMZss?@(1OMnBF1%Hi?i6c+^=JS6|pO~*hxq7svh
zQDwciNzW`L^5Wy+?`7GGf{!gc`?n{)b+yG;7(I!X`@K4;|1!@1ER?j4NYmgl@dwUf
zgAW@ntppR}K|oA=?bmfhGeWYub2tXGmf`S%>qUvJ0|?6ED<e3c((&Q05>IeRvZy2m
zTMej_*>3x1l@DVq`cSCd4}SFO9#q>uy({pdhr|j-miJN=Y&=I}zV5%2rk9^YPP5SQ
zzstf;Z`Y`8)7_=`-CBV8;eBR(t4`{|f7&rxC-r;zJg)<$=pR=?Iw^wU6PLm}GS-pF
zo?BO@-Z)=)7L3%DC`6l)B`oLZAo@Fzue{$SXi?vX_|zqk|HqS=e6HhLSx6ch!M$tQ
zYoTG4`Bb2?)|}F>n#<LD`z+RUG)LOybW*eX#{CNxIiLRJ(S}`6svW%dPTz3n?=~W+
zm1^JiX7Nl{doi_+&$lkYZJ-KoplZUXG(x{LpUVCNqCMMbk_CN|1%7IIx%-bR98Bpz
zILKK;H`sL+y-rlG&wc;nQhSbcSj@MQgao3-!!B|=uRKybXkOJ3Zu31098`ww>A#qS
zty<KklS;Zdd%`tk8d%5YF;JCkQ2K(}{)1_f#dx<)YO+pWl`Tep?wxMq+B^cJRg@X^
z1S56hPKU{HkasF8J6n8@liOEDWyK=?9v%r1v6C}{d^&FI+^BSeeN)?<Yf8zmG@r=+
z&%=Lzkhi@)TN|*!;zup+k6M-0QDyS3|JJqZ)a!o#zVn^Pak6UE?O9EWv_gN6&7XS5
zoso$N#3tRS&z~QC)obx=`_PH)?CjvMJa0c7FlyEjO2|^~b$_Q?mTuXgcYifyyWWF0
z@3IzQW@grUd%lDNmCyaD-AH(&qOR`O*C#)3+f8}k27<E<jdwkM?TK_495g2^tB*ac
zcBVVPyGi!fVt;c^$rzR9W7~h=+0_2$fl^}~-{*3QNW=<{U#O6NL^ff3iea5-e{E^g
znRes#Uc;~7zhkJb2dzE*cFI~jjZt`b^v6HHuxVIux!NwU9Tq;<pR4{5Yo$&gtz+9s
zsP)Yl4H+5vV7GF3wdMZ0MRtE$uKGGm?*=|1BEsct=AG^CG}qTRHQ101a20w2bc3a4
z4}P0vXu$MI^Zo69e-p-I$M?O=Qr3gF1r}Zsb1A6y5p~F%Q-)kqYGHLwAk9=ISv>yV
z2dlkRTmc(F8IofX1s3RUHtsO-zZ4d>x^AThE!~~fK;F&GB9`;DcvEkkDZQgbwcfs^
zL=<nGd6yXx#$Jz_`qVnZA@$?=UT3q%^+7xJ9WGe}@`n!}YX4Nv$uybP&j~yH!d>*d
z{vdSNfzL@60p49MR)P-OFgSX^N?(2E({C;%gAOtpgc<gK=opp44OD3XpEfK_@6b_T
z+nkmAp9du_b{_@|A6_@6Lr{}im^708`W<F${YoJDfgkk^biU*zs_U|X^!0Tmvi)Xw
zN25VIve4bd8Zf`FM7H;!X?vsV@uP)|PDAlmuLzHhkMq09$jN2@TBFl038dc{N;qZ|
z{Twk{1@(tMu6dRA-U5I26Aa?ztTJ+8I6-7ds<^@vxdjnmPNGg7!H~KH?SZP;B#WZ|
z&ja39r4dZ5x%Q?z$9-91Za+Jcixk-F5cRHSr^-F9orLahj%$yH1Z741X2N$$8?aOP
zKD51lmyV>r{zyla;PHa)ix*g5HCWnKIOF=FrFB1k{HVN_%3ocq5>AC!T3qZWpWil=
zE|@)OzOiUB&i%AZ=W6CbIm4}3C$OFLe-A8Jl)Z?zF#6o)*5(O`*~Y|j6FLY`HlO5k
ztbAO$rkb6bJIv?sK!cX;>asM7M`s1Nb(R(vBd2sfe(3wdrc;XxT<>Co!wU_v5lDCW
zT~v^_wKijAV#0t;!B2DBke`oa@$>7CKQ&9(#gpjz>%Y5!qqCpyrK+haV%N4e=UHdg
zb?Jotmm~e>d{daGx(E`)xiBFW4EZ{vsD7-c^WIOF-0>52-Iy?u5umJ+5-jH>kEcUI
z$BJLyHW2OnENKrQ*5%)*2Cmr8(Gm{Q<?_Ij;f`0DZv$A$8r9rHGX8j;H!O83dtIp$
z5EHMZF5NtWmd{)6g}-aFC9uCm_xs^J?6GHWX01FZbYVQ!u`WjQK>q41*<zrIadXxT
zc%4CMM3;^N<-b2jFS2`qt4xY;*EM=df+}DeX^Xd4*eJGlAt!?%{<zuJA<yK9GM;#u
z!pM#&G{Ue{mEb?>lVZF@XtHJ8KkY?m0K?xs2ZR5A`RrjaM!$3z(aNNzUc^q-ix)3k
zc1mhuuBgcQEL)E@`ZpG_;*dd?@vlPqcNvV;)F4h+x?VglH&e9})XP5-&T#w9l;*mb
zJYrs@Rb~E4zzL5*j&CD{F|-!srV9NZ@V^`cc=-5MyG8G8>zxQ07*p-ANZ#j)ST72h
zRcXJ})Lh%qyBBb8UN6g)=}r7lN5sp+<6mdLB~dnf!+UBbRlP9_R){5zD|2hTHI)8-
zT0O4NvcGCZgS>DZn+9)6$Nq&zldQBf(q6;9h~->07Vvm^*WEawnK%m&ZB$Vx_s635
zejtroE;UnXzI7z*>+iQ6fAKW`#$Ltn8`BiDjG=b{Z`E|Fk)-(O<m6<zSnrmZ%Iy~|
z$b#$}nSKvq!J<xY_r0j?HwS*Swu<$w6|DDk>3m%FL9w~NJy*CN>hD*$e3#W>@}_jK
zdX}bGmenXIr=h;yrk5eH)tEawI~xv1<btMc<)CnCf4TypAk$MIG&I!a2NH2sL&Hby
z{n61;ANyfpVwqe#7LBj1cemHFO=n=&ox@&t$7nUD0UYjdnmV>f%*@Q|i;g@mz}DpR
zAGBOemX*7mn#Uz3uK8lHw@MSg^)Go-)7M1KgbjWIf_;yNmp8k%mc)K*Nc#0_Qfx|r
zhv!y#{Kjue;aX*>J^X8yJjr2Mcw^Cn2WuwDf;k>tyF}wElLpmpUnW!%*xs@j>sx(!
z_ycLYL_N=EU%q_N=S!`yU6Z-lb8cz(x}EZha9b$zy0y5u9QKm2w&nmoVe_YIN@+*9
zw00Gp_4PMI1Ox<FT{&F3zg^1JJqE{Q*dB;ww~sBKZ8=b-!s!2Du6iZ>$<U{d=c7Ev
z>ZP|gTcWV@440u_blswycbs}_w=a?sXxUq(&lx;t)qvG<(a8Z4$;RRb9)y_~3+P}y
zRyn7-ny6T#q)lB}nBlxM_kG`Iozxem(gt<bOCr1Tb@)-jXGspqGoUm_y~}Vlo!Du<
z9JI~>C!<|L6eV)S2z#ld!Ohx5qy5FE=^ATl&x>|yiQG|GYN%Oonk$@w6~+C=<H-32
zKQ%^j`#-RdSA2vIbDTP7>KBrH8?%E?m%^h&??}p;ub4}lugoW=ruv(%&B(qnOnKg{
zQ5$Xa#cNfXM$9*an=-z!`|~-~s*U>oLe_HjJDT&nEy`Zq78U3<boUly$3L;v@y!{Z
z3MZ(VR+8rD=jU3jXFj!FOzH@}{bt0StfHa<g?0MQ&dxF|wHsDsefffZd3iZz1Tv|e
zmxFvIh>(W8dJNF2!rk4Sw@BuW_WZRFrYUrEbnwgGDJf(Q&dzIBJ7wJoY;V>V0|*K%
zd#e0Ua13v+PA#)yHonn<$T-ftZ#+4^xjeSLxv;(2eA@D(<!+~?uL*-m2^hX|@GD8h
z*OHQeN%Mg2wP>oHU%yCV3DVQk$A<Y*u_PrW3yX?Ec7)B%&GoCaWd{fM{``r*&Q(t_
zQ78?`3AT1$BfkJy!KY@?4TFJ!!ERq5<uf>Bb?`lU*~q`wOFLC<IpceG=lQ+5I_KNB
z2YQ!%n#-qds;^)7UUyeM=Ubn3nK0*-$o@3n5cQe%jXm4kU})PnaAsk|99<sg3n-bx
zLAkO~tFoe3cvhexRV}-o8TJfZ>y9#VaUof{9CAL`R`wDoWqAOV+pI{y5>;Z$4FlA=
z?GTx4e-F&F*84a!&4zr%hQzal8r1F_#<u-)dR)|htxrm|15~ES!PKW~AYucQ3jCJ#
zWjRUW{_<wY(#u;<p{rlH{Gd&tz((P-UJRF|az(s9;<%?f-^+d96R6X0HfsS(0O^9x
zh9JSYtcE<!8s9DlA)BWAgniCfvNS+~R_J)(39Qp-Uugx!FEaCH{c-Kh5lGUoxcy|&
zx?79|oSg|3{tlS5&5|z{gu_;vJ$CcW2FKc)rK_KXQw#3LgBPpBEx|`eb`Fk?D>W^*
zf4(oSZ6xY-1!9rgY$m^3{Zd@o7O$!M>^JY5YRez@cl-BBwGrLaUK9f9*sw>DIHEVW
z;PYb5vV?#3H)F{MT$bwN*{-;wBQGfX;GMl;m;3LKSWl72ZUz2TZaPTz^5x5An8)WQ
z>V{_C?!NmwsC#(Rm4ZNnyzSN5*@=q?7at#=wwWpYm-k(6Wp>9fD5$)C9n!GhOn$!L
zObrW>zn;2!9hRB^uI`*h0fkW**;gseNXK80nd6gSvs_hm^(xF$ZSF3&`YHRq^58nG
zlFQ0kX|&`X!KWE1?)g>@@P_{Xb`0N~I>xMO+JPxya6`o`;Xlwf@n*`+w{PV)zjC8E
zo%eL+-F8aMtgMvG-i#tr?0eOH-@}ZE`ZwhGH*Qa+Wl1D|U15ksfi0_6{p!t|Fr)6s
z-rHzK$^U@n81`&Wj5mV+soy4%Xc+y}Y4#|(9;dIMR)DbbGE-Ah4t~C|?xl2=S#Exg
zy*WEk%Ms&O$H%)Dwyt0LBI26vS(1hN+TY)@^U&+yHN0%v_rM7wVyJ0<$=(342@NVd
z=*_x?&KLGQiH|o2{WOZD!67!^otCw49Y?qg>fId?!&11bt1AfQ=%2%I-XL<;09v2}
z^-D!n)dz*j1*(Yfs?ugd6el@t0jO{LvLM3^9pu<Ypc4{lIcxmBPrj*b$2Ii`bUW;u
zvmHqmy`4I#B=&4|07x2@GE5o&Tg{(}a&!NplgvEzxo=~7OF4Di_Ycl3p27WvMQ-;i
zEnWLT-Y37y2|}cBL!{L11ZO(d4wr#HNyy2eWq4j~a}9f~VCc1iiuM#%$V+M#Va@E}
zh$N2Oa0@`AwV;`|0f=b%;&5%c)|MW4CNhKzN*m|{<>lo^CMN@6as0I963g>2Oz(Ni
zzJtKZ@nzn9iB<(XXdU8D-2Z7b=0R@xwoARKeZ)ESs9Q&YYIAlSY^+@;mBOCQHOV3t
zY;5@D|Mm@}#}~$R50pz^#P?SWTf3|&+kAI6+Q;{ze%e)w-aox!1x;5Np(u&oIuH;N
zu7GMT=ImT^qNA>k?+MU{{^XZeNrKMokyOH=jypfIi;B?Pny5g_rq$wkTlgpQN)SYN
zjSUEnNH+ikkIBDRRl)kYzxxNH59a1=Yf+++`3i}2qN3EqZyoTEQBV#~%e*K-#{mw$
zea^att*N=V7$Z@~4h<AFGBUD*Wj`D`3T%bpIZHm6a~Fa<4{+~nmBor(T~}k-jAa+_
zLz!FJuH)4(j-2vx-21Kj8)~8Rd3#i3S<M@eBV@qxL&L(jKzPK($FGcJOO74iUuNEi
zT<^C~Kb^NhU}j+%9UJo{WYGYWo`3+zc}hY@J@kyMtWpXJ7<;vAk*b<KM@L6lljZ=C
zz#ulPhpU;l>n~idTMcCa9*C!G{*V$trUbx<ZE6F0EXe|}H2_3|2=b^y4tHqsxc*af
z_{ZAo<A8nahP+T6-z)ZR5z&HdI9CRw1B%MX>(>Arn2V!9xU`0JEx`8=w?ywro0^&m
zzkF${n$i{geW|JIie%k<`6!ZFG_tEphUdRRZ2Y?+1RNo6fD@@HDJjQ}hefVgt(q@Q
zVHjP#`pN^OqQj$pj@*U@a`5C@APEc!?67K;Xuy2w;<sjtz!B;Hv&)j3tN;C_c-g6;
z0@w%>8+&zBmO4P5w?26fhKq}oJ<#vZtnULsIO>m0gHU*{^YKqr?PiJvC+McZ@=XRT
zJnxP(b8~Xw!I6VD279l5haIAqT%N134q^;7v$XW@HEv@_)bSk}Qi48}Zld~fK59OU
zInivnA{-LjZ?rA*OiMv<Mdyu)_#16;0aF8<55gK?)xYr9;GPZ!vL6hZAHWC&^V4;&
zC3Sm-MV(Z}L}d?8-S?EpB0`~;Pd&|VbnulAZ`WGz%I5ElDjMrF+&AJir9d(dz#x_v
zeagod3hVLmrrwlj;CW9^PBIAJ1NapjTX)pQ(%DV*QSs{BrNtinPAHDhfj47l&h*R-
zlQ|DrML~gVn!J%w?gijC<nKM#qh&jQcRjy%5=f_mMy+bXK%v5W#T@_3sUd$W6WHnS
z`?wOmv?{^*{t^c>0s82QjpBta=+?8eUcF+tTfy;4;;wG`cpeIMD2rT55lE{xf^yx7
z!O>_lup>kjWyY7<36P}v&6L-z<{^mT9L7*XP0d79F-r>zd>m>K*4aN!hy8kY-)ikP
z<N*uJMS&d_M#T2Ox^9EW*39g=ki)M}(-RX9Jr=b*Tl83&nDF%6w)1U`JZ=uVJ3Cp1
zaA6iY?7I~ua>k-eJvx2?D9O4<p?^_Eu!BfN1KiOIf@F)jPZm)^N5L+0A8JCtBD`*Q
z^g4ZgeQj%abxS)^E$}yviiYohbF|=Xy><QUayYj{`E;r)T<fQ~BK>3qNyb5+l!r&t
zNoB9Ggt#~yD^b{qmZRfo;gt1sIUGRW;vOEtu%rMQqIPpiVSl&Z`ydM0TtLd938l;M
zx^qqQy1Oj=`c+EEu;!JD3g_=7(2qhgn@>^)P2&<0BJ0td78_kTQwJd#D<?J<7Qa!m
zw1cJ$_anJ9khp8tV-)hbVNh~=b7ndTb}+EI1DZHw2*#?p91lK~{nKDJz{|_a4=4`X
zpReDofVH4NOhaDRHC~cNM)aT}9UQddthM8aN_lyS5}+g9ofL>>x3o}WVPRdg5jT5V
zl-3~w`Y0eEAa`*M$Lj)t0KK!YR+*RLnJYu?rj+hO{J*)ER5I|f3nY<F!&0uP98gC4
zf9eRj9MJqM(~F|M{UxjJ*>!05FTZZK`vV`B!G%ISf4*7^<LY#hL}N(sxO~^{d~6Y-
zus0CskHdok0|66AFH9sjZ_bx$yWlvrQqfH%`R?80NJ_!LFpg%Imrrxly-*;L^SWVL
z3Tz{gm0`w}8DvpTn4Q%IX~IQ_YP%vb5_{t3Pk3l^AQ=IWAqC^lR`@(G`}Nv(PFI0F
z5;yg{PK)&d7(+o-16c-=39QHjheN?{1A)#8oKFXUQ$aTT;V=-?_9yJGRvUs~)m%?c
z)Kst2+|sfGFdToi+%+{Y{%<gaSA{t~SND8ET^ZC*D{q<HWSch`J<nwenO|@LcK#vx
z-(~|vkjx%SA4U@;S*QX|*Qm5N(E@am;@~*6-@E}$dq|o41`FUpVGH?L@<z64U#jkb
zb*Xgc=24M|4YfBT{384H-$u;N<QPm|*xDg;&J_hcyA1#!m^e6Vf=id!BDdSh3W7H;
z*%L9$tH?@9N+9pEGbdPJs^1w9q9a+=Z$E;;#_}1<+KW}dw3fBp2>@5?-QMi21(2!S
z<<M{*@I42Y?)+{ijGiaC)GJ-&)`$?RRymY;HBn##N=CbBB{+vQ02qU&cH*nEr*;m3
zfpZHAPylbmCXqc-yMGFx0+Lk?xuDZ-E9f&|sTsV;<BO>K3!Iy)sg^*-(1Yt+my=Oh
z+g~HJ{1n)hi;bipNKx-L^zNi*hhQD5{Phy(VX}j$+I~+KtLo}v>#uJvmX@+(Vqz`>
z60{RvS;z+{4mK}YWuvG%Kfg%LyIijT?A?v0=x)2!MH`%dkR<<;GA5?$>zDr|1wh6o
z0h%dUA2=`2Fhr+dOmPw1Fsv2oz*iP47&iboL#x@n?t4kEoyj+yROUpZOi_LtiZ?Zl
zVIIoO0r8oypWI<nSV!*Nzq<@Gv;2ZPe7R9#cz&>)<T@dGcNo>#8Acq2>?aPR8}pw{
zjBecnkQp_*=ykW{6*@RLxSGOi(LruiE8YmIR>!{kmSF5%B*=`ivO$cfzAk|4m5cJ&
z5?sOqMRFHyi#fBt1i%cPMpzo~Lh<{ddfXgEEz0f*x;b10dHIg|dV$01rTb<QOyvA2
z98An$&^`ERsjAYFMabCxnXSs{-yl2I)h*M!*?lL<q6>I#LmFHX{3#@29hybZyckjK
z@9z^=X_wRj>cZB=MXM3`Md!C~+^V^w!5}@X3ZKu@yMX{Z%~pAS15^OE6vamswY6G}
zyWjeyDrZ=Q??Im*3UGK1D4&V!Z<(7xJ886d+^}?8TI+WD+xC1ZH%f#V9`wND2^s94
z(f`RHuoL)8-T3P$1gDgEM}-sAH~OUhZbQn{iaUH$3@a`-mBUbw1-m^s#0fgNV)LV}
zJ|0qcdm-S_Y5d+=o8wJ<w87l!;M$}MxE>?GPL9nR$v0tpQ8$0QaSg_;P+SVWHfL9!
z`N%b82Mfmks~J)NBEp_P!twWb{_a{-=pcIwchQ;p`85hi6R_x#Ss0NoBF2@}S~`Cu
z6%1I?c@-zCRq}M8_&CE>{;!tmf1X<L@zhIvVcQqm1+eD|Z1cRyIWJ?>BsR$46Amj6
zY&Rf@kb(7l09`8GQw^xnc|ZPm7n*w~0=0HBu&5L<DP3N3Pub!8x5f55*ngNWDpzd?
zj5J8!F!Aq14SnV2-m)iOw81&ZPUpRJV%=7J7Tx(?xj)ly|EDLKym<6<;97@HKjnY$
zmE5s9>LM=Y7+**TQ|Jha(JL10N$#FQ_cni|o2n^Zy{~1K*v2Q2ypZ1Cap|wafK*DD
zew>@PusT8tD4W_=`v0a&!t~bU&Dp#F254YXgGM>Ki^}@3uOZ2P0}c%?nn80ZI<q)h
zpVhU8sgUU<zSGAj1jzhY{%~QIBm4QQPiI;n5W&g(r<fS46cBL=4CuLdOcqEl4vAIB
zM{y);tg%XBpcA;-?IWaoZoi{y<g3NHci5NZflDA5=zKe==dYA4XEqmNSUL#Pf&ZPM
zjS1HYYbZzb9$JADnml)=IC!1cJ>mP{9l?O4f+rjEg!FsL$byTI98_5s)bw_QiynKD
zCb))^G>z;ZCZAtj81~9D()3U=Lz~)s|ABX5YV&t=1^soIp>v%-E~E8G2;zzJ*AI)$
zq!{2OwDve3qN8dVnRROR4atPab2#%GaZZ`y{393tOGB08nfm-zOC|(&bdPsU*jZ5I
zh$8R_&%^WP9xY~7pN$N2o-2ie>#?S9M2Yg^K94>SY7kd1e{DnJ?BjguRi9+x{V1p7
z#sELXonhLNA33Ox!}+<oVX28oM4?n=8NugI*eF$eCuZ|@K|J&WysjlqleC4wtvR;Y
zf(rJGx&u`wuh+f=!NX&lmHe7AmL2E?5>QFKH(v3)<8=H|n4|zXU6SfE9+rnsrc8CT
zGWtl*3^{*wsTh+OV&B_#J+VC1FKkDLdRSPb#3irhxVO-l2^gyiRftV)BPIcuHSoe6
zTA&|Sbl{a9c}3hyIp&{aF(VlsTy9NW_e`{<nq-zacb@hI044~*bjYH6h;(${<B`{=
z;(I7n<vFLuSn1lDD=`fD;a|wXokW<}$;R=4CEHN%N{(zJ5Wt5fPKc{~epi>E7!#_a
z&cMQ-%S|m~(rhXCICm1EA-#c%pFem7?&+rHfKP7+ua_zFV|zC0=~CymwL`IT6dUbv
ziaPo}G|(pQZ=D(f^6Sc*tmJZlrOw!*6eJc*85wq^0WOVf?iRJZPiE+VR06~;38pLr
z4Nlq{^ldqRwu~#P+dRHJ$UY{4vg*H~A7gCwQI36${<u-6n39^BtrjtHh=NTa2FNeL
zhke{8CiI$?O}ZR`f!xk+XeNoizR^q^157DsWm7D-6oU*oQm5zV2vD3nLugo?2pvOW
zr`@+vl@~7}=2r%oSy{&)a&X`PhzofG))(-SeUuMOl0!q$x3_-DB{Hgx8n$j7)5<x^
zs9=XQ(L`rvy3*svd4R$QkS$!ej>;>>i&yb6k14QoYifug6*aZCHI10VmY+O8eEcMn
z4kWNJgp`EFMj=pAAXZi;*PH;8hYx&)w_*h;`nX5i)QTr1iU#(se=t4)y_Xan{kVSa
zQgU!iK4<_|4xZz`h}3vQjnU7OD(WS|lM*d0gF`9czp`RbCz-?gK)Cnn*0Xk@vf-L8
zUv_s=TPY}iJ9HDX;D_2bP}d;{P?q5HG3kOIza0eP;s%CHjqD5!l1)K31Bxf)4M=eW
z6zm?b<7hzEfgSfIGU6d2A>sN*cSgO|8gmHFVT`b=EYCAW4Uib)nI0Qc+n$z+!_+7y
z^t|t}`%qO?Wf$~>rJ_8}r`I@UFR^m&&#&$6XmBbIK>sVV))Rx9hON+>rNRi1HC(t=
zhXfBHIF*IO{<fGH1ATq5zM^T2BB@?n%958wFLSRnRF2Ba<KzIG2Vfk6WFsk7nqCmp
z{OWZAexy{OzGq`br2kGU1&wbn+c<2fBcv%<J`NKQz&898koT7_%!~LLbgZl<Ici1a
zKoNrIDZc<|uQf89AQ9By_infa?P}aoVepy5k9h}aQslpv6CnBoFfg!^V9M*=7!BJ?
z^#a)eE}Ww5OL6fA_Pd{k228oDtA<Q<=}KFLj*h{Pax{unU%o`tKlx6ZlN=hU6`Iqt
zT~1JARtnT7b~JIcH6AVbczsUB1rcNJ<h(gi#!Ht+pI!sh0O(36D>v6qeUy=XG}ej+
zciBNpUTgQ(7WjnjE~-K=!R;z_!*?Q8Za|kSRN78|e_W-d)u))Pk_B{0UmBVRA^P`s
zw-i1(Or1{sM)=h7FFid)1lz~Hca(nYdtV+`I31QpQIco;Kw8y2rf6!%b>AwqqI>{C
z_JebDbcCR&Za=`O^@FqKkC$gCX)&(ScIEPY@Ze(<1%^VXtYwBxN6Z7lk3Ky0x=&QO
zf%`nS=(7LLeX6Bp8XOpi{Oi;W5(6+|-l}GvRy8mz!`<%(&8}0!{EA9~k%8R!4{)MP
zv%yLQ<JslqDzJkJIpa_*o@_LWMrLJw;!lHW_Wr#O5U?3Tr(3*48=j2i$tx)22a@h$
zD};m+p#ai*#YSt-hOu3Gzw;F%fm$Y%lMK3An>rncUt6ws!>%rG{!3}8X>0}0GSv_b
z@Y19w@36h2W1T>=2bP4a&Ky7HFzw?5ryHE*U+#nRLe`;!`|M69CpkDBx&NAlDLE%M
z-j@bQ81j2IcJLVenCg0Z91B{Oe5v`o>A_w2!e~M527vSEqr^6;m~M+1H+aun5x}5I
zm-EC5=<Lr*QeqzAqCT&CrK=kW0=iDJ4-M>01u#e;w?NCSJ_u<EGWgudifW}c0;~)Y
z#gp>m{#sgN#y~ch!1N1%7yx<#SrS?{V*pvmFq(|~=fp%0rP7eTL9!2EF*k?OG4$6L
z{a~sDI48yc3Aor^L<qo(MpW@9T|?yr95BZRd8iL;=QUk)@b`}3Q8Q!7LLLy+*+Bou
zyWVnhbJMS!S;r3HetBX%unio;EV{KV<`Mo!9Ci!^d8v~-`6k`r_mgov9v&X3$bP`O
z$yQ(=TK4u&UNRFx%gaCf#=IU%RRE$gPiO^rsnqx9sEF_&1K0&gk}|ukh!?#CdxKf2
zC_9^Wn+^%&7;fp*v-ORQ?C;-S0^_0s<0d2~1_3b?qSrgYrV?Uwejewel#bo+>ut*H
z)Zv|*z=H?dOYk4$e&D>U*g=v4GLsV%<ra=9z-w&?2?@vGrp&ACq(Zw0n`X&Fr1(Fi
zn1-htFj0-pFM}YHI=K%2p}*Yolp$iq5}DI(wCUS7HSp5d82zlAUU%+hJY*2Px?%3Z
zMQ{$_2D3Ga;lQB`rfC$%*JFZ=c8KUFzFmz;l)$47kd;U#4kXLM<a*=TGGURElOu7Z
zE4AI;#*I-pX+DrCO5OTA_57bqNg|G$cZ~|N|1-)2Q&w%(4n8y5(9ovk)}nGA^)zIK
zB9KAqI)Vp4+=PI93DgJ(0&y}Slg!90K&e3>4bpsFdp2w5P&p4NJFi49uCOfFt?i1N
zd?>)nU`hp|udH=(k3j)D83T+BrFRs?GrAY_wD2-YBcn{?6`Fn_pXmkA2{DoJP!ZR$
zgSZvHmxJ`lifSg4rt}y9HYid<?das}Oun*;Uz>@VDB-nKzM8=gUnEGTbO1nfF|wnm
zP7^sh&69_JZ1}qfkdz8y&$q_nKASH;S>y^bEjYaUIm$FfftBSl{pXBpNiN3&GPGWy
z3Du;*^<J=|K6Y_-ehTK%#^utFm~LO%p@@yE>*$34{%sC&SNpa@8xRVst2Nx3K@EeU
z$uF;p^Bz&e3HJ5%=~rmwZI=UV1B(8yogJWI!IT&NKC#$PHWEuda&Q3O7t^}eeynze
z0VuqI9XqBEgi<hN#K_1f1~MKx2FA*v3shd7cLBtuA1gMk`kYx`zhZ7|Y^WKK3ibyw
z0(eH;;Pl5=sX!9Upum*mnz{TiU5b)8VqnpU>1ou41#BSK0g3}mR7mElt$@fKdqok{
z{)2_@S&tbHP-AIuQI~t7>4T-{ax=EvlpMZ*N(u9IFYw&HG-3qO@1wW1ET>}xfyxP@
zt&5VjT)PAWliDcAynsS>4bSG1QuQC-2Jaow^s;%;Hjx@*ic$p)SMDMdBJL#JQPhdZ
zY9MbUl0=h&W#y?bjsS)lXmfNR(GO<`gK*0g@BySAQNpv1ozomZF9DN{5lpLx$>$s<
z4VqGjdJ2_Jfx6B}(C9~}spy19i;JocYATtBTG30bIy;8B>v6^cU9;eHT@S!$16lw8
zr60!Jhtm?a&m7GicON_lMH&=OP7+MJpyW#6uuNc@1PHd^*|#5EXsr#37R<Z@mAAop
zKeEB^u`K?tm!On@3LjFeiX+Qn-|z6b;pfy;Wntd?5GlH`PPBZrkgF><m>Nw<WK!Fk
zpniPm4$~XJp{W853e;fFeYq#PRDEiGetxk|cPDO5mJ=}upxCRteEHCsB(4)!1W-=+
zQx>jzZ>@TVhog2Bl7Ry6@q(w+rDX!cw-vjBAkl^FW&MKH_nI1`snWM#w9p<bJ=yH(
z^g@HP^0T&9)&BVraLT!(B;aW_BD>VVoRsEhPaAg)BLTWu)hv_VQ=k+m8RhwZ|Nb3#
zaW5sZiI0n>NKb!%tcF_F$x1R!n7;%qOacJZmTgl;fQc1Q48b(m?wS0PN2u^1PYZ7B
z@DuW#D>`z4a)%3?&n`%glsO|T8O&}ND5HkX8LJ!Qg7%AuLziMIfJ>eo1mVN_NRvh+
zM2!CV>`}9kpFjCF<kILt9*phvUtL{A%OnQ9hzWMeZ)@1hN}XgLtA>sa#raEjI{n{x
z3IP&6V2I|>odF*_Q<oW!A(*PPT?oSkL<MiR+1V2kOhB#231apc#uB3KqNb)kBClX&
zW7`m3<S_!H2Y|e4T$UT>eht%ZiP&{|O3S%dYy4UB)QZ4L0uvGvm|A|>PH@*uOib8d
zJx<{Q6N!MVv&~$aT$<-mH6YzTdEfT|pD+SF03L&w-T1LVN_EKRHgsR_pFcI?;^KNJ
z4V8=xk~_iVNV&uIm|PiSW%+>c@z!wWNk7e~@gwt&wl3j1xq|VCkMlt9SnWhksX)RY
z5-S6)f^3o<BEt%Pba<$Z)H}c=frXK=mGoU2*9xQG__ox;2hoCx^0~imks2Q;K8Xfp
z1yE~TQQ-Mspa&Q#)(ma4s!G;<XbgJ*NL$MbsV5JxgJ?rnle)JCQ-eTHSl3QpUY<q>
zJWE?!8w44AzS^_jb<ZEd7mPDG2gjM>FEq5_fer{T&pc#<h28uD@eNA1b5V)ZQiu8+
zkbH+rZai@N!s6msK^_5d24^TvC~(YuptsojQbiPgG1h)0>Xh35iSivs0GT*O$u~&)
zH##Gt$twAPDjpZ+$8zF!0}l8cuT|nrxGr-x3Zl%DPo2&XGgsBME*!%VfdI)w{s<Gs
z#^T#&Y$T+C^wU8r!Njy&%~dTqC$G#gsYEL@N+HZ;M&!oc2xW%C=z<>E=z4f_q{MzX
ze-?hodK$~im~nd!+gyoCGL8(XjX{axcrW88-}g}XbruW*75gtKX{#b$4|`R986$}2
zYs0k*Aw^!FDPFi`h<=l0j(UcV=F*;fSHdyqmt$u(zlIm}><G8xuq%$q;^ZPqKXd0T
z#fdwr<_J23E5$%IU#z<F7E!GA(`@L%=frjFS2+X|h2vFOgXKhn0W6;imI)e)Fl7U6
zT5oA*-|lDk*HEwUhatkj6Y;Z4MSB^~&JKNBYAaJ*Lai2m8;EojZ^rvoJU&6QX#58u
zMXSj9M>FSJe1!IHT0_$y7dZ4Zt0uBp6+CxlK@lboS`#h|T$O~UC?$cFBO0}VdWfsI
zc<sLqJbgV%&uuy?5!#t<?A{$cHE3RQhguERWV9SnOdCstq~XX;-+=`p+aD`$Nq6xi
zZC}!prle&`7g@teNtKtaE8AGCz6l~<zN8<|J5G9nXOx9q;-?2en46THKXRVJ864SD
zL_OQ?Y~xm-q?`?TfiT=IV3~2i+u3eE`b&uT%X6ygMA5J;y54pyxvU2h{7t_qwJho%
zXft38s9&~Ax=fut5V5sPI}28i<Bka$x773Hv+Ricw#D_;q5tT${IR*M)y!#<X1P?t
zdFCHWI1Ij1(I|RJK_610*aGIWr}{o?64T#hZ3=%4@_0Oei$GGavAR6vd~ZH>k}*S1
z7g*7L=xkZlglRn;pKv}A5Ncb0O6D@DvCDVs*+tBv6ko=pU$u>%rh4+sb4J0(I8F4y
z<gW|Nz8f}v(+d>0<ASGslwn&qmp!%v$-%u-{+C0?D-%h!Sr@!g{s-SZGrQ9!KxCVw
z{eWwnycgx)onaS#B<2(7JI?vo3s0VBGWT;_00JH+#c5W#Vi<gE<9SE0A!5cSN{{w#
zn~__%`E2p+XX)Q?GufNS6hen$dAk(9jUN(KYw7uOxPQZCeVII>Q~Q9>6>Fq8los`H
z;SuFY!06nb#3>zpT*}0fHf1=b5$8z6+M+N&-*<94CB9GQV;t?S8&BJNREzRZWdaXz
zb`+-fE?DSRdt)tKbrCjq65Hc6V@Kk?HQ%zEVVRQ@J3d+LGIlq3YS~UiabziZf55eE
z8<5rUEeGSEXpH6Ai!*<AJ;A%ha~|ZthP-Ybbjp;IewE@kkY7E4JT20T2C3zXGx3)^
z#1a-sGiU9FxTRTyrb}=XXO^-wc6M$bUh7hdXxg&;b|`tjR8aYfdzd<;+!0eeJecuA
zcnbpSFm<wIwb2mb{B8SzR*>q98mc&#>o6*n{rZnK?yw|}>qC@&qp=W)V%F9#F$xtu
zHhkzWyU@Ip38aA%LqKFYbc;?W<={GZc(+VF%Jp5r%{}_i(!F_AFPp&b{A{V5q=UR^
zr=q(`+Rf7Hhv8S|I!wpQ5?szoWTD&JTQH~G)330fc#{1I|D#Uoq4Hd2UeS|Fc?9<2
zOFnZ)w)YVx)rbDOE;C0)Cs$GMBurgOPd$kg!tRmsQ)UrsJXeK<s4i|jfwK1b=2#2C
z)ZNkxKa>BdytG}u6IE$p9gf>8+rA~ySdU#7eDdqe_%nr&&tw*gdJ^_4=!O0hdOSiv
z_}CmwO^o{x&;eoBi?)5N#v;LVKp^Q|IPSA<BbDANxTzd4EfVk=i2G}#c{0wzO4%Rg
zj~)Z0!>z3_&FJ&OX-0S2iGe{f!&%*8=T1|Lkgc`9H-mI$XMsXl2I12h>LbhcuDf^3
znAi{mwpE+6xs?5CbCFE*t{jEq4OPHQ4K<|yGI6ExD0L)HkKNe}?Ktr5H1`3Wh;1{L
zn7^9JT3g@TsHql<v)St+uL}~sB(H}NH&YumEA|^tb6Lwow)YuA4YdW)aWI`%_dQx<
z63Zwdl|tqpiRLYSM~#8sJ3lc=1y4`kV^+^CK~N!~D=Y)_hl*zY#IPyq@hK;1bGp_$
zEd6s1JQfZD--sGcSc*G?H{)Br^oyZYk9=0tYNYg=33llNWdqyyk+Xx@<GYDlemc}B
z3dLfpZu9YtN>jHaIrRwXK;_(SFOs2UNQah%wb@V0Oe<C0ewp5ogq&40p;>Y2SBkCU
zf>(}2YMLs1T@j(H*%*f(QyH18kRUFRAOHF#D&Z`0oY|KRY>whuUxbUdQ<4Y%fInf7
z-dc(IVx4dKl4GHNf}k*5%lL+;%@7s%IwGA2g^iatQoy^-d8C3tgqC*Mvng<G>J?Y%
zms=_#f($fivd>&y^3vFhh)CuR7{JDXAcJvI9Oi(P8gTi8bTM{y`HgazI*v8jZa@x+
z%3duhz$B2IWUpg?@E*m03pGL_sQC9C!iy^tyP$Xn<^bdmfxP7?ZU$HmKM23I5q`^l
z-6F9!GPZfT_}I3m5(^s9t|d#Hx)D4gIO4#MwKQ>cEF89OhdW0>4HKszo%MNiJ0@3P
zKQ7mfM}0SSgTU{53wZ;Z2P-G)rYBtIkI8v^gH>g{BFZxQh40@12{e5iHN7Bs`M$A0
zR;eK^^<b#g8ZMlNqptdH<@F^)y_kuV#frIui*p;f(~sRboZNa$V8=Sielj7igPUkf
zp-rA1O6{MiZ<nM@lRuHPQnN<U;vR~7!|Q#$+x*&GAjVW^t2&uJ_HKS{K0H0LW*wV_
z%}HP}W-p{@ylOS)i(sW^U#~H6-V4Lir?g@|VjV%ET2!$m76#RqZ<fmPWYCM{*?%0Z
zVr*bD<tGs91;1iQgh=W}&T5(q*0Gt`V99G(JBxd@pZ9$D_qF2b7en`=P0u1RmnV;g
z-`o9bkq1e!m;mli2l<t?;_V_pY3fdFWEcs3lxlrUNQCTVb(+5~synafZdOjp;^UO=
z4sb!LM&z_p9x&lu%9zwc&jCNZn)a;>f+&87xbt5S10uxbZL`y^{Rbq7XFG9i++?bU
zQcc^vewXb1=Pr?{k7L5d03+SK!zGebL6z6hz1@FSQC*<=Hn9K8et?7u9cy5D#XTwP
zY5rOytI*~@i;QD2Zol^PLgpMJw3Y6awhM~m6EL9_dE<%i9i5N5Sk?5<`8m^R=^uuo
z@qJJc215fC54Bb8xR{ma5l-%wGgr_75yE5?61-f}{0<5pVf@bDvidXaJs*u&>m{LH
zfJ`<L4T$FZC+29rGn-l0k`(8Aa1blYmCzT*V_@bF<_?2e8Yiw$tL;7&g@YGhF-Rk}
za56+=e7Y&9>|+9e2hSRK9ZPjH!)3CyaJ_35lNp@iNF<V2+@5BvBF+2|NH5~PI*kgM
z^@x7ZaM(}9hJznsX(9FY);A^*-1UkT%nP3cIbD0}kagUI@g<<>)wBt}$d`XvR|D*&
ztx+OV$_EjCTU9Q-_Z=x?EbG{h|NM;eBt`m?6YXKJ^?X9g<qothuU!UKxE{X-Fz6xP
zpS|d9tJBMp%B$wS-n7?8(h&WQ9i@ZULuY;_+typ<?Zr=(ii!P{k*r{Ih5gj8^IS5o
z-$FWY?}e^)_TfmjBWq7OCMR_&8!y`Ma}&g_LvFd|_<cQool(O<uqI-i2^|>q4`^Ij
zxiV0oUuTrQ!XqreG9mwFnUDYzWbSM1=N1ChM5WP!6f+YWHFCM!*YlOf3dYwV*Bj`s
z9&N8Dh0Q9h*Rhxd%s2zekXz|F=JKa^>#Ke4x9CN+eY~R#DA4k!EqebB?|E8=h37wn
zjyCucr+!&HNfSE%<9rYJVD1OQ)buWQ&oQH(@gL5(4#*)BC6E@5H|%#P!-KTM<jnsK
zk0z_m229%Sevp;I?59*ylkO;=@XxEWF&TQ{9LXEAMW5Ny-=GP5{jz&Tcsy!o{e%k+
zBKoBXN0N*@y|e#GFfC8elkUF%=(lQ4Q4H7&NS>+*nmEv}buI(U3{j=kOL{J$&G2Qe
z$IrkS(7b8I)X#hKihUtiG$XXJ4)Xp);(y#x?@<{*KmGWH(HEh^XZG<*uTepN0qJ9D
zfkR!>3K|?6uFGJl*a@F8H7J!5Ro`HJJuQMqI2)~#8R4}hb$DY7wi>d+we^t7{y}#e
zH^OBb736)_5z$fimBy4>>b&0)@7fjJ6a>KG7>n%Bn4{ruE%w|OvK#Wz^D8~fXDx(!
z(+tl1a>T;y>sL#p&`Dpk$FlI~X&hL-5l@DwX=Ha=tjmBL1t9MTL~IGUiGSv#gvyUR
z7aguf`qXat{3_YYLM)%;^38D9_2vV@)N6ThHeg@1PWL+%V<$>{@1!+R7scRBH<tv;
z>sX(ys8N@@q-Rw2N~>It0nHuq_gz$4R@V120Er!dNr=1~=jXaNg6fS)EOkC!??Q>~
z17dt!<KAj%J^1TFiK+xJXVL#XiMRzDO6{SKT2{}zA^>O*HjxNOr;b~9O-*9<y7dR(
zh6Na&TmFGUEyXYNZN7V|<<61fnH}QOMHhN7UIe>Np-eh1)nZgLXWP$KllA#CG8kI{
zbAW<i=zC+aiPZ0})i5|KR_k3le`ch@X9O{pT^kZ->p69qRcm!0zq&6#op!*>(Rkns
z8w!)5!#hlOUc|#dNACc0{?FZ-D=JiV8k|bN1Q;0m>MZm4W3dVbMaia)qn}IZfT4(s
zsxprkfB{)uX%FhWJl+Zuz3f%aThc4n_ql+rSDP#1TyQJXtJ&rDbSexGqz?QPtDVGp
ztczfdZ|t}`=qz;SOpmE_&MV)=-K=chD<v;<t8$}n={3Px|FzC(luu%wPV#g14S5Hk
zZ7eg?-Sfy155G&bY#I98VlU*HS^Tfu@hqe}t;4p1Tj8z8=_J6pel)23*SnRrqte6%
zU_K~U*M2MAS)yzT0b;NJMF)5h;nJ=1KXXBVty%@sMSg&30}97p%e^OTd>pNtU<Zs~
zy1>RE!5B!Z8Bd!T&+1ALbr+asqdDIjB>CsWjlSIWwLF0ndZco6kR*8s7)I8r|8m(a
zm$-(FjV<2u=e7f&0d0k?`BGK2wV5dqA^pE6y^<7#xH<*?+ub)uD~@95U|;djsB}HD
z=#1rl2=O3Ethe0=yE81ngorzJuojFbmcJi=l;-&JHT2iuRu1DMlWQPs0YdM<MQF@x
z7Ytc~nbdgQTyS-5H}dgkb+3ZbQfzR8MbLTAAJEAkYfLHb(wTGz;^(9LT6ph=Dp8`)
z;y@7_aj;>wB(kg1Q7N7@`v8g~V64`_pr^v@+1&qgI4Dk69gHslu33=<_DVqe`L{c1
zP`MT>%o{cqJ4yeJ7Y66NBkRGI<J})d@C+DH&v+<VSu0iEydmFfzP5s0m(pr*VgnOU
zLBRH$@8^gS;k>{F5d&aCz{lASJwpMb*NKNE)*cFOgU&72Z*RaTo3yPh7wpzb7vXqO
z3j)*vZes#Uv%+Cp4QQC|d-j=<A3rwCH}KQLENciX`|_^j(m6xeMKQ~Gg)8W@ByKD(
zhi}YgKxxp(_CnCr4WEwsxi`o`U&b47I2?o#q1w-Ta1&cKy_63`--L4RKRzoxJ$D(d
zhJy~$k?vVQPPrcSDTW|`Bid%iIQl8E|NgPA<8Ct{uK|F?A0H;g@<;w#miXU%@xPy$
z(&Te~%K$~Ck1}*u!cgq$b;$@&Thy2WVvBG6FKc8NxsPfz&?<NK(=eG=*2&(0{F@tv
z<_d1XJB+S*X<_z)-^-~RGHTNj@(Kqpegas*X-hsBe_Md%NeWLOwCUZQX1pwrK>ECZ
z;&uN!#`RBv1T)g-;sMsQ^G$FheSzQ#42`pv9Nw*9x95<~CYm4|Bw&&~pKVO>L9x_D
z0-IzKbh{5zr|&fkK?2WK|Gk1X!$siqu(8B)>sbYkiy}$b-+5FAh{kh&0{b_77p#KX
zS3dppU=<@p_ci%@2`~XkF(o>{AYGir-N0GtvCEPT3Y2AThUS)r@avff6d~gWbWD&I
z|DwG6hIrW3G{qPS@gDg7qTFkxOJj3>{_QYe!3Ih8X9D+ob681=+f3JCt+%eFFTm&*
zx#!y6nn_}L^a4wWR?zKMb^}}5$>T11t%b?@>^2?0^SE0M*oYyN4(Nj~rRsIqrRbu$
z%J324+Fh|3r5K`~f!at47*m9FOLF#A?`wp*_UH&yJd_1$3DVK<lmuK<#F43~Adl;_
zmCXB_$4-A{kt$~}fmZC?GoFJRS2$S(!O)|B@ci4$cMpeUMZDfd)(lcB0~8AII<)M4
z?E#)~5^lI>Ygql4N>MuSBDtrJy=WnQ{aSoKA1m<PLH#Upg!XjE+l?{`tzh+X?O}$f
z3;?%agkK+AKAx_3WO3S^_5n8~fUFSC%zS)&torIzKy@|WSKTJEtIxPPDOObWphAX%
ztM<to=8hNuQT`yM5No&=W@Qpk-uq-cy1jtmY=)}>ZD3R-Wja6FbrJ;cKrF(;4#EqO
z%Il7#cm`jh5eCiz97Jwj9-@4{+Fle_kw!ZiVC@10jU^Lngx^ik&2706L;5O-IT}-}
z&itgZ3}z!=^?Za&+4$FI0>wITA|zOxwcZnlW3GLAM-J(GuJpZjrB*C9zyi%WJdI)I
zPE*=jO{>fu;q9%2wn}iE^oKWGbehA|KxcJopj0Zr2mkjG)1RN}^yC1ft*>?o)u~X_
zGkUN+Y|ui<iCL_WHHuz5c5s_BIQx@Ws<;`0gQ5q=IaUigC!`|+?U%1!WrN#bpqL3H
z&06xk|IY2!SqcOnKoHC1stP0#5t%N=ts(^!ADlPq(LY2kpFFcu^5iPNz|H}rAT(mC
zg(3C{w7(PPT7F<(Fe0bo7V9hbTW06y?`sC{$b6#>Ah9<qWOeOwA#{#NUmRb6R<jMe
zd#NI4U1FWdwA*9jkSy||eEgCK%95E1H++#lPiq3s)Nbbn2xEEs!eEP@Enf3e?|I0!
z8<fG_jiaMS)^=S+*ip}V^I$leI!+LktoUIWStkFIgcPO$aFqgL527|!5&ewcpz*Y<
zx`l&mWDYOQewI<3HqxMuY&$Trv&#To30x`2|18qp-VP?)av0?`Pk`<UJQ$^HDhyD|
zB^nq35_ieV%;H$q(;-y%2K;`vg(E`&a_Vg0*ytrXsOzcQReG?i=x~T%Eret}g^w^j
zi;!aY@?1SqGUE>ky5Pu~Xt;D&Wwf)Qn<cwV5b|MS#=V3VX0}in+*X{I$xkYLM|9z{
zhWn5#{%}Ap%=?Z5yi&r4bA#R8)Bp!<#V>VSr+49@2jHTGqM}L)d3B_T9uZsuXw3tz
zn1S;b78d^X>sKofr#Lw|VbcMC9!Alu(#8UE1W@uo`;=8tfoEc30&Yo!H=rOzJrmk|
zhyi-T_=29GU{rvXL8{7DHB-%f12WwE6^w?%puJ3e{?!1b*TFDgXC9g_0|aH5_5%cS
zFj)z1oQUVE(SlhElw}o0MDUV8tOr^Xf4UMPJY3=AZ!UGou_Eoc@V7%Xcn=T&;AoR*
zFEHKUMqj(MIUwA1`~U}i<$CLbUO`Ijb;AYJLcr+*0geW>veG4q9S$BasTFf|qy}7w
zfOIbjoUE*_4P}t%C>9p&ZnYtX;1Yk%3m21<Bp;dUpZy7XG99(Jk_&(t7IKx`Uinzt
zs$g)-sVEj6-R3$++l+}`H1N*V^Xt)>nULG-^Y)!nL%Cb+3{g(IJ3y!a<mN9$2UFFG
zJ_)&Ba9&+qfotb7FYLk4KO-}<1i0S>WTG&jfWuTbeNM$^0?+XN${s`@yuQj$`8!Q%
zT+mDM?=ASrF9-od0r-IYHbdN&r)9BLSt4btk0z8Vvmzr+6XN24no0!vQ{a8T+d$L~
zn3;lWJ3J|9;qy_Sh74u$M`)Cz0cA{W1kB&*jEjMl=_~QcONuu>(nX|8@8dnWcQ6Q{
zW|i9)6oPKBff*p^g9Zntz|x=sWwj?5&$Nzo(xe=vfs1oK;D;CvSP2$VK`*67-v_D-
zN+-c3I8N?`57LJ)5N<~}>pH#RPL9VF4tq`U6=t*e%8za-Px!rw8>qx-R>9weSvgAi
z7+uEQN2wtLx;B2hbyRRE7_neH1VjfAUsL+De;tPldA;<)N2jg)$ebqs7u|Vse!d3o
zOu=q3x_$iEO%Ma-HGsYb^l|X_F2ulu0th<;9<p~j&m)8WLf}9#LsmzXAL!K!ZYp)g
z1N7GJC5huD8)u=y1~wZ5*hSgt4ZLiS69g$e$?#K-*G)cfdc_R1a|Dp}WFkzF;o<z-
z4WP%ePp+@8^G64OV@AltCTu7&R~;{7Vr6+<@B@}@*GCsH8zu`O+`6Vqo;MZOj{(VK
zMj)W`>Rg!Mi+o-eJj|v+?Lf69p`h>wL?#Wcn7Fw3Pj-%2RxG42TVpf9Mg!u3W`q%D
z43&~p4reW<%E3E<I|K0N3RDs_qxvnHvGQq_F#Q~uR7h0|xmQ412MG|=1c^$|{0T}I
z7)yK6v~eHsgdc)xvyLpDJ*Bi38kOD^6cCVc6nRP(kpw2hz_x~|9}iI{vui&G7p!42
zv5*Vy!otGXZOo`6LpRIA;^Jae9UaVcrNnO-OMeqBz9fUjeOkaqhJKf_xOhH*3_T{S
zF^INxDtWzu^YNg2FEa~tZjef`#g9qHCFM)v=PgS(se{O=gM8*cmbHMq_$U?)yh}?<
zGYJ;Tc5ni|P13>b9G;}k=jJaz0L5r{cHzyT+i)yu2plS|jq6|&2qNkx4mvzAYLr!3
zX+M$3B#x`1tsPolUmtGS6~|t>3}x)xPz6fRhxu|a3E*V1w3ujkL=)isNbn&BWOycA
zIIEUNdm44l^S89lX78(9mz~E%fQn!%J?K$5F%&EKHIEF?i^%;^c~z*I@j9WE{eX&K
zof%la=suP;4IK&(z2i7r?9r-7f3u;Rnc7Vu55!s4is4gs-Thmb?|6HUM{KPEK8r^0
z{U^ci{*-^S%z4E!j`bQky7nC3<vq05z`29@GcctCj=tmI;2>#AYX1XgvZAfyn^bnM
zX+OZ?NdmgKwP&LH|Frkr@mTin-=`TVD?1UwC3_|#;mVAx+sc+bvI)rwWmQC0R#usj
zQ7Ahqo064;?2+v99Ou>j{r>LXc>a3+e15%p-MN$VI?vDf`5edlIF9%6J|O=XzRMW+
zH75DoQZqcp+8xgm2$h<AEE(^0xxUWpR^YPeqKPEZTn49sBmredp%jnl3-@$?Qn0oV
z|A?O=k6lWZGUbJO16kL`0&H<hKr}l6lVy2_<d9McEZUOB^rUgXa&RlOLdvat{5LUj
zvE*%w@j!A#@eV%b_7Y12MKqgp%^~gT=O97veN+8FJjke7YmP|=KD7`cfo|`W6<3rU
z;|}F~=$IWG8q)CZp4wFmf_38FW5J7|c>wpOlq<|R$w}R0P0kq#v6S%s)^vPQ=9`l~
zc*q6~k>oos9y-QuCHbTer1We#??&Ucj~qtDWt;X-7cllTz1AB`qB>FrlnUTmHM`an
zc#DbhMXV`J`Y*Y=Cb6VlEoXpar=n?))9|T-2nEe0cb9fKms`zl*GQ0iWny2Rrv?;w
z9~O5^hm1HY-MV#H_!=6b|E7C*tX#N!<!xLtznl`}bAs>B|0p;q7oFF37^<UDGB%}=
zCT`K*v2||!jwc8E*SBIn)s|&Uv^`ljpIX(mBn$z*Skpfg?GNJV)3c79SASGy{@DA2
zan9oXM!V6|{BKJs6D7+IxYD(~8po#cGJ591;ILo0(o=YQ37S)ZHVBZBcAJE*hjku|
z%#@pC{tH4C2&k$)zc-J3)WMueiq`KLncrOfbr{z2c)8yipT-_z7`}IK3Dc)%vlO@!
z#n7(xHl%Ti91@NszV=%Fngx8V;EtN++k4)gnyn3O%JL%eaC37T9QuZNsCN6`xSqTI
zT03`l>?fHu{6Nc^O~dS`giw;zzJHD$(uFY@dkj%KLJZ&K3w$Z!jxUEqC735cool^r
zBhBKGr9scd)4}lG4f&FU`9GpY0=kHtk|({$0GC06)w%NUuRVMOnYrN!xqae<e6HLy
zDnCWdPxy3b9qe$;9amC=>~zKO=1lQEb;4Ol2_H19&Yu`}SGK1y2J@M#^zm5k(_?Cv
zY-fWcsl!FZkm%LZ<N?)el;?Me>iNo8<_yMCJr~8LGc<wP6PpqX1Ryj_1$qdhdGWVh
z4soE}ng}F&HH|F$#P3Vx>5zss-RoPl?+Al+MHv=PA(LM(yT)AHVq2%E8l|Q+y_wTv
zRWjdIyl`O_I`N(*7BPR=jEr1^CxWzW)IEmutyQ}R{q@Fy`AZQ`#+g~kBl|{2qf1Lm
zwFhFMvkMqY(61DKa4=GtKce>^rCGddE0(fy*Ay-ahu>?KYvLsoqvLC`>$-kXmAs^r
zz6@R=ic^QSo{I(8$V6NeA4{FkHt!PEXzULb&*v?F^4*j1>bEm#n7cYZ=#rf_CwoA>
z19DJpZJmr6bGsbwASeP54SowGJ2n4k00N=Xnaa9*%)YFxH{N#=*8xgD2hUsDaf@&n
zYTvy-vc2N1@^wia2WM+?O5Bd8k{Q_k3T>~@wRDykEtHtj?t7Npr{MW=z#@Xf4E9yj
z&6RD7t!ldu$%IhG`0#fbzFOF3)w*(|cVr}GL(p;L%|9#h8H~DyB$QgRRW#x&Gv!Y4
zQ(Ap#!aY1%j}7AKS{$orOOss0*we7BLQ?gaz!{KbKJ^*zmY<5hfSlpoOVzwzbSb`;
zuk-Kd`P8*Mw0}tS$iR*G^D!r(bBCMhyB^$gw}l?deWhO>W0EaUPWCzf5TN$rPZUSN
z(TP!-E`CjG{oGoU>)nuboQa`fH#?N|O7GGBE~lpoKQU?Xz=4q8OHDRrVZ&&jwHk95
z5J!LF$esMoZ*z0GAWOo@$)WCic`lrWs>O|Xe~K)ClJb}Pq1j7JR=9o&Mc7lZ<>QZl
zI>$#xR;<5R_^hk*k#SNixL>FlF~l9SX-)17;b8GQqAXCK*geo<XdDcjg1h3hTBLV0
zPW5=N+&iww#vRB;;lEW!xp^B8uh3EaqVDNK*+t_^ct`av8CCFMML?ec$Z^ot#m$kw
z^5Z~QzdqwC85x<S5W|xQX|<W91C|qI--SC^JKy7r8pNOX08VY9LPc)M3Gb-154!f>
z9=x|@NR$CVClJ|E1R5UbCxLj5=+l#5oqIvl0d1zesHjLV`@XY5^JPl#hjtMIXSR7@
z_bsO&*M_N2V@E=P%F5JM>6)*U%Q!?B#a)>KO|;*amv4_avxr|hKe6>IF%XE|0o!rK
zFARXr2d!7{fOZdE*l53WR1OJHf}pc|c97&I7dSgeMU{^#Uy<wnIdlIT%_~9`cpy<1
zQpy}>paQFK^{sfohO197UAoB(&_Kz0)@S?uHBlqCbR9e3F$3}>K*Y|S%SG9^_nw=b
zp>C>n8U6BO7r5d;Akv|sxpqad%DH`7-IkO%3`oGB*p%So`q#6ee%S}aVIhYNl$>~E
z>N&@#q-E{g+$g8kKJ8z#z7o!rz%#|E;c*|5ByCoV179;aj?SMrdb`|ZR06vC6pQq%
zMxNXB&Wm`aJaKm4S+g|c135Yn)Y39A;PzFiG!tS#!JDt|cHw+tI@Nu%x@tlaZ7%d>
zgyisd7f5N(&dsUo+S<K*qFVY@R%_$==<=r*PV~X|bAmb_Z~``d0qPqUcE6oKf}1Sf
z0JYJNPlauwg}+Q!x<<2_P^D^$yyrf49+n7020@R*`Gh+!hj|{VIdt)@Qrs~I)Ckl{
zK00+JFPHb6M<`gZSFTVXS<r_M+$OlXH&{SeCt|=~KnR<8m+ZND?h@==z)bD$ecle0
z4$M<uRZ%Lai946eO`nXgb%{aZ{rTl3)nVJpCeRw$e`&A$>U$K|mGXoH&x|?~TJp#W
zkn@^vd}H%2$({AeKnTH+-qBGq1VxiL61;;x)g>y}7q*oti{J9K>(ztHJ$bclEn!J!
ztdhklMx1kBW-7U>Cd|W^-<OuY(RYf$%i0009O&3BaM#pb^0M;zq#<BtuU7V{yW|A_
z2cmSoGqFl&Bdw%pB&Maj{7kA%L_~y;q@?7019yGbN!Ee^(&#q)XG2nsavlp;Jx)RD
z9^`2dQ~(3VF4Sp{b_s43mrv+e-9{VTA(%~P%2cSJ$4;B#KY<eu;CH3>jy*Lzy=}!j
zpM#E$4t_#sYu|hKawBEOOYBsE(D%&1#1wj(#IV|#@#^7~?1qL@K$t*+L0bYyyHJSJ
zw-zlMw3^e;83X<_CK9`P^^OGIUH%6-wNE-*Lz_-RgaV0FWsUQ*8~AsA_{r?-Ovkg`
zW#9Cd1!1nPzAvP}^j#W%MgQjQ<`QuCI#1~gKmYsOEa$S!Z7ANk<0IiKl>xgOw)Tt*
z6SndQY-wiNJJ>I7lCT<4O-=GUfsMjtRD^iYOpIn8Ac+A1^{x%838m~`XiG0}`U%i3
z-7w(jLj_e5EClP#O)9uOU>_z4ns-Ob4YVExC9Do|UQE}VtECqhse5Se13Mavd|hm&
zfAxfP)tAw-X4JC6u$@o-4ORU7iAz+_X&MaKMpzt>@jFHwHfCv)2;J$;jxX>LsJCe8
z>vQ?47;!KhL#e*7{!JEI{^_FRwp@=rm8$NEF&qQl35JI!M5icG+KXL{iY^H&rnI?G
z3F>uUzI+MaZfTXk>-s5ptX`S-e7%`q_4Ua(l{29S2Z4N7t7G<;!zU)p&{kqZ3o-{l
zFoXv?TCXg1-ik~$vPJl*WPIDK8@4VQ#KtsufRXR=W!Y!H)*O?w-33&{tlC;hyR<Sm
zT__vbs-pbaw`1NuJ~_F$1PEltqd{1qPVvF`+8TP2B5?6u81(M1k?qs`>m+em7|iXo
z3gA&+pBsMgfI4oEdPWF+1ebL_2kLzr4*E32{ZxG`s`msw8gT;w#N5I{6-WTS2-tQ7
zNQ;lq@f)}D44;5_{qM8WtEmgd1@gB8^XXNdPR?QH0K8PvQ{A!WgG(pg`y{aeuwPF!
z#t>e0fAW1`p{FzPfT6{msc{ErBL38PQyL5as{AHs{<u=N<khQJXvZ&d5ilooMnMK>
zdQG8w)18!(T}=YLs@}Q;o(Q~+l*{n@`~cxw969@q<BK?^nVG@*bt67HG@E^Kg-#C=
zgWy`MLuBz@PsRfhgsiCB!Y+;G@|e^J^=Y)1{jMrYaukLIjugh`6QO;eGZ*!1(^o#K
zPRAbme0G4>4GEK~uZdsQD~yH*t;jmRZw|Xj%FKG3;D<*+shpE4PXk)EN|Pwa$>UO=
z({Yl7ilv?(EifetlLxXohzWt(kxIPhZdihq@{&i1g9a4yK#Ali0-7-Ly1EHtY@OX;
z#jGk+=uhU<*7rpD*~TDCV`|tUFsD)1rnmPkD@TBh&1Iq`U9H{f?1JA)G|K&PU?2?I
zy6_Q*_ksP%4vYc%S4(#_3tFDrytDnK@*BG|+0kkR=Gm))T+&EI>EzQ7?b8^l&Um<E
zn{bsg>^J38i|=EBdw!W~ptxbX1Jrt~Dq@V-**+>?+gcjoSWbWxF=Tn*fOkdZM4giM
zQ8_%0$_hXp6!J;}|2icBgVf4(GC8c$iyRZ27ojP^bM}G{`vZD1b@ncm0SA1VQy<bb
zOj&PGjM#@<L{L8u8^*Y#9sIoAa^brQ>khOz9k~cZRY)yC#oeJT;NF$HHJ;CmOCp*q
zf<fuvXALE#_N)~gZbrcg*NDpX#_WIN`t()*k;%z;=wL@KIymqE2^i=Ms&ngpYwI1X
ziZqu1D_MhcJnxAIjw0vTGpG0VHwC;k;xg(00zhtyYiW$>)ctF0s@N2O5>z%Berd@3
zPk%UZpgm{vi(asF;G7lY(HR&R5CoXjS<ptYa_<2b^_?#u3e(=(O9>)}K<mB#^M*97
zHjU*AW3Zl{9vL5X1yeP=%FavIP-{zAz^xkzO1Yr!%C+Xo2w~xqCug3megMM>=iq&(
z;Me~OfhzZ|GcJDb%LUacd<3$V7TxLR!BdLV1!i9Z-HX%-sXnp8PYNPZ4dMoA=cu%|
z(2uB8gK`RJTz&a+ddX+MP~bsDPHqYaLje`u=*G{<f1i?6fpen;<dEK88mT;jX#EQn
z=`@=N(EF^6k!sUGi~^Ohexu!*AN#+)udFCtxpJlZ@^V~*`4|)t9I-S`5}GsIIb+SR
zMQvYPJT2rX=2_nt;{4j2hdwO>HFBtYLh$UYEhN-qZ)lvRd|OK_H{Ax>Lnvt33A_NL
zbw1OVG|=$m5t*u2HyCpkb`R`V+kL*H?CintDrWA%t7r?FdYAt~6uPqh_$4$2-eEVr
zaSXJ?U@x_Ry@PZqszj8bdBoyd^sj8iNB~>CpFf}VEu@B&94aUW)!vhTk+4d6kwDG}
z2?kRMDiB?#9Q&c{1yBjPLA2w0@@G7AwgVwbt2HTAm99}LnDzvNlNt+Ed+qL{xMWDb
z{Vdk0nHFjQe+|T#7->zo;<);V($R+lSL9AXCj*+|Q^=+iV>*ti)6}&jg0Y^4vN59c
z^0UObb1l>BF}ue`E)e-t&zS?}XcdS|j1^^zipoc=ejn+2aYl;EfbFu6G#}l&N1V*I
z@1nGq>@&^EB0HuTe;%OF*w%P{q9FjbS>CN@LymATwJ>6~gW@N_+`yy2$Hl*l7aCQ5
zKb<j8DJ#)h%AK-IPYNFphJJvg)$7af+9fl(-OUU7?4R&dri{5+S5HjSkw>nBCXR+5
z>(%_r{Ir*-uGYAo+O`7b6DT0&=PRuU*<Gt1pMJ-AF)GS70xqS^oL}i<yBTfE6=(Qw
zpLFe}K{qt}K%-YyL4jTKM>s@ZKp-YTTCdhbU<q0#n&-rsj)O_BbR-=4*J<>V@ZpTJ
ztHyuRU4q3X006^^5mcCCkKn3H9ravWEW8PwiQ0=BHv3YckVt|a*+*_qMAQKEg0{wU
zdPz{sgOvZH4e!7H`U|=R-hUD&yQCcsqIx6F-3#7&zk=kz0Rl*Pl5y3VtP>niaou+X
z?3p9rNwB1#%m_Ld;G+6%SyH4vgQ-R1MDg1S=H{$htCOtTi&y@rN_8^*;LZ%hk8Gj(
z=mcyCv>hNORUa0Y-6|PLDb_Y{R<`$X<z!D~L9eTtR5D<j@$cAcUr5y5(fV(>5<g8#
zXs3`@R19;jfL4F!@t59X`<t419~b+Ehm(Xg0-@LG<*QeH0|N<v=@v{ck3H3YrYt_%
zxoK;H#iqodi+@+c?sIYHwNPkJ^?q{ZYHCI8xa$%~5jFP}V4&GWv|CcKS`t>U0;GR=
z%nd+kY<6}whN0+vwe1xsZ86>Vg3zPRWZKN@3&c>^q82#Dm{_aVh}~Y*2s8`>P=O>X
zyw~~l%f=VjX^u`o`y5!nM{DAtr!9Ui3_Yi9fi_l1Q$1e00r@oOd4O)E%Sk&{AZYim
z6J_^#LripeER|H005%OTx-24H5e}6j<zH^X>O%XR<doL{NEYQldtK5eKuZ^FZ+<9j
zZdL&OtxhS0jN`Y=5dgCf@@q5)>%_NSKLpo%*(Dr{6&nR`&E9nyoV!GthOx9~y@G0q
zzWOEGmnUOBE&HI^9`(zY?Xd4=Jern$xNnXtK~)BduuUM#cBAy=OD5>xxH1}a_b71~
zlt&k9dxnOFELX<rR2x*#iY@4vwYRs|T2&hIp{5VKJ4;U@m4g7D0`?Dy$EGmlavDDm
zkLo)T4fgFX%YcRf4nbP;(YfnpI@4e*Bth<GZ+~A|_HkxrJixFgfv?k_LbKEIGGJs~
zAWk?q$g8TV>P$L8BT{DI25I00R@8_!n8~dyiu`2x3?W4VKOXgH-HRtruv^fcj1@!O
zlXg<nudgnZBTD1j473?X4RQECaf2rYTr1c%h+f^jkN?~a+yT9fCstLBjVvI9s9`K)
zsf{fYBylnqWH#-<Tfa7?H|Q@#<EWvw-BA?rQcB3V=JA-|o<d?zTKv(Q7mf~fz5?;e
z-8Il|RB1D`vEiiSGZb172T*8gYHF>NHnXsIGJ|-<ODa!$Y5nbag)3O>ItslL^Tu4a
z6F;b1y-qCf@Zz5ECI<<pW6^CQ3%*W{j-UuGHoeVq^wL`PQ0z9b$*0v#HU`G?B@a)B
z|G0ESO-N_0k)NT0b>6gn+P{MJcGSi2U>60v>THBv>28fk2@|h|e=*OwtO~c;wtjHC
zP)rwJP=G=Y=;qQwQ~*8c?`<oW78BcF5t4w{rLy5hw_N4L{L5)z(9q7jg$1U8^7d;Y
z;Dh6@A_n^UIdye6j5%x-vxUs0Y+KsRBK0mmRwJddyW`3iL4PE>?W%(IgYARXyOiw)
zt#~_GZ>}m(y%buvcW?<}Da|y9;N&D4@TaI5jfEDi_?OB${)aj25ZP$oZqzBlg4CU(
zlap?v{%ldk)l=jhwd%p(^B7{=wHH;Nd-8(I#@RYDB8Zo}mawr`a?HQ$ii>>nl7g58
z6hEnYit?+fXrRT(y(E4|Kb}C4rLy^yOdVGt&z0K`^KRSs&Se|shT?NaCj<|Wo0l=R
z6BXon)WnN8FDk!rP?rBCgVEzIAfQlY|Gq_B-Z9nY+CgdM^wow-*433yIYnP~W(y3I
z&+3Hi%FDunRC`Ackq=&^l(~J6w4JP|srufdXDoH*>#6f<LscX8m5)|dR(QzXL*T$?
zQtQ^Nz`;OO@mou<dNU(^xURMqNgjHb^#a415bL=ccmc|uRpR;UeljN{dhd7;D|{MT
zZO)W(VK^E~V@dpideZfoe6;&ls!NeAA+iBmmyPtELPu(fs9nFU+8v8IeRlS{DB4T3
zwp#VBgiw4~AcmHo7?kw@UY<C68K8hWG$z-YIf*g3e|9K{P03#*y}kb3jBHC<Iq3w?
zz*Bc!>+?xzo9-^>66!)N>;0UO>)VoS_*%S#V-qHKUd%)Ym%SKq2BF%?a3`Pkeh^=-
z^RVg9PXgfqsM~^n1O(7d_W&*ZoaE$W&v}nxaZE>>=tAQ6su7UfwY0a#tb9Cqj5r;F
zg#NeshBWIf<F-rQF*Hpdh)=DU8WLDjOFy#>Eg^7~xsi1w<gH3{W&r(g%-Qt)y*)Sd
z=JV;4fsd74T<om0-s;0%J-Q5i;nR%*KBSbWAL-ISxrHj!t5qYP6q>Jz$mgci3dfGS
zc9fdF%0CrtR@Q;`lIlNOZw+B_*jrq9t}r&DLo~$avi5v(zU8H??M^e*_r#+z^-oxS
z65Rkh)(*%hzd`nod*CSj*gjhKSBZYSV|5Gacw6E5qPS@I#i0HS$<=TUeN+%PaYQ$*
zV(Ie>GM`sBl~yw|Gb!wB2!e)$f%z-Gu=cCXyRy14X3x)!Bz{GlgB_pNSbU{OZ^}=(
zBc{f;ozd=%1%q^n#Dr@z8Y@NRqhK-Wt3-lBC0HxyO$HB=(ih-Q$oV^l+P=#8AX9WU
z%PmRcR>oW2S^0z6?}&aBOL4GW|3{_OY?+^lX22jr876de3IzinYCU-&5yYNTgl^Lu
zMygBhgTH)F#QH$fBAcAtt$uBFNbI3P*x6dp_IjG(7(4Wl#t+?*FJq;Wf0L6$WT<U+
zX1hc4B-I<i3!kFdUz4?jQuFfOwdi$#K1R?-&Wn)jh&z?$HmPr5kPF(m0?F!*wW4O+
zMyMaa-a+>#R+S1G74MgKGBsSrW^19uJqYUX5Lo)Lt)3PVC2YEWdi-jpKKHir<yT<>
ztiLleGAQv(j15AaW*<8@iwX$^LlCD~+j>MuM@Z8vRS<(=5NE&PNN)y1P3FHS=wPvl
zG$3mMd<C5&{hu5v!HVhr4uZ5Iuqy$ppmGog11rixB04mtW_Azur#o{-vJ{^Qv+k#x
z)sL>W4|%?$JVxjS(X?U7FwZTy>cT<C7k`BQRZR;(>6`?O8?Zb%LyjmZ2*45OX<}ZD
z$H5lQ<|tzbrp}k4HICkx`7Qa}NY^atrlw$Vi~Q0;mBrgYW~l$KOqM-;ssN!I?p-9}
z7KKMVA+8+Yxkff#R4;qkHFA$Sm-i`ow|ADaS<;WwGoNZ0_<d8G#z>EvO1ldk;gXZc
z#LS$&zS<qh`d#j>d;&2lXMBssXlF?xkVxOY^=(Yht$g(cm;j>znIv|0HAZb3C_!=E
zjMXWENHRh#@4@d|Dm3M7QsKc_Z*Dx~0#vMW=7DF1+7V*nUI>(f6>e)uh6hG5%+sHI
z4JsoLet@t=og6t8)dwr|m|<)nvjo;!nKrgU?%B|S`$y%mxwWr~SLu;2)Vhq9&-=&Q
zR5A8v`gmJrwnSIw1Rf|maYDsv^Y!-)z7w)^{3}Ml+S5M!=V|)v#h|W#PddiIsLGxS
zr75BcbxJ)2#!E~|>rhPp(q#oN))D>WD64->{jfa<%-1gRd&<q8GdMjG5fz1L7yoD?
zx-xxV<y!w7{7VlbS8w^#>^bS+NW%{gdJa)FQFlnvpz`Htf)%j<C~5EL7;09g3}Pc7
zw<ovYOV4i{ZeV`%ug~2SL2CrX#xITnB53Q&Rql{E{O??2l~F0+G<2VzV+LA_CCps#
z0}aUXz+I0zUkWNJ?S9+?8qLR^n|PCUz3~o+IufFi>p=e`L(thBps)rio9mkcSEe5a
ztf0wnuCZ=Z5D%Rf0Nvrr*4ERH<|YGwXdHlkDms+{-NG_!ZwL@2sjsN=80yW@2^*u6
zzv)mp(U}Q;ENoEJ{zlp6Nw3rrK*)#$-#XmB)d9Mi1eGH(&&nE^0DM6+vt8y*2^^Uh
zPGa-&7);54y^aXE9vOvydICgW5jT|myT7%J1cTrCf)aYaXGg+m?;9J70daJVm2o)S
ziPZHSxR1jL<^UiCnGis=^@qRr{UE{GH#9^9A{J>MB5wSc)u_!_SXfY<EC3L@XH+l%
zfRg|L_>QuJKu%+R7h1>be92(()Nq!{s3<pAajy=rAZY97V7|UwENy{Ra|206JO+n$
z(Gd^k=~$iiTtKOiw`E{qNxJ(E${>GV!tE>{>{&Ir!`&Vby+(6dAdgCs2~v$vazP*C
z^XH?8{q^e|PGt>A54O!=|G^lI^*00*U;)1Zh~F713*y{%WrI^hiZV_6ioIZN!PkCu
z!8XXmlsgWcMfKY*xw!EEi)xGE<wbeW0`+UniS@5fllbk<B(K9&XhF^mB%9lRel(8<
z4RZArQ$<)guTaU616G*H#JJ??x87SSfV?4yUhLIRlq;FM4nn$uw|3RjeJ5^s+)C7D
zx$%t(%|Hk??tHBZTFo{!{WQ0hQ-Wco;44kow}$f`+f-DJzWog#wPihXdUA0BsuHN`
zWgv()CQ0*wm3%U-X9$1LoCl^fs+kYng}?_5T)aBj4C-kDzJ~DgsvWr9lhG1M@$M8p
z@&y&@hFKJxz6|KaJJ8Qabl1C^pdLs#auf-1BSf;a-#gErfa~g^xE9N(T&AxkCymbr
ze9H_`24aNfX(FjBoJov0O6u2cFY0}oeL-02;}m0(G_z4%n5E$_kR#J*R9bO^{($y2
z^z%jHbSQhR2Jw&!RWI+qOR$uS+C3OB^|=jK!L<fxD;ewBBFLu*MSEYr8FFQb)vZ?u
zl6S*V?4%cHlt2Uk#bCJ8`}z-Q)jl|ytyFbFy9@ctKmgKs$Z*ohH^Z{<?IPSADfRQv
z?p(H^LM-B2qFp57$Jp{7l6?6XxzC3GRu|(I<b8jP4Vn=&q9CZCvMJ_0c^M<q$F`Ni
z@yjjqdt54+nJ`68%k&Y%M8_OomnHcz7hs@2PoW>8cIVkiH9Umi_i2_<J9*5J;TH#f
zy<zlH#0WwwCEjRw6|#}Aun9PCz_?uhls(<lk`P()-CJw@%|gggSyMZ<x;a&_EE^TY
zF%Yuq^6Y++8SKa|J)A9`zFAU-Pj-dxMTPg9*hpXRrt@S4ZLX)>rXydy`a-6B8re}&
zr(Gc1_(mQb6=km$ikW?r4jZn^|FITB{PK&eU)bb92p7(YCt2623PMk$CsB)Ewl$LG
z55BjM7T@)HY3pZ?u(51-{x&p$OJx`cPK>4Ni$r&>I}fWR9YZ%%vuuAY}OlIbCo
z65FrV3N%kfC{Q86@+GYeWKQM5PWtfH=}-WVHL|mgKOx9`l3|Fm`V1iRe1-QzbJ;4n
zp*nBhiww}mWp&WMVTq&<U{;K3F!icajZb5Gr=9=TyHi|LRDD%An@}u9b96e-TEcnB
z``S&>%+CihW4(qE1JPvP$YFQbE8*U#{*MUuo!*7^00P%4=>}DP5&Gsx2|qmKbtF%b
zd$Pljp+mp5Mjc-4?ax&!qOfzX;dXANk|9msDn;{&V=b(j2b;5tm2W<NJdR+D>e;83
zd5BJ%=V6QVi5k>+wpU;c)tDZR;la7%jMkyl${Rpb7Xm!r7qzdSkjvCh7u@dPfMqr`
zL2V=Yiy8;${y+aJ=Ycc=t9=;~2Ot(Lwjlg^Qt+2^&8V_$f!<j#%HPP-;DB?$;Dfs!
zN93{CGv~l)qw`EwK0*L$1@bi>ks5GpsU&HW0FDLgKLefcnEjtP@r}#y4Po@{Q)N(I
zuPE%Z#|etm!r@{>^Ldal1Y}4_$mURnz8P8^pwNWA45+C+-tY$59xxhAC}hfE(4~zv
zE=d}<0HdXebEX(2NSD*xy>lfVc!(a6aAFhPaUazNdbLnjIdQ=pt%pUS9~O!GFdnW{
z@@GI^LCODq|M+W=RLasRYM1HmQ-e=h$4%vvx~-56`S|%q9V5r#x$FaR_tx(2NCmWI
zbUqS#crhGB)RTnZ8f__CEC~YwsJ8ss_I{DN@KOXDQ&Q4gLN01!2v=9e?+_L0DP#}G
zn;IXrvCJNOh5o1mp|*^BJIw#<Qn;VZRtu9AO1CG%t{(<PHCgmmFB`w2n`ny;Q6Ze^
zkv($zV_KEVvv*xt=5^E&;!asE&WY9^;q-GJN7trHZ7Q4XlbJthT<pmN<IC+w_4VHP
zw9&p@T*9tDE6ZQ1{5CY4v$VIUU4BiqYa$Fj`yuW?4!hU?q$?I|j4B`Hm(7QvBDEfz
z8%#`DX9Xg%R5VY9jB9c~ePR4)W8=Z|hPP`%_~YYiD<fgq<F7(gu>P8_of*5XH!?c<
zWI}k!hbG{no<}eo%kll?Ut8^bpM}@3o`EyjG~!@b|03ms-;Yu7q@nSl_Fep$v){a$
zU)&INzO{FPT~QHS1jyT5zjg9uQqthE3cP_cxRY<hwzU3c<9qJf@+5`DH<q@IG}q9e
z-HpV?GLSdc(JGJIa~7Z{HVdTcu9+mpQiU4Fc8y+-v|Rq2*eD3E<T=hV#c)To2pz6P
zuC0udA-&?~a%U$3vc;Hyp;t6^wI*k-lbekgKlpSO{I#x`<Ox_(0^E}N$zG9R)40WN
z(!lhW?*u6uk*=<22)vv}A4?Ql=fV^U6o0i)aFjKgt(1fP;Ch(eO$-}~=d0f$e^KG!
zi|X!$9BRaipTF?;>^Z#K_*9t`NgXWeMjx-j<6Xm<T}ECRF~Y@YL11GRJf9tf9^Okf
zdQJU#nEfsT1Zir#aP0StphqNJRg4GcHY=P38_w|a=mZUDXwlI@WANuhlR)9BAG3G}
z@!^48xU)o^BACEbO56jez(>M<z(aNmGLix`oC)z}&P^U8!Dnx2DL-7k?xBocnC*{0
z9y>fJ@VcHo4+BNWmFZs2&ffod9<O%tCcVXLS=e4$IIqT0SM5rK6kg_n7SKz#orV!4
zRG&8x6=sAeq^}mYquQxjEO)k6k03OqCZr^xP#Mf$doP;T!)n@*0NZZh&nr2xw3v|J
zH2VQ<26?Un(J&kfgv5~VpBTZtnI(R+@@Xwd&s{4JW5SD>5QG~}!40yqVHT!&5Y&C!
zo3|jR*B(KvPM`M0z;~x{{u;i(lHykQKfXE>24!q08RGDgD$aU5?}`uF<Ny+7c3~mO
z&!0d2U;xUKIp98^teW{rbP5)#k9F&uxcC$l;yU{C)Inu39vw;tB1t<S?@SKb!GNEq
zVEoJD^~H}6B>}4(fHqVdEMbUFt`!Ow{~5E?H2&mI4_K*#DM&Cp2pwbRv1$wZ!Qqb^
zMpct`*5hx*Y)=GGkVg`rWP1Hllk+7msLV2oTKmSwV<EF$4AUs=GQ8;^VTfYfKv4Y7
zSYJHJ#DsRtK_Xq|*XBU&+~WW!R8?z_27pZd+HBhOuR!4TgPB!pzxP*j0mA$LM)jp(
zWD(zXDHm)T;J<?f6^BG5eh2~$H+ax8(cj<i5WfOy<)-NP30NFAxQmu+BB=IyB$PlQ
zjEVxiE=lG%#)jVu&&rhk+w?w`pT7YtbH{KeQSk8dx6!}exg-NUj7TtOTsx0EC;Di9
zt6}U_Di~PfKQ;+E^4j)(F0OyFj?b>IXNIn$xeB|U#||zKP#EvkWo20mzJ?AUJmm4C
zpX>!c_RvX|P(~>%D^muZmD#Od-jFCDon%E^L17=AMggV%9=SuAZn?uiJ$?PIhG*bF
z^8cWwho~a_Axz8xYEAU;@UZrP(ocU`&^p_^p7tTBx)x?0mcCk%lG*xX4OPxow9>oY
zMz2WIrVTArmV>Y5{<B!S8?>yxt+Ipb+s6GtWstH*J2ipT|F)+`88~x#XCH1Jz*bs^
zPEyeH^OEWi>rv%Qen<4eI2H&M$i;vFX)_qzz;o+Lwia$&j;ZfB5d>-wfJ=S|cnF6G
z*RqRo@@p^~3s`Ny@bjaSy(C;dhr`?zHUHnx_}PL+w!j;jf;Z9*!z^~I+Th_L&>1@D
zT$bTd(`e}3oq`#OF^j%1*TNP}kfHMz2OYgXZ}Wof%pqJKJTC?3N94!KXk=+53qCR^
zyM}Do?QWa|sWouzXjc}9Xvxg(!goV`Cog=fd6B&ma^xF%GQ7)UxE{bx6`D(^Aa@J!
zt=jp|!n!;UIlHa_@T~7Lko*5bA{s&tI0lS3tAFMM=s-C-prMO%+n>ND{E=pvl@JDu
zMKmFBUFCD{OVhx&-EpyeOzz-fUZx^>Bs4M#EP3H0iMMZiL_!Swcd+3;NJ~qp$@fZl
ztd9+@aP-wUiE#;n;B;ou01d5MQrR%e7loJR5g}iGuPL66G-&s@wHC6uYq$TL7tTZj
zZf7cq6dMsBBqPpi)iuzj?`*Q#2#V3kNeUmwpcM?(>W}$hEOHw<TVxc=ubljK?UW43
z<nLby((?I5Fz6}+4EozpD(QUp!tA}bLy!sCfG;D}8Y}nu)oJ<;7tINBNG_SN-0hY#
zUqOK>7cQ?Uk~H7No2Oa$t4AgboHbAkM0dJ)*D~!*^gKE_wNfts@e_Z{c|{@-5vF0!
z5xMAQfq>TuV>pWTi`=;In}I;E9<g|$=-NsD8b}6k&wqzFCEx3RSjg-<Bf;R}Hf7cc
zTP?I!%5~anZprxv?dhJT4grVvp<!E1y$sgO<}A)ste<V%68|P>LV~zmAHQ1~$*S~e
zJ`u?Mp6LI<q)u=^l!SUWr%%3pR+fwlb$Lj09<@Y=!3G;936Lq5*<uLfPGZqcewclb
zpO$6bXtL5veTZ}Fet6v)X^3`smVWNW)~K>n6D*w#-X9Cqszq{h@ncKfjmk~Krmt<7
zx+RfsHPyD`9?Wb7CrUr0S?&*HQZh2(BbejFgJS`LUSxD{PqDy<j{S{#T*sn9=*=M9
z(f>;)wD}?L{d2wodY3LyCaTr7FdMYaDP|QMf!88bWk*4IN2O;zSx|Xb<D9%HfXPR&
zW&^mW;kk~VJxO)4S*Z1Nd-3(z1Ow2~JL!6{X};jysiW|oWBvq2bnE_8u%~J#11Ki8
zww}<XG~Onga&aO+9t9olKWQ%!;0+6!QDYb`4ympXF4>QP13rS@W?wN|L<`B*HpkNX
z#m)T_6OY^G6l;2pAcopH5B!4FI1_hwwH_uZ6v2SGSFlYUD1rVN+*>C*=3%eiUz{!h
z3)USltO@7-x&AlqLD4Dt>mL!{|HeJ&%&Y(BxTj4hbiyoL(Jaamh5>gGWa*QVL|`HA
zktdmGg185R$<~_B8it)JkQm^LhAgqC@8)29DPgmU(vlu;8b4$+xw+aj_lZK7`ts*$
z)4MO$&e9P?Wyu^c)8w3U7}a-rmnCRUs@TS4e>_%X={y+zZ-4L-4>MEf@^^JNLkX@I
zk78(P&RXc2g~(K#Xqz(7-7?xZ>ZmPo=uh(~>B$nLX`*!Q;0*n#$Qgda`1&ZfUI=-*
zM6=5SIX8};%<896q<k=-WU-CM{gk&#h<1k4Cg47VJQJ>d%K<`SwI?Y#`aSphZ7}9;
zqJze`kZ0+S8HivY&G9|-ul281iKk8EQ&MfQc`N8hM94P3dIM--pxYSp>Odt&0gu~6
zHqBCciyNzeMm+7osRND2O|{h*e9nMrdVup~#gyd!79&3~fpNbOr4?;HbGH+ldGB=h
z8gnoTD)LQI3zbAB43c~7=}9$sQIs@}desx;iZlNu-nnb}$oGKKY~KtC?*0^$+P1Aw
zB;0k%Sf_5kI>&A@fszC7BBfU4&O{=8H)=3|LkpQbvz|uoC-Y^uX`g8i`EV1qEtv?v
zgIDsS+dC@!rWVG#(EPX1#^z6t$V=c@i@teDoQ7Nr6|5x$JG3t#*zx!#TAk(JcMeZJ
zMCH?wccmRV!#^Qx#(?+@nqE8_8DSy2!5(}+;H(qftpI)chuTDW>e|Dbw8+KYkC9E&
zw?Lg7UajgMpKc?)_n_*D%5G>{mL5GW(it>8<}xLNy!o;&$MFqSC;$@+BF=DarMuyQ
zum|vw4Ry_T4O|kT!5c*92q-B@DOy~2)5c)S{VPd$#`h4pjYoqZM4GBtc|Ezj05Xl^
zILRHacH3|CHE~Pv6TW!mli(|Cxc)>&CO5C*Ax~vaY(yKQ3C-KIPJlmN(_ZM1uWNyX
z9}`LvWbcy9VTkCdU`4bH8>uM?fyJs}xtoG0U3mSm0`zNO^ZlJa)$IMqtr7NtHys!F
zJSEE>PQpj<73u!dwWhL^_lzDI0H%1Z>llM5t<uA~T*hrSkvIlYRXMWeCG#4Ovq$rY
zk<&`}|G+v}cSA^*!;HWoVL6CosnB~vf2rXjLfls*xW7Xdhf5Tpzyg%$)YR1SpkMyD
z$gzuQDe58?O9%tj@nDLs0_4!q>6>WCqn<3#h7JWLkA-(S?cNG|hU5)Qn2Uw}VUp0`
z%AN}+(Xnzc-;xiPur><efM^1XNH~8AhRH6>K)cYw&;V-|2qurmEaB#Ch;S?X<8O?O
z&Ok+D8stl=Q)vjKnj)0|dC}r>N*<4ao@-p9<mAMQWC0z#tYItl&oz#tI8(dZ$;qi@
z+;=iov`6Ly<dc9y)e7UAF@Np^muI31j${pDfbo@U;XSJ(m3Cv9fJUQqEz$qN6UpO}
zJLp#+gY);_;l9F5liW?<M_N@t2k9C;=@AHe6*&G8Y%(er4XevV;*&#O-Bj`mA%YqD
zFX;2-QIezLh{dOI9Qr&mPO^uhPkESf@K5xqrC>0KkGO5$B8kd@sJVUo5HI+CPp0`G
z*FT|Ur&a~bD`cu}TC`6FxuaVDNeIYa<n_{ugBHhsV9T?7=A7csDozC$!A(zjvV^01
z)1uG>ZR|%6+26qViPpT9-Va73*w>9hhF}!LqZ+VOzm5cf?w&$QWKQajnK)Pxdj(wd
zZ=Eu0%HyrDEVpy4SstY3^FCP2{Q;mNbJT@ZT>nfx7^?k+r*^W87$N*cP`>1U%<9Y;
z=&PNp2#N{e5JVW}?_Cv{i~eV3qwlU`cUZ*&z)!eS+5N}C45ZMY+_l5J-&Z+T5X=yw
zP^9?bYd~I}0o;%XE$+<u(UzDi2CXX~!LP3KzPu2`Fcqx3QvRTOyoT0PIw%T^5DETM
zx|;+ypnBB;dq4HT>AUCaB^hAn#k%zpfQ9-ZU9lNud^KWYHZ4o=s(`Qkom<txWCY4)
z!Ic5f{TZqd+ci9T#VeE(PB<?6hjEI!Pr0{M7FlTr&{MwCDpAZN;vh%=;@i?0$^Xf~
zuiP%R4;h<+-QE`a$NLo6a1h7U3q4gYK3hh3dxrPEfh7G|4Jtx;9^XgPcf)Uz_yv49
zH5SIU&vG#akf(ZbZeWv!mead$<d8r4J4Dt;jj8k=HFHv|(d2vXvMf5AAJ;F2%zM=-
zf=Nn6N&VQwA$B&wF#^`+@qL#1xUFIyk)h8b-`^QD!wxc5%*AcKKU2X!Fy$~P^?r<p
zH4$D)mQJrA<V<`TSXh;(_4KEQGFlNsTUB7)(ZlgSWzIJa3A&7fr)#Cz_7Mc(^txo|
zb<NVtM#|d52L3=qghYh+g@yS=#Po#4q=Y4<L?!u!B&CFeGJTRk6!ITG;Bx(%gYAv~
z`wy(}3O<J)z@nev>0>8lW$9_d@9p7$*g81dNC^rGx_Ml8w6XT`6oij<@N~8@ck^%*
tv~%zhbg}et@HDq_xNa*hBy1^o&C*T)E?~Ep;S1M8R4-{_OBF0a{||S?lgt1B

literal 0
HcmV?d00001

diff --git a/docs/inputs/support.md b/docs/inputs/support.md
index 89c6c482..af3a5e90 100644
--- a/docs/inputs/support.md
+++ b/docs/inputs/support.md
@@ -11,28 +11,7 @@ to be compatible with MAVIS.
 
 ## Job Schedulers
 
-MAVIS can be run locally without a job scheduler
-(`MAVIS_SCHEDULER=LOCAL`) however, due to the computational resources
-generally required, it is recommended that you use one of the supported
-schedulers listed below.
-
-| Name                             | Version(s)  | Environment Setting      |
-| -------------------------------- | ----------- | ------------------------ |
-| [TORQUE](../../glossary/#torque) | `6.1.2`     | `MAVIS_SCHEDULER=TORQUE` |
-| [SGE](../../glossary/#sge)       | `8.1.8`     | `MAVIS_SCHEDULER=SGE`    |
-| [SLURM](../../glossary/#slurm)   | `17.02.1-2` | `MAVIS_SCHEDULER=SLURM`  |
-
-Users requiring support for other schedulers may make a request by
-[submitting an issue to our github
-page](https://github.com/bcgsc/mavis/issues). Additionally, developers
-looking to extend the functionality may submit a pull request (Please
-see the
-[guidelines for contributors](../../development/)
-
-MAVIS running locally uses the python
-`concurrent.futures` library to manage
-jobs.
-##
+MAVIS v3 uses [snakemake](https://snakemake.readthedocs.io/en/stable/) to handle job scheduling
 
 ## Aligners
 
diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index 929a3dad..bb7f00f2 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -14,112 +14,24 @@ installed
 
 ```bash
 git clone https://github.com/bcgsc/mavis.git
-git checkout v2.0.0
+git checkout <VERSION_TAG>
 mv mavis/tests .
 rm -r mavis
 ```
 
-Now you should have a folder called `tests` in your current directory.
-You will need to specify the scheduler if you want to test one that is
-not the default. For example
-
-```bash
-export MAVIS_SCHEDULER=LOCAL
-```
-
-Since this is a trivial example, it can easily be run locally. By
-default MAVIS in local mode will run a maximum of 1 less than the
-current cpu count processes. If you are running other things on the same
-machine you may find it useful to set this directly.
-
-```bash
-export MAVIS_CONCURRENCY_LIMIT=2
-```
-
-The above will limit mavis to running 2 processes concurrently.
-
-Now you are ready to run MAVIS itself. This can be done in two commands
-(since the config file we are going to use is already built). First set
-up the pipeline
-
-```bash
-mavis setup tests/data/pipeline_config.cfg -o output_dir
-```
-
-Now if you run the schedule step (without the submit flag, schedule acts
-as a checker) you should see something like
-
-```bash
-mavis schedule -o output_dir/
-```
+Now you should have a folder called `tests` in your current directory. Since this is a trivial
+example, it can easily be run locally. However in order to run the snakemake file you will need
+to have a copy of the config schema definition file which is included in MAVIS by default.
 
 ```text
-                        MAVIS: 1.8.4
-                        hostname: gphost08.bcgsc.ca
-[2018-06-01 12:19:31] arguments
-                        command = 'schedule'
-                        log = None
-                        log_level = 'INFO'
-                        output = 'output_dir/'
-                        resubmit = False
-                        submit = False
-[2018-06-01 12:19:31] validate
-                        MV_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-1 is NOT SUBMITTED
-                        MV_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-2 is NOT SUBMITTED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-1 is NOT SUBMITTED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-2 is NOT SUBMITTED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-3 is NOT SUBMITTED
-[2018-06-01 12:19:31] annotate
-                        MA_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-1 is NOT SUBMITTED
-                        MA_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-2 is NOT SUBMITTED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-1 is NOT SUBMITTED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-2 is NOT SUBMITTED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-3 is NOT SUBMITTED
-[2018-06-01 12:19:31] pairing
-                        MP_batch-s4W2Go4tinn49nkhSuusrE is NOT SUBMITTED
-[2018-06-01 12:19:31] summary
-                        MS_batch-s4W2Go4tinn49nkhSuusrE is NOT SUBMITTED
-                        rewriting: output_dir/build.cfg
+mavis/schemas/config.json
 ```
 
-Adding the submit argument will start the pipeline
+Now you are ready to run MAVIS. This can be done in a single command using snakemake.
 
 ```bash
-mavis schedule -o output_dir/ --submit
-```
-
-After this completes, run schedule without the submit flag again and you
-should see something like
-
-```text
-                        MAVIS: 1.8.4
-                        hostname: gphost08.bcgsc.ca
-[2018-06-01 13:15:28] arguments
-                        command = 'schedule'
-                        log = None
-                        log_level = 'INFO'
-                        output = 'output_dir/'
-                        resubmit = False
-                        submit = False
-[2018-06-01 13:15:28] validate
-                        MV_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-1 (zQJYndSMimaoALwcSSiYwi) is COMPLETED
-                        MV_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-2 (BHFVf3BmXVrDUA5X4GGSki) is COMPLETED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-1 (tUpx3iabCrpR9iKu9rJtES) is COMPLETED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-2 (hgmH7nqPXZ49a8yTsxSUWZ) is COMPLETED
-                        MV_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-3 (cEoRN582An3eAGALaSKmpJ) is COMPLETED
-[2018-06-01 13:15:28] annotate
-                        MA_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-1 (tMHiVR8ueNokhBDnghXYo6) is COMPLETED
-                        MA_mock-A36971_batch-s4W2Go4tinn49nkhSuusrE-2 (AsNpNdvUyhNtKmRZqRSPpR) is COMPLETED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-1 (k7qQiAzxfC2dnZwsGH7BzD) is COMPLETED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-2 (dqAuhhcVKejDvHGBXn22xb) is COMPLETED
-                        MA_mock-A47933_batch-s4W2Go4tinn49nkhSuusrE-3 (eB69Ghed2xAdp2VRdaCJBf) is COMPLETED
-[2018-06-01 13:15:28] pairing
-                        MP_batch-s4W2Go4tinn49nkhSuusrE (6LfEgBtBsmGhQpLQp9rXmi) is COMPLETED
-[2018-06-01 13:15:28] summary
-                        MS_batch-s4W2Go4tinn49nkhSuusrE (HDJhXgKjRmseahcQ7mgNoD) is COMPLETED
-                        rewriting: output_dir/build.cfg
-                        run time (hh/mm/ss): 0:00:00
-                        run time (s): 0
+snakemake -j 1 --configfig tests/mini-tutorial.config.json
 ```
 
-If you see the above, then MAVIS has completed correctly!
+Which will run the mini tutorial version and output files into a folder called `output_dir` in the
+current directory
diff --git a/mavis/annotate/file_io.py b/mavis/annotate/file_io.py
index fb7ec50d..2683806f 100644
--- a/mavis/annotate/file_io.py
+++ b/mavis/annotate/file_io.py
@@ -2,20 +2,19 @@
 module which holds all functions relating to loading reference files
 """
 import json
+import os
 import re
 import warnings
-import os
 
-from Bio import SeqIO
 import tab
+from Bio import SeqIO
 
-from .base import BioInterval, ReferenceName
-from .genomic import Exon, Gene, Template, Transcript, PreTranscript
-from .protein import Domain, Translation
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, STRAND, translate
 from ..interval import Interval
-from ..util import DEVNULL, LOG, filepath, WeakMavisNamespace
-
+from ..util import DEVNULL, LOG, WeakMavisNamespace, filepath
+from .base import BioInterval, ReferenceName
+from .genomic import Exon, Gene, PreTranscript, Template, Transcript
+from .protein import Domain, Translation
 
 REFERENCE_DEFAULTS = WeakMavisNamespace()
 REFERENCE_DEFAULTS.add(
@@ -552,3 +551,7 @@ def load(self, ignore_cache=False, verbose=True):
             message = 'Error in loading files: {}. {}'.format(', '.join(self.name), err)
             raise err.__class__(message)
         return self
+
+    @classmethod
+    def load_from_config(cls, config, file_type: str, **kwargs):
+        return ReferenceFile(file_type, *config[f'reference.{file_type}'], **kwargs)
diff --git a/mavis/annotate/main.py b/mavis/annotate/main.py
index b968b85d..61c7fd57 100644
--- a/mavis/annotate/main.py
+++ b/mavis/annotate/main.py
@@ -1,29 +1,27 @@
+import hashlib
 import json
 import os
-import re
 import time
-import warnings
-import hashlib
+from typing import Dict, List
 
-from .constants import DEFAULTS, PASS_FILENAME
+from ..constants import COLUMNS, PRIME, PROTOCOL, sort_columns
+from ..error import DrawingFitError, NotSpecifiedError
+from ..illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
+from ..illustrate.constants import DiagramSettings
+from ..illustrate.diagram import draw_sv_summary_diagram
+from ..util import LOG, generate_complete_stamp, mkdirp, read_inputs
+from .constants import PASS_FILENAME
+from .file_io import ReferenceFile
+from .fusion import determine_prime
 from .genomic import PreTranscript
 from .variant import (
     annotate_events,
+    call_protein_indel,
     choose_more_annotated,
     choose_transcripts_by_priority,
-    call_protein_indel,
     flatten_fusion_transcript,
     flatten_fusion_translation,
 )
-from .fusion import determine_prime
-from ..cluster.constants import DEFAULTS as CLUSTER_DEFAULTS
-from ..constants import COLUMNS, PRIME, PROTOCOL, sort_columns
-from ..error import DrawingFitError, NotSpecifiedError
-from ..illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
-from ..illustrate.constants import DiagramSettings
-from ..illustrate.diagram import draw_sv_summary_diagram
-from ..util import LOG, mkdirp, read_inputs
-
 
 ACCEPTED_FILTERS = {
     'choose_more_annotated': choose_more_annotated,
@@ -114,54 +112,38 @@ def draw(drawing_config, ann, reference_genome, template_metadata, drawings_dire
 
 
 def main(
-    inputs,
-    output,
-    library,
-    protocol,
-    reference_genome,
-    annotations,
-    template_metadata,
-    min_domain_mapping_match=DEFAULTS.min_domain_mapping_match,
-    min_orf_size=DEFAULTS.min_orf_size,
-    max_orf_cap=DEFAULTS.max_orf_cap,
-    annotation_filters=DEFAULTS.annotation_filters,
+    inputs: List[str],
+    output: str,
+    library: str,
+    config: Dict,
     start_time=int(time.time()),
-    draw_fusions_only=DEFAULTS.draw_fusions_only,
-    draw_non_synonymous_cdna_only=DEFAULTS.draw_non_synonymous_cdna_only,
-    max_proximity=CLUSTER_DEFAULTS.max_proximity,
     **kwargs
 ):
     """
     Args:
         inputs (List[str]): list of input files to read
         output (str): path to the output directory
-        reference_genome (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genome`
-        annotations (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genes`
-        template_metadata (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_templates`
-        min_domain_mapping_match (float): min mapping match percent (0-1) to count a domain as mapped
-        min_orf_size (int): minimum size of an [open reading frame](/glossary/#open-reading-frame) to keep as a putative translation
-        max_orf_cap (int): the maximum number of [open reading frame](/glossary/#open-reading-frame) s to collect for any given event
     """
-    # error early on missing input files
-    annotations.files_exist()
-    reference_genome.files_exist()
-    template_metadata.files_exist()
-    if not template_metadata.is_loaded():
-        template_metadata.load()
+    reference_genome = ReferenceFile.load_from_config(config, 'reference_genome')
+    annotations = ReferenceFile.load_from_config(config, 'annotations')
+    template_metadata = ReferenceFile.load_from_config(config, 'template_metadata', eager_load=True)
 
     drawings_directory = os.path.join(output, 'drawings')
     tabbed_output_file = os.path.join(output, PASS_FILENAME)
     fa_output_file = os.path.join(output, 'annotations.fusion-cdna.fa')
 
-    annotation_filters = [] if not annotation_filters else annotation_filters.split(',')
-    annotation_filters = [ACCEPTED_FILTERS[a] for a in annotation_filters]
+    annotation_filters = [ACCEPTED_FILTERS[a] for a in config['annotate.annotation_filters']]
 
     mkdirp(drawings_directory)
     # test that the sequence makes sense for a random transcript
     bpps = read_inputs(
         inputs,
         in_={COLUMNS.protocol: PROTOCOL.values()},
-        add_default={COLUMNS.protocol: protocol, COLUMNS.library: library, COLUMNS.stranded: False},
+        add_default={
+            COLUMNS.protocol: config['libraries'][library]['protocol'],
+            COLUMNS.library: library,
+            COLUMNS.stranded: False,
+        },
         require=[COLUMNS.protocol, COLUMNS.library],
         expand_strand=False,
         expand_orient=True,
@@ -171,14 +153,15 @@ def main(
 
     annotations.load()
     reference_genome.load()
+
     annotated_events = annotate_events(
         bpps,
         reference_genome=reference_genome.content,
         annotations=annotations.content,
-        min_orf_size=min_orf_size,
-        min_domain_mapping_match=min_domain_mapping_match,
-        max_proximity=max_proximity,
-        max_orf_cap=max_orf_cap,
+        min_orf_size=config['annotate.min_orf_size'],
+        min_domain_mapping_match=config['annotate.min_domain_mapping_match'],
+        max_proximity=config['cluster.max_proximity'],
+        max_orf_cap=config['annotate.max_orf_cap'],
         log=LOG,
         filters=annotation_filters,
     )
@@ -288,9 +271,11 @@ def main(
             # draw the annotation and add the path to all applicable rows (one drawing for multiple annotated_events)
             if any(
                 [
-                    not ann.fusion and not draw_fusions_only,
-                    ann.fusion and not draw_non_synonymous_cdna_only,
-                    ann.fusion and draw_non_synonymous_cdna_only and not cdna_synon_all,
+                    not ann.fusion and not config['annotate.draw_fusions_only'],
+                    ann.fusion and not config['annotate.draw_non_synonymous_cdna_only'],
+                    ann.fusion
+                    and config['annotate.draw_non_synonymous_cdna_only']
+                    and not cdna_synon_all,
                 ]
             ):
                 drawing, legend = draw(
@@ -307,6 +292,7 @@ def main(
                 rows = [ann_row]
             for row in rows:
                 tabbed_fh.write('\t'.join([str(row.get(k, None)) for k in header]) + '\n')
+        generate_complete_stamp(output, LOG, start_time=start_time)
     finally:
         LOG('closing:', tabbed_output_file)
         tabbed_fh.close()
diff --git a/mavis/cluster/main.py b/mavis/cluster/main.py
index c1628ea6..3ea459fd 100644
--- a/mavis/cluster/main.py
+++ b/mavis/cluster/main.py
@@ -1,27 +1,34 @@
-import inspect
 import itertools
 import os
-from shortuuid import uuid
 import time
+from typing import Dict, List
 
-from .cluster import merge_breakpoint_pairs
-from .constants import DEFAULTS
-from ..constants import COLUMNS
+from shortuuid import uuid
+
+from ..annotate.file_io import ReferenceFile
+from ..breakpoint import BreakpointPair
+from ..constants import COLUMNS, SUBCOMMAND
 from ..util import (
+    LOG,
     filter_on_overlap,
     filter_uninformative,
     generate_complete_stamp,
-    LOG,
-    log_arguments,
     mkdirp,
     output_tabbed_file,
     read_inputs,
     write_bed_file,
 )
+from .cluster import merge_breakpoint_pairs
+
+SECTION = SUBCOMMAND.CLUSTER
 
 
 def split_clusters(
-    clusters, outputdir, batch_id, min_clusters_per_file=0, max_files=1, write_bed_summary=True
+    clusters: List[BreakpointPair],
+    outputdir: str,
+    total_batches: int,
+    min_clusters_per_file: int = 0,
+    write_bed_summary: bool = True,
 ):
     """
     For a set of clusters creates a bed file representation of all clusters.
@@ -36,13 +43,7 @@ def split_clusters(
             bedfile, itertools.chain.from_iterable([b.get_bed_repesentation() for b in clusters])
         )
 
-    number_of_jobs = len(clusters) // min_clusters_per_file
-    if number_of_jobs > max_files:
-        number_of_jobs = max_files
-    elif number_of_jobs == 0:
-        number_of_jobs = 1
-
-    jobs = [[] for j in range(0, number_of_jobs)]
+    jobs: List[List[BreakpointPair]] = [[] for j in range(0, total_batches)]
     clusters = sorted(
         clusters, key=lambda x: (x.break1.chr, x.break1.start, x.break2.chr, x.break2.start)
     )
@@ -55,70 +56,57 @@ def split_clusters(
     output_files = []
     for i, job in enumerate(jobs):
         # generate an output file
-        filename = os.path.join(outputdir, '{}-{}.tab'.format(batch_id, i + 1))
+        filename = os.path.join(outputdir, 'batch-{}.tab'.format(i + 1))
         output_files.append(filename)
         output_tabbed_file(job, filename)
     return output_files
 
 
 def main(
-    inputs,
-    output,
-    strand_specific,
-    library,
-    protocol,
-    disease_status,
-    masking,
-    annotations,
-    limit_to_chr=DEFAULTS.limit_to_chr,
-    cluster_initial_size_limit=DEFAULTS.cluster_initial_size_limit,
-    cluster_radius=DEFAULTS.cluster_radius,
-    uninformative_filter=DEFAULTS.uninformative_filter,
-    max_proximity=DEFAULTS.max_proximity,
-    min_clusters_per_file=DEFAULTS.min_clusters_per_file,
-    max_files=DEFAULTS.max_files,
-    batch_id=None,
-    split_only=False,
+    inputs: List[str],
+    output: str,
+    library: str,
+    config: Dict,
     start_time=int(time.time()),
-    **kwargs
+    **kwargs,
 ):
     """
     Args:
-        inputs (List[str]): list of input files to read
-        output (str): path to the output directory
-        strand_specific (bool): is the bam using a strand specific protocol
-        library (str): the library to look for in each of the input files
-        protocol (PROTOCOL): the sequence protocol (genome or transcriptome)
-        masking (object): see :func:`mavis.annotate.file_io.load_masking_regions`
-        cluster_clique_size (int): the maximum size of cliques to search for using the exact algorithm
-        cluster_radius (int): distance (in breakpoint pairs) used in deciding to join bpps in a cluster
-        uninformative_filter (bool): if True then clusters should be filtered out if they are not
-          within a specified (max_proximity) distance to any annotation
-        max_proximity (int): the maximum distance away an annotation can be before the uninformative_filter
-          is applied
+        inputs: list of input files to read
+        output: path to the output directory
+        library: the library to look for in each of the input files
+        masking (ReferenceFile): see :func:`mavis.annotate.file_io.load_masking_regions`
         annotations (ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genes`
-        min_clusters_per_file (int): the minimum number of clusters to output to a file
-        max_files (int): the maximum number of files to split clusters into
     """
-    if uninformative_filter:
+    masking = ReferenceFile.load_from_config(config, 'masking', eager_load=True)
+    annotations = ReferenceFile.load_from_config(config, 'annotations')
+
+    if config[f'{SECTION}.uninformative_filter'] and not annotations.is_empty():
         annotations.load()
-    if masking:
+    if not masking.is_empty():
         masking.load()
 
+    lib_config = config['libraries'][library]
+
     # output files
-    batch_id = 'batch-' + str(uuid()) if batch_id is None else batch_id
     filtered_output = os.path.join(output, 'filtered_pairs.tab')
     cluster_assign_output = os.path.join(output, 'cluster_assignment.tab')
 
     # load the input files
     breakpoint_pairs = read_inputs(
         inputs,
-        cast={COLUMNS.tools: lambda x: set(x.split(';')) if x else set() if not split_only else x},
+        cast={
+            COLUMNS.tools: lambda x: set(x.split(';'))
+            if x
+            else set()
+            if not config[f'{SECTION}.split_only']
+            else x
+        },
         add_default={
             COLUMNS.library: library,
-            COLUMNS.protocol: protocol,
+            COLUMNS.protocol: lib_config['protocol'],
             COLUMNS.tools: '',
-            COLUMNS.disease_status: disease_status,
+            COLUMNS.disease_status: lib_config['disease_status'],
             COLUMNS.stranded: False,
             COLUMNS.tracking_id: '',
         },
@@ -139,15 +127,17 @@ def main(
             other_libs.add(bpp.library)
             bpp.data[COLUMNS.filter_comment] = 'Not the target library name'
             filtered_pairs.append(bpp)
-        elif None in limit_to_chr or (
-            bpp.break1.chr in limit_to_chr and bpp.break2.chr in limit_to_chr
+        elif not config[f'{SECTION}.limit_to_chr'] or (
+            bpp.break1.chr in config[f'{SECTION}.limit_to_chr']
+            and bpp.break2.chr in config[f'{SECTION}.limit_to_chr']
         ):
             unfiltered_breakpoint_pairs.append(bpp)
         else:
             other_chr.update({bpp.break1.chr, bpp.break2.chr})
             bpp.data[COLUMNS.filter_comment] = 'Non standard chromosome name'
             filtered_pairs.append(bpp)
-    other_chr -= set(limit_to_chr)
+    if config[f'{SECTION}.limit_to_chr']:
+        other_chr -= set(config[f'{SECTION}.limit_to_chr'])
     breakpoint_pairs = unfiltered_breakpoint_pairs
     if other_libs:
         LOG(
@@ -161,10 +151,10 @@ def main(
     for bpp in masked_pairs:
         filtered_pairs.append(bpp)
     # filter by informative
-    if uninformative_filter:
+    if config[f'{SECTION}.uninformative_filter']:
         LOG('filtering from', len(breakpoint_pairs), 'breakpoint pairs using informative filter')
         pass_clusters, uninformative_clusters = filter_uninformative(
-            annotations.content, breakpoint_pairs, max_proximity=max_proximity
+            annotations.content, breakpoint_pairs, max_proximity=config[f'{SECTION}.max_proximity']
         )
         LOG(
             'filtered from',
@@ -180,19 +170,20 @@ def main(
     else:
         LOG('did not apply uninformative filter')
 
-    output_tabbed_file(filtered_pairs, filtered_output)
     mkdirp(output)
+    output_tabbed_file(filtered_pairs, filtered_output)
 
-    if not split_only:
+    if not config[f'{SECTION}.split_only']:
         LOG('computing clusters')
         clusters = merge_breakpoint_pairs(
             breakpoint_pairs,
-            cluster_radius=cluster_radius,
-            cluster_initial_size_limit=cluster_initial_size_limit,
+            cluster_radius=config[f'{SECTION}.cluster_radius'],
+            cluster_initial_size_limit=config[f'{SECTION}.cluster_initial_size_limit'],
         )
 
-        hist = {}
-        length_hist = {}
+        hist: Dict[int, int] = {}
+        length_hist: Dict[float, int] = {}
+
         for cluster in clusters:
             input_pairs = clusters[cluster]
             hist[len(input_pairs)] = hist.get(len(input_pairs), 0) + 1
@@ -242,11 +233,10 @@ def main(
     output_files = split_clusters(
         breakpoint_pairs,
         output,
-        batch_id,
-        min_clusters_per_file=min_clusters_per_file,
-        max_files=max_files,
+        total_batches=lib_config['total_batches'],
+        min_clusters_per_file=config[f'{SECTION}.min_clusters_per_file'],
         write_bed_summary=True,
     )
 
-    generate_complete_stamp(output, LOG, start_time=start_time, prefix='MAVIS-{}.'.format(batch_id))
+    generate_complete_stamp(output, LOG, start_time=start_time)
     return output_files
diff --git a/mavis/config.py b/mavis/config.py
index bfeb7a52..f38483bb 100644
--- a/mavis/config.py
+++ b/mavis/config.py
@@ -1,39 +1,16 @@
 import argparse
-from configparser import ConfigParser, ExtendedInterpolation
-from copy import copy as _copy
-import logging
 import os
-import re
-import sys
-import warnings
+from copy import copy as _copy
+from typing import Dict, Optional
 
+import snakemake
 import tab
 
-from . import __version__
-from .align import SUPPORTED_ALIGNER
-from .annotate.constants import DEFAULTS as ANNOTATION_DEFAULTS
-from .annotate.file_io import REFERENCE_DEFAULTS
-from .bam.cache import BamCache
+from .annotate.file_io import ReferenceFile
 from .bam import stats
-from .cluster.constants import DEFAULTS as CLUSTER_DEFAULTS
-from .constants import DISEASE_STATUS, SUBCOMMAND, PROTOCOL, float_fraction
-from .illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
-from .pairing.constants import DEFAULTS as PAIRING_DEFAULTS
-from .schedule.constants import OPTIONS as SUBMIT_OPTIONS
-from .schedule.constants import SCHEDULER
-from .summary.constants import DEFAULTS as SUMMARY_DEFAULTS
-from .tools import SUPPORTED_TOOL
-from .util import (
-    bash_expands,
-    cast,
-    DEVNULL,
-    MavisNamespace,
-    WeakMavisNamespace,
-    filepath,
-    NullableType,
-)
-from .validate.constants import DEFAULTS as VALIDATION_DEFAULTS
-
+from .bam.cache import BamCache
+from .constants import PROTOCOL, SUBCOMMAND, float_fraction
+from .util import WeakMavisNamespace, bash_expands, filepath
 
 CONVERT_OPTIONS = WeakMavisNamespace()
 CONVERT_OPTIONS.add(
@@ -43,6 +20,49 @@
 )
 
 
+def calculate_bam_stats(config: Dict, library_name: str) -> Dict:
+    """
+    Calculate the read stats for a library from a given bam file
+    """
+    library = config['libraries'][library_name]
+    annotations = ReferenceFile('annotations', *config['reference.annotations'])
+
+    if library['protocol'] == PROTOCOL.TRANS:
+        if annotations is None or annotations.is_empty():
+            raise AttributeError(
+                'missing required attribute: annotations. Annotations must be given for transcriptomes'
+            )
+        annotations.load()
+    bam = BamCache(library['bam_file'], stranded=library['strand_specific'])
+    if library['protocol'] == PROTOCOL.TRANS:
+        bam_stats = stats.compute_transcriptome_bam_stats(
+            bam,
+            annotations=annotations.content,
+            sample_size=config['bam_stats.sample_size'],
+            sample_cap=config['bam_stats.sample_cap'],
+            distribution_fraction=config['bam_stats.distribution_fraction'],
+        )
+        return {
+            'median_fragment_size': int(bam_stats.median_fragment_size),
+            'read_length': int(bam_stats.read_length),
+            'stdev_fragment_size': int(bam_stats.stdev_fragment_size),
+            'strand_specific': bam_stats.stranded,
+            'strand_determining_read': bam_stats.strand_determining_read,
+        }
+    bam_stats = stats.compute_genome_bam_stats(
+        bam,
+        sample_size=config['bam_stats.sample_size'],
+        sample_bin_size=config['bam_stats.sample_bin_size'],
+        sample_cap=config['bam_stats.sample_cap'],
+        distribution_fraction=config['bam_stats.distribution_fraction'],
+    )
+    return {
+        'median_fragment_size': int(bam_stats.median_fragment_size),
+        'read_length': int(bam_stats.read_length),
+        'stdev_fragment_size': int(bam_stats.stdev_fragment_size),
+    }
+
+
 class CustomHelpFormatter(argparse.ArgumentDefaultsHelpFormatter):
     """
     subclass the default help formatter to stop default printing for required arguments
@@ -96,385 +116,89 @@ def __call__(self, parser, namespace, values, option_string=None):
         setattr(namespace, self.dest, items)
 
 
-class LibraryConfig(MavisNamespace):
+def validate_config(config: Dict, bam_stats: Optional[bool] = False, stage: str = '') -> None:
     """
-    holds library specific configuration information
+    Check that the input JSON config conforms to the expected schema as well
+    as the other relevant checks such as file exsts
     """
+    schema = 'config' if stage != SUBCOMMAND.OVERLAY else 'overlay'
 
-    def __init__(
-        self,
-        library,
-        protocol,
-        disease_status,
-        bam_file=None,
-        inputs=None,
-        read_length=None,
-        median_fragment_size=None,
-        stdev_fragment_size=None,
-        strand_specific=False,
-        strand_determining_read=2,
-        **kwargs
-    ):
-        MavisNamespace.__init__(self)
-        self.library = library
-        self.protocol = PROTOCOL.enforce(protocol)
-        self.bam_file = bam_file
-        self.read_length = NullableType(int)(read_length)
-        self.median_fragment_size = NullableType(int)(median_fragment_size)
-        self.stdev_fragment_size = NullableType(int)(stdev_fragment_size)
-        self.strand_specific = cast(strand_specific, bool)
-        self.strand_determining_read = int(strand_determining_read)
-        self.disease_status = DISEASE_STATUS.enforce(disease_status)
-        try:
-            self.inputs = [f for f in re.split(r'[;\s]+', inputs) if f]
-        except TypeError:
-            self.inputs = inputs if inputs is not None else []
-
-        for attr, value in kwargs.items():
-            for namespace in [CLUSTER_DEFAULTS, VALIDATION_DEFAULTS, ANNOTATION_DEFAULTS]:
-                if attr not in namespace:
-                    continue
-                self.add(
-                    attr,
-                    value,
-                    listable=namespace.is_listable(attr),
-                    nullable=namespace.is_nullable(attr),
-                    cast_type=namespace.type(attr),
-                )
-                break
-
-    def flatten(self):
-        result = MavisNamespace.items(self)
-        result['inputs'] = '\n'.join(result['inputs'])
-        return result
-
-    def is_trans(self):
-        return True if self.protocol == PROTOCOL.TRANS else False
-
-    @staticmethod
-    def build(
-        library,
-        protocol,
-        bam_file,
-        inputs,
-        annotations=None,
-        log=DEVNULL,
-        distribution_fraction=0.98,
-        sample_cap=3000,
-        sample_bin_size=1000,
-        sample_size=500,
-        **kwargs
-    ):
-        """
-        Builds a library config section and gathers the bam stats
-        """
-        PROTOCOL.enforce(protocol)
-
-        if protocol == PROTOCOL.TRANS:
-            if annotations is None or annotations.is_empty():
-                raise AttributeError(
-                    'missing required attribute: annotations. Annotations must be given for transcriptomes'
-                )
-            annotations.load()
-        bam = BamCache(bam_file)
-        if protocol == PROTOCOL.TRANS:
-            bamstats = stats.compute_transcriptome_bam_stats(
-                bam,
-                annotations=annotations.content,
-                sample_size=sample_size,
-                sample_cap=sample_cap,
-                distribution_fraction=distribution_fraction,
-            )
-        elif protocol == PROTOCOL.GENOME:
-            bamstats = stats.compute_genome_bam_stats(
-                bam,
-                sample_size=sample_size,
-                sample_bin_size=sample_bin_size,
-                sample_cap=sample_cap,
-                distribution_fraction=distribution_fraction,
-            )
-        else:
-            raise ValueError('unrecognized value for protocol', protocol)
-        log(bamstats)
-
-        return LibraryConfig(
-            library=library,
-            protocol=protocol,
-            bam_file=bam_file,
-            inputs=inputs,
-            median_fragment_size=bamstats.median_fragment_size,
-            stdev_fragment_size=bamstats.stdev_fragment_size,
-            read_length=bamstats.read_length,
-            strand_determining_read=bamstats.strand_determining_read,
-            **kwargs
-        )
-
-    @classmethod
-    def parse_args(cls, *args):
-        # '<name>', '(genome|transcriptome)', '<diseased|normal>', '[strand_specific]', '[/path/to/bam/file]'
-        if len(args) < 4:
-            return LibraryConfig(args[0], protocol=args[1], disease_status=args[2])
-        elif len(args) < 5:
-            return LibraryConfig(
-                args[0], protocol=args[1], disease_status=args[2], strand_specific=args[3]
-            )
-        return LibraryConfig(
-            args[0],
-            protocol=args[1],
-            disease_status=args[2],
-            strand_specific=args[3],
-            bam_file=args[4],
+    try:
+        snakemake.utils.validate(
+            config, os.path.join(os.path.dirname(__file__), f'schemas/{schema}.json')
         )
+    except Exception as err:
+        short_msg = '. '.join(
+            [line for line in str(err).split('\n') if line.strip()][:3]
+        )  # these can get super long
+        raise snakemake.WorkflowError(short_msg)
+
+    required = []
+    if (
+        stage not in {SUBCOMMAND.CONVERT}
+        or stage == SUBCOMMAND.CLUSTER
+        and not config['cluster.uninformative_filter']
+    ):
+        required.append('reference.annotations')
+
+    if stage == SUBCOMMAND.VALIDATE:
+        required.extend(['reference.aligner_reference', 'reference.reference_genome'])
+
+    for req in required:
+        if req not in config:
+            raise snakemake.WorkflowError(f'missing required property: {req}')
+
+    if schema == 'config':
+        conversion_dir = os.path.join(config['output_dir'], 'converted_outputs')
+        # check all assignments are conversions aliases or existing files
+        for libname, library in config['libraries'].items():
+            assignments = []
+            for i, assignment in enumerate(library['assign']):
+                if assignment in config.get('convert', {}):
+                    # replace the alias with the expected output path
+                    converted_output = os.path.join(conversion_dir, f'{assignment}.tab')
+                    assignments.append(converted_output)
+                elif (
+                    not os.path.exists(assignment) and os.path.dirname(assignment) != conversion_dir
+                ):
+                    raise FileNotFoundError(f'cannot find the expected input file {assignment}')
+                else:
+                    assignments.append(assignment)
+            library['assign'] = assignments
 
-
-class MavisConfig(MavisNamespace):
-    def __init__(self, **kwargs):
-        # section can be named schedule or qsub to support older versions
-        MavisNamespace.__init__(self)
-        try:
-            content = validate_section(
-                kwargs.pop('schedule', kwargs.pop('qsub', {})), SUBMIT_OPTIONS, True
-            )
-            self.schedule = content
-        except Exception as err:
-            err.args = [
-                'Error in validating the schedule section in the config. '
-                + ' '.join([str(a) for a in err.args])
-            ]
-            raise err
-
-        # set the global defaults
-        for sec, defaults in [
-            ('pairing', PAIRING_DEFAULTS),
-            ('summary', SUMMARY_DEFAULTS),
-            ('validate', VALIDATION_DEFAULTS),
-            ('annotate', ANNOTATION_DEFAULTS),
-            ('illustrate', ILLUSTRATION_DEFAULTS),
-            ('cluster', CLUSTER_DEFAULTS),
-            ('reference', REFERENCE_DEFAULTS),
-        ]:
-            try:
-                self[sec] = validate_section(kwargs.pop(sec, {}), defaults, True)
-            except Exception as err:
-                err.args = [
-                    'Error in validating the {} section in the config. '.format(sec)
-                    + ' '.join([str(a) for a in err.args])
-                ]
-
-                raise err
-
-        SUPPORTED_ALIGNER.enforce(self.validate.aligner)
-        for attr, fnames in self.reference.items():
-            if attr != 'aligner_reference':
-                self.reference[attr] = [f for f in [NullableType(filepath)(v) for v in fnames] if f]
-            if not self.reference[attr] and attr not in {
-                'dgv_annotation',
-                'masking',
-                'template_metadata',
+            if not config['skip_stage.validate'] and stage in {
+                SUBCOMMAND.VALIDATE,
+                SUBCOMMAND.SETUP,
             }:
-                raise FileNotFoundError(
-                    'Error in validating the convert section of the config for tag={}. '
-                    'Required reference file does not exist'.format(attr)
-                )
-
-        # set the conversion section
-        self.convert = kwargs.pop('convert', {})
-        for attr, val in self.convert.items():
-            if attr in CONVERT_OPTIONS:
-                self.convert[attr] = CONVERT_OPTIONS.type(attr)(val)
-                continue
-            val = [v for v in re.split(r'[;\s]+', val) if v]
-            if not val:
-                raise UserWarning(
-                    'Error in validating convert section of the config for tag={}. Tag requires arguments'.format(
-                        attr
-                    )
-                )
-            if val[0] == 'convert_tool_output':
-                try:
-                    val[-1] = tab.cast_boolean(val[-1])
-                except TypeError:
-                    val.append(False)
-                if len(val) < 4 or val[-2] not in SUPPORTED_TOOL.values():
-                    raise UserWarning(
-                        'Error in validating the convert section of the config for tag={}. '.format(
-                            attr
-                        ),
-                        'Conversion using the built-in convert_tool_output requires specifying the input file(s) and '
-                        'tool name. Currently supported tools include:',
-                        SUPPORTED_TOOL.values(),
-                        'given',
-                        val,
-                    )
-                expanded_inputs = []
-                for file_expr in val[1:-2]:
-                    expanded = bash_expands(file_expr)
-                    if not expanded:
-                        raise FileNotFoundError(
-                            'Error in validating the config for tag={}. '
-                            'Input file(s) do not exist'.format(attr),
-                            val[1:-2],
-                        )
-                    expanded_inputs.extend(expanded)
-                val = [val[0]] + expanded_inputs + val[-2:]
-            self.convert[attr] = val
-        self.convert = MavisNamespace(**self.convert)
-
-        # now add the library specific sections
-        self.libraries = {}
-
-        for libname, val in kwargs.items():  # all other sections already popped
-            libname = nameable_string(libname)
-            d = {}
-            d.update(self.cluster.items())
-            d.update(self.validate.items())
-            d.update(self.annotate.items())
-            d.update(val)
-            d['library'] = libname
-            val['library'] = libname
-            self.libraries[libname] = LibraryConfig(**val)
-            # now try building the LibraryConfig object
-            try:
-                lc = LibraryConfig(**d)
-                self.libraries[libname] = lc
-            except TypeError as terr:  # missing required argument
-                try:
-                    lc = LibraryConfig.build(**d)
-                    self.libraries[libname] = lc
-                except Exception as err:
-                    raise UserWarning(
-                        'Error in validating the library section of the config.', libname, err, terr
-                    )
-            for inputfile in lc.inputs:
-                if inputfile not in self.convert and not os.path.exists(inputfile):
+                if not library.get('bam_file', None) or not os.path.exists(library['bam_file']):
                     raise FileNotFoundError(
-                        'Error in validating the library section of the config. Input file does not exist',
-                        libname,
-                        inputfile,
+                        f'missing bam file for library ({libname}), it is a required input when the validate stage is not skipped'
                     )
-
-    def has_transcriptome(self):
-        return any([lib.is_trans() for lib in self.libraries.values()])
-
-    @staticmethod
-    def read(filepath):
-        """
-        reads the configuration settings from the configuration file
-
-        Args:
-            filepath (str): path to the input configuration file
-
-        Returns:
-            List[Namespace]: namespace arguments for each library
-        """
-        if not os.path.exists(filepath):
-            raise FileNotFoundError('File does not exist: {}'.format(filepath))
-        parser = ConfigParser(interpolation=ExtendedInterpolation())
-        parser.read(filepath)
-        config_dict = {}
-
-        # get the library sections and add the default settings
-        for sec in parser.sections():
-            config_dict.setdefault(sec, {}).update(parser[sec].items())
-        return MavisConfig(**config_dict)
-
-
-def write_config(filename, include_defaults=False, libraries=[], conversions={}, log=DEVNULL):
-    """
-    Args:
-        filename (str): path to the output file
-        include_defaults (bool): True if default parameters should be written to the config, False otherwise
-        libraries (List[LibraryConfig]): library configuration sections
-        conversions (Dict[str,List]): conversion commands by alias name
-        log (Callable): function to pass output logging to
-    """
-    config = {}
-
-    config['reference'] = REFERENCE_DEFAULTS.to_dict()
-    for filetype, fname in REFERENCE_DEFAULTS.items():
-        if fname is None:
-            warnings.warn(
-                'filetype {} has not been set. This must be done manually before the configuration file is used'.format(
-                    filetype
-                )
-            )
-
-    if libraries:
-        for lib in libraries:
-            config[lib.library] = lib.to_dict()
-
-    if include_defaults:
-        config['schedule'] = SUBMIT_OPTIONS.to_dict()
-        config['validate'] = VALIDATION_DEFAULTS.to_dict()
-        config['cluster'] = CLUSTER_DEFAULTS.to_dict()
-        config['annotate'] = ANNOTATION_DEFAULTS.to_dict()
-        config['illustrate'] = ILLUSTRATION_DEFAULTS.to_dict()
-        config['summary'] = SUMMARY_DEFAULTS.to_dict()
-
-    config['convert'] = CONVERT_OPTIONS.to_dict()
-    for alias, command in conversions.items():
-        if alias in CONVERT_OPTIONS:
-            raise UserWarning(
-                'error in writing config. Alias for conversion product cannot be a setting',
-                alias,
-                CONVERT_OPTIONS.keys(),
-            )
-        config['convert'][alias] = '\n'.join(command)
-
-    for sec in config:
-        for tag, value in config[sec].items():
-            if '_regex_' in tag:
-                config[sec][tag] = re.sub(r'\$', '$$', config[sec][tag])
-                continue
-            elif not isinstance(value, str):
-                try:
-                    config[sec][tag] = '\n'.join([str(v) for v in value])
-                    continue
-                except TypeError:
-                    pass
-            config[sec][tag] = str(value)
-
-    conf = ConfigParser()
-    for sec in config:
-        conf[sec] = {}
-        for tag, val in config[sec].items():
-            conf[sec][tag] = val
-    log('writing:', filename)
-    with open(filename, 'w') as configfile:
-        conf.write(configfile)
-
-
-def validate_section(section, namespace, use_defaults=False):
-    """
-    given a dictionary of values, returns a new dict with the values casted to their appropriate type or set
-    to a default if the value was not given
-    """
-    new_namespace = MavisNamespace()
-    if use_defaults:
-        new_namespace.copy_from(namespace)
-
-    for attr, value in section.items():
-        if attr not in namespace:
-            raise KeyError('tag not recognized', attr)
-        else:
-            cast_type = namespace.type(attr)
-            if namespace.is_listable(attr):
-                value = MavisNamespace.parse_listable_string(
-                    value, cast_type, namespace.is_nullable(attr)
-                )
-            else:
-                value = cast_type(value)
-            try:
-                new_namespace.add(
-                    attr,
-                    value,
-                    cast_type=cast_type,
-                    listable=namespace.is_listable(attr),
-                    nullable=namespace.is_nullable(attr),
+                # calculate the bam_stats if the have not been given
+                missing_stats = any(
+                    [
+                        col not in library
+                        for col in ['median_fragment_size', 'read_length', 'stdev_fragment_size']
+                    ]
                 )
-            except Exception as err:
-                raise ValueError('failed adding {}. {}'.format(attr, err))
-    return new_namespace
+                if missing_stats and bam_stats:
+                    library.update(calculate_bam_stats(config, libname))
+
+        # expand and check the input files exist for any conversions
+        for conversion in config.get('convert', {}).values():
+            expanded = []
+            for input_file in conversion['inputs']:
+                expanded.extend(bash_expands(input_file))
+            conversion['inputs'] = expanded
+
+    # make sure all the reference files specified exist and overload with environment variables where applicable
+    for ref_type in list(config.keys()):
+        if not ref_type.startswith('reference.'):
+            continue
+        expanded = []
+        for input_file in config[ref_type]:
+            expanded.extend(bash_expands(input_file))
+        config[ref_type] = expanded
 
 
 def get_metavar(arg_type):
@@ -496,291 +220,5 @@ def get_metavar(arg_type):
     return None
 
 
-def nameable_string(input_string):
-    """
-    A string that can be used for library and/or filenames
-    """
-    input_string = str(input_string)
-    if re.search(r'[;,_\s]', input_string):
-        raise TypeError('names cannot contain the reserved characters [;,_\\s]', input_string)
-    if input_string.lower() == 'none':
-        raise TypeError('names cannot be none', input_string)
-    if not input_string:
-        raise TypeError('names cannot be an empty string', input_string)
-    if not re.search(r'^[a-zA-Z]', input_string):
-        raise TypeError('names must start with a letter', input_string)
-    return input_string
-
-
-def augment_parser(arguments, parser, required=None):
-    """
-    Adds options to the argument parser. Separate function to facilitate the pipeline steps
-    all having a similar look/feel
-    """
-    if required is None:
-        try:
-            required = bool(parser.title.startswith('required'))
-        except AttributeError:
-            pass
-
-    for arg in arguments:
-
-        if arg == 'help':
-            parser.add_argument(
-                '-h', '--help', action='help', help='show this help message and exit'
-            )
-        elif arg == 'version':
-            parser.add_argument(
-                '-v',
-                '--version',
-                action='version',
-                version='%(prog)s version ' + __version__,
-                help='Outputs the version number',
-            )
-        elif arg == 'log':
-            parser.add_argument('--log', help='redirect stdout to a log file', default=None)
-        elif arg == 'log_level':
-            parser.add_argument(
-                '--log_level',
-                help='level of logging to output',
-                choices=['INFO', 'DEBUG'],
-                default='INFO',
-            )
-        elif arg == 'aligner_reference':
-            default = REFERENCE_DEFAULTS[arg]
-            parser.add_argument(
-                '--{}'.format(arg),
-                default=default,
-                required=required if not default else False,
-                help=REFERENCE_DEFAULTS.define(arg),
-                type=filepath,
-            )
-        elif arg in REFERENCE_DEFAULTS:
-            default = REFERENCE_DEFAULTS[arg]
-            parser.add_argument(
-                '--{}'.format(arg),
-                default=default,
-                required=required if not default else False,
-                help=REFERENCE_DEFAULTS.define(arg),
-                type=filepath if required else NullableType(filepath),
-                nargs='*',
-            )
-        elif arg == 'config':
-            parser.add_argument('config', help='path to the config file', type=filepath)
-        elif arg == 'bam_file':
-            parser.add_argument(
-                '--bam_file', help='path to the input bam file', required=required, type=filepath
-            )
-        elif arg == 'read_length':
-            parser.add_argument(
-                '--read_length',
-                type=int,
-                help='the length of the reads in the bam file',
-                required=required,
-            )
-        elif arg == 'stdev_fragment_size':
-            parser.add_argument(
-                '--stdev_fragment_size',
-                type=int,
-                help='expected standard deviation in insert sizes',
-                required=required,
-            )
-        elif arg == 'median_fragment_size':
-            parser.add_argument(
-                '--median_fragment_size',
-                type=int,
-                help='median inset size for pairs in the bam file',
-                required=required,
-            )
-        elif arg == 'library':
-            parser.add_argument(
-                '--library', help='library name', required=required, type=nameable_string
-            )
-        elif arg == 'protocol':
-            parser.add_argument(
-                '--protocol', choices=PROTOCOL.values(), help='library protocol', required=required
-            )
-        elif arg == 'disease_status':
-            parser.add_argument(
-                '--disease_status',
-                choices=DISEASE_STATUS.values(),
-                help='library disease status',
-                required=required,
-            )
-        elif arg == 'skip_stage':
-            parser.add_argument(
-                '--skip_stage',
-                choices=[SUBCOMMAND.CLUSTER, SUBCOMMAND.VALIDATE],
-                action='append',
-                default=[],
-                help='Use flag once per stage to skip. Can skip clustering or validation or both',
-            )
-        elif arg == 'strand_specific':
-            parser.add_argument(
-                '--strand_specific',
-                type=tab.cast_boolean,
-                default=False,
-                help='indicates that the input is strand specific',
-            )
-        else:
-            value_type = None
-            help_msg = None
-            default_value = None
-            choices = None
-            nargs = None
-            if arg == 'aligner':
-                choices = SUPPORTED_ALIGNER.values()
-                help_msg = 'aligner to use for aligning contigs'
-            if arg == 'uninformative_filter':
-                help_msg = 'If flag is False then the clusters will not be filtered based on lack of annotation'
-            if arg == 'scheduler':
-                choices = SCHEDULER.keys()
-
-            # get default values
-            for nspace in [
-                CLUSTER_DEFAULTS,
-                VALIDATION_DEFAULTS,
-                ANNOTATION_DEFAULTS,
-                ILLUSTRATION_DEFAULTS,
-                PAIRING_DEFAULTS,
-                SUMMARY_DEFAULTS,
-                SUBMIT_OPTIONS,
-                CONVERT_OPTIONS,
-            ]:
-                if arg in nspace:
-                    default_value = nspace[arg]
-                    if nspace.is_listable(arg):
-                        nargs = '*'
-                    value_type = nspace.type(arg, None)
-                    if nspace.is_nullable(arg):
-                        value_type = NullableType(value_type)
-                    if not help_msg:
-                        help_msg = nspace.define(arg)
-                    break
-
-            if help_msg is None:
-                raise KeyError('invalid argument', arg)
-            parser.add_argument(
-                '--{}'.format(arg),
-                choices=choices,
-                nargs=nargs,
-                help=help_msg,
-                required=required,
-                default=default_value,
-                type=value_type,
-            )
-
-
-def generate_config(args, parser, log=DEVNULL):
-    """
-    Args:
-        parser (argparse.ArgumentParser): the main parser
-        required: the argparse required arguments group
-        optional: the argparse optional arguments group
-    """
-    libs = []
-    inputs_by_lib = {}
-    convert = {}
-    try:
-        # process the libraries by input argument (--input)
-        for libconf in [LibraryConfig.parse_args(*a) for a in args.library]:
-            if not libconf.bam_file and SUBCOMMAND.VALIDATE not in args.skip_stage:
-                raise KeyError(
-                    'argument --library: bam file must be given if validation is not being skipped'
-                )
-            libs.append(libconf)
-            inputs_by_lib[libconf.library] = set()
-            if (
-                SUBCOMMAND.VALIDATE not in args.skip_stage
-                and libconf.protocol == PROTOCOL.TRANS
-                and (not args.annotations or args.annotations.is_empty())
-            ):
-                parser.error(
-                    'argument --annotations is required to build configuration files for transcriptome libraries'
-                )
-
-        for arg_list in args.input:
-            inputfile = arg_list[0]
-            for lib in arg_list[1:]:
-                if lib not in inputs_by_lib:
-                    raise KeyError(
-                        'argument --input: specified a library that was not configured. Please input all libraries using '
-                        'the --library flag',
-                        lib,
-                    )
-                inputs_by_lib[lib].add(inputfile)
-        # process the inputs by library argument (--assign)
-        for arg_list in args.assign:
-            lib = arg_list[0]
-            if lib not in inputs_by_lib:
-                raise KeyError(
-                    'argument --assign: specified a library that was not configured. Please input all libraries using '
-                    'the --library flag',
-                    lib,
-                )
-            inputs_by_lib[lib].update(arg_list[1:])
-
-        for libconf in libs:
-            if not inputs_by_lib[libconf.library]:
-                raise KeyError(
-                    'argument --input: no input was given for the library', libconf.library
-                )
-            libconf.inputs = inputs_by_lib[libconf.library]
-
-        for alias, command in args.external_conversion:
-            if alias in convert:
-                raise KeyError('duplicate alias names are not allowed', alias)
-            convert[alias] = []
-            open_option = False
-            for item in re.split(r'\s+', command):
-                if convert[alias]:
-                    if open_option:
-                        convert[alias][-1] += ' ' + item
-                        open_option = False
-                    else:
-                        convert[alias].append(item)
-                        if item[0] == '-':
-                            open_option = True
-                else:
-                    convert[alias].append(item)
-
-        for arg in args.convert:
-            # should follow the pattern: alias file [file...] toolname [stranded]
-            alias = arg[0]
-            if alias in convert:
-                raise KeyError('duplicate alias names are not allowed: {}'.format(alias))
-            if arg[-1] in SUPPORTED_TOOL.values():
-                toolname = arg[-1]
-                stranded = False
-                inputfiles = arg[1:-1]
-            else:
-                toolname, stranded = arg[-2:]
-                inputfiles = arg[1:-2]
-            if not inputfiles:
-                raise KeyError('argument --convert is missing input file path(s): {}'.format(arg))
-            stranded = str(tab.cast_boolean(stranded))
-            SUPPORTED_TOOL.enforce(toolname)
-            convert[alias] = ['convert_tool_output'] + inputfiles + [toolname, stranded]
-    except KeyError as err:
-        parser.error(' '.join(err.args))
-
-    if SUBCOMMAND.VALIDATE not in args.skip_stage:
-        for i, libconf in enumerate(libs):
-            log('generating the config section for:', libconf.library)
-            libs[i] = LibraryConfig.build(
-                library=libconf.library,
-                protocol=libconf.protocol,
-                bam_file=libconf.bam_file,
-                inputs=inputs_by_lib[libconf.library],
-                strand_specific=libconf.strand_specific,
-                disease_status=libconf.disease_status,
-                annotations=args.annotations,
-                log=log,
-                sample_size=args.genome_bins
-                if libconf.protocol == PROTOCOL.GENOME
-                else args.transcriptome_bins,
-                distribution_fraction=args.distribution_fraction,
-            )
-    write_config(
-        args.write, include_defaults=args.add_defaults, libraries=libs, conversions=convert, log=log
-    )
+def get_by_prefix(config, prefix):
+    return {k.replace(prefix, ''): v for k, v in config.items() if k.startswith(prefix)}
diff --git a/mavis/constants.py b/mavis/constants.py
index 46a33890..72fccdf4 100644
--- a/mavis/constants.py
+++ b/mavis/constants.py
@@ -2,8 +2,8 @@
 module responsible for small utility functions and constants used throughout the structural_variant package
 """
 import argparse
-import re
 import os
+import re
 
 from Bio.Alphabet import Gapped
 from Bio.Alphabet.IUPAC import ambiguous_dna
@@ -11,7 +11,6 @@
 from Bio.Seq import Seq
 from tab import cast_boolean, cast_null
 
-
 PROGNAME = 'mavis'
 EXIT_OK = 0
 EXIT_ERROR = 1
@@ -431,14 +430,12 @@ def float_fraction(num):
 SUBCOMMAND = MavisNamespace(
     ANNOTATE='annotate',
     VALIDATE='validate',
-    SETUP='setup',
-    SCHEDULE='schedule',
     CLUSTER='cluster',
     PAIR='pairing',
     SUMMARY='summary',
-    CONFIG='config',
     CONVERT='convert',
     OVERLAY='overlay',
+    SETUP='setup',
 )
 """MavisNamespace: holds controlled vocabulary for allowed pipeline stage values
 
@@ -448,7 +445,6 @@ def float_fraction(num):
 - convert
 - pairing
 - pipeline
-- schedule
 - summary
 - validate
 """
diff --git a/mavis/main.py b/mavis/main.py
index f3aec4cc..9c6d678c 100644
--- a/mavis/main.py
+++ b/mavis/main.py
@@ -1,178 +1,29 @@
 #!python
 import argparse
+import json
 import logging
-import platform
 import os
-import time
+import platform
 import sys
+import time
+from typing import Dict
 
 import tab
 
 from . import __version__
+from . import config as _config
+from . import util as _util
 from .align import get_aligner_version
-from . import annotate as _annotate
 from .annotate import main as annotate_main
-from .cluster.constants import DEFAULTS as CLUSTER_DEFAULTS
 from .cluster import main as cluster_main
-from . import config as _config
-from .constants import SUBCOMMAND, PROTOCOL, float_fraction, EXIT_OK
-from .error import DrawingFitError
-from .illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS, DiagramSettings
-from .illustrate.diagram import draw_multi_transcript_overlay
-from .illustrate.scatter import bam_to_scatter
-from .pairing.constants import DEFAULTS as PAIRING_DEFAULTS
+from .constants import SUBCOMMAND
+from .overlay import check_overlay_args
+from .overlay import main as overlay_main
 from .pairing import main as pairing_main
-from .summary.constants import DEFAULTS as SUMMARY_DEFAULTS
 from .summary import main as summary_main
-from .tools import convert_tool_output, SUPPORTED_TOOL
-from . import util as _util
-from .validate.constants import DEFAULTS as VALIDATION_DEFAULTS
+from .tools import SUPPORTED_TOOL, convert_tool_output
+from .util import filepath
 from .validate import main as validate_main
-from .schedule import pipeline as _pipeline
-
-
-def check_overlay_args(args, parser):
-    """
-    parse the overlay options and check the formatting
-    """
-    # check complex options
-    for marker in args.markers:
-        if len(marker) < 3:
-            marker.append(marker[-1])
-        try:
-            marker[1] = int(marker[1])
-            marker[2] = int(marker[2])
-        except ValueError:
-            parser.error('argument --marker: start and end must be integers: {}'.format(marker))
-
-    defaults = [None, None, 0.5, None, True]
-    bam_file, density, ymax, stranded = range(1, 5)
-
-    for plot in args.read_depth_plots:
-        for i, d in enumerate(defaults):
-            if i >= len(plot):
-                plot.append(d)
-        if not os.path.exists(plot[bam_file]):
-            parser.error(
-                'argument --read_depth_plots: the bam file given does not exist: {}'.format(
-                    plot[bam_file]
-                )
-            )
-        try:
-            plot[density] = float(plot[density])
-            if plot[density] < 0 or plot[density] > 1:
-                raise ValueError()
-        except ValueError:
-            parser.error(
-                'argument --read_depth_plots: density must be an float between 0 and 1: {}'.format(
-                    plot[density]
-                )
-            )
-        try:
-            if str(plot[ymax]).lower() in ['null', 'none']:
-                plot[ymax] = None
-            else:
-                plot[ymax] = int(plot[ymax])
-        except ValueError:
-            parser.error(
-                'argument --read_depth_plots: ymax must be an integer: {}'.format(plot[ymax])
-            )
-        try:
-            plot[stranded] = tab.cast_boolean(plot[stranded])
-        except TypeError:
-            parser.error(
-                'argument --read_depth_plots: stranded must be an boolean: {}'.format(
-                    plot[stranded]
-                )
-            )
-    return args
-
-
-def overlay_main(
-    gene_name,
-    output,
-    buffer_length,
-    read_depth_plots,
-    markers,
-    annotations,
-    drawing_width_iter_increase,
-    max_drawing_retries,
-    min_mapping_quality,
-    ymax_color='#FF0000',
-    **kwargs
-):
-    """
-    generates an overlay diagram
-    """
-    annotations.load()
-    # check options formatting
-    gene_to_draw = None
-
-    for chrom in annotations.content:
-        for gene in annotations.content[chrom]:
-            if gene_name in gene.aliases or gene_name == gene.name:
-                gene_to_draw = gene
-                _util.LOG(
-                    'Found target gene: {}(aka. {}) {}:{}-{}'.format(
-                        gene.name, gene.aliases, gene.chr, gene.start, gene.end
-                    )
-                )
-                break
-    if gene_to_draw is None:
-        raise KeyError('Could not find gene alias or id in annotations file', gene_name)
-
-    settings = DiagramSettings(**kwargs)
-
-    genomic_min = max(gene_to_draw.start - buffer_length, 1)
-    genomic_max = gene_to_draw.end + buffer_length
-
-    plots = []
-    for axis_name, bam_file, density, ymax, stranded in read_depth_plots:
-        # one plot per bam
-        plots.append(
-            bam_to_scatter(
-                bam_file,
-                gene_to_draw.chr,
-                genomic_min,
-                genomic_max,
-                strand=gene_to_draw.get_strand() if stranded else None,
-                ymax=ymax,
-                density=density,
-                axis_name=axis_name,
-                min_mapping_quality=min_mapping_quality,
-                ymax_color=ymax_color,
-            )
-        )
-
-    for i, (marker_name, marker_start, marker_end) in enumerate(markers):
-        markers[i] = _annotate.base.BioInterval(
-            gene_to_draw.chr, marker_start, marker_end, name=marker_name
-        )
-
-    canvas = None
-    attempts = 1
-    while True:
-        try:
-            canvas = draw_multi_transcript_overlay(
-                settings,
-                gene_to_draw,
-                vmarkers=markers,
-                plots=plots,
-                window_buffer=buffer_length,
-                log=_util.LOG,
-            )
-            break
-        except DrawingFitError as err:
-            if attempts > max_drawing_retries:
-                raise err
-            _util.LOG('Drawing fit: extending window', drawing_width_iter_increase)
-            settings.width += drawing_width_iter_increase
-            attempts += 1
-
-    svg_output_file = os.path.join(output, '{}_{}_overlay.svg'.format(gene_to_draw.name, gene_name))
-    _util.LOG('writing:', svg_output_file)
-
-    canvas.saveas(svg_output_file)
 
 
 def convert_main(inputs, outputfile, file_type, strand_specific=False, assume_no_untemplated=True):
@@ -189,20 +40,15 @@ def convert_main(inputs, outputfile, file_type, strand_specific=False, assume_no
     _util.output_tabbed_file(bpp_results, outputfile)
 
 
-def main(argv=None):
-    """
-    sets up the parser and checks the validity of command line args
-    loads reference files and redirects into subcommand main functions
-
-    Args:
-        argv (list): List of arguments, defaults to command line arguments
-    """
-    if argv is None:  # need to do at run time or patching will not behave as expected
-        argv = sys.argv[1:]
-    start_time = int(time.time())
-
+def create_parser(argv):
     parser = argparse.ArgumentParser(formatter_class=_config.CustomHelpFormatter)
-    _config.augment_parser(['version'], parser)
+    parser.add_argument(
+        '-v',
+        '--version',
+        action='version',
+        version='%(prog)s version ' + __version__,
+        help='Outputs the version number',
+    )
     subp = parser.add_subparsers(
         dest='command', help='specifies which step/stage in the pipeline or which subprogram to use'
     )
@@ -215,87 +61,27 @@ def main(argv=None):
         )
         required[command] = subparser.add_argument_group('required arguments')
         optional[command] = subparser.add_argument_group('optional arguments')
-        _config.augment_parser(['help', 'version', 'log', 'log_level'], optional[command])
-
-    # config arguments
-    required[SUBCOMMAND.CONFIG].add_argument(
-        '-w',
-        '--write',
-        help='path to the new configuration file',
-        required=True,
-        metavar='FILEPATH',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--library',
-        metavar='<name> {genome,transcriptome} {diseased,normal} [strand_specific] [/path/to/bam/file]',
-        action=_config.RangeAppendAction,
-        help='configuration for libraries to be analyzed by mavis',
-        nmin=3,
-        nmax=5,
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--input',
-        help='path to an input file or filter for mavis followed by the library names it '
-        'should be used for',
-        nmin=2,
-        action=_config.RangeAppendAction,
-        metavar='FILEPATH <name> [<name> ...]',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--assign',
-        help='library name followed by path(s) to input file(s) or filter names. This represents the list'
-        ' of inputs that should be used for the library',
-        action=_config.RangeAppendAction,
-        nmin=2,
-        metavar='<name> FILEPATH [FILEPATH ...]',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--genome_bins',
-        default=_util.get_env_variable('genome_bins', 100),
-        type=int,
-        metavar=_config.get_metavar(int),
-        help='number of bins/samples to use in calculating the fragment size stats for genomes',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--transcriptome_bins',
-        default=_util.get_env_variable('transcriptome_bins', 500),
-        type=int,
-        metavar=_config.get_metavar(int),
-        help='number of genes to use in calculating the fragment size stats for genomes',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--distribution_fraction',
-        default=_util.get_env_variable('distribution_fraction', 0.97),
-        type=float_fraction,
-        metavar=_config.get_metavar(float),
-        help='the proportion of the distribution of calculated fragment sizes to use in determining the stdev',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--convert',
-        nmin=3,
-        metavar='<alias> FILEPATH [FILEPATH ...] {{{}}} [stranded]'.format(
-            ','.join(SUPPORTED_TOOL.values())
-        ),
-        help='input file conversion for internally supported tools',
-        action=_config.RangeAppendAction,
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--external_conversion',
-        metavar=('<alias>', '<"command">'),
-        nargs=2,
-        default=[],
-        help='alias for use in inputs and full command (quoted)',
-        action='append',
-    )
-    optional[SUBCOMMAND.CONFIG].add_argument(
-        '--add_defaults',
-        default=False,
-        action='store_true',
-        help='write current defaults for all non-specified options to the config output',
-    )
-    _config.augment_parser(['annotations'], optional[SUBCOMMAND.CONFIG])
-    # add the optional annotations file (only need this is auto generating bam stats for the transcriptome)
-    _config.augment_parser(['skip_stage'], optional[SUBCOMMAND.CONFIG])
+        optional[command].add_argument(
+            '-h', '--help', action='help', help='show this help message and exit'
+        )
+        optional[command].add_argument(
+            '-v',
+            '--version',
+            action='version',
+            version='%(prog)s version ' + __version__,
+            help='Outputs the version number',
+        )
+        optional[command].add_argument('--log', help='redirect stdout to a log file', default=None)
+        optional[command].add_argument(
+            '--log_level',
+            help='level of logging to output',
+            choices=['INFO', 'DEBUG'],
+            default='INFO',
+        )
+        if command not in SUBCOMMAND.CONVERT:
+            optional[command].add_argument(
+                '--config', '-c', help='path to the JSON config file', type=filepath, required=True
+            )
 
     # convert
     required[SUBCOMMAND.CONVERT].add_argument(
@@ -304,42 +90,22 @@ def main(argv=None):
         required=True,
         help='Indicates the input file type to be parsed',
     )
-    _config.augment_parser(
-        ['strand_specific', 'assume_no_untemplated'], optional[SUBCOMMAND.CONVERT]
+    optional[SUBCOMMAND.CONVERT].add_argument(
+        '--strand_specific', type=tab.cast_boolean, default=False
     )
-    required[SUBCOMMAND.CONVERT].add_argument(
-        '--outputfile', '-o', required=True, help='path to the outputfile', metavar='FILEPATH'
+    optional[SUBCOMMAND.CONVERT].add_argument(
+        '--assume_no_untemplated', type=tab.cast_boolean, default=True
     )
+    for command in [SUBCOMMAND.CONVERT, SUBCOMMAND.SETUP]:
+        required[command].add_argument(
+            '--outputfile', '-o', required=True, help='path to the outputfile', metavar='FILEPATH'
+        )
 
-    for command in set(SUBCOMMAND.values()) - {SUBCOMMAND.CONFIG, SUBCOMMAND.CONVERT}:
+    for command in set(SUBCOMMAND.values()) - {SUBCOMMAND.CONVERT, SUBCOMMAND.SETUP}:
         required[command].add_argument(
             '-o', '--output', help='path to the output directory', required=True
         )
 
-    # pipeline
-    _config.augment_parser(['config'], required[SUBCOMMAND.SETUP])
-    optional[SUBCOMMAND.SETUP].add_argument(
-        '--skip_stage',
-        choices=[SUBCOMMAND.CLUSTER, SUBCOMMAND.VALIDATE],
-        action='append',
-        default=[],
-        help='Use flag once per stage to skip. Can skip clustering or validation or both',
-    )
-
-    # schedule arguments
-    optional[SUBCOMMAND.SCHEDULE].add_argument(
-        '--submit',
-        action='store_true',
-        default=False,
-        help='submit jobs to the the scheduler specified',
-    )
-    optional[SUBCOMMAND.SCHEDULE].add_argument(
-        '--resubmit',
-        action='store_true',
-        default=False,
-        help='resubmit jobs in error states to the the scheduler specified',
-    )
-
     # add the inputs argument
     for command in [
         SUBCOMMAND.CLUSTER,
@@ -358,79 +124,14 @@ def main(argv=None):
             metavar='FILEPATH',
         )
 
-    # cluster
-    _config.augment_parser(
-        ['library', 'protocol', 'strand_specific', 'disease_status'], required[SUBCOMMAND.CLUSTER]
-    )
-    _config.augment_parser(
-        list(CLUSTER_DEFAULTS.keys()) + ['masking', 'annotations'], optional[SUBCOMMAND.CLUSTER]
-    )
-    optional[SUBCOMMAND.CLUSTER].add_argument(
-        '--batch_id', help='batch id to use for prefix of split files', type=_config.nameable_string
-    )
-    optional[SUBCOMMAND.CLUSTER].add_argument(
-        '--split_only',
-        help='Cluster the files or simply split them without clustering',
-        type=tab.cast_boolean,
-    )
-
-    # validate
-    _config.augment_parser(
-        [
-            'library',
-            'protocol',
-            'bam_file',
-            'read_length',
-            'stdev_fragment_size',
-            'median_fragment_size',
-            'strand_specific',
-            'reference_genome',
-            'aligner_reference',
-        ],
-        required[SUBCOMMAND.VALIDATE],
-    )
-    _config.augment_parser(VALIDATION_DEFAULTS.keys(), optional[SUBCOMMAND.VALIDATE])
-    _config.augment_parser(['masking', 'annotations'], optional[SUBCOMMAND.VALIDATE])
-
-    # annotate
-    _config.augment_parser(
-        ['library', 'protocol', 'annotations', 'reference_genome'], required[SUBCOMMAND.ANNOTATE]
-    )
-    _config.augment_parser(
-        ['max_proximity', 'masking', 'template_metadata'], optional[SUBCOMMAND.ANNOTATE]
-    )
-    _config.augment_parser(
-        list(_annotate.constants.DEFAULTS.keys()) + list(ILLUSTRATION_DEFAULTS.keys()),
-        optional[SUBCOMMAND.ANNOTATE],
-    )
-
-    # pair
-    _config.augment_parser(['annotations'], required[SUBCOMMAND.PAIR], optional[SUBCOMMAND.PAIR])
-    _config.augment_parser(
-        ['max_proximity'] + list(PAIRING_DEFAULTS.keys()), optional[SUBCOMMAND.PAIR]
-    )
-
-    # summary
-    _config.augment_parser(
-        [
-            'annotations',
-            'flanking_call_distance',
-            'split_call_distance',
-            'contig_call_distance',
-            'spanning_call_distance',
-        ],
-        required[SUBCOMMAND.SUMMARY],
-    )
-    _config.augment_parser(SUMMARY_DEFAULTS.keys(), optional[SUBCOMMAND.SUMMARY])
-    _config.augment_parser(['dgv_annotation'], optional[SUBCOMMAND.SUMMARY])
+    # library specific commands
+    for command in [SUBCOMMAND.CLUSTER, SUBCOMMAND.VALIDATE, SUBCOMMAND.ANNOTATE]:
+        required[command].add_argument(
+            '--library', '-l', required=True, help='The library to run the current step on'
+        )
 
     # overlay arguments
     required[SUBCOMMAND.OVERLAY].add_argument('gene_name', help='Gene ID or gene alias to be drawn')
-    _config.augment_parser(['annotations'], required[SUBCOMMAND.OVERLAY])
-    _config.augment_parser(
-        ['drawing_width_iter_increase', 'max_drawing_retries', 'width', 'min_mapping_quality'],
-        optional[SUBCOMMAND.OVERLAY],
-    )
     optional[SUBCOMMAND.OVERLAY].add_argument(
         '--buffer_length',
         default=0,
@@ -457,13 +158,25 @@ def main(argv=None):
         action=_config.RangeAppendAction,
     )
 
-    args = _util.MavisNamespace(**parser.parse_args(argv).__dict__)
+    return parser, _util.MavisNamespace(**parser.parse_args(argv).__dict__)
+
+
+def main(argv=None):
+    """
+    sets up the parser and checks the validity of command line args
+    loads reference files and redirects into subcommand main functions
+
+    Args:
+        argv (list): List of arguments, defaults to command line arguments
+    """
+    if argv is None:  # need to do at run time or patching will not behave as expected
+        argv = sys.argv[1:]
+    start_time = int(time.time())
+    parser, args = create_parser(argv)
+
     if args.command == SUBCOMMAND.OVERLAY:
         args = check_overlay_args(args, parser)
 
-    if args.command == SUBCOMMAND.VALIDATE:
-        args.aligner_version = get_aligner_version(args.aligner)
-
     log_conf = {'format': '{message}', 'style': '{', 'level': args.log_level}
 
     original_logging_handlers = logging.root.handlers[:]
@@ -476,16 +189,23 @@ def main(argv=None):
     _util.LOG('MAVIS: {}'.format(__version__))
     _util.LOG('hostname:', platform.node(), time_stamp=False)
     _util.log_arguments(args)
-    rfile_args = args
 
-    if args.command == SUBCOMMAND.SETUP:  # load the configuration file
-        config = _config.MavisConfig.read(args.config)
-        config.output = args.output
-        config.skip_stage = args.skip_stage
-        config.command = SUBCOMMAND.SETUP
-        rfile_args = config.reference
-        args = config
+    config: Dict = dict()
+
+    try:
+        if args.command != SUBCOMMAND.CONVERT:
+            with open(args.config, 'r') as fh:
+                config = json.load(fh)
+                _config.validate_config(
+                    config,
+                    args.command in {SUBCOMMAND.SETUP, SUBCOMMAND.VALIDATE},
+                    args.command,
+                )
+    except AttributeError as err:
+        raise err
 
+    if args.command == SUBCOMMAND.VALIDATE:
+        args.aligner_version = get_aligner_version(config['validate.aligner'])
     # try checking the input files exist
     try:
         args.inputs = _util.bash_expands(*args.inputs)
@@ -494,95 +214,70 @@ def main(argv=None):
     except FileNotFoundError:
         parser.error('--inputs file(s) for {} {} do not exist'.format(args.command, args.inputs))
 
-    # convert reference files to objects to store both content and name for rewrite
-    for arg in [f for f in _annotate.file_io.REFERENCE_DEFAULTS.keys() if f != 'aligner_reference']:
-        try:
-            rfile_args[arg] = _annotate.file_io.ReferenceFile(
-                arg, assert_exists=True, *rfile_args[arg]
-            )
-        except AttributeError:
-            pass
-        except FileNotFoundError:
-            parser.error('--{} The file specified does not exist: {}'.format(arg, rfile_args[arg]))
-
-    # throw an error if MAVIS can't find the aligner reference
-    try:
-        rfile_args.aligner_reference = _annotate.file_io.ReferenceFile(
-            'aligner_reference', rfile_args.aligner_reference, assert_exists=True
-        )
-    except AttributeError:
-        pass
-    except FileNotFoundError:
-        parser.error(
-            '--aligner_reference file does not exist at: {}'.format(rfile_args.aligner_reference)
-        )
-
-    # for specific cases throw an argument error if missing annotations
-    if any(
-        [
-            args.command == SUBCOMMAND.CLUSTER and args.uninformative_filter,
-            args.command == SUBCOMMAND.CONFIG
-            and any([PROTOCOL.TRANS in values for values in args.library])
-            and SUBCOMMAND.VALIDATE not in args.skip_stage,
-            args.command == SUBCOMMAND.VALIDATE and args.protocol == PROTOCOL.TRANS,
-            args.command
-            in {
-                SUBCOMMAND.PAIR,
-                SUBCOMMAND.ANNOTATE,
-                SUBCOMMAND.SUMMARY,
-                SUBCOMMAND.OVERLAY,
-                SUBCOMMAND.SETUP,
-            },
-        ]
-    ):
-        try:
-            rfile_args.annotations.files_exist(not_empty=True)
-        except FileNotFoundError:
-            parser.error('--annotations file(s) are required and do not exist')
-
     # decide which main function to execute
-    ret_val = EXIT_OK
     command = args.command
-    log_to_file = args.get('log', None)
-
-    # discard any arguments needed for redirect/setup only
-    for init_arg in ['command', 'log', 'log_level']:
-        args.discard(init_arg)
 
     try:
         if command == SUBCOMMAND.CLUSTER:
-            ret_val = cluster_main.main(**args, start_time=start_time)
+            cluster_main.main(
+                inputs=args.inputs,
+                output=args.output,
+                start_time=start_time,
+                config=config,
+                library=args.library,
+            )
         elif command == SUBCOMMAND.VALIDATE:
-            validate_main.main(**args, start_time=start_time)
+            validate_main.main(
+                inputs=args.inputs,
+                output=args.output,
+                start_time=start_time,
+                config=config,
+                library=args.library,
+            )
         elif command == SUBCOMMAND.ANNOTATE:
-            annotate_main.main(**args, start_time=start_time)
+            annotate_main.main(
+                inputs=args.inputs,
+                output=args.output,
+                start_time=start_time,
+                config=config,
+                library=args.library,
+            )
         elif command == SUBCOMMAND.PAIR:
-            pairing_main.main(**args, start_time=start_time)
+            pairing_main.main(
+                inputs=args.inputs,
+                output=args.output,
+                start_time=start_time,
+                config=config,
+            )
         elif command == SUBCOMMAND.SUMMARY:
-            summary_main.main(**args, start_time=start_time)
+            summary_main.main(
+                inputs=args.inputs,
+                output=args.output,
+                start_time=start_time,
+                config=config,
+            )
         elif command == SUBCOMMAND.CONVERT:
-            convert_main(**args)
-        elif command == SUBCOMMAND.OVERLAY:
-            overlay_main(**args)
-        elif command == SUBCOMMAND.CONFIG:
-            _config.generate_config(args, parser, log=_util.LOG)
-        elif command == SUBCOMMAND.SCHEDULE:
-            build_file = os.path.join(args.output, 'build.cfg')
-            args.discard('output')
-            pipeline = _pipeline.Pipeline.read_build_file(build_file)
-            try:
-                code = pipeline.check_status(log=_util.LOG, **args)
-            finally:
-                _util.LOG('rewriting:', build_file)
-                pipeline.write_build_file(build_file)
-            if code != EXIT_OK:
-                sys.exit(code)  # EXIT
-        else:  # PIPELINE
-            config.reference = rfile_args
-            pipeline = _pipeline.Pipeline.build(config)
-            build_file = os.path.join(config.output, 'build.cfg')
-            _util.LOG('writing:', build_file)
-            pipeline.write_build_file(build_file)
+            convert_main(
+                args.inputs,
+                args.outputfile,
+                args.file_type,
+                args.strand_specific,
+                args.assume_no_untemplated,
+            )
+        elif command == SUBCOMMAND.SETUP:
+            _util.LOG(f'writing: {args.outputfile}')
+            with open(args.outputfile, 'w') as fh:
+                fh.write(json.dumps(config, sort_keys=True, indent='  '))
+        else:
+            print(args)
+            overlay_main(
+                buffer_length=args.buffer_length,
+                gene_name=args.gene_name,
+                markers=args.markers,
+                read_depth_plots=args.read_depth_plots,
+                config=config,
+                output=args.output,
+            )
 
         duration = int(time.time()) - start_time
         hours = duration - duration % 3600
@@ -593,16 +288,16 @@ def main(argv=None):
             time_stamp=False,
         )
         _util.LOG('run time (s): {}'.format(duration), time_stamp=False)
-        return ret_val
     except Exception as err:
-        if log_to_file:
-            logging.exception(err)  # capture the error in the logging output file
         raise err
     finally:
-        for handler in logging.root.handlers:
-            logging.root.removeHandler(handler)
-        for handler in original_logging_handlers:
-            logging.root.addHandler(handler)
+        try:
+            for handler in logging.root.handlers:
+                logging.root.removeHandler(handler)
+            for handler in original_logging_handlers:
+                logging.root.addHandler(handler)
+        except Exception as err:
+            print(err)
 
 
 if __name__ == '__main__':
diff --git a/mavis/overlay.py b/mavis/overlay.py
new file mode 100644
index 00000000..868a90fa
--- /dev/null
+++ b/mavis/overlay.py
@@ -0,0 +1,159 @@
+import os
+from typing import Dict, List, Tuple, Union
+
+import tab
+
+from . import annotate as _annotate
+from . import util as _util
+from .annotate.file_io import ReferenceFile
+from .error import DrawingFitError
+from .illustrate.constants import DiagramSettings
+from .illustrate.diagram import draw_multi_transcript_overlay
+from .illustrate.scatter import bam_to_scatter
+
+
+def check_overlay_args(args, parser):
+    """
+    parse the overlay options and check the formatting
+    """
+    # check complex options
+    for marker in args.markers:
+        if len(marker) < 3:
+            marker.append(marker[-1])
+        try:
+            marker[1] = int(marker[1])
+            marker[2] = int(marker[2])
+        except ValueError:
+            parser.error('argument --marker: start and end must be integers: {}'.format(marker))
+
+    defaults = [None, None, 0.5, None, True]
+    bam_file, density, ymax, stranded = range(1, 5)
+
+    for plot in args.read_depth_plots:
+        for i, d in enumerate(defaults):
+            if i >= len(plot):
+                plot.append(d)
+        if not os.path.exists(plot[bam_file]):
+            parser.error(
+                'argument --read_depth_plots: the bam file given does not exist: {}'.format(
+                    plot[bam_file]
+                )
+            )
+        try:
+            plot[density] = float(plot[density])
+            if plot[density] < 0 or plot[density] > 1:
+                raise ValueError()
+        except ValueError:
+            parser.error(
+                'argument --read_depth_plots: density must be an float between 0 and 1: {}'.format(
+                    plot[density]
+                )
+            )
+        try:
+            if str(plot[ymax]).lower() in ['null', 'none']:
+                plot[ymax] = None
+            else:
+                plot[ymax] = int(plot[ymax])
+        except ValueError:
+            parser.error(
+                'argument --read_depth_plots: ymax must be an integer: {}'.format(plot[ymax])
+            )
+        try:
+            plot[stranded] = tab.cast_boolean(plot[stranded])
+        except TypeError:
+            parser.error(
+                'argument --read_depth_plots: stranded must be an boolean: {}'.format(
+                    plot[stranded]
+                )
+            )
+    return args
+
+
+def main(
+    gene_name: str,
+    output: str,
+    config: Dict,
+    buffer_length: int,
+    read_depth_plots,
+    markers: List[Tuple[str, int, int]],
+    ymax_color='#FF0000',
+    **kwargs,
+):
+    """
+    generates an overlay diagram
+    """
+    annotations = ReferenceFile.load_from_config(config, 'annotations')
+    annotations.load()
+    drawing_width_iter_increase = config['illustrate.drawing_width_iter_increase']
+    max_drawing_retries = config['illustrate.max_drawing_retries']
+    min_mapping_quality = config['validate.min_mapping_quality']
+    # check options formatting
+    gene_to_draw = None
+
+    for chrom in annotations.content:
+        for gene in annotations.content[chrom]:
+            if gene_name in gene.aliases or gene_name == gene.name:
+                gene_to_draw = gene
+                _util.LOG(
+                    'Found target gene: {}(aka. {}) {}:{}-{}'.format(
+                        gene.name, gene.aliases, gene.chr, gene.start, gene.end
+                    )
+                )
+                break
+    if gene_to_draw is None:
+        raise KeyError('Could not find gene alias or id in annotations file', gene_name)
+
+    settings = DiagramSettings(**kwargs)
+
+    genomic_min = max(gene_to_draw.start - buffer_length, 1)
+    genomic_max = gene_to_draw.end + buffer_length
+
+    plots = []
+    for axis_name, bam_file, density, ymax, stranded in read_depth_plots:
+        # one plot per bam
+        plots.append(
+            bam_to_scatter(
+                bam_file,
+                gene_to_draw.chr,
+                genomic_min,
+                genomic_max,
+                strand=gene_to_draw.get_strand() if stranded else None,
+                ymax=ymax,
+                density=density,
+                axis_name=axis_name,
+                min_mapping_quality=min_mapping_quality,
+                ymax_color=ymax_color,
+            )
+        )
+
+    vmarkers = []
+
+    for i, (marker_name, marker_start, marker_end) in enumerate(markers):
+        vmarkers.append(
+            _annotate.base.BioInterval(gene_to_draw.chr, marker_start, marker_end, name=marker_name)
+        )
+
+    canvas = None
+    attempts = 1
+    while True:
+        try:
+            canvas = draw_multi_transcript_overlay(
+                settings,
+                gene_to_draw,
+                vmarkers=vmarkers,
+                plots=plots,
+                window_buffer=buffer_length,
+                log=_util.LOG,
+            )
+            break
+        except DrawingFitError as err:
+            if attempts > max_drawing_retries:
+                raise err
+            _util.LOG('Drawing fit: extending window', drawing_width_iter_increase)
+            settings.width += drawing_width_iter_increase
+            attempts += 1
+
+    svg_output_file = os.path.join(output, '{}_{}_overlay.svg'.format(gene_to_draw.name, gene_name))
+    _util.LOG('writing:', svg_output_file)
+
+    canvas.saveas(svg_output_file)
diff --git a/mavis/pairing/main.py b/mavis/pairing/main.py
index 1f220d04..debd823d 100644
--- a/mavis/pairing/main.py
+++ b/mavis/pairing/main.py
@@ -1,40 +1,35 @@
 import itertools
 import os
 import time
+from typing import Dict, List
 
-from .pairing import inferred_equivalent, product_key, pair_by_distance
-from .constants import DEFAULTS
 from ..annotate.constants import SPLICE_TYPE
+from ..annotate.file_io import ReferenceFile
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
-from ..util import generate_complete_stamp, LOG, output_tabbed_file, read_inputs
+from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs
+from .pairing import inferred_equivalent, pair_by_distance, product_key
 
 
 def main(
-    inputs,
-    output,
-    annotations,
-    flanking_call_distance=DEFAULTS.flanking_call_distance,
-    split_call_distance=DEFAULTS.split_call_distance,
-    contig_call_distance=DEFAULTS.contig_call_distance,
-    spanning_call_distance=DEFAULTS.spanning_call_distance,
+    inputs: List[str],
+    output: str,
+    config: Dict,
     start_time=int(time.time()),
-    **kwargs
+    **kwargs,
 ):
     """
     Args:
         inputs (List[str]): list of input files to read
         output (str): path to the output directory
-        flanking_call_distance (int): pairing distance for pairing with an event called by [flanking read pair](/glossary/#flanking-read-pair)
-        split_call_distance (int): pairing distance for pairing with an event called by [split read](/glossary/#split-read)
-        contig_call_distance (int): pairing distance for pairing with an event called by contig or [spanning read](/glossary/#spanning-read)
     """
-    annotations.load()
+    annotations = ReferenceFile.load_from_config(config, 'annotations', eager_load=True)
+
     # load the file
     distances = {
-        CALL_METHOD.FLANK: flanking_call_distance,
-        CALL_METHOD.SPLIT: split_call_distance,
-        CALL_METHOD.CONTIG: contig_call_distance,
-        CALL_METHOD.SPAN: spanning_call_distance,
+        CALL_METHOD.FLANK: config['pairing.flanking_call_distance'],
+        CALL_METHOD.SPLIT: config['pairing.split_call_distance'],
+        CALL_METHOD.CONTIG: config['pairing.contig_call_distance'],
+        CALL_METHOD.SPAN: config['pairing.spanning_call_distance'],
     }
 
     bpps = []
@@ -93,7 +88,15 @@ def main(
         bpp.data[COLUMNS.inferred_pairing] = ''
 
         if product_key(bpp) in bpp_by_product_key:
-            raise KeyError('duplicate bpp is not unique within lib', product_key(bpp))
+            diffs = {}
+            other = bpp_by_product_key[product_key(bpp)]
+            for key in (set(other.data.keys()) | set(bpp.data.keys())) - {'line_no'}:
+                if bpp.data.get(key) != other.data.get(key):
+                    diffs[key] = (bpp.data.get(key), other.data.get(key))
+            if diffs:
+                raise KeyError(
+                    f'duplicate bpp ({product_key(bpp)}) is not unique within lib (diffs: {diffs})'
+                )
         bpp_by_product_key[product_key(bpp)] = bpp
 
     distance_pairings = {}
@@ -137,5 +140,6 @@ def main(
         bpp = bpp_by_product_key[pkey]
         bpp.data[COLUMNS.inferred_pairing] = ';'.join(sorted(pkeys))
 
-    fname = os.path.join(output, 'mavis_paired_{}.tab'.format('_'.join(sorted(list(libraries)))))
+    fname = os.path.join(output, 'mavis_paired.tab')
     output_tabbed_file(bpps, fname)
+    generate_complete_stamp(output, LOG)
diff --git a/mavis/schedule/constants.py b/mavis/schedule/constants.py
deleted file mode 100644
index 5ee37a8c..00000000
--- a/mavis/schedule/constants.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from ..constants import MavisNamespace
-from ..util import WeakMavisNamespace
-
-
-JOB_STATUS = MavisNamespace(
-    'SUBMITTED',
-    'COMPLETED',
-    'ERROR',
-    'RUNNING',
-    'FAILED',
-    'PENDING',
-    'CANCELLED',
-    NOT_SUBMITTED='NOT SUBMITTED',
-    UNKNOWN='UNKNOWN',
-    __name__='mavis.schedule.constants.JOB_STATUS',
-)
-
-
-def cumulative_job_state(states):
-    """
-    Given a set of states, return a single state based on the reporting priority
-    """
-    priority = [
-        JOB_STATUS.ERROR,
-        JOB_STATUS.FAILED,
-        JOB_STATUS.CANCELLED,
-        JOB_STATUS.NOT_SUBMITTED,
-        JOB_STATUS.PENDING,
-        JOB_STATUS.SUBMITTED,
-        JOB_STATUS.RUNNING,
-        JOB_STATUS.COMPLETED,
-    ]
-    for state in priority:
-        if state in states:
-            return state
-    return JOB_STATUS.NOT_SUBMITTED
-
-
-SCHEDULER = MavisNamespace(
-    'SGE', 'SLURM', 'TORQUE', 'LOCAL', __name__='mavis.schedule.constants.SCHEDULER'
-)
-"""MavisNamespace: scheduler types
-
-- [LOCAL](/glossary/#LOCAL)
-- [SGE](/glossary/#SGE)
-- [SLURM](/glossary/#SLURM)
-- [TORQUE](/glossary/#TORQUE)
-"""
-
-MAIL_TYPE = MavisNamespace(
-    'BEGIN', 'END', 'FAIL', 'ALL', 'NONE', __name__='mavis.schedule.constants.MAIL_TYPE'
-)
-"""
-When the scheduler should notify [mail_user](/configuration/settings/#mail_user) about a job
-
-- ``ALL`` - All other options (except none)
-- ``BEGIN`` - Send an email when the job starts
-- ``END`` - Send an email when the job has terminated
-- ``FAIL`` - Send an email if the job fails
-- ``NONE`` - Do not send email
-"""
-
-STD_OPTIONS = ['memory_limit', 'queue', 'time_limit', 'import_env', 'mail_user', 'mail_type']
-
-OPTIONS = WeakMavisNamespace(__name__='mavis.schedule.constants.options')
-"""MavisNamespace: submission options
-
-- [annotation_memory](/configuration/settings/#annotation_memory)
-- [concurrency_limit](/configuration/settings/#concurrency_limit)
-- [import_env](/configuration/settings/#import_env)
-- [mail_type](/configuration/settings/#mail_type)
-- [mail_user](/configuration/settings/#mail_user)
-- [memory_limit](/configuration/settings/#memory_limit)
-- [queue](/configuration/settings/#queue)
-- [remote_head_ssh](/configuration/settings/#remote_head_ssh)
-- [scheduler](/configuration/settings/#scheduler)
-- [time_limit](/configuration/settings/#time_limit)
-- [trans_validation_memory](/configuration/settings/#trans_validation_memory)
-- [validation_memory](/configuration/settings/#validation_memory)
-
-"""
-OPTIONS.add('annotation_memory', 12000, defn='default memory limit (MB) for the annotation stage')
-OPTIONS.add('import_env', True, defn='flag to import environment variables')
-OPTIONS.add(
-    'mail_type', MAIL_TYPE.NONE, cast_type=MAIL_TYPE, defn='When to notify the mail_user (if given)'
-)
-OPTIONS.add('mail_user', '', defn='User(s) to send notifications to')
-OPTIONS.add(
-    'memory_limit', 16000, defn='the maximum number of megabytes (MB) any given job is allowed'
-)  # 16 GB
-OPTIONS.add('queue', '', cast_type=str, defn='the queue jobs are to be submitted to')
-OPTIONS.add('scheduler', SCHEDULER.SLURM, defn='The scheduler being used', cast_type=SCHEDULER)
-OPTIONS.add(
-    'time_limit', 16 * 60 * 60, defn='the time in seconds any given jobs is allowed'
-)  # 16 hours
-OPTIONS.add(
-    'trans_validation_memory',
-    18000,
-    defn='default memory limit (MB) for the validation stage (for transcriptomes)',
-)
-OPTIONS.add('validation_memory', 16000, defn='default memory limit (MB) for the validation stage')
-OPTIONS.add(
-    'concurrency_limit',
-    None,
-    nullable=True,
-    cast_type=int,
-    defn='The concurrency limit for tasks in any given job array or the number of concurrent processes allowed for a local run',
-)
-OPTIONS.add('remote_head_ssh', '', cast_type=str, defn='ssh target for remote scheduler commands')
diff --git a/mavis/schedule/job.py b/mavis/schedule/job.py
deleted file mode 100644
index 76557baf..00000000
--- a/mavis/schedule/job.py
+++ /dev/null
@@ -1,265 +0,0 @@
-from copy import copy as _copy
-import os
-import re
-import time
-
-from ..constants import SUBCOMMAND, MavisNamespace
-from .constants import JOB_STATUS, OPTIONS, STD_OPTIONS
-
-
-class LogFile:
-    """
-    stores information about the log status
-    """
-
-    STATUS = MavisNamespace('EMPTY', 'CRASH', 'INCOMPLETE', 'COMPLETE')
-    """MavisNamespace: The status of the job based on parsing of the logfile"""
-
-    def __init__(self, filename, status, message=None):
-        """
-        Args:
-            filename (str): path to the logfile
-            status (LogFile.STATUS): the status of the logfile
-            message (str): the message parsed from the logfile. Generally this is an error from the log
-        """
-        self.filename = filename
-        self.status = self.STATUS.enforce(status)
-        self.message = message.strip() if message is not None else None
-
-    @classmethod
-    def parse(cls, filename):
-        """
-        given a file parse to see if it looks like a complete log file (contains run time),
-        was truncated, or reported an error
-        """
-        if not os.path.isfile(filename):
-            raise FileNotFoundError('Log file does not exist', filename)
-        log = None
-        with open(filename, 'r') as fh:
-            lines = [line.strip() for line in fh.readlines() if line.strip()]
-            for line in lines[::-1]:
-                line = line.strip().lower()
-                if (
-                    line and line[0] != '\x1b'
-                ):  # ignore lines starting with terminal control characters
-                    if re.search(
-                        r'(\b|^)((\S+)?error|fault|fatal|aborted|core dumped|killed|died|command not found)(\b|$)',
-                        line,
-                    ):
-                        log = LogFile(filename, cls.STATUS.CRASH, line)
-                    elif re.match(r'^\s*run time \(s\): (\d+)\s*$', line):
-                        log = LogFile(filename, cls.STATUS.COMPLETE)
-                    else:
-                        log = LogFile(filename, cls.STATUS.INCOMPLETE, line)
-                    return log
-            return LogFile(filename, cls.STATUS.EMPTY)
-
-
-class Job:
-    def __init__(
-        self,
-        stage,
-        output_dir,
-        stdout=None,
-        job_ident=None,
-        name=None,
-        dependencies=None,
-        script=None,
-        created_at=None,
-        status=JOB_STATUS.NOT_SUBMITTED,
-        status_comment='',
-        **options
-    ):
-        """
-        Args:
-            stage (str): the mavis pipleine stage this job belongs to
-            job_ident (int): the job number/id according to the scheduler being used
-            output_dir (str): path to the output directory where logs/stamps for this job will be written
-            name (str): the job name according to the scheduler being used
-            dependencies (List[Job]): list of jobs which must complete for this job to run
-            stdout (str): basename of the file to write std output to
-            script (str): path to the script which contains the commands for the job
-            created_at (int): the time stamp for when the job was created (created != submitted)
-            status (mavis.schedule.constants.JOB_STATUS): The current (since last checked) status of the job
-            status_comment (str): the comment which describes the status, generally this is used for reporting errors from the log file or failed dependencies (SLURM)
-            options (**dict): override default options specified by OPTIONS
-        """
-        self.stage = SUBCOMMAND.enforce(stage)
-        self.job_ident = job_ident
-        self.name = name
-        self.dependencies = dependencies if dependencies else []
-        self.script = script
-        self.status = JOB_STATUS.enforce(status)
-        self.output_dir = output_dir
-        self.stdout = (
-            os.path.join(output_dir, 'job-{name}-{job_ident}.log') if not stdout else stdout
-        )
-
-        self.created_at = int(created_at if created_at else time.time())
-        self.status = status
-        self.status_comment = status_comment
-
-        # inputs to the function call should override the default values
-        for option, value in [(o, OPTIONS[o]) for o in STD_OPTIONS]:
-            setattr(self, option, OPTIONS.type(option)(options.get(option, value)))
-
-        # check that nothing weird was passed in the kwargs
-        for option in options:
-            if option not in STD_OPTIONS:
-                raise AttributeError('unexpected attribute: {}'.format(option))
-
-    @property
-    def display_name(self):
-        """
-        Used for identifying this job in an ini config file
-        """
-        display_name = (
-            self.name if self.job_ident is None else '{}_{}'.format(self.name, self.job_ident)
-        )
-        display_name = re.sub(r'[\[\]#;]', '_', display_name)
-        return display_name
-
-    def flatten(self):
-        result = {}
-        for attr, value in self.__dict__.items():
-            if attr == 'dependencies':
-                value = [j.display_name for j in value]
-            try:
-                if not isinstance(value, str):
-                    value = '\n'.join([str(v) for v in value])
-            except TypeError:
-                pass
-            result[attr] = str(value)
-        return result
-
-    def logfile(self):
-        """
-        returns the path to the logfile with job name and job id substituted into the stdout pattern
-        """
-        return self.stdout.format(name=self.name, job_ident=self.job_ident)
-
-    def complete_stamp(self):
-        """
-        returns the path to the expected complete stamp
-        """
-        return os.path.join(self.output_dir, 'MAVIS-{job_ident}.COMPLETE').format(
-            job_ident=self.job_ident, name=self.name
-        )
-
-    def reset(self):
-        self.status = JOB_STATUS.NOT_SUBMITTED
-        self.status_comment = ''
-        self.job_ident = None
-
-
-class ArrayJob(Job):
-    """
-    Class for dealing with array jobs. Jobs with many tasks
-    """
-
-    def __init__(self, stage, task_list, **kwargs):
-        """
-        Args:
-            task_list (Union[List,int]): the ids of tasks in the job array
-        """
-        Job.__init__(self, stage, **kwargs)
-        self.stdout = (
-            os.path.join(self.output_dir, 'job-{name}-{job_ident}-{task_ident}.log')
-            if 'stdout' not in kwargs
-            else kwargs['stdout']
-        )
-
-        if isinstance(task_list, int):
-            task_list = list(range(1, task_list + 1))
-        self.task_list = [Task(self, n) for n in task_list]
-
-    @property
-    def tasks(self):
-        return len(self.task_list)
-
-    def get_task(self, task_ident):
-        """
-        returns a task by task id
-        """
-        task_ident = int(task_ident)
-        for task in self.task_list:
-            if task.task_ident == task_ident:
-                return task
-        raise KeyError('task id not found', task_ident, self.task_list)
-
-    def has_task(self, task_ident):
-        task_ident = int(task_ident)
-        for task in self.task_list:
-            if task.task_ident == task_ident:
-                return True
-        return False
-
-    def remove_task(self, task_ident):
-        self.task_list = [task for task in self.task_list if task.task_ident != task_ident]
-
-    def logfile(self, task_ident):
-        return self.stdout.format(name=self.name, job_ident=self.job_ident, task_ident=task_ident)
-
-    def complete_stamp(self, task_ident):
-        """
-        returns the path to the expected complete stamp
-        """
-        return os.path.join(self.output_dir, 'MAVIS-{job_ident}.COMPLETE').format(
-            job_ident=self.job_ident, name=self.name, task_ident=task_ident
-        )
-
-    def flatten(self):
-        result = {k: v for k, v in Job.flatten(self).items() if k != 'task_list'}
-        result['task_list'] = '\n'.join([str(t.task_ident) for t in self.task_list])
-        return result
-
-    def copy_with_tasks(self, task_list):
-        copy = _copy(self)
-        copy.task_list = [Task(self, n) for n in task_list]
-        copy.dependencies = []
-        copy.reset()
-        return copy
-
-    def reset(self):
-        Job.reset(self)
-        for task in self.task_list:
-            task.reset()
-
-    def __repr__(self):
-        return '{}(job_ident={}, name={}, stage={}, status={})'.format(
-            self.__class__.__name__, self.job_ident, self.name, self.stage, self.status
-        )
-
-
-class TorqueArrayJob(ArrayJob):
-    def complete_stamp(self, task_ident):
-        # example: MAVIS-136[1].torque01.bcgsc.ca.COMPLETE
-        job_ident = re.sub(r'\[\]', '[{}]'.format(task_ident), self.job_ident)
-        return os.path.join(self.output_dir, 'MAVIS-{job_ident}.COMPLETE').format(
-            job_ident=job_ident, name=self.name, task_ident=task_ident
-        )
-
-    def logfile(self, task_ident):
-        # example: job-MV_mock-A47933_batch-B9PE6YAtnHu4cHA2GrsEzX-1-136[1].torque01.bcgsc.ca-1.log-1
-        name = '{}-{}'.format(self.name, task_ident)
-        job_ident = re.sub(r'\[\]', '[{}]'.format(task_ident), self.job_ident)
-        log = self.stdout.format(name=name, job_ident=job_ident, task_ident=task_ident)
-        return '{}-{}'.format(log, task_ident)
-
-
-class Task:
-    def __init__(self, array_job, task_ident):
-        self.array_job = array_job
-        self.task_ident = int(task_ident)
-        self.status = JOB_STATUS.NOT_SUBMITTED
-        self.status_comment = ''
-
-    def logfile(self):
-        return self.array_job.logfile(self.task_ident)
-
-    def complete_stamp(self):
-        return self.array_job.complete_stamp(self.task_ident)
-
-    def reset(self):
-        self.status = JOB_STATUS.NOT_SUBMITTED
-        self.status_comment = ''
diff --git a/mavis/schedule/local.py b/mavis/schedule/local.py
deleted file mode 100644
index 8037c3a1..00000000
--- a/mavis/schedule/local.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import atexit
-from concurrent import futures
-from datetime import datetime
-import logging
-import multiprocessing
-import os
-
-import shortuuid
-
-from ..util import LOG
-from ..annotate.file_io import REFERENCE_DEFAULTS, ReferenceFile
-
-from .job import Job
-from .scheduler import Scheduler
-from .constants import JOB_STATUS, SCHEDULER
-
-
-class LocalJob(Job):
-    def __init__(self, args, func, rank=None, response=None, *pos, **kwargs):
-        """
-        Args:
-            args (list): A list of arguments to passed to the function given
-            func (Callable): the function to be run
-            rank (int): rank of the job within the pool
-            response (concurrent.futures.Future): the result from the subprocess
-        """
-        self.args = args
-        self.func = func
-        self.response = response
-        self.rank = rank
-        for filetype in REFERENCE_DEFAULTS:
-            setattr(self, filetype, kwargs.pop(filetype, None))
-        Job.__init__(self, *pos, **kwargs)
-
-    def check_complete(self):
-        """
-        check that the complete stamp associated with this job exists
-        """
-        return os.path.exists(self.complete_stamp())
-
-    def flatten(self):
-        result = Job.flatten(self)
-        omit = {
-            'script',
-            'rank',
-            'response',
-            'func',
-            'queue',
-            'import _env',
-            'mail_user',
-            'mail_type',
-        }
-        return {k: v for k, v in result.items() if k not in omit}
-
-
-def write_stamp_callback(response):
-    if response.exception() or response.cancelled() or response.running():
-        return
-    try:
-        LOG('writing:', response.complete_stamp, time_stamp=True, indent_level=1)
-        with open(response.complete_stamp, 'w') as fh:
-            fh.write('end: {}\n'.format(int(datetime.timestamp(datetime.utcnow()))))
-    except Exception as err:
-        LOG('error writing the complete stamp', level=logging.CRITICAL, indent_level=1)
-        raise err
-
-
-class LocalScheduler(Scheduler):
-    """
-    Scheduler class for dealing with running mavis locally
-    """
-
-    NAME = SCHEDULER.LOCAL
-    """:attr:`mavis.schedule.constants.SCHEDULER`: the type of scheduler"""
-
-    def __init__(self, *pos, **kwargs):
-        Scheduler.__init__(self, *pos, **kwargs)
-        self.concurrency_limit = (
-            multiprocessing.cpu_count() - 1
-            if not self.concurrency_limit
-            else self.concurrency_limit
-        )
-        self.pool = None  # set this at the first submission
-        self.submitted = {}  # submitted jobs process response objects by job ID
-        atexit.register(self.close)  # makes the pool 'auto close' on normal python exit
-
-    def submit(self, job):
-        """
-        Add a job to the pool
-
-        Args:
-            job (LocalJob): the job to be submitted
-        """
-        if self.pool is None:
-            self.pool = futures.ProcessPoolExecutor(max_workers=self.concurrency_limit)
-        if not job.job_ident:
-            job.job_ident = str(shortuuid.uuid())
-            job.status = JOB_STATUS.SUBMITTED
-        args = [arg.format(job_ident=job.job_ident, name=job.name) for arg in job.args]
-        # if this job exists in the pool, return its response object
-        if job.job_ident in self.submitted:
-            return self.submitted[job.job_ident]
-
-        # load any reference files not cached into the parent memory space
-        for filetype in [f for f in REFERENCE_DEFAULTS.keys() if f != 'aligner_reference']:
-            if getattr(job, filetype) is not None:
-                ref = ReferenceFile(filetype, getattr(job, filetype))
-                ref.load(verbose=False)
-        # otherwise add it to the pool
-        job.response = self.pool.submit(
-            job.func, args
-        )  # no arguments, defined all in the job object
-        setattr(job.response, 'complete_stamp', job.complete_stamp())
-        job.response.add_done_callback(write_stamp_callback)
-        self.submitted[job.job_ident] = job
-        job.rank = len(self.submitted)
-        LOG('submitted', job.name, indent_level=1)
-        return job
-
-    def wait(self):
-        """
-        wait for everything in the current pool to finish
-        """
-        if self.pool is None:
-            return
-        self.pool.shutdown(True)
-        self.pool = None
-        for job in self.submitted.values():
-            self.update_info(job)
-
-    def update_info(self, job):
-        """
-        Args:
-            job (LocalJob): the job to check and update the status for
-        """
-        # check if the job has been submitted already and completed or partially run
-        if not job.job_ident:
-            job.status = JOB_STATUS.NOT_SUBMITTED
-        elif os.path.exists(job.complete_stamp()):
-            job.status = JOB_STATUS.COMPLETED
-        elif os.path.exists(job.logfile()) and job.job_ident not in self.submitted:
-            job.status = JOB_STATUS.UNKNOWN
-        elif job.job_ident in self.submitted:
-            if job.response.done():
-                excpt = job.response.exception()
-                if excpt is None:
-                    job.status = JOB_STATUS.COMPLETED
-                else:
-                    job.status = JOB_STATUS.FAILED
-                    job.status_comment = str(excpt)
-            elif job.response.running():
-                job.status = JOB_STATUS.RUNNING
-            else:
-                job.status = JOB_STATUS.PENDING
-        else:
-            job.status = JOB_STATUS.UNKNOWN
-
-    def close(self):
-        if self.pool is not None:
-            self.pool.shutdown()
-            self.pool = None
diff --git a/mavis/schedule/pipeline.py b/mavis/schedule/pipeline.py
deleted file mode 100644
index bdb42f63..00000000
--- a/mavis/schedule/pipeline.py
+++ /dev/null
@@ -1,945 +0,0 @@
-from configparser import ConfigParser, ExtendedInterpolation
-import os
-import re
-import shutil
-import subprocess
-
-from shortuuid import uuid
-
-from ..cluster import constants as _CLUSTER
-from ..constants import SUBCOMMAND, PROTOCOL, EXIT_ERROR, EXIT_OK, EXIT_INCOMPLETE
-from ..tools import convert_tool_output
-from ..util import mkdirp, output_tabbed_file, LOG, DEVNULL
-from ..validate import constants as _VALIDATE
-from ..annotate import constants as _ANNOTATE
-from ..annotate import file_io as _file_io
-from ..summary import constants as _SUMMARY
-from .job import Job, ArrayJob, LogFile, TorqueArrayJob
-from .scheduler import SlurmScheduler, TorqueScheduler, SgeScheduler, consecutive_ranges
-from .local import LocalJob, LocalScheduler
-from .constants import JOB_STATUS, STD_OPTIONS, OPTIONS, SCHEDULER
-
-PROGNAME = shutil.which('mavis')
-SHEBANG = '#!/bin/bash'
-SCHEDULERS_BY_NAME = {
-    sched.NAME: sched for sched in [SlurmScheduler, TorqueScheduler, LocalScheduler, SgeScheduler]
-}
-
-
-def stringify_args_to_command(args):
-    """
-    takes a list of arguments and prepares them for writing to a bash script
-    """
-    command = []
-    for argname, value in args.items():
-        if isinstance(value, _file_io.ReferenceFile):
-            value = value.name
-        if isinstance(value, str):
-            command.append('--{} "{}"'.format(argname, value))
-        else:
-            try:
-                value = ' '.join([str(v) for v in value])
-            except TypeError:
-                pass
-            command.append('--{} {}'.format(argname, value))
-    return command
-
-
-def parse_run_time(filename):
-    """
-    parses the run time listed at the end of a file following mavis conventions
-    """
-    with open(filename, 'r') as fh:
-        content = fh.read().strip()
-        for line in [line.strip() for line in content.split('\n')][::-1]:
-            match = re.match(r'^\s*run time \(s\): (\d+)\s*$', line)  # older style complete stamp
-            if match:
-                return int(match.group(1))
-            match = re.search(r'start:\s*(\d+)\s*end:\s*(\d+)', line)
-            if match:
-                return int(match.group(2)) - int(match.group(1))
-    return -1
-
-
-def run_conversion(config, libconf, conversion_dir, assume_no_untemplated=True):
-    """
-    Converts files if not already converted. Returns a list of filenames
-    """
-    inputs = []
-    # run the conversions
-    for input_file in libconf.inputs:
-        output_filename = os.path.join(conversion_dir, input_file + '.tab')
-        if input_file in config.convert:
-            if not os.path.exists(output_filename):
-                command = config.convert[input_file]
-                if (
-                    command[0] == 'convert_tool_output'
-                ):  # convert_tool_output FILEPATH [FILEPATH...] TOOL stranded
-                    LOG('converting input command:', command)
-                    output_tabbed_file(
-                        convert_tool_output(
-                            command[1:-2],
-                            command[-2],
-                            command[-1],
-                            log=LOG,
-                            assume_no_untemplated=assume_no_untemplated,
-                        ),
-                        output_filename,
-                    )
-                else:
-                    command = ' '.join(command) + ' -o {}'.format(output_filename)
-                    LOG('converting input command:')
-                    LOG('>>>', command, time_stamp=False)
-                    subprocess.check_output(command, shell=True)
-            inputs.append(output_filename)
-        else:
-            inputs.append(input_file)
-    return inputs
-
-
-def validate_args(config, libconf):
-    """
-    Pull arguments from the main config and library specific config to pass to validate
-
-    Args:
-        config (MavisConfig): the main program config
-        libconf (LibraryConfig): library specific configuration
-    """
-    allowed_args = [
-        'masking',
-        'reference_genome',
-        'aligner_reference',
-        'library',
-        'bam_file',
-        'protocol',
-        'read_length',
-        'stdev_fragment_size',
-        'median_fragment_size',
-        'strand_specific',
-        'annotations',
-    ] + list(_VALIDATE.DEFAULTS.keys())
-
-    # overwrite args in order of increasing specificity
-    args = {}
-    args.update(_VALIDATE.DEFAULTS.items())
-    args.update({k: v.name for k, v in config.reference.items()})
-    args.update(config.validate.items())
-    args.update(libconf.items())
-    args = {k: v for k, v in args.items() if k in allowed_args}
-    return args
-
-
-def annotate_args(config, libconf):
-    """
-    Pull arguments from the main config and library specific config to pass to annotate
-
-    Args:
-        config (MavisConfig): the main program config
-        libconf (LibraryConfig): library specific configuration
-    """
-    allowed_args = [
-        'reference_genome',
-        'template_metadata',
-        'masking',
-        'annotations',
-        'min_orf_size',
-        'max_orf_cap',
-        'library',
-        'protocol',
-        'min_domain_mapping_match',
-        'domain_name_regex_filter',
-        'max_proximity',
-    ] + list(_ANNOTATE.DEFAULTS.keys())
-    args = {}
-    args.update(_ANNOTATE.DEFAULTS.items())
-    args.update({k: v.name for k, v in config.reference.items()})
-    args.update(config.cluster.items())
-    args.update(config.illustrate.items())
-    args.update(config.annotate.items())
-    args.update(libconf.items())
-    args = {k: v for k, v in args.items() if k in allowed_args}
-    return args
-
-
-def summary_args(config):
-    """
-    Pull arguments from the main config and library specific config to pass to summary
-
-    Args:
-        config (MavisConfig): the main program config
-        libconf (LibraryConfig): library specific configuration
-    """
-    allowed_args = [
-        'flanking_call_distance',
-        'split_call_distance',
-        'contig_call_distance',
-        'spanning_call_distance',
-        'dgv_annotation',
-        'annotations',
-    ] + list(_SUMMARY.DEFAULTS.keys())
-    args = {}
-    args.update({k: v.name for k, v in config.reference.items()})
-    args.update(config.pairing.items())
-    args.update(config.summary.items())
-    args = {k: v for k, v in args.items() if k in allowed_args}
-    return args
-
-
-def cluster_args(config, libconf):
-    """
-    Pull arguments from the main config and library specific config to pass to cluster
-
-    Args:
-        config (MavisConfig): the main program config
-        libconf (LibraryConfig): library specific configuration
-    """
-    allowed_args = [
-        'masking',
-        'annotations',
-        'library',
-        'protocol',
-        'disease_status',
-        'strand_specific',
-    ] + list(_CLUSTER.DEFAULTS.keys())
-    args = {}
-    args.update(_CLUSTER.DEFAULTS.items())
-    args.update({k: v.name for k, v in config.reference.items()})
-    args.update(config.cluster.items())
-    args.update(config.illustrate.items())
-    args.update(config.annotate.items())
-    args.update(libconf.items())
-    args = {k: v for k, v in args.items() if k in allowed_args}
-    return args
-
-
-class Pipeline:
-    ERROR_STATES = {
-        JOB_STATUS.ERROR,
-        JOB_STATUS.FAILED,
-        JOB_STATUS.CANCELLED,
-        JOB_STATUS.UNKNOWN,
-        JOB_STATUS.NOT_SUBMITTED,
-    }
-
-    def __init__(
-        self,
-        output_dir,
-        scheduler,
-        validations=None,
-        annotations=None,
-        pairing=None,
-        summary=None,
-        checker=None,
-        batch_id='batch-{}'.format(uuid()),
-    ):
-        """
-        Args:
-            output_dir (str): path to main output directory for all mavis pipeline results
-            scheduler (Scheduler): the class for interacting with a job scheduler
-            validations (List[Job]): list of validation jobs
-            annotations (List[Job]): list of annotation jobs
-            pairing (Job): pairing job
-            summary (Job): summary job
-            batch_id (str): the batch id for this pipeline run. Used in avoinfing job name conflicts
-        """
-        self.scheduler = scheduler
-        self.output_dir = output_dir
-        self.validations = [] if validations is None else validations
-        self.annotations = [] if annotations is None else annotations
-        self.pairing = pairing
-        self.summary = summary
-        self.checker = checker
-        self.batch_id = batch_id
-        self.args = {}  # for local runs only, store config to be passed to MAVIS stage
-
-    def write_submission_script(self, subcommand, job, args, aligner_path=None):
-        """
-        Args:
-            subcommand (SUBCOMMAND): the pipeline step this script will run
-            job (Job): the job the script is for
-            args (dict): arguments for the subcommand
-        """
-        LOG('writing:', job.script, time_stamp=True)
-        with open(job.script, 'w') as fh:
-            fh.write(
-                """{shebang}
-{aligner_path}
-cd {cwd}
-START_TIME=$(date +%s)\n\n""".format(
-                    shebang=SHEBANG,
-                    aligner_path='export PATH={}:$PATH'.format(os.path.dirname(aligner_path))
-                    if aligner_path
-                    else '',
-                    cwd=os.getcwd(),
-                )
-            )
-            commands = [PROGNAME, subcommand] + stringify_args_to_command(args)
-            fh.write(' \\\n\t'.join(commands) + '\n\n')
-            fh.write(
-                """
-code=$?
-
-if [ "$code" -ne "0" ]
-then
-    exit $code
-fi
-
-END_TIME=$(date +%s)
-
-echo "start: $START_TIME end: $END_TIME" > {}/MAVIS-${}.COMPLETE
-
-            """.format(
-                    args['output'],
-                    self.scheduler.ENV_JOB_IDENT
-                    if not isinstance(job, ArrayJob)
-                    else self.scheduler.ENV_ARRAY_IDENT,
-                )
-            )
-
-    @classmethod
-    def format_args(cls, subcommand, args):
-        command = [subcommand]
-        for arg, val in args.items():
-            command.append('--{}'.format(arg))
-            if isinstance(val, str):
-                command.append(val)
-            else:
-                try:
-                    command.extend(iter(val))
-                except TypeError:
-                    command.append(val)
-        return [str(v) for v in command]
-
-    @classmethod
-    def build(cls, config):
-        """
-        Args:
-            config (MavisConfig): the main configuration. Note this is the config after all reference inputs have been loaded
-        Returns:
-            Pipeline: the pipeline instance with job dependencies information etc.
-        """
-        from ..main import main as _main
-
-        conversion_dir = mkdirp(os.path.join(config.output, 'converted_inputs'))
-        config.output = os.path.abspath(config.output)
-        if config.schedule.scheduler not in SCHEDULERS_BY_NAME:
-            raise NotImplementedError(
-                'unsupported scheduler', config.schedule.scheduler, list(SCHEDULERS_BY_NAME.keys())
-            )
-
-        scheduler = SCHEDULERS_BY_NAME[config.schedule.scheduler](
-            config.schedule.get('concurrency_limit', OPTIONS.concurrency_limit),
-            remote_head_ssh=config.schedule.get('remote_head_ssh', OPTIONS.remote_head_ssh),
-        )
-        pipeline = Pipeline(output_dir=config.output, scheduler=scheduler)
-
-        annotation_output_files = []
-        for libconf in config.libraries.values():
-            base = os.path.join(
-                config.output,
-                '{}_{}_{}'.format(libconf.library, libconf.disease_status, libconf.protocol),
-            )
-            LOG('setting up the directory structure for', libconf.library, 'as', base)
-            libconf.inputs = run_conversion(config, libconf, conversion_dir)
-
-            # run the cluster stage
-            cluster_output = mkdirp(
-                os.path.join(base, SUBCOMMAND.CLUSTER)
-            )  # creates the clustering output dir
-            args = cluster_args(config, libconf)
-            args.update({'batch_id': pipeline.batch_id, 'output': cluster_output})
-            args['split_only'] = SUBCOMMAND.CLUSTER in config.get('skip_stage', [])
-            args['inputs'] = libconf.inputs
-            LOG('clustering', '(split only)' if args['split_only'] else '', time_stamp=True)
-            clustering_log = os.path.join(
-                args['output'], 'MC_{}_{}.log'.format(libconf.library, pipeline.batch_id)
-            )
-            LOG('writing:', clustering_log, time_stamp=True)
-            args['log'] = clustering_log
-            clustered_files = _main(cls.format_args(SUBCOMMAND.CLUSTER, args))
-
-            # make a validation job for each cluster file
-            validate_jobs = []
-
-            if SUBCOMMAND.VALIDATE not in config.skip_stage:
-                mkdirp(os.path.join(base, SUBCOMMAND.VALIDATE))
-                for task_ident in range(1, len(clustered_files) + 1):
-                    mkdirp(
-                        os.path.join(
-                            base, SUBCOMMAND.VALIDATE, '{}-{}'.format(pipeline.batch_id, task_ident)
-                        )
-                    )
-                args = validate_args(config, libconf)
-
-                script_name = os.path.join(base, SUBCOMMAND.VALIDATE, 'submit.sh')
-                job_options = {k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-                job_options['memory_limit'] = config.schedule.validation_memory
-
-                if libconf.protocol == PROTOCOL.TRANS:
-                    job_options['memory_limit'] = config.schedule.trans_validation_memory
-
-                if scheduler.NAME == SCHEDULER.LOCAL:
-                    job_options['reference_genome'] = args['reference_genome']
-                    if libconf.protocol == PROTOCOL.TRANS:
-                        job_options['annotations'] = args['annotations']
-
-                    for task_ident in range(1, len(clustered_files) + 1):
-                        args['inputs'] = [
-                            os.path.join(
-                                cluster_output, '{}-{}.tab'.format(pipeline.batch_id, task_ident)
-                            )
-                        ]
-                        args['output'] = os.path.join(
-                            base, SUBCOMMAND.VALIDATE, '{}-{}'.format(pipeline.batch_id, task_ident)
-                        )
-                        job_name = 'MV_{}_{}-{}'.format(
-                            libconf.library, pipeline.batch_id, task_ident
-                        )
-                        args['log'] = os.path.join(args['output'], 'job-{name}-{job_ident}.log')
-                        validate_job = LocalJob(
-                            stage=SUBCOMMAND.VALIDATE,
-                            output_dir=args['output'],
-                            stdout=args['log'],
-                            name=job_name,
-                            args=cls.format_args(SUBCOMMAND.VALIDATE, args),
-                            func=_main,
-                            **job_options
-                        )
-                        pipeline.validations.append(validate_job)
-                        validate_jobs.append(validate_job)
-                else:
-                    args['inputs'] = os.path.join(
-                        cluster_output,
-                        '{}-${}.tab'.format(pipeline.batch_id, scheduler.ENV_TASK_IDENT),
-                    )
-                    args['output'] = os.path.join(
-                        base,
-                        SUBCOMMAND.VALIDATE,
-                        '{}-${}'.format(pipeline.batch_id, scheduler.ENV_TASK_IDENT),
-                    )
-                    aligner_path = shutil.which(args['aligner'].split(' ')[0])
-                    job_class = ArrayJob if scheduler.NAME != SCHEDULER.TORQUE else TorqueArrayJob
-                    validate_job = job_class(
-                        stage=SUBCOMMAND.VALIDATE,
-                        task_list=len(clustered_files),
-                        output_dir=os.path.join(
-                            base, SUBCOMMAND.VALIDATE, '{}-{{task_ident}}'.format(pipeline.batch_id)
-                        ),
-                        script=script_name,
-                        name='MV_{}_{}'.format(libconf.library, pipeline.batch_id),
-                        **job_options
-                    )
-                    pipeline.write_submission_script(
-                        SUBCOMMAND.VALIDATE, validate_job, args, aligner_path=aligner_path
-                    )
-                    pipeline.validations.append(validate_job)
-                    validate_jobs.append(validate_job)
-
-            # make an annotation job for each validation/cluster job/file
-            mkdirp(os.path.join(base, SUBCOMMAND.ANNOTATE))
-            for task_ident in range(1, len(clustered_files) + 1):
-                mkdirp(
-                    os.path.join(
-                        base, SUBCOMMAND.ANNOTATE, '{}-{}'.format(pipeline.batch_id, task_ident)
-                    )
-                )
-            args = annotate_args(config, libconf)
-
-            script_name = os.path.join(base, SUBCOMMAND.ANNOTATE, 'submit.sh')
-            job_options = {k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-            job_options['memory_limit'] = config.schedule.annotation_memory
-
-            if isinstance(scheduler, LocalScheduler):
-                job_options['annotations'] = args['annotations']
-                job_options['reference_genome'] = args['reference_genome']
-                if args['template_metadata']:
-                    job_options['template_metadata'] = args['template_metadata']
-                for task_ident in range(1, len(clustered_files) + 1):
-                    args['output'] = os.path.join(
-                        base, SUBCOMMAND.ANNOTATE, '{}-{}'.format(pipeline.batch_id, task_ident)
-                    )
-                    # annotate 'clustered' files if the pipeline does not include the validation step
-                    if SUBCOMMAND.VALIDATE not in config.skip_stage:
-                        args['inputs'] = [
-                            os.path.join(
-                                base,
-                                SUBCOMMAND.VALIDATE,
-                                '{}-{}'.format(pipeline.batch_id, task_ident),
-                                _VALIDATE.PASS_FILENAME,
-                            )
-                        ]
-                    else:
-                        args['inputs'] = [
-                            os.path.join(
-                                cluster_output, '{}-{}.tab'.format(pipeline.batch_id, task_ident)
-                            )
-                        ]
-                    job_name = 'MA_{}_{}-{}'.format(libconf.library, pipeline.batch_id, task_ident)
-                    args['log'] = os.path.join(args['output'], 'job-{name}-{job_ident}.log')
-                    annotate_job = LocalJob(
-                        stage=SUBCOMMAND.ANNOTATE,
-                        script=script_name,
-                        name=job_name,
-                        stdout=args['log'],
-                        output_dir=args['output'],
-                        args=cls.format_args(SUBCOMMAND.ANNOTATE, args),
-                        func=_main,
-                        **job_options
-                    )
-                    pipeline.annotations.append(annotate_job)
-                    annotation_output_files.append(
-                        os.path.join(args['output'], _ANNOTATE.PASS_FILENAME)
-                    )
-                    if validate_jobs:
-                        annotate_job.dependencies.append(validate_jobs[task_ident - 1])
-            else:
-                args['output'] = os.path.join(
-                    base,
-                    SUBCOMMAND.ANNOTATE,
-                    '{}-${}'.format(pipeline.batch_id, scheduler.ENV_TASK_IDENT),
-                )
-                # annotate 'clustered' files if the pipeline does not include the validation step
-                if SUBCOMMAND.VALIDATE not in config.skip_stage:
-                    args['inputs'] = [
-                        os.path.join(
-                            base,
-                            SUBCOMMAND.VALIDATE,
-                            '{}-${}'.format(pipeline.batch_id, scheduler.ENV_TASK_IDENT),
-                            _VALIDATE.PASS_FILENAME,
-                        )
-                    ]
-                else:
-                    args['inputs'] = [
-                        os.path.join(
-                            cluster_output,
-                            '{}-${}.tab'.format(pipeline.batch_id, scheduler.ENV_TASK_IDENT),
-                        )
-                    ]
-
-                job_class = ArrayJob if scheduler.NAME != SCHEDULER.TORQUE else TorqueArrayJob
-                annotate_job = job_class(
-                    stage=SUBCOMMAND.ANNOTATE,
-                    task_list=len(clustered_files),
-                    script=script_name,
-                    name='MA_{}_{}'.format(libconf.library, pipeline.batch_id),
-                    output_dir=os.path.join(
-                        base, SUBCOMMAND.ANNOTATE, '{}-{{task_ident}}'.format(pipeline.batch_id)
-                    ),
-                    **job_options
-                )
-                pipeline.write_submission_script(SUBCOMMAND.ANNOTATE, annotate_job, args)
-                pipeline.annotations.append(annotate_job)
-                if validate_jobs:
-                    annotate_job.dependencies.extend(validate_jobs)
-
-                # add the expected output file names for input to pairing
-                for taskid in range(1, len(clustered_files) + 1):
-                    fname = os.path.join(args['output'], _ANNOTATE.PASS_FILENAME)
-                    fname = re.sub(r'\${}'.format(scheduler.ENV_TASK_IDENT), str(taskid), fname)
-                    annotation_output_files.append(fname)
-
-        # set up the pairing job
-        args = {}
-        args.update(config.pairing.items())
-        args['output'] = os.path.join(config.output, SUBCOMMAND.PAIR)
-        args['annotations'] = config.reference.annotations
-        mkdirp(args['output'])
-        args['inputs'] = annotation_output_files
-        job_name = 'MP_{}'.format(pipeline.batch_id)
-
-        script_name = os.path.join(config.output, SUBCOMMAND.PAIR, 'submit.sh')
-
-        if isinstance(scheduler, LocalScheduler):
-            args['log'] = os.path.join(args['output'], 'job-{name}-{job_ident}.log')
-            pipeline.pairing = LocalJob(
-                stage=SUBCOMMAND.PAIR,
-                script=script_name,
-                output_dir=args['output'],
-                stdout=args['log'],
-                name=job_name,
-                dependencies=pipeline.annotations,
-                args=cls.format_args(SUBCOMMAND.PAIR, args),
-                func=_main,
-                **{k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-            )
-        else:
-            pipeline.pairing = Job(
-                SUBCOMMAND.PAIR,
-                script=script_name,
-                output_dir=args['output'],
-                name=job_name,
-                dependencies=pipeline.annotations,
-                **{k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-            )
-            pipeline.write_submission_script(SUBCOMMAND.PAIR, pipeline.pairing, args)
-        # set up the summary job
-        args = summary_args(config)
-        args['output'] = os.path.join(config.output, SUBCOMMAND.SUMMARY)
-        mkdirp(args['output'])
-        args['inputs'] = [os.path.join(config.output, SUBCOMMAND.PAIR, 'mavis_paired*.tab')]
-        script_name = os.path.join(args['output'], 'submit.sh')
-        job_name = 'MS_{}'.format(pipeline.batch_id)
-        if isinstance(scheduler, LocalScheduler):
-            args['log'] = os.path.join(args['output'], 'job-{name}-{job_ident}.log')
-            pipeline.summary = LocalJob(
-                stage=SUBCOMMAND.SUMMARY,
-                name=job_name,
-                output_dir=args['output'],
-                stdout=args['log'],
-                script=script_name,
-                dependencies=[pipeline.pairing],
-                args=cls.format_args(SUBCOMMAND.SUMMARY, args),
-                func=_main,
-                **{k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-            )
-        else:
-            pipeline.summary = Job(
-                stage=SUBCOMMAND.SUMMARY,
-                name=job_name,
-                output_dir=args['output'],
-                script=script_name,
-                dependencies=[pipeline.pairing],
-                **{k: v for k, v in config.schedule.items() if k in STD_OPTIONS}
-            )
-            pipeline.write_submission_script(SUBCOMMAND.SUMMARY, pipeline.summary, args)
-        return pipeline
-
-    def _resubmit_job(self, job):
-        """
-        Given a failed job, cancel it and all of its dependencies and then resubmit them
-        """
-        # resubmit the job or all failed tasks for the job. Update any dependencies
-        failed_tasks = set()
-        try:
-            for task in job.task_list:
-                if task.status in self.ERROR_STATES:
-                    failed_tasks.add(task.task_ident)
-            if len(failed_tasks) == len(job.task_list):
-                failed_tasks = []
-        except AttributeError:  # non-array jobs
-            pass
-        # SGE cannot submit a task list that is non-consecutive so we will cancel the entire array
-        if self.scheduler.NAME == SCHEDULER.SGE and len(consecutive_ranges(failed_tasks)) != 1:
-            failed_tasks = set()
-
-        if failed_tasks:  # resubmit failed tasks only and create a new job
-            new_job = job.copy_with_tasks(failed_tasks)
-            for task_ident in failed_tasks:
-                # cancel and remove the failed task
-                try:
-                    self.scheduler.cancel(job, task_ident=task_ident)
-                    job.remove_task(task_ident)
-                except subprocess.CalledProcessError:  # ignore cancelling errors
-                    pass
-            self.scheduler.submit(new_job)
-        else:
-            # 'clean' the current job so that it is no longer 'submitted'
-            self.scheduler.cancel(job)
-            job.reset()
-            try:
-                for task in job.task_list:
-                    task.status = JOB_STATUS.NOT_SUBMITTED
-                    task.status_comment = ''
-            except AttributeError:
-                pass
-            self.scheduler.submit(job)
-            new_job = job
-
-        if new_job.stage == SUBCOMMAND.VALIDATE:
-            if new_job not in self.validations:
-                self.validations.append(new_job)
-            # cancel and resubmit annotate, pairing and summary jobs
-            new_annotations = []
-            for ajob in self.annotations:
-                if ajob.dependencies == [job] and failed_tasks:  # only dependent on this job
-                    try:
-                        new_ajob = ajob.copy_with_tasks(failed_tasks)
-                        new_annotations.append(new_ajob)
-                        for task in failed_tasks:
-                            self.scheduler.cancel(ajob, task_ident=task)
-                            ajob.remove_task(task)
-                        new_ajob.dependencies = [new_job]
-                        self.scheduler.submit(new_ajob)
-                    except AttributeError:
-                        self.scheduler.cancel(ajob)
-                        ajob.reset()
-                        if new_job not in ajob.dependencies:
-                            ajob.dependencies.append(new_job)
-                        self.scheduler.submit(ajob)
-                elif job in ajob.dependencies:
-                    # dependent on multiple jobs
-                    self.scheduler.cancel(ajob)
-                    ajob.reset()
-                    if new_job not in ajob.dependencies:
-                        ajob.dependencies.append(new_job)
-                    self.scheduler.submit(ajob)
-                # ignore annotation jobs not related to the failed validation job
-            self.annotations.extend(new_annotations)
-        elif new_job.stage == SUBCOMMAND.ANNOTATE:
-            if new_job not in self.annotations:
-                self.annotations.append(new_job)
-
-        if new_job.stage in {SUBCOMMAND.VALIDATE, SUBCOMMAND.ANNOTATE}:
-            # cancel pairing
-            self.scheduler.cancel(self.pairing)
-            self.pairing.reset()
-            self.pairing.dependencies = self.annotations[:]
-
-        # all resubmissions result in cancelling summary
-        self.scheduler.cancel(self.summary)
-        self.summary.reset()
-
-    def _job_status(self, job, submit=False, resubmit=False, log=DEVNULL):
-        """
-        report information regarding a particular job status
-        """
-        run_time = -1
-        if not job.job_ident and (submit or resubmit):
-            self.scheduler.submit(job)
-        elif job.job_ident and resubmit and job.status in self.ERROR_STATES:
-            self._resubmit_job(job)
-        if job.job_ident:
-            log('{} ({}) is {}'.format(job.name, job.job_ident, job.status))
-        else:
-            log('{} is {}'.format(job.name, job.status))
-        if job.status == JOB_STATUS.COMPLETED:
-            if isinstance(job, ArrayJob):
-                for task in job.task_list:
-                    if not os.path.exists(task.complete_stamp()):
-                        log('complete stamp is expected but does not exist', indent_level=1)
-                        log(task.complete_stamp(), indent_level=2)
-                    else:
-                        run_time = max(run_time, parse_run_time(task.complete_stamp()))
-            elif not os.path.exists(job.complete_stamp()):
-                with log.indent() as log:
-                    log('complete stamp is expected but does not exist')
-                    log(job.complete_stamp())
-            else:
-                run_time = max(run_time, parse_run_time(job.complete_stamp()))
-            if run_time >= 0:
-                if isinstance(job, ArrayJob):
-                    log(
-                        '{} {} COMPLETED'.format(
-                            job.tasks, 'task is' if job.tasks == 1 else 'tasks are'
-                        ),
-                        indent_level=1,
-                    )
-                log('run time: {}'.format(run_time), indent_level=1)
-        else:
-            if isinstance(job, ArrayJob):
-                tasks_by_status = {}
-                for task in job.task_list:
-                    tasks_by_status.setdefault(task.status, []).append(task)
-                for status, tasks in tasks_by_status.items():
-                    comments = set([t.status_comment for t in tasks if t.status_comment])
-                    context = 'tasks are' if len(tasks) != 1 else 'task is'
-                    LOG('{} {} {}'.format(len(tasks), context, status), indent_level=2)
-                    for comment in comments:
-                        LOG('comment:', comment, indent_level=3)
-            elif job.status not in {
-                JOB_STATUS.PENDING,
-                JOB_STATUS.NOT_SUBMITTED,
-                JOB_STATUS.SUBMITTED,
-            }:
-                try:
-                    content = LogFile.parse(job.logfile())
-                    log('{}: {}'.format(content.status, content.message), indent_level=1)
-                except FileNotFoundError:
-                    log('missing log file:', job.logfile(), indent_level=1)
-
-        return run_time
-
-    def check_status(self, submit=False, resubmit=False, log=DEVNULL):
-        """
-        Check all jobs for completetion. Report any failures, etc.
-
-        Args:
-            submit (bool): submit any pending jobs
-        """
-        # update the information for all jobs where possible
-        run_times = [[], [], [], []]
-        jobs_not_complete = 0
-        jobs_with_errors = 0
-
-        for job in self.validations + self.annotations + [self.pairing, self.summary]:
-            self.scheduler.update_info(job)
-        log('validate', time_stamp=True)
-        for job in self.validations:
-            run_time = self._job_status(job, submit=submit, resubmit=resubmit, log=log.indent())
-            if job.status == JOB_STATUS.COMPLETED:
-                if run_time >= 0:
-                    run_times[0].append(run_time)
-        self.scheduler.wait()
-
-        log('annotate', time_stamp=True)
-        if (
-            not all([job.status == JOB_STATUS.COMPLETED for job in self.validations])
-            and self.scheduler.NAME == 'LOCAL'
-            and (submit or resubmit)
-        ):
-            log('Stopping submission. Dependencies not complete', indent_level=1)
-            submit = False
-            resubmit = False
-
-        for job in self.annotations:
-            self._job_status(job, submit=submit, resubmit=resubmit, log=log.indent())
-            if job.status == JOB_STATUS.COMPLETED:
-                if run_time >= 0:
-                    run_times[1].append(run_time)
-        self.scheduler.wait()
-
-        log('pairing', time_stamp=True)
-        if (
-            not all([job.status == JOB_STATUS.COMPLETED for job in self.annotations])
-            and self.scheduler.NAME == 'LOCAL'
-            and (submit or resubmit)
-        ):
-            log('Stopping submission. Dependencies not complete', indent_level=1)
-            submit = False
-            resubmit = False
-
-        run_time = self._job_status(
-            self.pairing, submit=submit, resubmit=resubmit, log=log.indent()
-        )
-        if self.pairing.status == JOB_STATUS.COMPLETED:
-            if run_time >= 0:
-                run_times[2].append(run_time)
-        self.scheduler.wait()
-
-        log('summary', time_stamp=True)
-        if (
-            self.pairing.status != JOB_STATUS.COMPLETED
-            and self.scheduler.NAME == 'LOCAL'
-            and (submit or resubmit)
-        ):
-            log('Stopping submission. Dependencies not complete', indent_level=1)
-            submit = False
-            resubmit = False
-
-        run_time = self._job_status(
-            self.summary, submit=submit, resubmit=resubmit, log=log.indent()
-        )
-        if self.summary.status == JOB_STATUS.COMPLETED:
-            if run_time >= 0:
-                run_times[3].append(run_time)
-        self.scheduler.wait()
-
-        for job in self.validations + self.annotations + [self.pairing, self.summary]:
-            if submit or resubmit and job.status != JOB_STATUS.COMPLETED:
-                self.scheduler.update_info(job)
-            if job.status in self.ERROR_STATES:
-                jobs_with_errors += 1
-            elif job.status != JOB_STATUS.COMPLETED:
-                jobs_not_complete += 1
-
-        if jobs_not_complete + jobs_with_errors == 0:
-            if all([r for r in run_times]):
-                log('parallel run time:', sum([max(r) for r in run_times]))
-            return EXIT_OK
-        elif not jobs_with_errors:
-            return EXIT_INCOMPLETE
-        else:
-            return EXIT_ERROR
-
-    @classmethod
-    def read_build_file(cls, filepath):
-        """
-        read the configuration file which stored the build information concerning jobs and dependencies
-
-        Args:
-            filepath (str): path to the input config file
-        """
-        from ..main import main as _main
-
-        if not os.path.exists(filepath):
-            raise FileNotFoundError('File does not exist: {}'.format(filepath))
-        parser = ConfigParser(interpolation=ExtendedInterpolation())
-        parser.read(filepath)
-        cast = {'None': None, 'False': False, 'True': True}
-
-        pipeline = cls(
-            output_dir=parser['general']['output_dir'],
-            scheduler=SCHEDULERS_BY_NAME[parser['general']['scheduler']](
-                concurrency_limit=parser['general']['concurrency_limit']
-                if 'concurrency_limit' in parser['general']
-                else OPTIONS.concurrency_limit,
-                remote_head_ssh=parser['general']['remote_head_ssh']
-                if 'remote_head_ssh' in parser['general']
-                else OPTIONS.remote_head_ssh,
-            ),
-            batch_id=parser['general']['batch_id'],
-        )
-
-        jobs = {}
-        for sec in parser.sections():
-            if sec != 'general':
-                section = {}
-                for attr, value in parser[sec].items():
-                    if attr in ['dependencies', 'inputs', 'outputs', 'args', 'task_list'] and value:
-                        section[attr] = [s.strip() for s in re.split(r'\n', value)]
-                    elif value == 'None':
-                        section[attr] = None
-                    elif value in cast:
-                        value = cast[value]
-                    else:
-                        section[attr] = value
-                if pipeline.scheduler.NAME == SCHEDULER.LOCAL:
-                    jobs[sec] = LocalJob(func=_main, **section)
-                elif 'task_list' in section:
-                    if pipeline.scheduler.NAME == SCHEDULER.TORQUE:
-                        jobs[sec] = TorqueArrayJob(**section)
-                    else:
-                        jobs[sec] = ArrayJob(**section)
-                else:
-                    jobs[sec] = Job(**section)
-
-        for job in jobs.values():
-            for i, prior_job_name in enumerate(job.dependencies):
-                job.dependencies[i] = jobs[prior_job_name]
-
-        for job in jobs.values():
-            if job.stage == SUBCOMMAND.VALIDATE:
-                pipeline.validations.append(job)
-            elif job.stage == SUBCOMMAND.ANNOTATE:
-                pipeline.annotations.append(job)
-            elif job.stage == SUBCOMMAND.PAIR:
-                if pipeline.pairing:
-                    raise ValueError('mavis pipeline expects a single pairing job')
-                pipeline.pairing = job
-            elif job.stage == SUBCOMMAND.SUMMARY:
-                if pipeline.summary:
-                    raise ValueError('mavis pipeline expects a single summary job')
-                pipeline.summary = job
-            elif job.stage == SUBCOMMAND.CHECKER:
-                if pipeline.checker:
-                    raise ValueError('mavis pipeline expects a single checker job')
-                pipeline.checker = job
-            else:
-                raise NotImplementedError(
-                    'unexpected job stage for MAVIS pipeline: {}'.format(job.stage), job
-                )
-
-        return pipeline
-
-    def write_build_file(self, filename):
-        """
-        write the build.cfg file for the current pipeline. This is the file used in re-loading the pipeline
-        to check the status and report failures, etc. later.
-
-        Args:
-            filename (str): path to the output config file
-        """
-        parser = ConfigParser(interpolation=ExtendedInterpolation())
-        parser['general'] = {
-            'batch_id': self.batch_id,
-            'output_dir': self.output_dir,
-            'scheduler': self.scheduler.NAME,
-            'remote_head_ssh': self.scheduler.remote_head_ssh,
-            'concurrency_limit': str(self.scheduler.concurrency_limit),
-        }
-
-        for job in [self.summary, self.pairing] + self.validations + self.annotations:
-            parser[job.display_name] = {k: re.sub(r'\$', '$$', v) for k, v in job.flatten().items()}
-
-        with open(filename, 'w') as configfile:
-            parser.write(configfile)
diff --git a/mavis/schedule/scheduler.py b/mavis/schedule/scheduler.py
deleted file mode 100644
index 58c8c2a5..00000000
--- a/mavis/schedule/scheduler.py
+++ /dev/null
@@ -1,942 +0,0 @@
-from datetime import timedelta
-import subprocess
-import re
-import logging
-import socket
-
-from ..util import LOG
-from ..config import NullableType
-
-from .job import ArrayJob
-from .constants import SCHEDULER, JOB_STATUS, cumulative_job_state, MAIL_TYPE
-
-
-def time_format(total_seconds):
-    """
-    Converts a total seconds to a str format "H:M:S"
-    """
-    hours, remainder = divmod(total_seconds, 60 * 60)
-    minutes, seconds = divmod(remainder, 60)
-    return "{}:{:02d}:{:02d}".format(hours, minutes, seconds)
-
-
-def consecutive_ranges(numbers):
-    """
-    Given a list of integers, return a list of ranges
-
-    Example:
-        >>> consecutive_ranges([1, 2, 3, 4, 5, 9, 10, 14, 18])
-        [(1, 5), (9, 10), (14, 14), (18, 18)]
-    """
-    ranges = []
-    for number in sorted(set(numbers)):
-        if not ranges or ranges[-1][1] + 1 != number:
-            ranges.append((number, number))
-        else:
-            ranges[-1] = ranges[-1][0], number
-    return ranges
-
-
-class Scheduler:  # pragma: no cover
-    """
-    Class responsible for methods interacting with the scheduler
-    """
-
-    ENV_TASK_IDENT = '{TASK_IDENT}'
-    """str: the expected pattern of environment variables which store the task id"""
-    ENV_JOB_IDENT = '{JOB_IDENT}'
-    """str: the expected pattern of environment variables which store the job id"""
-    HEADER_PREFIX = '#'
-
-    def __init__(self, concurrency_limit=None, remote_head_ssh=''):
-        """
-        Args:
-            concurrency_limit (int): the maximum allowed concurrent processes. Defaults to one less than the total number available
-        """
-        self.concurrency_limit = NullableType(int)(concurrency_limit)
-        self.remote_head_ssh = remote_head_ssh
-
-    def command(self, command, shell=False):
-        """
-        Wrapper to deal with subprocess commands. If configured and not on the head node currently, will send the command through ssh
-
-        Args:
-            command (list or str): the command can be a list or a string and is passed to the subprocess to be run
-
-        Returns:
-            str: the content returns from stdout of the subprocess
-        """
-        if self.remote_head_ssh and self.remote_head_ssh != socket.gethostname():
-            # ssh to remote head and run the command there
-            if not isinstance(command, str):
-                command = ' '.join(command)
-            return (
-                subprocess.check_output(['ssh', str(self.remote_head_ssh), command])
-                .decode('utf8')
-                .strip()
-            )
-        return subprocess.check_output(command, shell=shell).decode('utf8').strip()
-
-    def wait(self):
-        pass
-
-    def submit(self, job):
-        """
-        submit a job to the scheduler
-        """
-        raise NotImplementedError('abstract method')
-
-    def update_info(self, job):
-        """
-        update the information about the job from the scheduler
-        """
-        raise NotImplementedError('abstract method')
-
-    def cancel(self, job, task_ident=None):
-        raise NotImplementedError('abstract method')
-
-    def format_dependencies(self, job):
-        """
-        returns a string representing the dependency argument
-        """
-        raise NotImplementedError('abstract method')
-
-
-class SlurmScheduler(Scheduler):
-    """
-    Class for formatting commands to match a SLURM scheduler system
-    SLURM docs can be found here https://slurm.schedmd.com
-    """
-
-    NAME = SCHEDULER.SLURM
-    """:attr:`mavis.schedule.constants.SCHEDULER`: the type of scheduler"""
-
-    ENV_TASK_IDENT = 'SLURM_ARRAY_TASK_ID'
-    ENV_JOB_IDENT = 'SLURM_JOB_ID'
-    ENV_ARRAY_IDENT = 'SLURM_ARRAY_JOB_ID'
-
-    def submit(self, job):
-        """
-        runs a subprocess sbatch command
-
-        Args:
-            job (Job): the job to be submitted
-        """
-        command = ['sbatch']
-        if job.job_ident:
-            raise ValueError('Job has already been submitted and has the job number', job.job_ident)
-        if job.queue:
-            command.append('--partition={}'.format(job.queue))
-        if job.memory_limit:
-            command.extend(['--mem', str(job.memory_limit) + 'M'])
-        if job.time_limit:
-            command.extend(['-t', time_format(job.time_limit)])
-        if job.import_env:
-            command.append('--export=ALL')
-        if job.dependencies:
-            command.append(self.format_dependencies(job))
-        if job.name:
-            command.extend(['-J', job.name])
-        if job.stdout:
-            command.extend(
-                [
-                    '-o',
-                    job.stdout.format(
-                        name='%x',
-                        job_ident='%A' if isinstance(job, ArrayJob) else '%j',
-                        task_ident='%a',
-                    ),
-                ]
-            )
-        if job.mail_type and job.mail_user:
-            command.append('--mail-type={}'.format(job.mail_type))
-            command.append('--mail-user={}'.format(job.mail_user))
-        # options specific to job arrays
-        if isinstance(job, ArrayJob):
-            concurrency_limit = (
-                '' if self.concurrency_limit is None else '%{}'.format(self.concurrency_limit)
-            )
-            task_ranges = [
-                '{}{}'.format(s, '-{}'.format(t) if s != t else '')
-                for s, t in consecutive_ranges([task.task_ident for task in job.task_list])
-            ]
-            command.append('--array={}{}'.format(','.join(task_ranges), concurrency_limit))
-
-        command.append(job.script)
-        LOG('submitting', job.name)
-        content = self.command(command)
-
-        match = re.match(r'^submitted batch job (\d+)$', content, re.IGNORECASE)
-        if not match:
-            raise NotImplementedError(
-                'Error in retrieving the submitted job number. Did not match the expected pattern',
-                content,
-            )
-        job.job_ident = match.group(1)
-        job.status = JOB_STATUS.SUBMITTED
-
-        try:
-            for task in job.task_list:
-                task.status = job.status
-                task.status_comment = job.status_comment
-        except AttributeError:
-            pass
-
-    @classmethod
-    def parse_sacct(cls, content):
-        """
-        parses content returned from the sacct command
-
-        Args:
-            content (str): the content returned from the sacct command
-        """
-        lines = content.strip().split('\n')
-        header = lines[0].split('|')
-        rows = []
-        for line in lines[1:]:
-            row = {col: val for col, val in zip(header, line.split('|'))}
-            rows.append(row)
-        # now combine the .batch split jobs
-        results = {}
-        for row in rows:
-            jobid = re.sub(r'\.batch$', '', row['JobID'])
-            if row['JobName'] != 'batch':
-                results[jobid] = row
-        for row in rows:
-            jobid = re.sub(r'\.batch$', '', row['JobID'])
-            if row['JobName'] == 'batch' and jobid in results:
-                results[jobid].update(
-                    {k: v for k, v in row.items() if k not in ['JobName', 'JobID']}
-                )
-        rows = []
-        for row in results.values():
-            row['State'] = row['State'].split(' ')[0]
-            task_ident = None
-            if re.match(r'^\d+_\d+$', row['JobID']):
-                job_ident, task_ident = row['JobID'].rsplit('_', 1)
-                task_ident = int(task_ident)
-            elif re.match(r'^(\d+)_\[\d+(-\d+)?\]$', row['JobID']):
-                job_ident = row['JobID'].split('_', 1)[0]
-            else:
-                job_ident = row['JobID']
-            rows.append(
-                {
-                    'job_ident': job_ident,
-                    'task_ident': task_ident,
-                    'name': row['JobName'],
-                    'status': row['State'],
-                    'status_comment': '',
-                }
-            )
-
-        return rows
-
-    @classmethod
-    def parse_scontrol_show(cls, content):
-        """
-        parse the content from the command: scontrol show job <JOBID>
-
-        Args:
-            content (str): the content to be parsed
-        """
-        rows = []
-        for job_content in re.split(r'\n\s*\n', content):
-            job_content = job_content.strip()
-            if not job_content:  # ignore empty
-                continue
-            row = {}
-            for pair in re.split(r'\s+', job_content):
-                if '=' not in pair:
-                    continue
-                col, val = pair.split('=', 1)
-                row[col] = val
-            try:
-                task_ident = int(row.get('ArrayTaskId', ''))
-            except ValueError:
-                task_ident = None
-            rows.append(
-                {
-                    'job_ident': row['JobId'],
-                    'status': row['JobState'],
-                    'name': row['JobName'],
-                    'status_comment': row['Reason'] if row['Reason'].lower() != 'none' else '',
-                    'task_ident': task_ident,
-                }
-            )
-        return rows
-
-    def update_info(self, job):
-        """
-        Pull job information about status etc from the scheduler. Updates the input job
-
-        Args:
-            job (Job): the job to be updated
-        """
-        if not job.job_ident:
-            return
-        command = ['sacct', '-j', job.job_ident, '--long', '--parsable2']
-        content = self.command(command)
-        rows = self.parse_sacct(content)
-        updated = False
-        updated_tasks = set()
-
-        for row in rows:
-            if row['job_ident'] == job.job_ident:
-                if row['task_ident'] is not None:
-                    if job.has_task(row['task_ident']):
-                        task = job.get_task(row['task_ident'])
-                        task.status = row['status']
-                        task.status_comment = row['status_comment']
-                        updated_tasks.add(task.task_ident)
-                else:
-                    job.status = row['status']
-                    job.status_comment = row['status_comment']
-                    updated = True
-        try:
-            if not updated:
-                job.status = cumulative_job_state([t.status for t in job.task_list])
-            else:
-                for task in job.task_list:
-                    if task.task_ident not in updated_tasks:
-                        task.status = job.status
-        except AttributeError:
-            pass
-
-    def cancel(self, job, task_ident=None):
-        """
-        cancel a job
-
-        Args:
-            job (Job): the job to be cancelled
-            task_ident (int): the task id to be cancelled (instead of the entire array)
-        """
-        if not job.job_ident:
-            return
-        if task_ident is not None:
-            self.command(['scancel', '{}_{}'.format(job.job_ident, task_ident)])
-            job.get_task(task_ident).status = JOB_STATUS.CANCELLED
-            LOG('cancelled task', job.name, job.job_ident, task_ident)
-        else:
-            self.command(['scancel', job.job_ident])
-            job.status = JOB_STATUS.CANCELLED
-            LOG('cancelled job', job.name, job.job_ident)
-
-            try:
-                for task in job.task_list:
-                    task.status = JOB_STATUS.CANCELLED
-            except AttributeError:
-                pass
-
-    def format_dependencies(self, job):
-        """
-        returns a string representing the dependency argument
-
-        Args:
-            job (Job): the job the argument is being built for
-        """
-        try:
-            if len(job.dependencies) == 1 and job.tasks == job.dependencies[0].tasks:
-                # array job dependent on only another array job with the same number of tasks
-                dependency = job.dependencies[0]
-                if not dependency.job_ident:
-                    raise ValueError(
-                        'The dependencies must be submitted before the dependent job',
-                        job,
-                        dependency,
-                    )
-                return '--dependency=aftercorr:{}'.format(dependency.job_ident)
-        except AttributeError:
-            pass
-
-        dep_jobs = []
-        for dependency in job.dependencies:
-            if not dependency.job_ident:
-                raise ValueError(
-                    'The dependencies must be submitted before the dependent job', job, dependency
-                )
-            try:
-                for task in dependency.task_list:
-                    dep_jobs.append('{}_{}'.format(dependency.job_ident, task.task_ident))
-            except AttributeError:
-                dep_jobs.append(str(dependency.job_ident))
-
-        return '--dependency=afterok:{}'.format(':'.join(dep_jobs))
-
-
-class SgeScheduler(Scheduler):
-    """
-    Class for managing interactions with the SGE scheduler
-    """
-
-    NAME = SCHEDULER.SGE
-    """:attr:`mavis.schedule.constants.SCHEDULER`: the type of scheduler"""
-    ENV_TASK_IDENT = 'SGE_TASK_ID'
-    ENV_JOB_IDENT = 'JOB_ID'
-    """str: expected pattern for environment variables which store the job id"""
-    ENV_ARRAY_IDENT = ENV_JOB_IDENT
-    ENV_JOB_NAME = 'JOB_NAME'
-    """str: expected pattern for environment variables which store the job name"""
-    HEADER_PREFIX = '#$'
-
-    STATE_MAPPING = {
-        'q': JOB_STATUS.PENDING,
-        'h': JOB_STATUS.PENDING,
-        'R': JOB_STATUS.RUNNING,
-        'r': JOB_STATUS.RUNNING,
-        'd': JOB_STATUS.CANCELLED,
-        's': JOB_STATUS.ERROR,
-        'w': JOB_STATUS.PENDING,
-        'E': JOB_STATUS.ERROR,
-        'T': JOB_STATUS.ERROR,
-        't': JOB_STATUS.RUNNING,
-    }
-    """dict: mapping from SGE job states to their MAVIS JOB_STATUS equivalent"""
-    MAIL_TYPE_MAPPING = {
-        MAIL_TYPE.BEGIN: 'b',
-        MAIL_TYPE.NONE: 'n',
-        MAIL_TYPE.FAIL: 'as',
-        MAIL_TYPE.END: 'e',
-        MAIL_TYPE.ALL: 'abes',
-    }
-    """dict: mapping from MAVIS mail type options to SGE mail options"""
-
-    @classmethod
-    def parse_qacct(cls, content):
-        """
-        parses the information produced by qacct
-
-        Args:
-            content (str): the content returned from the qacct command
-
-        Raises
-            ValueError: when no job information is reported (this may happen due to a bad or too old job ID where information is no longer stored)
-        """
-        if re.match(r'^\s*Total System Usage.*', content):
-            raise ValueError('Job information not found')
-        rows = []
-        for section in re.split(r'=+\n', content)[1:]:  # initial item will be empty
-            row = {}
-            for line in section.split('\n'):
-                if re.match(r'^[\s=]*$', line):
-                    continue
-                col, val = re.split(r'\s+', line, 1)
-                val = val.strip()
-                if val == 'undefined':
-                    val = None
-                row[col] = val
-
-            if row['exit_status'] == '0' and row['failed'] == '0':
-                status = JOB_STATUS.COMPLETED
-            elif '(Killed)' in row['exit_status']:
-                status = JOB_STATUS.CANCELLED
-            else:
-                status = JOB_STATUS.FAILED
-            if ':' in row['failed']:
-                status_comment = row['failed'].split(':', 1)[1].strip()
-            else:
-                status_comment = ''
-            rows.append(
-                {
-                    'name': row['jobname'],
-                    'job_ident': row['jobnumber'],
-                    'task_ident': row['taskid'],
-                    'status': status,
-                    'status_comment': status_comment,
-                }
-            )
-        return rows
-
-    @classmethod
-    def parse_qstat(cls, content, job_id):
-        """
-        parses the qstat content into rows/dicts representing individual jobs
-
-        Args:
-            content (str): content returned from the qstat command
-        """
-        header = [
-            'job-ID',
-            'prior',
-            'name',
-            'user',
-            'state',
-            'submit/start at',
-            'queue',
-            'slots',
-            'ja-task-ID',
-        ]
-        content = content.strip()
-        if not content:
-            return []
-        lines = [line for line in content.split('\n') if line.strip()]
-        column_sizes = []
-        for col in header:
-            match = re.search(col + r'\s*', lines[0])
-            if not match:
-                raise ValueError(
-                    'Error in parsing the qstat content for the column from', col, lines[0]
-                )
-            column_sizes.append(len(match.group(0)))
-        rows = []
-
-        for line in lines[1:]:
-            if re.match(r'^[\-]+$', line):
-                continue  # ignore dashed separators
-            row = {}
-            pos = 0
-            for col, size in zip(header, column_sizes):
-                row[col] = line[pos : pos + size].strip()
-                pos += size
-            task_ident = row['ja-task-ID']
-            if not task_ident or set(task_ident) & set(',:-'):
-                task_ident = None
-            if row['job-ID'] == job_id:
-                rows.append(
-                    {
-                        'task_ident': task_ident,
-                        'job_ident': row['job-ID'],
-                        'name': row['name'],
-                        'status': cls.convert_state(row['state']),
-                        'status_comment': '',
-                    }
-                )
-        return rows
-
-    @classmethod
-    def convert_state(cls, state):
-        states = set()
-        for char in state:
-            states.add(cls.STATE_MAPPING[char])
-        return cumulative_job_state(states)
-
-    def submit(self, job):
-        """
-        runs a subprocess sbatch command
-
-        Args:
-            job (Job): the job to be submitted
-        """
-        command = ['qsub', '-j', 'y']  # always join output
-        if job.job_ident:
-            raise ValueError('Job has already been submitted and has the job number', job.job_ident)
-        if job.queue:
-            command.extend(['-q', job.queue])
-        if job.memory_limit:
-            command.extend(
-                ['-l', 'mem_free={0}M,mem_token={0}M,h_vmem={0}M'.format(job.memory_limit)]
-            )
-        if job.time_limit:
-            command.extend(['-l', 'h_rt={}'.format(time_format(job.time_limit))])
-        if job.import_env:
-            command.append('-V')
-        if job.dependencies:
-            command.append(self.format_dependencies(job))
-        if job.name:
-            command.extend(['-N', job.name])
-        if job.mail_type and job.mail_user:
-            command.extend(['-m', self.MAIL_TYPE_MAPPING[job.mail_type]])
-            command.extend(['-M', job.mail_user])
-        # options specific to job arrays
-        if isinstance(job, ArrayJob):
-            task_ranges = consecutive_ranges([t.task_ident for t in job.task_list])
-            if len(task_ranges) != 1:
-                raise ValueError(
-                    'SGE does not support array jobs with non-consecutive task ranges', task_ranges
-                )
-            command.extend(['-t', '{}-{}'.format(*task_ranges[0])])
-        if job.stdout:
-            command.extend(
-                [
-                    '-o',
-                    job.stdout.format(
-                        name='\\${}'.format(self.ENV_JOB_NAME),
-                        job_ident='\\${}'.format(self.ENV_JOB_IDENT),
-                        task_ident='\\$TASK_ID',
-                    ),
-                ]
-            )
-
-        command.append(job.script)
-        command = ' '.join(command)
-        LOG(command, level=logging.DEBUG)
-        LOG('submitting', job.name)
-        content = self.command(command, shell=True)
-
-        # example: Your job-array 3760559.1-1:1 ("MV_mock-A36971_batch-E6aEZJnTQAau598tcsMjAE") has been submitted
-        # example: Your job 3766949 ("MP_batch-TvkFvM52v3ncuNQZb2M9TD") has been submitted
-        match = re.match(
-            r'^Your job(-array)? (\d+)(\.\d+-\d+:1)? .* has been submitted$', content, re.IGNORECASE
-        )
-        if not match:
-            raise NotImplementedError(
-                'Error in retrieving the submitted job number. Did not match the expected pattern',
-                content,
-            )
-        job.job_ident = match.group(2)
-        job.status = JOB_STATUS.SUBMITTED
-
-        try:
-            for task in job.task_list:
-                task.status = job.status
-                task.status_comment = job.status_comment
-        except AttributeError:
-            pass
-
-    def update_info(self, job):
-        """
-        runs a subprocess scontrol command to get job details and add them to the current job
-
-        Args:
-            job (Job): the job information is being gathered for
-
-        Raises
-            ValueError: if the job information could not be retrieved
-        """
-        if not job.job_ident:
-            return
-        try:
-            content = self.command(['qstat', '-u', "*"])
-            rows = self.parse_qstat(content, job.job_ident)
-        except subprocess.CalledProcessError:  # job not queued
-            rows = []
-
-        updated = False
-        if not rows:
-            # job no longer scheduled
-            command = ['qacct', '-j', job.job_ident]
-            content = self.command(command)
-            rows = self.parse_qacct(content)
-            # job is still on the scheduler
-        for row in rows:
-            if row['job_ident'] != job.job_ident:
-                continue
-            try:
-                if row['task_ident'] and not job.has_task(row['task_ident']):
-                    continue
-            except AttributeError:
-                pass
-            if row['task_ident']:
-                task_ident = int(row['task_ident'])
-                task = job.get_task(task_ident)
-                task.status = row['status']
-                task.status_comment = row['status_comment'].strip()
-            else:
-                job.status = row['status']
-                job.status_comment = row['status_comment'].strip()
-                updated = True
-
-        try:
-            if not updated:
-                job.status = cumulative_job_state([task.status for task in job.task_list])
-        except AttributeError:
-            pass  # only applies to array jobs
-
-    def cancel(self, job, task_ident=None):
-        """
-        cancel a job or a specific task of an array job
-
-        Args:
-            job (Job): the job to cancel
-            task_ident (int): if specified, will cancel the given task instead of the whole array or job
-        """
-        if not job.job_ident:
-            return
-        try:
-            if task_ident is not None:
-                self.command(['qdel', job.job_ident, '-t', str(task_ident)])
-                job.get_task(int(task_ident)).status = JOB_STATUS.CANCELLED
-                LOG('cancelled task', job.name, job.job_ident, task_ident)
-            else:
-                self.command(['qdel', job.job_ident])
-                job.status = JOB_STATUS.CANCELLED
-                LOG('cancelled job', job.name, job.job_ident)
-
-                try:
-                    for task in job.task_list:
-                        task.status = JOB_STATUS.CANCELLED
-                except AttributeError:
-                    pass
-        except subprocess.CalledProcessError:
-            LOG('unable to cancel job', job.job_ident)
-
-    def format_dependencies(self, job):
-        """
-        returns a string representing the dependency argument
-        """
-        # special case array dependency
-        try:
-            if len(job.dependencies) == 1 and job.tasks == job.dependencies[0].tasks:
-                dependency = job.dependencies[0]
-                if not dependency.job_ident:
-                    raise ValueError(
-                        'The dependencies must be submitted before the dependent job',
-                        job,
-                        dependency,
-                    )
-                return '-hold_jid_ad {}'.format(dependency.job_ident)
-        except AttributeError:
-            pass
-        for dependency in job.dependencies:
-            if not dependency.job_ident:
-                raise ValueError(
-                    'The dependencies must be submitted before the dependent job', job, dependency
-                )
-
-        return '-hold_jid {}'.format(','.join([d.job_ident for d in job.dependencies]))
-
-
-class TorqueScheduler(SgeScheduler):
-    """
-    Class for managing interactions with the Torque scheduler
-    """
-
-    NAME = SCHEDULER.TORQUE
-    """:attr:`mavis.schedule.constants.SCHEDULER`: the type of scheduler"""
-    ENV_TASK_IDENT = 'PBS_ARRAYID'
-    ENV_JOB_IDENT = 'PBS_JOBID'
-    """str: expected pattern for environment variables which store the job id"""
-    ENV_ARRAY_IDENT = ENV_JOB_IDENT
-    ENV_JOB_NAME = 'PBS_JOBNAME'
-    """str: expected pattern for environment variables which store the job name"""
-    TAB_SIZE = 8
-    MAIL_TYPE_MAPPING = {
-        MAIL_TYPE.BEGIN: 'b',
-        MAIL_TYPE.NONE: 'p',
-        MAIL_TYPE.FAIL: 'fa',
-        MAIL_TYPE.END: 'e',
-        MAIL_TYPE.ALL: 'abef',
-    }
-    """dict: mapping from MAVIS mail type options to Torque mail options"""
-    STATE_MAPPING = {
-        'C': JOB_STATUS.COMPLETED,
-        'E': JOB_STATUS.RUNNING,
-        'H': JOB_STATUS.PENDING,
-        'Q': JOB_STATUS.PENDING,
-        'T': JOB_STATUS.RUNNING,
-        'W': JOB_STATUS.PENDING,
-        'S': JOB_STATUS.ERROR,
-        'R': JOB_STATUS.RUNNING,
-    }
-    """dict: mapping from Torque job states to their MAVIS JOB_STATUS equivalent"""
-
-    def format_dependencies(self, job):
-        """
-        returns a string representing the dependency argument
-        """
-        arr_dependencies = []
-        job_dependencies = []
-
-        for dep in job.dependencies:
-            if not dep.job_ident:
-                raise ValueError('Dependencies must be submitted beforehand', job, dep)
-
-            if isinstance(dep, ArrayJob):
-                task_ident = re.sub(
-                    r'\[\]', '[][{}]'.format(dep.tasks) if dep.tasks > 1 else '[]', dep.job_ident
-                )
-                arr_dependencies.append(task_ident)
-            else:
-                job_dependencies.append(dep.job_ident)
-
-        result = []
-        if arr_dependencies:
-            result.append('afterokarray:{}'.format(':'.join(arr_dependencies)))
-        if job_dependencies:
-            result.append('afterok:{}'.format(':'.join(job_dependencies)))
-
-        return '-W depend={}'.format(','.join(result))
-
-    @classmethod
-    def parse_qstat(cls, content):
-        """
-        parses the qstat content into rows/dicts representing individual jobs
-
-        Args:
-            content (str): content returned from the qstat command
-        """
-        content = re.sub(r'\t', ' ' * cls.TAB_SIZE, content)  # PBS  torque tab size is 8
-        jobs = re.split(r'\s*\n\n\s*', content.strip())
-        rows = []
-
-        for job in jobs:
-            if job.startswith('request_version') or not job:
-                continue
-            row = {}
-            lines = job.split('\n')
-            task_ident = None
-            row['Job Id'] = lines[0].split(':', 1)[1].strip()
-            match = re.match(r'^(\d+)\[(\d+)\](.*)$', row['Job Id'])
-            if match:
-                row['Job Id'] = '{}[]{}'.format(match.group(1), match.group(3))
-                task_ident = int(match.group(2))
-            tab_size = None
-            columns = []
-            values = []
-            for line in lines[1:]:
-                if not line.strip():
-                    continue
-                match = re.match(r'^(\s*)(\S.*)', line)
-                curr_tab_size = len(match.group(1))
-                if tab_size is None:
-                    tab_size = curr_tab_size
-
-                if curr_tab_size > tab_size or '=' not in line:
-                    if not values:
-                        raise NotImplementedError(
-                            'Unexpected indentation prior to setting column', line
-                        )
-                    values[-1] = values[-1] + line.strip()
-                elif curr_tab_size == tab_size:
-                    col, val = line.split('=', 1)
-                    columns.append(col.strip())
-                    values.append(val.strip())
-                else:
-                    raise NotImplementedError('Unexpected indentation', line)
-            for col, val in zip(columns, values):
-                row[col] = val
-            status = cls.STATE_MAPPING[row['job_state']]
-            if status == JOB_STATUS.COMPLETED:
-                if 'exit_status' in row:
-                    if row['exit_status'] != '0':
-                        status = JOB_STATUS.FAILED
-                else:
-                    status = JOB_STATUS.CANCELLED
-            rows.append(
-                {
-                    'job_ident': row['Job Id'],
-                    'name': row['Job_Name'],
-                    'status': status,
-                    'task_ident': task_ident,
-                    'status_comment': '',
-                }
-            )
-        return rows
-
-    def submit(self, job):
-        """
-        runs a subprocess qsub command
-
-        Args:
-            job (Job): the job to be submitted
-        """
-        command = ['qsub', '-j', 'oe']  # always join output as stdout
-        if job.job_ident:
-            raise ValueError('Job has already been submitted and has the job number', job.job_ident)
-        if job.queue:
-            command.extend(['-q', job.queue])
-        if job.memory_limit:
-            command.extend(['-l', 'mem={0}mb'.format(job.memory_limit)])
-        if job.time_limit:
-            command.extend(['-l', 'walltime={}'.format(time_format(job.time_limit))])
-        if job.import_env:
-            command.append('-V')
-        if job.dependencies:
-            command.append(self.format_dependencies(job))
-        if job.name:
-            command.extend(['-N', job.name])
-        if job.stdout:
-            command.extend(
-                [
-                    '-o',
-                    job.stdout.format(
-                        name='${}'.format(self.ENV_JOB_NAME),
-                        job_ident='${}'.format(self.ENV_JOB_IDENT),
-                        task_ident='${}'.format(self.ENV_TASK_IDENT),
-                    ),
-                ]
-            )
-        if job.mail_type and job.mail_user:
-            command.extend(['-m', self.MAIL_TYPE_MAPPING[job.mail_type]])
-            command.extend(['-M', job.mail_user])
-        # options specific to job arrays
-        if isinstance(job, ArrayJob):
-            concurrency_limit = (
-                '' if self.concurrency_limit is None else '%{}'.format(self.concurrency_limit)
-            )
-            task_ranges = [
-                '{}{}'.format(s, '-{}'.format(t) if s != t else '')
-                for s, t in consecutive_ranges([task.task_ident for task in job.task_list])
-            ]
-            command.extend(['-t', '{}{}'.format(','.join(task_ranges), concurrency_limit)])
-
-        command.append(job.script)
-        LOG('submitting', job.name)
-        content = self.command(command)
-
-        job.job_ident = content.strip()
-        job.status = JOB_STATUS.SUBMITTED
-        job.status_comment = ''
-
-        # update task status
-        try:
-            for task in job.task_list:
-                task.status = job.status
-                task.status_comment = job.status_comment
-        except AttributeError:
-            pass
-
-    def update_info(self, job):
-        """
-        runs a subprocess scontrol command to get job details and add them to the current job
-
-        Args:
-            job (Job): the job information is being gathered for
-
-        Raises
-            ValueError: if the job information could not be retrieved
-        """
-        if job.job_ident is None:
-            job.status = JOB_STATUS.NOT_SUBMITTED
-            return
-        command = ['qstat', '-f', '-t', job.job_ident]  # always split into tasks
-        content = self.command(command)
-        rows = self.parse_qstat(content)
-        tasks_updated = False
-
-        for row in rows:
-            if row['job_ident'] != job.job_ident:
-                continue
-            if isinstance(job, ArrayJob) and row['task_ident']:
-                task_ident = int(row['task_ident'])
-                try:
-                    task = job.get_task(task_ident)
-                except KeyError:
-                    pass
-                else:
-                    task.status = row['status']
-                    task.status_comment = row['status_comment']
-                    tasks_updated = True
-            else:
-                job.status = row['status']
-                job.status_comment = row['status_comment']
-
-        if tasks_updated:
-            job.status = cumulative_job_state([t.status for t in job.task_list])
-
-    def cancel(self, job, task_ident=None):
-        """
-        cancel a job
-
-        Args:
-            job (Job): the job to be cancelled
-            task_ident (int): if specified then a single task will be cancelled instead of the whole job or array
-        """
-        if not job.job_ident:
-            return
-        try:
-            if task_ident is not None:
-                self.command(['qdel', job.job_ident, '-t', str(task_ident)])
-                job.get_task(int(task_ident)).status = JOB_STATUS.CANCELLED
-                LOG('cancelled task', job.name, job.job_ident, task_ident)
-            else:
-                self.command(['qdel', job.job_ident])
-                job.status = JOB_STATUS.CANCELLED
-                LOG('cancelled job', job.name, job.job_ident)
-
-                try:
-                    for task in job.task_list:
-                        task.status = JOB_STATUS.CANCELLED
-                except AttributeError:
-                    pass
-        except subprocess.CalledProcessError:
-            LOG('failed to cancel {}'.format(job.job_ident), level=logging.DEBUG)
diff --git a/mavis/schemas/config.json b/mavis/schemas/config.json
new file mode 100644
index 00000000..c50a8632
--- /dev/null
+++ b/mavis/schemas/config.json
@@ -0,0 +1,781 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "additionalProperties": false,
+    "properties": {
+        "annotate.annotation_filters": {
+            "default": [
+                "choose_more_annotated",
+                "choose_transcripts_by_priority"
+            ],
+            "description": "A comma separated list of filters to apply to putative annotations",
+            "items": {
+                "enum": [
+                    "choose_more_annotated",
+                    "choose_transcripts_by_priority"
+                ],
+                "type": "string"
+            },
+            "type": "array"
+        },
+        "annotate.draw_fusions_only": {
+            "default": true,
+            "description": "Flag to indicate if events which do not produce a fusion transcript should produce illustrations",
+            "type": "boolean"
+        },
+        "annotate.draw_non_synonymous_cdna_only": {
+            "default": true,
+            "description": "Flag to indicate if events which are synonymous at the cdna level should produce illustrations",
+            "type": "boolean"
+        },
+        "annotate.max_orf_cap": {
+            "default": 3,
+            "description": "The maximum number of orfs to return (best putative orfs will be retained)",
+            "type": "integer"
+        },
+        "annotate.min_domain_mapping_match": {
+            "default": 0.9,
+            "description": "A number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript to be displayed",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "annotate.min_orf_size": {
+            "default": 300,
+            "description": "The minimum length (in base pairs) to retain a putative open reading frame (orf)",
+            "type": "integer"
+        },
+        "bam_stats.distribution_fraction": {
+            "default": 0.97,
+            "description": "the proportion of the distribution to use in computing stdev",
+            "maximum": 1,
+            "minimum": 0.01,
+            "type": "number"
+        },
+        "bam_stats.sample_bin_size": {
+            "default": 1000,
+            "description": "how large to make the sample bin (in bp)",
+            "type": "integer"
+        },
+        "bam_stats.sample_cap": {
+            "default": 1000,
+            "description": "maximum number of reads to collect for any given sample region",
+            "type": "integer"
+        },
+        "bam_stats.sample_size": {
+            "default": 500,
+            "description": "the number of genes/bins to compute stats over",
+            "type": "integer"
+        },
+        "cluster.cluster_initial_size_limit": {
+            "default": 25,
+            "description": "The maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering phase (combining based on overlap)",
+            "type": "integer"
+        },
+        "cluster.cluster_radius": {
+            "default": 100,
+            "description": "Maximum distance allowed between paired breakpoint pairs",
+            "type": "integer"
+        },
+        "cluster.limit_to_chr": {
+            "default": [
+                "1",
+                "2",
+                "3",
+                "4",
+                "5",
+                "6",
+                "7",
+                "8",
+                "9",
+                "10",
+                "11",
+                "12",
+                "13",
+                "14",
+                "15",
+                "16",
+                "17",
+                "18",
+                "19",
+                "20",
+                "21",
+                "22",
+                "X",
+                "Y"
+            ],
+            "description": "A list of chromosome names to use. breakpointpairs on other chromosomes will be filteredout. for example '1 2 3 4' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4",
+            "items": {
+                "type": "string"
+            },
+            "type": [
+                "array",
+                "null"
+            ]
+        },
+        "cluster.max_files": {
+            "default": 200,
+            "description": "The maximum number of files to output from clustering/splitting",
+            "minimum": 1,
+            "type": "integer"
+        },
+        "cluster.max_proximity": {
+            "default": 5000,
+            "description": "The maximum distance away from an annotation before the region in considered to be uninformative",
+            "type": "integer"
+        },
+        "cluster.min_clusters_per_file": {
+            "default": 50,
+            "description": "The minimum number of breakpoint pairs to output to a file",
+            "minimum": 1,
+            "type": "integer"
+        },
+        "cluster.split_only": {
+            "default": false,
+            "description": "just split the input files, do not merge input breakpoints into clusters",
+            "type": "boolean"
+        },
+        "cluster.uninformative_filter": {
+            "default": false,
+            "description": "Flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered out prior to clustering",
+            "type": "boolean"
+        },
+        "convert": {
+            "additionalProperties": {
+                "properties": {
+                    "assume_no_untemplated": {
+                        "default": false,
+                        "description": "Assume the lack of untemplated information means that there IS not untemplated sequence expected at the breakpoints",
+                        "type": "boolean"
+                    },
+                    "file_type": {
+                        "description": "the tool the file is input from or 'mavis' for standard mavis-style tab files",
+                        "enum": [
+                            "manta",
+                            "delly",
+                            "transabyss",
+                            "pindel",
+                            "chimerascan",
+                            "mavis",
+                            "defuse",
+                            "breakdancer",
+                            "vcf",
+                            "breakseq",
+                            "cnvnator",
+                            "strelka",
+                            "starfusion"
+                        ],
+                        "type": "string"
+                    },
+                    "inputs": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "minItems": 1,
+                        "type": "array"
+                    },
+                    "strand_specific": {
+                        "default": false,
+                        "type": "boolean"
+                    }
+                },
+                "required": [
+                    "inputs",
+                    "file_type"
+                ],
+                "type": "object"
+            },
+            "type": "object"
+        },
+        "illustrate.domain_color": {
+            "default": "#ccccb3",
+            "description": "Domain fill color",
+            "type": "string"
+        },
+        "illustrate.domain_mismatch_color": {
+            "default": "#b2182b",
+            "description": "Domain fill color on 0%% match",
+            "type": "string"
+        },
+        "illustrate.domain_name_regex_filter": {
+            "default": "^PF\\d+$",
+            "description": "The regular expression used to select domains to be displayed (filtered by name)",
+            "type": "string"
+        },
+        "illustrate.domain_scaffold_color": {
+            "default": "#000000",
+            "description": "The color of the domain scaffold",
+            "type": "string"
+        },
+        "illustrate.drawing_width_iter_increase": {
+            "default": 500,
+            "description": "The amount (in  pixels) by which to increase the drawing width upon failure to fit",
+            "type": "integer"
+        },
+        "illustrate.exon_min_focus_size": {
+            "default": 10,
+            "description": "Minimum size of an exon for it to be granted a label or min exon width",
+            "type": "integer"
+        },
+        "illustrate.gene1_color": {
+            "default": "#657e91",
+            "description": "The color of genes near the first gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene1_color_selected": {
+            "default": "#518dc5",
+            "description": "The color of the first gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene2_color": {
+            "default": "#325556",
+            "description": "The color of genes near the second gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene2_color_selected": {
+            "default": "#4c9677",
+            "description": "The color of the second gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.label_color": {
+            "default": "#000000",
+            "description": "The label color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.mask_fill": {
+            "default": "#ffffff",
+            "description": "Color of mask (for deleted region etc.)",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.mask_opacity": {
+            "default": 0.7,
+            "description": "Opacity of the mask layer",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "illustrate.max_drawing_retries": {
+            "default": 5,
+            "description": "The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output",
+            "type": "integer"
+        },
+        "illustrate.novel_exon_color": {
+            "default": "#5D3F6A",
+            "description": "Novel exon fill color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.scaffold_color": {
+            "default": "#000000",
+            "description": "The color used for the gene/transcripts scaffolds",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.splice_color": {
+            "default": "#000000",
+            "description": "Splicing lines color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.width": {
+            "default": 1000,
+            "description": "The drawing width in pixels",
+            "type": "integer"
+        },
+        "illustratebreakpoint_color": {
+            "default": "#000000",
+            "description": "Breakpoint outline color",
+            "type": "string"
+        },
+        "libraries": {
+            "additionalProperties": {
+                "additionalProperties": false,
+                "properties": {
+                    "assign": {
+                        "items": {
+                            "type": "string"
+                        },
+                        "minItems": 1,
+                        "type": "array"
+                    },
+                    "total_batches": {
+                        "type": "integer",
+                        "min": 1,
+                        "description": "The number of jobs to slit a library into for cluster/validate/annotate"
+                    },
+                    "bam_file": {
+                        "type": "string"
+                    },
+                    "disease_status": {
+                        "enum": [
+                            "diseased",
+                            "normal"
+                        ],
+                        "type": "string"
+                    },
+                    "median_fragment_size": {
+                        "type": "integer"
+                    },
+                    "protocol": {
+                        "enum": [
+                            "genome",
+                            "transcriptome"
+                        ],
+                        "type": "string"
+                    },
+                    "read_length": {
+                        "type": "integer"
+                    },
+                    "stdev_fragment_size": {
+                        "type": "integer"
+                    },
+                    "strand_determining_read": {
+                        "default": 2,
+                        "description": "1 or 2. the read in the pair which determines if (assuming a stranded protocol) the first or second read in the pair matches the strand sequenced",
+                        "type": "integer"
+                    },
+                    "strand_specific": {
+                        "default": false,
+                        "type": "boolean"
+                    }
+                },
+                "required": [
+                    "disease_status",
+                    "protocol",
+                    "assign"
+                ],
+                "type": "object"
+            },
+            "minProperties": 1,
+            "type": "object"
+        },
+        "log": {
+            "type": "string"
+        },
+        "log_level": {
+            "default": "INFO",
+            "enum": [
+                "INFO",
+                "DEBUG"
+            ],
+            "type": "string"
+        },
+        "output_dir": {
+            "type": "string"
+        },
+        "pairing.contig_call_distance": {
+            "default": 10,
+            "description": "The maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair",
+            "type": "integer"
+        },
+        "pairing.flanking_call_distance": {
+            "default": 50,
+            "description": "The maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair",
+            "type": "integer"
+        },
+        "pairing.input_call_distance": {
+            "default": 20,
+            "description": "The maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair",
+            "type": "integer"
+        },
+        "pairing.spanning_call_distance": {
+            "default": 20,
+            "description": "The maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair",
+            "type": "integer"
+        },
+        "pairing.split_call_distance": {
+            "default": 20,
+            "description": "The maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair",
+            "type": "integer"
+        },
+        "reference.aligner_reference": {
+            "examples": [
+                "tests/data/mock_reference_genome.2bit"
+            ],
+            "items": {
+                "type": "string"
+            },
+            "maxItems": 1,
+            "minItems": 1,
+            "type": "array"
+        },
+        "reference.annotations": {
+            "examples": [
+                "tests/data/mock_annotations.json"
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "reference.dgv_annotation": {
+            "examples": [
+                [
+                    "tests/data/mock_dgv_annotation.txt"
+                ]
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "reference.masking": {
+            "examples": [
+                [
+                    "tests/data/mock_masking.tab"
+                ]
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "reference.reference_genome": {
+            "examples": [
+                [
+                    "tests/data/mock_reference_genome.fa"
+                ]
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "reference.template_metadata": {
+            "examples": [
+                [
+                    "tests/data/cytoBand.txt"
+                ]
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "skip_stage.validate": {
+            "default": false,
+            "description": "skip the validation stage of the MAVIS pipeline",
+            "type": "boolean"
+        },
+        "summary.filter_cdna_synon": {
+            "default": true,
+            "description": "Filter all annotations synonymous at the cdna level",
+            "type": "boolean"
+        },
+        "summary.filter_min_complexity": {
+            "default": 0.2,
+            "description": "Filter event calls based on call sequence complexity",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "summary.filter_min_flanking_reads": {
+            "default": 10,
+            "description": "Minimum number of flanking pairs for a call by flanking pairs",
+            "type": "integer"
+        },
+        "summary.filter_min_linking_split_reads": {
+            "default": 1,
+            "description": "Minimum number of linking split reads for a call by split reads",
+            "type": "integer"
+        },
+        "summary.filter_min_remapped_reads": {
+            "default": 5,
+            "description": "Minimum number of remapped reads for a call by contig",
+            "type": "integer"
+        },
+        "summary.filter_min_spanning_reads": {
+            "default": 5,
+            "description": "Minimum number of spanning reads for a call by spanning reads",
+            "type": "integer"
+        },
+        "summary.filter_min_split_reads": {
+            "default": 5,
+            "description": "Minimum number of split reads for a call by split reads",
+            "type": "integer"
+        },
+        "summary.filter_protein_synon": {
+            "default": false,
+            "description": "Filter all annotations synonymous at the protein level",
+            "type": "boolean"
+        },
+        "summary.filter_trans_homopolymers": {
+            "default": true,
+            "description": "Filter all single bp ins/del/dup events that are in a homopolymer region of at least 3 bps and are not paired to a genomic event",
+            "type": "boolean"
+        },
+        "validate.aligner": {
+            "default": "blat",
+            "description": "The aligner to use to map the contigs/reads back to the reference e.g blat or bwa",
+            "enum": [
+                "bwa mem",
+                "blat"
+            ],
+            "type": "string"
+        },
+        "validate.assembly_kmer_size": {
+            "default": 0.74,
+            "description": "The percent of the read length to make kmers for assembly",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.assembly_max_paths": {
+            "default": 8,
+            "description": "The maximum number of paths to resolve. this is used to limit when there is a messy assembly graph to resolve. the assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting",
+            "type": "integer"
+        },
+        "validate.assembly_min_edge_trim_weight": {
+            "default": 3,
+            "description": "This is used to simplify the debruijn graph before path finding. edges with less than this frequency will be discarded if they are non-cutting, at a fork, or the end of a path",
+            "type": "integer"
+        },
+        "validate.assembly_min_exact_match_to_remap": {
+            "default": 15,
+            "description": "The minimum length of exact matches to initiate remapping a read to a contig",
+            "type": "integer"
+        },
+        "validate.assembly_min_remap_coverage": {
+            "default": 0.9,
+            "description": "Minimum fraction of the contig sequence which the remapped sequences must align over",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.assembly_min_remapped_seq": {
+            "default": 3,
+            "description": "The minimum input sequences that must remap for an assembled contig to be used",
+            "type": "integer"
+        },
+        "validate.assembly_min_uniq": {
+            "default": 0.1,
+            "description": "Minimum percent uniq required to keep separate assembled contigs. if contigs are more similar then the lower scoring, then shorter, contig is dropped",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.assembly_strand_concordance": {
+            "default": 0.51,
+            "description": "When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.blat_limit_top_aln": {
+            "default": 10,
+            "description": "Number of results to return from blat (ranking based on score)",
+            "type": "integer"
+        },
+        "validate.blat_min_identity": {
+            "default": 0.9,
+            "description": "The minimum percent identity match required for blat results when aligning contigs",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.call_error": {
+            "default": 10,
+            "description": "Buffer zone for the evidence window",
+            "type": "integer"
+        },
+        "validate.clean_aligner_files": {
+            "default": false,
+            "description": "Remove the aligner output files after the validation stage is complete. not required for subsequent steps but can be useful in debugging and deep investigation of events",
+            "type": "boolean"
+        },
+        "validate.contig_aln_max_event_size": {
+            "default": 50,
+            "description": "Relates to determining breakpoints when pairing contig alignments. for any given read in a putative pair the soft clipping is extended to include any events of greater than this size. the softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to",
+            "type": "integer"
+        },
+        "validate.contig_aln_merge_inner_anchor": {
+            "default": 20,
+            "description": "The minimum number of consecutive exact match base pairs to not merge events within a contig alignment",
+            "type": "integer"
+        },
+        "validate.contig_aln_merge_outer_anchor": {
+            "default": 15,
+            "description": "Minimum consecutively aligned exact matches to anchor an end for merging internal events",
+            "type": "integer"
+        },
+        "validate.contig_aln_min_anchor_size": {
+            "default": 50,
+            "description": "The minimum number of aligned bases for a contig (m or =) in order to simplify. do not have to be consecutive",
+            "type": "integer"
+        },
+        "validate.contig_aln_min_extend_overlap": {
+            "default": 10,
+            "description": "Minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment",
+            "type": "integer"
+        },
+        "validate.contig_aln_min_query_consumption": {
+            "default": 0.9,
+            "description": "Minimum fraction of the original query sequence that must be used by the read(s) of the alignment",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.contig_aln_min_score": {
+            "default": 0.9,
+            "description": "Minimum score for a contig to be used as evidence in a call by contig",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.fetch_min_bin_size": {
+            "default": 50,
+            "description": "The minimum size of any bin for reading from a bam file. increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)",
+            "type": "integer"
+        },
+        "validate.fetch_reads_bins": {
+            "default": 5,
+            "description": "Number of bins to split an evidence window into to ensure more even sampling of high coverage regions",
+            "type": "integer"
+        },
+        "validate.fetch_reads_limit": {
+            "default": 3000,
+            "description": "Maximum number of reads, cap, to loop over for any given evidence window",
+            "type": "integer"
+        },
+        "validate.filter_secondary_alignments": {
+            "default": true,
+            "description": "Filter secondary alignments when gathering read evidence",
+            "type": "boolean"
+        },
+        "validate.fuzzy_mismatch_number": {
+            "default": 1,
+            "description": "The number of events/mismatches allowed to be considered a fuzzy match",
+            "type": "integer"
+        },
+        "validate.max_sc_preceeding_anchor": {
+            "default": 6,
+            "description": "When remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. for example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. if this is set to none then there is no limit on softclipping",
+            "type": "integer"
+        },
+        "validate.min_anchor_exact": {
+            "default": 6,
+            "description": "Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum number of consecutive exact matches to anchor a read to initiate targeted realignment",
+            "type": "integer"
+        },
+        "validate.min_anchor_fuzzy": {
+            "default": 10,
+            "description": "Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum length of a fuzzy match to anchor a read to initiate targeted realignment",
+            "type": "integer"
+        },
+        "validate.min_anchor_match": {
+            "default": 0.9,
+            "description": "Minimum percent match for a read to be kept as evidence",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.min_call_complexity": {
+            "default": 0.1,
+            "description": "The minimum complexity score for a call sequence. is an average for non-contig calls. filters low complexity contigs before alignment. see [contig_complexity](#contig_complexity)",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "validate.min_double_aligned_to_estimate_insertion_size": {
+            "default": 2,
+            "description": "The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping",
+            "type": "integer"
+        },
+        "validate.min_flanking_pairs_resolution": {
+            "default": 10,
+            "description": "The minimum number of flanking reads required to call a breakpoint by flanking evidence",
+            "type": "integer"
+        },
+        "validate.min_linking_split_reads": {
+            "default": 2,
+            "description": "The minimum number of split reads which aligned to both breakpoints",
+            "type": "integer"
+        },
+        "validate.min_mapping_quality": {
+            "default": 5,
+            "description": "The minimum mapping quality of reads to be used as evidence",
+            "type": "integer"
+        },
+        "validate.min_non_target_aligned_split_reads": {
+            "default": 1,
+            "description": "The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence",
+            "type": "integer"
+        },
+        "validate.min_sample_size_to_apply_percentage": {
+            "default": 10,
+            "description": "Minimum number of aligned bases to compute a match percent. if there are less than this number of aligned bases (match or mismatch) the percent comparator is not used",
+            "type": "integer"
+        },
+        "validate.min_softclipping": {
+            "default": 6,
+            "description": "Minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence",
+            "type": "integer"
+        },
+        "validate.min_spanning_reads_resolution": {
+            "default": 5,
+            "description": "Minimum number of spanning reads required to call an event by spanning evidence",
+            "type": "integer"
+        },
+        "validate.min_splits_reads_resolution": {
+            "default": 3,
+            "description": "Minimum number of split reads required to call a breakpoint by split reads",
+            "type": "integer"
+        },
+        "validate.outer_window_min_event_size": {
+            "default": 125,
+            "description": "The minimum size of an event in order for flanking read evidence to be collected",
+            "type": "integer"
+        },
+        "validate.stdev_count_abnormal": {
+            "default": 3,
+            "description": "The number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads",
+            "type": "number"
+        },
+        "validate.trans_fetch_reads_limit": {
+            "default": 12000,
+            "description": "Related to [fetch_reads_limit](#fetch_reads_limit). overrides fetch_reads_limit for transcriptome libraries when set. if this has a value of none then fetch_reads_limit will be used for transcriptome libraries instead",
+            "type": [
+                "integer",
+                "null"
+            ]
+        },
+        "validate.trans_min_mapping_quality": {
+            "default": 0,
+            "description": "Related to [min_mapping_quality](#min_mapping_quality). overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not none. if this value is none, min_mapping_quality is used for transcriptomes aswell as genomes",
+            "type": [
+                "integer",
+                "null"
+            ]
+        },
+        "validate.write_evidence_files": {
+            "default": true,
+            "description": "Write the intermediate bam and bed files containing the raw evidence collected and contigs aligned. not required for subsequent steps but can be useful in debugging and deep investigation of events",
+            "type": "boolean"
+        }
+    },
+    "anyOf": [
+        {
+            "not": {
+                "properties": {
+                    "skip_stage.validate": {
+                        "const": true
+                    }
+                },
+                "required": [
+                    "reference.aligner_reference"
+                ]
+            }
+        }
+    ],
+    "type": "object"
+}
diff --git a/mavis/schemas/overlay.json b/mavis/schemas/overlay.json
new file mode 100644
index 00000000..3fe89cf5
--- /dev/null
+++ b/mavis/schemas/overlay.json
@@ -0,0 +1,142 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "additionalProperties": false,
+    "properties": {
+        "illustrate.breakpoint_color": {
+            "default": "#000000",
+            "description": "Breakpoint outline color",
+            "type": "string"
+        },
+        "illustrate.domain_color": {
+            "default": "#ccccb3",
+            "description": "Domain fill color",
+            "type": "string"
+        },
+        "illustrate.domain_mismatch_color": {
+            "default": "#b2182b",
+            "description": "Domain fill color on 0%% match",
+            "type": "string"
+        },
+        "illustrate.domain_name_regex_filter": {
+            "default": "^PF\\d+$",
+            "description": "The regular expression used to select domains to be displayed (filtered by name)",
+            "type": "string"
+        },
+        "illustrate.domain_scaffold_color": {
+            "default": "#000000",
+            "description": "The color of the domain scaffold",
+            "type": "string"
+        },
+        "illustrate.drawing_width_iter_increase": {
+            "default": 500,
+            "description": "The amount (in  pixels) by which to increase the drawing width upon failure to fit",
+            "type": "integer"
+        },
+        "illustrate.exon_min_focus_size": {
+            "default": 10,
+            "description": "Minimum size of an exon for it to be granted a label or min exon width",
+            "type": "integer"
+        },
+        "illustrate.gene1_color": {
+            "default": "#657e91",
+            "description": "The color of genes near the first gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene1_color_selected": {
+            "default": "#518dc5",
+            "description": "The color of the first gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene2_color": {
+            "default": "#325556",
+            "description": "The color of genes near the second gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.gene2_color_selected": {
+            "default": "#4c9677",
+            "description": "The color of the second gene",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.label_color": {
+            "default": "#000000",
+            "description": "The label color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.mask_fill": {
+            "default": "#ffffff",
+            "description": "Color of mask (for deleted region etc.)",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.mask_opacity": {
+            "default": 0.7,
+            "description": "Opacity of the mask layer",
+            "maximum": 1,
+            "minimum": 0,
+            "type": "number"
+        },
+        "illustrate.max_drawing_retries": {
+            "default": 5,
+            "description": "The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output",
+            "type": "integer"
+        },
+        "illustrate.novel_exon_color": {
+            "default": "#5D3F6A",
+            "description": "Novel exon fill color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.scaffold_color": {
+            "default": "#000000",
+            "description": "The color used for the gene/transcripts scaffolds",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.splice_color": {
+            "default": "#000000",
+            "description": "Splicing lines color",
+            "pattern": "^#[a-zA-Z0-9]{6}",
+            "type": "string"
+        },
+        "illustrate.width": {
+            "default": 1000,
+            "description": "The drawing width in pixels",
+            "type": "integer"
+        },
+        "log": {
+            "type": "string"
+        },
+        "log_level": {
+            "default": "INFO",
+            "enum": [
+                "INFO",
+                "DEBUG"
+            ],
+            "type": "string"
+        },
+        "reference.annotations": {
+            "examples": [
+                "tests/data/mock_annotations.json"
+            ],
+            "items": {
+                "type": "string"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "validate.min_mapping_quality": {
+            "default": 5,
+            "description": "The minimum mapping quality of reads to be used as evidence",
+            "type": "integer"
+        }
+    },
+    "required": [
+        "reference.annotations"
+    ],
+    "type": "object"
+}
diff --git a/mavis/summary/main.py b/mavis/summary/main.py
index b0a51352..7e46f22d 100644
--- a/mavis/summary/main.py
+++ b/mavis/summary/main.py
@@ -1,11 +1,15 @@
-from functools import partial
 import os
 import re
 import time
+from functools import partial
+from typing import Dict, List
 
 import tab
 
-from .constants import DEFAULTS, HOMOPOLYMER_MIN_LENGTH
+from ..annotate.file_io import ReferenceFile
+from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
+from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs, soft_cast
+from .constants import HOMOPOLYMER_MIN_LENGTH
 from .summary import (
     annotate_dgv,
     filter_by_annotations,
@@ -14,9 +18,6 @@
     get_pairing_state,
     group_by_distance,
 )
-from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
-from ..pairing.constants import DEFAULTS as PAIRING_DEFAULTS
-from ..util import generate_complete_stamp, LOG, output_tabbed_file, read_inputs, soft_cast
 
 
 def soft_cast_null(value):
@@ -26,36 +27,17 @@ def soft_cast_null(value):
         return value
 
 
-def main(
-    inputs,
-    output,
-    annotations,
-    dgv_annotation=None,
-    filter_cdna_synon=DEFAULTS.filter_cdna_synon,
-    filter_protein_synon=DEFAULTS.filter_protein_synon,
-    filter_min_remapped_reads=DEFAULTS.filter_min_remapped_reads,
-    filter_min_spanning_reads=DEFAULTS.filter_min_spanning_reads,
-    filter_min_flanking_reads=DEFAULTS.filter_min_flanking_reads,
-    filter_min_split_reads=DEFAULTS.filter_min_split_reads,
-    filter_trans_homopolymers=DEFAULTS.filter_trans_homopolymers,
-    filter_min_linking_split_reads=DEFAULTS.filter_min_linking_split_reads,
-    filter_min_complexity=DEFAULTS.filter_min_complexity,
-    flanking_call_distance=PAIRING_DEFAULTS.flanking_call_distance,
-    split_call_distance=PAIRING_DEFAULTS.split_call_distance,
-    contig_call_distance=PAIRING_DEFAULTS.contig_call_distance,
-    spanning_call_distance=PAIRING_DEFAULTS.spanning_call_distance,
-    start_time=int(time.time()),
-    **kwargs
-):
-    annotations.load()
-    if dgv_annotation:
+def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time())):
+    annotations = ReferenceFile.load_from_config(config, 'annotations', eager_load=True)
+    dgv_annotation = ReferenceFile.load_from_config(config, 'dgv_annotation')
+    if not dgv_annotation.is_empty():
         dgv_annotation.load()
     # pairing threshold parameters to be defined in config file
     distances = {
-        CALL_METHOD.FLANK: flanking_call_distance,
-        CALL_METHOD.SPLIT: split_call_distance,
-        CALL_METHOD.CONTIG: contig_call_distance,
-        CALL_METHOD.SPAN: spanning_call_distance,
+        CALL_METHOD.FLANK: config['pairing.flanking_call_distance'],
+        CALL_METHOD.SPLIT: config['pairing.split_call_distance'],
+        CALL_METHOD.CONTIG: config['pairing.contig_call_distance'],
+        CALL_METHOD.SPAN: config['pairing.spanning_call_distance'],
     }
 
     bpps = []
@@ -147,17 +129,17 @@ def main(
 
     for bpp in bpps:
         # filter by synonymous and RNA homopolymers
-        if filter_protein_synon and bpp.protein_synon:
+        if config['summary.filter_protein_synon'] and bpp.protein_synon:
             bpp.data[COLUMNS.filter_comment] = 'synonymous protein'
             filtered_pairs.append(bpp)
             continue
-        elif filter_cdna_synon and bpp.cdna_synon:
+        elif config['summary.filter_cdna_synon'] and bpp.cdna_synon:
             bpp.data[COLUMNS.filter_comment] = 'synonymous cdna'
             filtered_pairs.append(bpp)
             continue
         elif all(
             [
-                filter_trans_homopolymers,
+                config['summary.filter_trans_homopolymers'],
                 bpp.protocol == PROTOCOL.TRANS,
                 bpp.data.get(COLUMNS.repeat_count, None),
                 bpp.event_type in [SVTYPE.DUP, SVTYPE.INS, SVTYPE.DEL],
@@ -182,7 +164,7 @@ def main(
                     continue
         # filter based on the sequence call complexity
         sc = str(bpp.data.get(COLUMNS.call_sequence_complexity, 'none')).lower()
-        if sc != 'none' and float(sc) < filter_min_complexity:
+        if sc != 'none' and float(sc) < config['summary.filter_min_complexity']:
             bpp.data[COLUMNS.filter_comment] = 'low complexity'
             filtered_pairs.append(bpp)
             continue
@@ -192,11 +174,11 @@ def main(
     # filter based on minimum evidence levels
     bpps, filtered = filter_by_evidence(
         bpps,
-        filter_min_remapped_reads=filter_min_remapped_reads,
-        filter_min_spanning_reads=filter_min_spanning_reads,
-        filter_min_flanking_reads=filter_min_flanking_reads,
-        filter_min_split_reads=filter_min_split_reads,
-        filter_min_linking_split_reads=filter_min_linking_split_reads,
+        filter_min_remapped_reads=config['summary.filter_min_remapped_reads'],
+        filter_min_spanning_reads=config['summary.filter_min_spanning_reads'],
+        filter_min_flanking_reads=config['summary.filter_min_flanking_reads'],
+        filter_min_split_reads=config['summary.filter_min_split_reads'],
+        filter_min_linking_split_reads=config['summary.filter_min_linking_split_reads'],
     )
     for pair in filtered:
         pair.data[COLUMNS.filter_comment] = 'low evidence'
@@ -340,7 +322,7 @@ def main(
     rows = []
     for lib in bpps_by_library:
         LOG('annotating dgv for', lib)
-        if dgv_annotation:
+        if not dgv_annotation.is_empty():
             annotate_dgv(
                 bpps_by_library[lib], dgv_annotation.content, distance=10
             )  # TODO make distance a parameter
@@ -401,3 +383,4 @@ def main(
             ):
                 lib_rows.append(row)
         output_tabbed_file(lib_rows, filename, header=output_columns)
+    generate_complete_stamp(output, LOG)
diff --git a/mavis/validate/base.py b/mavis/validate/base.py
index e47b12ae..911e4984 100644
--- a/mavis/validate/base.py
+++ b/mavis/validate/base.py
@@ -1,6 +1,6 @@
 import itertools
 import logging
-from .constants import DEFAULTS
+
 from ..assemble import assemble
 from ..bam import cigar as _cigar
 from ..bam import read as _read
@@ -13,13 +13,14 @@
     ORIENT,
     PROTOCOL,
     PYSAM_READ_FLAGS,
-    reverse_complement,
     STRAND,
     SVTYPE,
+    reverse_complement,
 )
 from ..error import NotSpecifiedError
 from ..interval import Interval
 from ..util import DEVNULL
+from .constants import DEFAULTS
 
 
 class Evidence(BreakpointPair):
diff --git a/mavis/validate/constants.py b/mavis/validate/constants.py
index 652cef7b..a1a84ed5 100644
--- a/mavis/validate/constants.py
+++ b/mavis/validate/constants.py
@@ -1,5 +1,5 @@
-from ..constants import float_fraction
 from ..align import SUPPORTED_ALIGNER
+from ..constants import float_fraction
 from ..util import WeakMavisNamespace
 
 PASS_FILENAME = 'validation-passed.tab'
diff --git a/mavis/validate/main.py b/mavis/validate/main.py
index 547d1452..6dc14203 100644
--- a/mavis/validate/main.py
+++ b/mavis/validate/main.py
@@ -4,36 +4,38 @@
 import re
 import time
 import warnings
+from typing import Dict, List
 
 import pysam
 from shortuuid import uuid
 
-from .call import call_events
-from .constants import DEFAULTS, PASS_FILENAME
-from .evidence import GenomeEvidence, TranscriptomeEvidence
-from ..align import align_sequences, select_contig_alignments, SUPPORTED_ALIGNER
+from ..align import SUPPORTED_ALIGNER, align_sequences, select_contig_alignments
 from ..annotate.base import BioInterval
+from ..annotate.file_io import ReferenceFile
 from ..bam import cigar as _cigar
 from ..bam.cache import BamCache
 from ..breakpoint import BreakpointPair
-from ..constants import CALL_METHOD, COLUMNS, MavisNamespace, PROTOCOL
-from ..util import filter_on_overlap, LOG, mkdirp, output_tabbed_file, read_inputs, write_bed_file
+from ..config import get_by_prefix
+from ..constants import CALL_METHOD, COLUMNS, PROTOCOL
+from ..util import (
+    LOG,
+    filter_on_overlap,
+    generate_complete_stamp,
+    mkdirp,
+    output_tabbed_file,
+    read_inputs,
+    write_bed_file,
+)
+from .call import call_events
+from .constants import PASS_FILENAME
+from .evidence import GenomeEvidence, TranscriptomeEvidence
 
 
 def main(
-    inputs,
-    output,
-    bam_file,
-    strand_specific,
-    library,
-    protocol,
-    median_fragment_size,
-    stdev_fragment_size,
-    read_length,
-    reference_genome,
-    annotations,
-    masking,
-    aligner_reference,
+    inputs: List[str],
+    output: str,
+    library: str,
+    config: Dict,
     start_time=int(time.time()),
     **kwargs
 ):
@@ -52,16 +54,15 @@ def main(
         aligner_reference (mavis.annotate.file_io.ReferenceFile): path to the aligner reference file (e.g 2bit file for blat)
     """
     mkdirp(output)
-    # check the files exist early to avoid waiting for errors
-    if protocol == PROTOCOL.TRANS:
-        annotations.load()
-    reference_genome.load()
-    masking.load()
-
-    validation_settings = {}
-    validation_settings.update(DEFAULTS.items())
-    validation_settings.update({k: v for k, v in kwargs.items() if k in DEFAULTS})
-    validation_settings = MavisNamespace(**validation_settings)
+    reference_genome = ReferenceFile.load_from_config(config, 'reference_genome', eager_load=True)
+    annotations = ReferenceFile.load_from_config(
+        config,
+        'annotations',
+        eager_load=bool(config['libraries'][library]['protocol'] == PROTOCOL.TRANS),
+    )
+    masking = ReferenceFile.load_from_config(config, 'masking')
+    if not masking.is_empty():
+        masking.load()
 
     raw_evidence_bam = os.path.join(output, 'raw_evidence.bam')
     contig_bam = os.path.join(output, 'contigs.bam')
@@ -71,21 +72,23 @@ def main(
     passed_bed_file = os.path.join(output, 'validation-passed.bed')
     failed_output_file = os.path.join(output, 'validation-failed.tab')
     contig_aligner_fa = os.path.join(output, 'contigs.fa')
-    if validation_settings.aligner == SUPPORTED_ALIGNER.BLAT:
+    if config['validate.aligner'] == SUPPORTED_ALIGNER.BLAT:
         contig_aligner_output = os.path.join(output, 'contigs.blat_out.pslx')
         contig_aligner_log = os.path.join(output, 'contigs.blat.log')
-    elif validation_settings.aligner == SUPPORTED_ALIGNER.BWA_MEM:
+    elif config['validate.aligner'] == SUPPORTED_ALIGNER.BWA_MEM:
         contig_aligner_output = os.path.join(output, 'contigs.bwa_mem.sam')
         contig_aligner_log = os.path.join(output, 'contigs.bwa_mem.log')
     else:
-        raise NotImplementedError('unsupported aligner', validation_settings.aligner)
+        raise NotImplementedError('unsupported aligner', config['validate.aligner'])
     igv_batch_file = os.path.join(output, 'igv.batch')
-    input_bam_cache = BamCache(bam_file, strand_specific)
+    input_bam_cache = BamCache(
+        config['libraries'][library]['bam_file'], config['libraries'][library]['strand_specific']
+    )
 
     bpps = read_inputs(
         inputs,
         add_default={COLUMNS.cluster_id: None, COLUMNS.stranded: False},
-        add={COLUMNS.protocol: protocol, COLUMNS.library: library},
+        add={COLUMNS.protocol: config['libraries'][library]['protocol'], COLUMNS.library: library},
         expand_strand=False,
         expand_orient=True,
         cast={COLUMNS.cluster_id: lambda x: str(uuid()) if not x else x},
@@ -103,10 +106,10 @@ def main(
                     stranded=bpp.stranded,
                     untemplated_seq=bpp.untemplated_seq,
                     data=bpp.data,
-                    stdev_fragment_size=stdev_fragment_size,
-                    read_length=read_length,
-                    median_fragment_size=median_fragment_size,
-                    **dict(validation_settings.items())
+                    stdev_fragment_size=config['libraries'][library]['stdev_fragment_size'],
+                    read_length=config['libraries'][library]['read_length'],
+                    median_fragment_size=config['libraries'][library]['median_fragment_size'],
+                    **get_by_prefix(config, 'validate.')
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:
@@ -125,10 +128,11 @@ def main(
                     stranded=bpp.stranded,
                     untemplated_seq=bpp.untemplated_seq,
                     data=bpp.data,
-                    stdev_fragment_size=stdev_fragment_size,
-                    read_length=read_length,
-                    median_fragment_size=median_fragment_size,
-                    **dict(validation_settings.items())
+                    stdev_fragment_size=config['libraries'][library]['stdev_fragment_size'],
+                    read_length=config['libraries'][library]['read_length'],
+                    median_fragment_size=config['libraries'][library]['median_fragment_size'],
+                    strand_determining_read=config['libraries'][library]['strand_determining_read'],
+                    **get_by_prefix(config, 'validate.')
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:
@@ -141,7 +145,12 @@ def main(
         extended_masks[chrom] = []
         for mask in masks:
             extended_masks[chrom].append(
-                BioInterval(chrom, mask.start - read_length, mask.end + read_length, name=mask.name)
+                BioInterval(
+                    chrom,
+                    mask.start - config['libraries'][library]['read_length'],
+                    mask.end + config['libraries'][library]['read_length'],
+                    name=mask.name,
+                )
             )
 
     evidence_clusters, filtered_evidence_clusters = filter_on_overlap(
@@ -215,12 +224,12 @@ def main(
         reference_genome=reference_genome.content,
         aligner_fa_input_file=contig_aligner_fa,
         aligner_output_file=contig_aligner_output,
-        clean_files=validation_settings.clean_aligner_files,
-        aligner=kwargs.get('aligner', validation_settings.aligner),
-        aligner_reference=aligner_reference.name[0],
+        clean_files=config['validate.clean_aligner_files'],
+        aligner=kwargs.get('aligner', config['validate.aligner']),
+        aligner_reference=config['reference.aligner_reference'][0],
         aligner_output_log=contig_aligner_log,
-        blat_min_identity=kwargs.get('blat_min_identity', validation_settings.blat_min_identity),
-        blat_limit_top_aln=kwargs.get('blat_limit_top_aln', validation_settings.blat_limit_top_aln),
+        blat_min_identity=kwargs.get('blat_min_identity', config['validate.blat_min_identity']),
+        blat_limit_top_aln=kwargs.get('blat_limit_top_aln', config['validate.blat_limit_top_aln']),
         log=LOG,
     )
     for evidence in evidence_clusters:
@@ -331,7 +340,7 @@ def main(
         itertools.chain.from_iterable([e.get_bed_repesentation() for e in event_calls]),
     )
 
-    if validation_settings.write_evidence_files:
+    if config['validate.write_evidence_files']:
         with pysam.AlignmentFile(contig_bam, 'wb', template=input_bam_cache.fh) as fh:
             LOG('writing:', contig_bam, time_stamp=True)
             for evidence in evidence_clusters:
@@ -376,4 +385,11 @@ def main(
             fh.write('load {} name="{}"\n'.format(contig_bam, 'aligned contigs'))
             fh.write('load {} name="{}"\n'.format(evidence_bed, 'evidence windows'))
             fh.write('load {} name="{}"\n'.format(raw_evidence_bam, 'raw evidence'))
-            fh.write('load {} name="{} {} input"\n'.format(bam_file, library, protocol))
+            fh.write(
+                'load {} name="{} {} input"\n'.format(
+                    config['libraries'][library]['bam_file'],
+                    library,
+                    config['libraries'][library]['protocol'],
+                )
+            )
+        generate_complete_stamp(output, LOG, start_time=start_time)
diff --git a/setup.py b/setup.py
index 07683e14..dd23fb29 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 
 from setuptools import find_packages, setup
 
-VERSION = '2.2.7'
+VERSION = '2.2.8'
 
 
 def parse_md_readme():
@@ -15,7 +15,7 @@ def parse_md_readme():
 
         rst_lines = parse_from_file('README.md').split('\n')
         long_description = [
-            '.. image:: http://mavis.bcgsc.ca/images/acronym.svg\n\n|\n'
+            '.. image:: http://mavis.bcgsc.ca/docs/latest/_static/acronym.svg\n\n|\n'
         ]  # backup since pip can't handle raw directives
         i = 0
         while i < len(rst_lines):
@@ -85,10 +85,12 @@ def check_nonpython_dependencies():
     'colour',
     'networkx==1.11.0',
     'numpy>=1.13.1',
+    'pandas>=1.1, <2',
     'pysam>=0.9, <=0.15.2',
     'pyvcf==0.6.8',
     'shortuuid>=0.5.0',
     'svgwrite',
+    'snakemake>=6.1.1, <7',
 ]
 
 DEPLOY_REQS = ['twine', 'm2r', 'wheel']
@@ -122,6 +124,8 @@ def check_nonpython_dependencies():
             'calculate_ref_alt_counts = tools.calculate_ref_alt_counts:main',
         ]
     },
+    include_package_data=True,
+    data_files=[('mavis', ['mavis/schemas/config.json', 'mavis/schemas/overlay.json'])],
     project_urls={'mavis': 'http://mavis.bcgsc.ca'},
 )
 check_nonpython_dependencies()
diff --git a/tests/end_to_end/__init__.py b/tests/end_to_end/__init__.py
index 3422c76b..e69de29b 100644
--- a/tests/end_to_end/__init__.py
+++ b/tests/end_to_end/__init__.py
@@ -1,21 +0,0 @@
-import glob
-import os
-
-
-def glob_exists(*pos, strict=False, n=1):
-    globexpr = os.path.join(*pos)
-    file_list = glob.glob(globexpr)
-    if strict and len(file_list) == n:
-        return file_list[0] if len(file_list) == 1 else file_list
-    elif not strict and len(file_list) > 0:
-        return file_list
-    else:
-        print(globexpr)
-        print(file_list)
-        return False
-
-
-def glob_not_exists(*pos):
-    globexpr = os.path.join(*pos)
-    file_list = glob.glob(globexpr)
-    return not file_list
diff --git a/tests/end_to_end/test_config.py b/tests/end_to_end/test_config.py
deleted file mode 100644
index 59c3f47d..00000000
--- a/tests/end_to_end/test_config.py
+++ /dev/null
@@ -1,202 +0,0 @@
-import argparse
-import glob
-import itertools
-import os
-import shutil
-import statistics
-import sys
-import tempfile
-import unittest
-from unittest import mock
-
-from mavis.constants import SUBCOMMAND
-from mavis.main import main
-from mavis.tools import SUPPORTED_TOOL
-from mavis.util import unique_exists
-
-from ..util import get_data
-
-
-ARGERROR_EXIT_CODE = 2
-
-
-class TestConfig(unittest.TestCase):
-    def setUp(self):
-        if 'MAVIS_ANNOTATIONS' in os.environ:
-            del os.environ['MAVIS_ANNOTATIONS']
-        self.temp_output = tempfile.mkdtemp()
-        # [--library <name> {genome,transcriptome} {diseased,normal} [strand_specific] [/path/to/bam/file]]
-        self.genome = ['--library', 'mock_genome', 'genome', 'diseased']
-        self.genome_bam = get_data('mock_reads_for_events.sorted.bam')
-        self.trans = ['--library', 'mock_trans', 'transcriptome', 'diseased']
-        self.trans_bam = get_data('mock_trans_reads_for_events.sorted.bam')
-        self.annotations = get_data('mock_reference_annotations.json')
-        self.args = ['mavis', SUBCOMMAND.CONFIG]
-        self.input = get_data('mock_sv_events.tsv')
-
-    def run_main(self, exit_status=0):
-        outputfile = os.path.join(self.temp_output, 'config.cfg')
-        self.args.extend(['-w', outputfile])
-        with mock.patch.object(sys, 'argv', [str(a) for a in self.args]):
-            print('sys.argv', sys.argv)
-            try:
-                return_code = main()
-            except SystemExit as ex:
-                return_code = ex.code
-            self.assertEqual(exit_status, return_code)
-
-    def test_no_libs_no_annotations(self):
-        self.run_main()
-
-    def test_no_input_error(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_input_missing_library(self):
-        self.args.extend(
-            self.genome
-            + ['False', self.genome_bam, '--input', self.input, 'mock_genome', 'bad_genome']
-        )
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_assign_missing_library(self):
-        self.args.extend(
-            self.genome
-            + [
-                'False',
-                self.genome_bam,
-                '--input',
-                self.input,
-                'mock_genome',
-                '--assign',
-                'bad_genome',
-                self.input,
-            ]
-        )
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_skip_no_annotations(self):
-        self.args.extend(
-            self.trans
-            + [
-                'False',
-                self.trans_bam,
-                '--input',
-                self.input,
-                'mock_trans',
-                '--skip_stage',
-                SUBCOMMAND.VALIDATE,
-            ]
-        )
-        self.run_main()
-
-    def test_requires_annotations_trans(self):
-        self.args.extend(
-            self.trans + ['False', self.trans_bam, '--input', self.input, 'mock_trans']
-        )
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_require_bam_noskip_error(self):
-        self.args.extend(
-            self.genome + ['--annotations', self.annotations, '--input', self.input, 'mock_genome']
-        )
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_genome_only(self):
-        # should be ok without the annotations file
-        self.args.extend(
-            self.genome + ['False', self.genome_bam, '--input', self.input, 'mock_genome']
-        )
-        self.run_main()
-
-    def test_genome_include_defaults(self):
-        # should be ok without the annotations file
-        self.args.extend(
-            self.genome
-            + ['False', self.genome_bam, '--input', self.input, 'mock_genome', '--add_defaults']
-        )
-        self.run_main()
-
-    def test_trans_with_annotations(self):
-        self.args.extend(
-            itertools.chain(
-                self.genome,
-                [False, self.genome_bam],
-                self.trans,
-                [
-                    True,
-                    self.trans_bam,
-                    '--input',
-                    self.input,
-                    'mock_genome',
-                    'mock_trans',
-                    '--annotations',
-                    self.annotations,
-                ],
-            )
-        )
-        with self.assertRaises(statistics.StatisticsError):  # too few annotations to calc median
-            self.run_main()
-
-    def test_convert_multiple(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(
-            [
-                '--convert',
-                'ta',
-                'transabyss_events.tab',
-                'transabyss_indels_output.tab',
-                'transabyss',
-            ]
-        )
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main()
-
-    def test_convert_multiple_strand(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(
-            [
-                '--convert',
-                'ta',
-                'transabyss_events.tab',
-                'transabyss_indels_output.tab',
-                'transabyss',
-                'False',
-            ]
-        )
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main()
-
-    def test_convert_quoted(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(['--convert', 'ta', 'transabyss_{events,indels_output}.tab', 'transabyss'])
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main()
-
-    def test_convert_quoted_strand(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(
-            ['--convert', 'ta', 'transabyss_{events,indels_output}.tab', 'transabyss', 'False']
-        )
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main()
-
-    def test_convert_argument_error(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(['--convert', 'ta', 'transabyss', 'False'])
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def test_convert_argument_error2(self):
-        self.args.extend(self.genome + ['False', self.genome_bam])
-        self.args.extend(['--convert', 'ta', 'transabyss'])
-        self.args.extend(['--assign', 'mock_genome', 'ta'])
-        self.run_main(ARGERROR_EXIT_CODE)
-
-    def tearDown(self):
-        # remove the temp directory and outputs
-        shutil.rmtree(self.temp_output)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index d92a7eae..ff58b064 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -6,15 +6,13 @@
 import unittest
 from unittest.mock import patch
 
-
 from mavis.constants import ORIENT, SUBCOMMAND, SVTYPE
 from mavis.main import main
 from mavis.tools import SUPPORTED_TOOL
-from mavis.util import unique_exists, read_bpp_from_input_file
+from mavis.util import read_bpp_from_input_file, unique_exists
 
 from ..util import get_data
 
-
 TEMP_OUTPUT = None
 
 
@@ -41,7 +39,7 @@ def run_main(self, inputfile, file_type, strand_specific=False):
             strand_specific,
         ]
         with patch.object(sys, 'argv', args):
-            self.assertEqual(0, main())
+            main()
             print('output', outputfile)
             self.assertTrue(unique_exists(outputfile))
         result = {}
diff --git a/tests/end_to_end/test_full_pipeline.py b/tests/end_to_end/test_full_pipeline.py
deleted file mode 100644
index 29ddae18..00000000
--- a/tests/end_to_end/test_full_pipeline.py
+++ /dev/null
@@ -1,406 +0,0 @@
-import os
-import shutil
-import sys
-import tempfile
-import unittest
-from unittest import mock
-
-from mavis.constants import SUBCOMMAND, EXIT_OK, EXIT_ERROR
-from mavis.main import main
-from mavis.util import unique_exists
-
-from . import glob_exists, glob_not_exists
-from ..util import get_data
-
-
-CONFIG = get_data('pipeline_config.cfg')
-BWA_CONFIG = get_data('bwa_pipeline_config.cfg')
-CLEAN_CONFIG = get_data('clean_pipeline_config.cfg')
-MOCK_GENOME = 'mock-A36971'
-MOCK_TRANS = 'mock-A47933'
-ENV = {e: v for e, v in os.environ.items() if not e.startswith('MAVIS_')}
-ENV.update({'MAVIS_SCHEDULER': 'LOCAL', 'MAVIS_CONCURRENCY_LIMIT': '2'})
-
-
-def print_file_tree(dirname):
-    for root, dirs, files in os.walk(dirname):
-        level = root.replace(dirname, '').count(os.sep)
-        indent = ' ' * 4 * (level)
-        print('{}{}/'.format(indent, os.path.basename(root)))
-        subindent = ' ' * 4 * (level + 1)
-        for f in files:
-            print('{}{}'.format(subindent, f))
-
-
-@unittest.skipIf(
-    not int(os.environ.get('RUN_FULL', 1)),
-    'slower tests will not be run unless the environment variable RUN_FULL is given',
-)
-class TestPipeline(unittest.TestCase):
-    def setUp(self):
-        # create the temp output directory to store file outputs
-        self.temp_output = tempfile.mkdtemp()
-        print('output dir', self.temp_output)
-
-    def check_annotate(self, lib):
-        # run annotation
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.ANNOTATE))
-        # check the generated files
-        for filename in [
-            'annotations.tab',
-            'annotations.fusion-cdna.fa',
-            'drawings',
-            'drawings/*svg',
-            'drawings/*json',
-            'MAVIS-*.COMPLETE',
-        ]:
-            filename = os.path.join(self.temp_output, lib, SUBCOMMAND.ANNOTATE, '*-1', filename)
-            self.assertTrue(glob_exists(filename), msg=filename)
-
-    def check_validate(self, lib):
-        # run validation
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE))
-
-        for suffix in [
-            'contigs.bam',
-            'contigs.fa',
-            'contigs.sorted.bam',
-            'contigs.sorted.bam.bai',
-            'evidence.bed',
-            'igv.batch',
-            'raw_evidence.bam',
-            'raw_evidence.sorted.bam',
-            'raw_evidence.sorted.bam.bai',
-            'validation-failed.tab',
-            'validation-passed.tab',
-            'MAVIS-*.COMPLETE',
-        ]:
-            self.assertTrue(
-                glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', suffix), msg=suffix
-            )
-
-    def check_aligner_output_files(self, lib, mem=False):
-        if mem:
-            self.assertTrue(
-                glob_exists(
-                    self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', 'contigs.bwa_mem.sam'
-                )
-            )
-            self.assertTrue(
-                glob_exists(
-                    self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', 'contigs.bwa_mem.log'
-                )
-            )
-        else:
-            self.assertTrue(
-                glob_exists(
-                    self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', 'contigs.blat_out.pslx'
-                )
-            )
-
-    def check_cluster(self, lib, skipped=False):
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER))
-        logfile = os.path.join(
-            self.temp_output, lib, SUBCOMMAND.CLUSTER, 'MC_{}*batch-*.log'.format(lib)
-        )
-        self.assertTrue(glob_exists(logfile), msg=logfile)
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'batch-*-1.tab'))
-        self.assertTrue(
-            glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'filtered_pairs.tab')
-        )
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'clusters.bed'))
-        if skipped:
-            self.assertFalse(
-                glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'cluster_assignment.tab')
-            )
-        else:
-            self.assertTrue(
-                glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'cluster_assignment.tab')
-            )
-        self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.CLUSTER, 'MAVIS-*.COMPLETE'))
-
-    def check_pairing(self):
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'mavis_paired*.tab'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'MAVIS-*.COMPLETE'))
-
-    def check_summary(self, count=3):
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY))
-        self.assertTrue(
-            glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'mavis_summary*.tab', n=count)
-        )
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'MAVIS-*.COMPLETE'))
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_pipeline_with_bwa(self):
-        main([SUBCOMMAND.SETUP, BWA_CONFIG, '-o', self.temp_output])
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib)
-            self.check_validate(lib)
-            self.check_aligner_output_files(lib, mem=True)
-            self.check_annotate(lib)
-
-        self.check_pairing()
-        self.check_summary()
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_error_on_bad_config(self):
-        with self.assertRaises(SystemExit) as err:
-            main([SUBCOMMAND.SETUP, 'thing/that/doesnot/exist.cfg', '-o', self.temp_output])
-        self.assertEqual(2, err.exception.code)
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_error_on_bad_input_file(self):
-        with self.assertRaises(FileNotFoundError):
-            main([SUBCOMMAND.SETUP, get_data('bad_input_file.cfg'), '-o', self.temp_output])
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_missing_reference(self):
-        with self.assertRaises(OSError):
-            main([SUBCOMMAND.SETUP, get_data('missing_reference.cfg'), '-o', self.temp_output])
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_full_pipeline(self):
-        main([SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output])
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib)
-            self.check_validate(lib)
-            self.check_aligner_output_files(lib)
-            self.check_annotate(lib)
-
-        self.check_pairing()
-        self.check_summary()
-
-        retcode = main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output])
-        self.assertEqual(EXIT_OK, retcode)
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_no_optional_files(self):
-        main([SUBCOMMAND.SETUP, get_data('no_opt_pipeline.cfg'), '-o', self.temp_output])
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib)
-            self.check_validate(lib)
-            self.check_aligner_output_files(lib)
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary()
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_reference_from_env(self):
-        os.environ.update(
-            {
-                'MAVIS_TEMPLATE_METADATA': get_data('cytoBand.txt'),
-                'MAVIS_ANNOTATIONS': get_data('mock_annotations.json'),
-                'MAVIS_MASKING': get_data('mock_masking.tab'),
-                'MAVIS_REFERENCE_GENOME': get_data('mock_reference_genome.fa'),
-                'MAVIS_ALIGNER_REFERENCE': get_data('mock_reference_genome.2bit'),
-                'MAVIS_DGV_ANNOTATION': get_data('mock_dgv_annotation.txt'),
-            }
-        )
-        main([SUBCOMMAND.SETUP, get_data('reference_from_env.cfg'), '-o', self.temp_output])
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*']:
-            self.check_cluster(lib)
-            self.check_validate(lib)
-            self.check_aligner_output_files(lib)
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary(count=2)
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_clean_files(self):
-        main([SUBCOMMAND.SETUP, CLEAN_CONFIG, '-o', self.temp_output])
-
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib)
-            self.assertTrue(glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE))
-
-            for suffix in ['evidence.bed', 'validation-failed.tab', 'validation-passed.tab']:
-                self.assertTrue(
-                    glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', suffix)
-                )
-            for suffix in [
-                'contigs.bam',
-                'contigs.blat_out.pslx',
-                'contigs.fa',
-                'contigs.sorted.bam',
-                'contigs.sorted.bam.bai',
-                'igv.batch',
-                'raw_evidence.bam',
-                'raw_evidence.sorted.bam',
-                'raw_evidence.sorted.bam.bai',
-            ]:
-                self.assertFalse(
-                    glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', suffix),
-                    msg=suffix,
-                )
-            self.assertTrue(
-                glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE + '/*-1', 'MAVIS-*.COMPLETE')
-            )
-
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary()
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_skip_clustering(self):
-        main([SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output, '--skip_stage', SUBCOMMAND.CLUSTER])
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib, skipped=True)
-            self.check_validate(lib)
-            self.check_aligner_output_files(lib)
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary()
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_skip_validation(self):
-        main(
-            [SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output, '--skip_stage', SUBCOMMAND.VALIDATE]
-        )
-
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib)
-            self.assertFalse(glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE))
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary()
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_skip_cluster_and_validate(self):
-        args = [
-            SUBCOMMAND.SETUP,
-            CONFIG,
-            '-o',
-            self.temp_output,
-            '--skip_stage',
-            SUBCOMMAND.VALIDATE,
-            '--skip_stage',
-            SUBCOMMAND.CLUSTER,
-        ]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        main([SUBCOMMAND.SCHEDULE, '-o', self.temp_output, '--submit'])
-
-        # check that the subdirectories were built
-        for lib in [MOCK_GENOME + '_*', MOCK_TRANS + '_*']:
-            self.check_cluster(lib, skipped=True)
-            self.assertFalse(glob_exists(self.temp_output, lib, SUBCOMMAND.VALIDATE))
-            self.check_annotate(lib)
-        self.check_pairing()
-        self.check_summary()
-
-    def tearDown(self):
-        # remove the temp directory and outputs
-        print_file_tree(self.temp_output)
-        shutil.rmtree(self.temp_output)
-
-
-class TestSetUp(unittest.TestCase):
-    def setUp(self):
-        # create the temp output directory to store file outputs
-        self.temp_output = tempfile.mkdtemp()
-        print('output dir', self.temp_output)
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_slurm(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SLURM'
-        args = [SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.VALIDATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.ANNOTATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'submit.sh'))
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_slurm_skip_validate(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SLURM'
-        args = [
-            SUBCOMMAND.SETUP,
-            CONFIG,
-            '-o',
-            self.temp_output,
-            '--skip_stage',
-            SUBCOMMAND.VALIDATE,
-        ]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        self.assertTrue(glob_not_exists(self.temp_output, '*', SUBCOMMAND.VALIDATE, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.ANNOTATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'submit.sh'))
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_slurm_skip_cluster(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SLURM'
-        args = [
-            SUBCOMMAND.SETUP,
-            CONFIG,
-            '-o',
-            self.temp_output,
-            '--skip_stage',
-            SUBCOMMAND.CLUSTER,
-        ]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.VALIDATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.ANNOTATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'submit.sh'))
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_sge(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SGE'
-        args = [SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.VALIDATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.ANNOTATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'submit.sh'))
-
-    @mock.patch('os.environ', ENV.copy())
-    def test_torque(self):
-        os.environ['MAVIS_SCHEDULER'] = 'TORQUE'
-        args = [SUBCOMMAND.SETUP, CONFIG, '-o', self.temp_output]
-        main(args)
-        self.assertTrue(glob_exists(self.temp_output, 'build.cfg'))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.VALIDATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, '*', SUBCOMMAND.ANNOTATE, 'submit.sh', n=2))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.PAIR, 'submit.sh'))
-        self.assertTrue(glob_exists(self.temp_output, SUBCOMMAND.SUMMARY, 'submit.sh'))
-
-    def tearDown(self):
-        # remove the temp directory and outputs
-        print_file_tree(self.temp_output)
-        shutil.rmtree(self.temp_output)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/end_to_end/test_help.py b/tests/end_to_end/test_help.py
index df10b85e..76823d7f 100644
--- a/tests/end_to_end/test_help.py
+++ b/tests/end_to_end/test_help.py
@@ -28,14 +28,6 @@ def test_pipeline(self):
             else:
                 self.assertEqual(0, returncode)
 
-    def test_config(self):
-        with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.CONFIG, '-h']):
-            try:
-                returncode = main()
-            except SystemExit as err:
-                self.assertEqual(0, err.code)
-            else:
-                self.assertEqual(0, returncode)
 
     def test_cluster(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.CLUSTER, '-h']):
diff --git a/tests/end_to_end/test_overlay.py b/tests/end_to_end/test_overlay.py
index 2759f0c1..db664c55 100644
--- a/tests/end_to_end/test_overlay.py
+++ b/tests/end_to_end/test_overlay.py
@@ -1,30 +1,39 @@
+import json
 import os
 import shutil
 import subprocess
 import sys
 import tempfile
-import unittest
-
 from unittest.mock import patch
 
+import pytest
 from mavis.constants import SUBCOMMAND
 from mavis.main import main
 
-from . import glob_exists
-from ..util import get_data
-
+from ..util import get_data, glob_exists
 
 ANNOTATIONS = get_data('annotations_subsample.json')
 BAM = get_data('mock_reads_for_events.sorted.bam')
 
 
-class TestOverlayOptions(unittest.TestCase):
-    def setUp(self):
-        # create the temp output directory to store file outputs
-        self.temp_output = tempfile.mkdtemp()
-        print('output dir', self.temp_output)
+@pytest.fixture
+def output_dir():
+    temp_output = tempfile.mkdtemp()
+    yield temp_output
+    shutil.rmtree(temp_output)
+
 
-    def test_basic(self):
+@pytest.fixture(scope='module')
+def config_json():
+    _, p = tempfile.mkstemp()
+    print(p)
+    with open(p, 'w') as fh:
+        fh.write(json.dumps({'reference.annotations': [ANNOTATIONS]}))
+    yield p
+
+
+class TestOverlayOptions:
+    def test_basic(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -32,22 +41,21 @@ def test_basic(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
             ],
         ):
             try:
-                print(sys.argv)
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_marker(self):
+    def test_marker(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -55,10 +63,10 @@ def test_marker(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--marker',
                 'm',
                 '49364900',
@@ -67,12 +75,12 @@ def test_marker(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_marker_range(self):
+    def test_marker_range(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -80,10 +88,10 @@ def test_marker_range(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--marker',
                 'm',
                 '49364900',
@@ -93,12 +101,12 @@ def test_marker_range(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_marker_not_enough_args(self):
+    def test_marker_not_enough_args(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -106,10 +114,10 @@ def test_marker_not_enough_args(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--marker',
                 'm',
             ],
@@ -117,10 +125,10 @@ def test_marker_not_enough_args(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertNotEqual(0, err.code)
+                assert err.code != 0
             else:
-                self.assertNotEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
         with patch.object(
             sys,
@@ -128,23 +136,23 @@ def test_marker_not_enough_args(self):
             [
                 'mavis',
                 SUBCOMMAND.OVERLAY,
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 'GAGE4',
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--marker',
             ],
         ):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertNotEqual(0, err.code)
+                assert err.code != 0
             else:
-                self.assertNotEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_marker_not_int(self):
+    def test_marker_not_int(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -152,10 +160,10 @@ def test_marker_not_int(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--marker',
                 'm',
                 'k',
@@ -164,12 +172,12 @@ def test_marker_not_int(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertNotEqual(0, err.code)
+                assert err.code != 0
             else:
-                self.assertNotEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_read_depth_plot(self):
+    def test_read_depth_plot(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -177,10 +185,10 @@ def test_read_depth_plot(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--read_depth_plot',
                 'axis',
                 BAM,
@@ -189,12 +197,12 @@ def test_read_depth_plot(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_read_depth_plot_binned(self):
+    def test_read_depth_plot_binned(self, config_json, output_dir):
         with patch.object(
             sys,
             'argv',
@@ -202,10 +210,10 @@ def test_read_depth_plot_binned(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--read_depth_plot',
                 'axis',
                 BAM,
@@ -215,12 +223,12 @@ def test_read_depth_plot_binned(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
 
-    def test_read_depth_plot_not_binned_but_stranded(self):
+    def test_read_depth_plot_not_binned_but_stranded(self, config_json, output_dir):
         # no ymax
         with patch.object(
             sys,
@@ -229,10 +237,10 @@ def test_read_depth_plot_not_binned_but_stranded(self):
                 'mavis',
                 SUBCOMMAND.OVERLAY,
                 'GAGE4',
-                '--annotations',
-                ANNOTATIONS,
+                '--config',
+                config_json,
                 '--output',
-                self.temp_output,
+                output_dir,
                 '--read_depth_plot',
                 'axis',
                 BAM,
@@ -244,11 +252,7 @@ def test_read_depth_plot_not_binned_but_stranded(self):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
-                self.assertTrue(glob_exists(os.path.join(self.temp_output, '*GAGE4*.svg')))
-
-    def tearDown(self):
-        # remove the temp directory and outputs
-        shutil.rmtree(self.temp_output)
+                assert returncode is None
+                assert glob_exists(os.path.join(output_dir, '*GAGE4*.svg'))
diff --git a/tests/end_to_end/test_pairing.py b/tests/end_to_end/test_pairing.py
deleted file mode 100644
index 769c1c9e..00000000
--- a/tests/end_to_end/test_pairing.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-import shutil
-import sys
-import tempfile
-import unittest
-from unittest.mock import patch
-
-from mavis.constants import SUBCOMMAND
-from mavis.main import main
-from mavis.util import read_bpp_from_input_file
-
-from ..util import get_data
-
-TEMP_OUTPUT = None
-
-
-def setUpModule():
-    global TEMP_OUTPUT
-    # create the temp output directory to store file outputs
-    TEMP_OUTPUT = tempfile.mkdtemp()
-
-
-class TestPairing(unittest.TestCase):
-    def test_pairing(self):
-        args = [
-            'mavis',
-            SUBCOMMAND.PAIR,
-            '-n',
-            get_data('pairing_annotations.tab'),
-            '-o',
-            TEMP_OUTPUT,
-            '--annotations',
-            get_data('pairing_reference_annotations_file.tab'),
-        ]
-        with patch.object(sys, 'argv', args):
-            self.assertEqual(0, main())
-        # make sure the output file exists
-        output = os.path.join(TEMP_OUTPUT, 'mavis_paired_A36971_A36973.tab')
-        self.assertTrue(os.path.exists(output))
-        # check that the expected pairings are present
-        bpps = read_bpp_from_input_file(output, expand_strand=False, expand_orient=False)
-        self.assertEqual(6, len(bpps))
-
-
-def tearDownModule():
-    # remove the temp directory and outputs
-    shutil.rmtree(TEMP_OUTPUT)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/end_to_end/test_ref_alt_count.py b/tests/end_to_end/test_ref_alt_count.py
index e5ec5a3d..4c30fb81 100644
--- a/tests/end_to_end/test_ref_alt_count.py
+++ b/tests/end_to_end/test_ref_alt_count.py
@@ -1,15 +1,14 @@
-import tempfile
-import unittest
 import os
 import shutil
+import tempfile
+import unittest
 
 from mavis.annotate.file_io import load_reference_genome
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, SVTYPE
 from tools.calculate_ref_alt_counts import RefAltCalculator
 
-from ..util import get_data
-from . import glob_exists
+from ..util import get_data, glob_exists
 
 
 def setUpModule():
diff --git a/tests/full-tutorial.config.json b/tests/full-tutorial.config.json
new file mode 100644
index 00000000..bf125870
--- /dev/null
+++ b/tests/full-tutorial.config.json
@@ -0,0 +1,98 @@
+{
+    "annotate.draw_fusions_only": true,
+    "cluster.min_clusters_per_file": 100,
+    "cluster.uninformative_filter": true,
+    "convert": {
+        "breakdancer": {
+            "assume_no_untemplated": true,
+            "file_type": "breakdancer",
+            "inputs": [
+                "tutorial_data/breakdancer-1.4.5/*txt"
+            ]
+        },
+        "breakseq": {
+            "assume_no_untemplated": true,
+            "file_type": "breakseq",
+            "inputs": [
+                "tutorial_data/breakseq-2.2/breakseq.vcf.gz"
+            ]
+        },
+        "chimerascan": {
+            "assume_no_untemplated": true,
+            "file_type": "chimerascan",
+            "inputs": [
+                "tutorial_data/chimerascan-0.4.5/chimeras.bedpe"
+            ]
+        },
+        "defuse": {
+            "assume_no_untemplated": true,
+            "file_type": "defuse",
+            "inputs": [
+                "tutorial_data/defuse-0.6.2/results.classify.tsv"
+            ]
+        },
+        "manta": {
+            "assume_no_untemplated": true,
+            "file_type": "manta",
+            "inputs": [
+                "tutorial_data/manta-1.0.0/diploidSV.vcf.gz",
+                "tutorial_data/manta-1.0.0/somaticSV.vcf"
+            ]
+        }
+    },
+    "libraries": {
+        "L1522785992-normal": {
+            "assign": [
+                "breakdancer",
+                "breakseq",
+                "manta"
+            ],
+            "bam_file": "tutorial_data/L1522785992_normal.sorted.bam",
+            "disease_status": "normal",
+            "protocol": "genome"
+        },
+        "L1522785992-trans": {
+            "assign": [
+                "chimerascan",
+                "defuse"
+            ],
+            "bam_file": "tutorial_data/L1522785992_trans.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "transcriptome",
+            "strand_specific": true
+        },
+        "L1522785992-tumour": {
+            "assign": [
+                "breakdancer",
+                "breakseq",
+                "manta"
+            ],
+            "bam_file": "tutorial_data/L1522785992_tumour.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "genome"
+        }
+    },
+    "output_dir": "output_dir_full",
+    "reference.aligner_reference": [
+        "reference_inputs/hg19.2bit"
+    ],
+    "reference.annotations": [
+        "reference_inputs/ensembl69_hg19_annotations.json"
+    ],
+    "reference.dgv_annotation": [
+        "reference_inputs/dgv_hg19_variants.tab"
+    ],
+    "reference.masking": [
+        "reference_inputs/hg19_masking.tab"
+    ],
+    "reference.reference_genome": [
+        "reference_inputs/hg19.fa"
+    ],
+    "reference.template_metadata": [
+        "reference_inputs/cytoBand.txt"
+    ],
+    "summary.filter_min_flanking_reads": 10,
+    "summary.filter_min_linking_split_reads": 1,
+    "summary.filter_min_remapped_reads": 5,
+    "summary.filter_min_spanning_reads": 5
+}
diff --git a/tests/integration/schedule/__init__.py b/tests/integration/schedule/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/integration/schedule/test_pipeline.py b/tests/integration/schedule/test_pipeline.py
deleted file mode 100644
index 4ae43b33..00000000
--- a/tests/integration/schedule/test_pipeline.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import unittest
-from unittest import mock
-import configparser
-import tempfile
-import shutil
-import os
-
-from mavis.schedule import pipeline as _pipeline
-from mavis.schedule import scheduler
-from mavis.main import main
-
-from ...util import get_data
-
-
-class TestTime(unittest.TestCase):
-    def test_time(self):
-        self.assertEqual('0:20:00', scheduler.time_format(1200))
-        self.assertEqual('1:00:00', scheduler.time_format(3600))
-        self.assertEqual('25:25:25', scheduler.time_format(91525))
-
-
-class TestReadBuildFile(unittest.TestCase):
-
-    # TODO: test_skip_validate
-    # TODO: test_no_skip
-    # TODO: test_missing_summary
-    # TODO: test_missing_pairing
-    # TODO: test_missing_annotations
-    # TODO: test_error_on_config_not_exists
-    # TODO: test_loading_unsubmitted
-    # TODO: test_loading_submitted
-    # TODO: test_loading_completed
-    # TODO: test_missing_validations
-    # TODO: test_missing_dependency_job
-
-    def setUp(self):
-        self.exists_patcher = mock.patch('os.path.exists')
-        self.exists_patcher.start().return_value = True
-
-    def read_mock_config(self, content):
-        with mock.patch('configparser.ConfigParser.read', configparser.ConfigParser.read_string):
-            return _pipeline.Pipeline.read_build_file(content)
-
-    def test_torque(self):
-        pipeline = _pipeline.Pipeline.read_build_file(get_data('torque_build.cfg'))
-        self.assertEqual(3, len(pipeline.validations))
-        self.assertEqual(3, len(pipeline.annotations))
-        self.assertIn(pipeline.annotations[0].dependencies[0], pipeline.validations)
-        self.assertIn(pipeline.pairing, pipeline.summary.dependencies)
-
-    def test_basic(self):
-        content = """
-[general]
-output_dir = temp
-scheduler = SLURM
-batch_id = 1
-
-[job1]
-stage = validate
-task_list = 1
-    2
-    3
-    4
-    5
-    6
-    7
-    8
-    9
-    10
-name = job1
-output_dir = temp2
-
-
-[job2]
-stage = annotate
-name = job2
-dependencies = job1
-output_dir = temp3
-
-[job3]
-stage = pairing
-name = job3
-dependencies = job2
-output_dir = temp4
-
-[job4]
-stage = summary
-name = job4
-dependencies = job3
-output_dir = temp5
-        """
-        result = self.read_mock_config(content)
-        self.assertEqual('job3', result.pairing.name)
-        self.assertEqual('job1', result.validations[0].name)
-        self.assertEqual('job2', result.annotations[0].name)
-        self.assertEqual(result.validations[0], result.annotations[0].dependencies[0])
-        self.assertEqual(result.annotations[0], result.pairing.dependencies[0])
-        self.assertEqual(result.pairing, result.summary.dependencies[0])
-
-    def test_parsed_types(self):
-        build = _pipeline.Pipeline.read_build_file(get_data('build.cfg'))
-        self.assertIs(build.validations[0].import_env, True)
-        self.assertIs(build.scheduler.concurrency_limit, None)
-
-    def tearDown(self):
-        self.exists_patcher.stop()
-
-
-class TestBuildPipeline(unittest.TestCase):
-    def setUp(self):
-        self.temp_output = tempfile.mkdtemp()
-        # clear any environment variables
-        self.env_patch = mock.patch(
-            'os.environ', {k: v for k, v in os.environ.items() if not k.startswith('MAVIS_')}
-        )
-        self.env_patch.start()
-
-    def test_basic_slurm(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SLURM'
-        config = get_data('pipeline_config.cfg')
-
-        with mock.patch('sys.argv', ['mavis', 'setup', '--output', self.temp_output, config]):
-            self.assertEqual(0, main())
-        build_file = os.path.join(self.temp_output, 'build.cfg')
-        with open(build_file, 'r') as fh:
-            print(fh.read())
-        build = _pipeline.Pipeline.read_build_file(build_file)
-        print(build)
-        self.assertGreaterEqual(len(build.validations), 1)
-        self.assertGreaterEqual(len(build.annotations), 1)
-        self.assertEqual(2, len(build.pairing.dependencies))
-        self.assertIsNotNone(build.pairing)
-        self.assertIsNotNone(build.summary)
-
-    def test_basic_sge(self):
-        os.environ['MAVIS_SCHEDULER'] = 'SGE'
-        config = get_data('pipeline_config.cfg')
-
-        with mock.patch('sys.argv', ['mavis', 'setup', '--output', self.temp_output, config]):
-            self.assertEqual(0, main())
-        build_file = os.path.join(self.temp_output, 'build.cfg')
-        with open(build_file, 'r') as fh:
-            print(fh.read())
-        build = _pipeline.Pipeline.read_build_file(build_file)
-        print(build)
-        self.assertGreaterEqual(len(build.validations), 1)
-        self.assertGreaterEqual(len(build.annotations), 1)
-        self.assertIsNotNone(build.pairing)
-        self.assertIsNotNone(build.summary)
-
-    # TODO: test_basic_submit
-    # TODO: test pipeline failure
-    # TODO: test conversion failure
-
-    def tearDown(self):
-        shutil.rmtree(self.temp_output)
-        self.env_patch.stop()
diff --git a/tests/integration/schedule/test_sge.py b/tests/integration/schedule/test_sge.py
deleted file mode 100644
index 172c3931..00000000
--- a/tests/integration/schedule/test_sge.py
+++ /dev/null
@@ -1,748 +0,0 @@
-import subprocess
-import unittest
-from unittest import mock
-
-from mavis.schedule import job as _job
-from mavis.schedule import scheduler as _scheduler
-from mavis.schedule import constants as _constants
-from mavis.constants import SUBCOMMAND
-
-QACCT_ARR3_OK = """
-==============================================================
-qname        merge.q
-hostname     n601.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      arrtest
-jobnumber    3757289
-taskid       1
-account      sge
-priority     0
-qsub_time    Thu May 24 10:54:05 2018
-start_time   Thu May 24 10:54:12 2018
-end_time     Thu May 24 10:55:12 2018
-granted_pe   NONE
-slots        1
-failed       0
-exit_status  0
-ru_wallclock 60s
-ru_utime     0.057s
-ru_stime     0.087s
-ru_maxrss    5.160KB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    20948
-ru_majflt    0
-ru_nswap     0
-ru_inblock   0
-ru_oublock   8
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     224
-ru_nivcsw    59
-cpu          0.144s
-mem          0.000GBs
-io           0.001GB
-iow          0.000s
-maxvmem      1.934MB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users
-==============================================================
-qname        merge.q
-hostname     n602.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      arrtest
-jobnumber    3757289
-taskid       3
-account      sge
-priority     0
-qsub_time    Thu May 24 10:54:05 2018
-start_time   Thu May 24 10:54:12 2018
-end_time     Thu May 24 10:55:12 2018
-granted_pe   NONE
-slots        1
-failed       0
-exit_status  0
-ru_wallclock 60s
-ru_utime     0.063s
-ru_stime     0.079s
-ru_maxrss    5.156KB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    20954
-ru_majflt    0
-ru_nswap     0
-ru_inblock   0
-ru_oublock   8
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     220
-ru_nivcsw    65
-cpu          0.142s
-mem          0.000GBs
-io           0.001GB
-iow          0.000s
-maxvmem      1.934MB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users
-==============================================================
-qname        merge.q
-hostname     n604.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      arrtest
-jobnumber    3757289
-taskid       2
-account      sge
-priority     0
-qsub_time    Thu May 24 10:54:05 2018
-start_time   Thu May 24 10:54:17 2018
-end_time     Thu May 24 10:55:17 2018
-granted_pe   NONE
-slots        1
-failed       0
-exit_status  0
-ru_wallclock 60s
-ru_utime     0.055s
-ru_stime     0.086s
-ru_maxrss    5.156KB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    20954
-ru_majflt    0
-ru_nswap     0
-ru_inblock   0
-ru_oublock   8
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     218
-ru_nivcsw    66
-cpu          0.141s
-mem          0.000GBs
-io           0.001GB
-iow          0.000s
-maxvmem      1.930MB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users
-"""
-
-
-class TestUpdate(unittest.TestCase):
-    # TODO: status of array job
-    # TODO: status of single job
-    # TODO: status of job waiting on dependency
-
-    @mock.patch('subprocess.check_output')
-    def test_job_array_waiting(self, patch_check):
-        content = """
-job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
------------------------------------------------------------------------------------------------------------------
-3751935 0.00000 subtest.sh creisle      qw    05/23/2018 13:44:04                                    1 1-10:1
-        """.encode(
-            'utf8'
-        )
-        patch_check.return_value = content
-        job = _job.ArrayJob(output_dir='temp', job_ident='3751935', task_list=10, stage='validate')
-        _scheduler.SgeScheduler().update_info(job)
-        self.assertEqual(_constants.JOB_STATUS.PENDING, job.status)
-
-    @mock.patch('subprocess.check_output')
-    def test_job_array(self, patch_check):
-        content = """
-job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
------------------------------------------------------------------------------------------------------------------
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n601.numbers.bcgsc.ca      1 1
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n602.numbers.bcgsc.ca      1 2
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n604.numbers.bcgsc.ca      1 3
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n603.numbers.bcgsc.ca      1 4
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n601.numbers.bcgsc.ca      1 5
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n602.numbers.bcgsc.ca      1 6
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n604.numbers.bcgsc.ca      1 7
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n603.numbers.bcgsc.ca      1 8
-3751935 0.50500 subtest.sh creisle      r     05/23/2018 13:44:12 merge.q@n601.numbers.bcgsc.ca      1 9
-3751935 0.50500 subtest.sh creisle      qw    05/23/2018 13:44:12 merge.q@n602.numbers.bcgsc.ca      1 10
-        """.encode(
-            'utf8'
-        )
-        patch_check.return_value = content
-        job = _job.ArrayJob(output_dir='temp', job_ident='3751935', task_list=10, stage='validate')
-        _scheduler.SgeScheduler().update_info(job)
-
-        for task in job.task_list[:9]:
-            self.assertEqual(_constants.JOB_STATUS.RUNNING, task.status)
-        self.assertEqual(_constants.JOB_STATUS.PENDING, job.task_list[-1].status)
-        self.assertEqual(_constants.JOB_STATUS.PENDING, job.status)
-
-    @mock.patch('subprocess.check_output')
-    def test_single_job(self, patch_check):
-        content = """
-job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
------------------------------------------------------------------------------------------------------------------
- 217940 1.50000 subtest.sh creisle      qw    05/22/2018 23:39:55                                    1
-        """.encode(
-            'utf8'
-        )
-        patch_check.return_value = content
-        job = _job.Job(output_dir='temp', job_ident='217940', stage='validate')
-        _scheduler.SgeScheduler().update_info(job)
-        self.assertEqual(_constants.JOB_STATUS.PENDING, job.status)
-
-    @mock.patch('subprocess.check_output')
-    def test_completed_array(self, patch_check):
-        patch_check.side_effect = [''.encode('utf8'), QACCT_ARR3_OK.encode('utf8')]
-        job = _job.ArrayJob(
-            output_dir='temp', job_ident='3757289', stage='validate', name='arrtest', task_list=3
-        )
-        _scheduler.SgeScheduler().update_info(job)
-        self.assertEqual(_constants.JOB_STATUS.COMPLETED, job.status)
-        for task in job.task_list:
-            self.assertEqual(_constants.JOB_STATUS.COMPLETED, task.status)
-
-
-class TestParseQacct(unittest.TestCase):
-    def test_job_array(self):
-        content = QACCT_ARR3_OK
-        rows = _scheduler.SgeScheduler().parse_qacct(content)
-        expected = {
-            'job_ident': '3757289',
-            'name': 'arrtest',
-            'status': _constants.JOB_STATUS.COMPLETED,
-            'status_comment': '',
-        }
-        for task_id, row in zip([1, 3, 2], rows):
-            exp = {'task_ident': str(task_id)}
-            exp.update(expected)
-            self.assertEqual(exp, row)
-
-    def test_passed(self):
-        content = """
-==============================================================
-qname        transabyss.q
-hostname     tac3n15.hpc.bcgsc.ca
-group        users
-owner        bioapps
-project      NONE
-department   defaultdepartment
-jobname      A89009negative
-jobnumber    3744253
-taskid       40
-account      sge
-priority     0
-qsub_time    Tue May 22 09:26:31 2018
-start_time   Tue May 22 10:32:42 2018
-end_time     Tue May 22 13:28:32 2018
-granted_pe   openmpi
-slots        8
-failed       0
-exit_status  0
-ru_wallclock 10550s
-ru_utime     42298.581s
-ru_stime     34509.422s
-ru_maxrss    2.608MB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    5382919
-ru_majflt    978
-ru_nswap     0
-ru_inblock   14027520
-ru_oublock   9259368
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     20635137
-ru_nivcsw    14100587
-cpu          76808.002s
-mem          119.207KGBs
-io           579.782GB
-iow          0.000s
-maxvmem      14.885GB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users -q transabyss.q -l h_vmem=3.85G,mem_free=3.85G,mem_token=3.85G -pe openmpi 8
-        """
-        rows = _scheduler.SgeScheduler().parse_qacct(content)
-        self.assertEqual(1, len(rows))
-        expected = {
-            'job_ident': '3744253',
-            'task_ident': '40',
-            'name': 'A89009negative',
-            'status': _constants.JOB_STATUS.COMPLETED,
-            'status_comment': '',
-        }
-        self.assertEqual(expected, rows[0])
-
-    def test_non_zero_exit(self):
-        content = """
-==============================================================
-qname        merge.q
-hostname     n603.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      error
-jobnumber    3755560
-taskid       undefined
-account      sge
-priority     0
-qsub_time    Thu May 24 09:42:58 2018
-start_time   Thu May 24 09:43:12 2018
-end_time     Thu May 24 09:44:12 2018
-granted_pe   NONE
-slots        1
-failed       0
-exit_status  1
-ru_wallclock 60s
-ru_utime     0.054s
-ru_stime     0.088s
-ru_maxrss    5.148KB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    21134
-ru_majflt    0
-ru_nswap     0
-ru_inblock   8
-ru_oublock   16
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     228
-ru_nivcsw    62
-cpu          0.142s
-mem          0.000GBs
-io           0.001GB
-iow          0.000s
-maxvmem      1.926MB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users
-        """
-
-        rows = _scheduler.SgeScheduler().parse_qacct(content)
-        self.assertEqual(1, len(rows))
-        expected = {
-            'job_ident': '3755560',
-            'task_ident': None,
-            'name': 'error',
-            'status': _constants.JOB_STATUS.FAILED,
-            'status_comment': '',
-        }
-        self.assertEqual(expected, rows[0])
-
-    def test_failed(self):
-        content = """
-==============================================================
-qname        merge.q
-hostname     n603.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      MV_mock-A36971_batch-E6aEZJnTQAau598tcsMjAE
-jobnumber    3760712
-taskid       1
-account      sge
-priority     0
-qsub_time    Thu May 24 13:35:02 2018
-start_time   -/-
-end_time     -/-
-granted_pe   NONE
-slots        1
-failed       26  : opening input/output file
-exit_status  0
-ru_wallclock 0s
-ru_utime     0.000s
-ru_stime     0.000s
-ru_maxrss    0.000B
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    0
-ru_majflt    0
-ru_nswap     0
-ru_inblock   0
-ru_oublock   0
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     0
-ru_nivcsw    0
-cpu          0.000s
-mem          0.000GBs
-io           0.000GB
-iow          0.000s
-maxvmem      0.000B
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users -l h_rt=57600,h_vmem=16000M,mem_free=16000M,mem_token=16000M
-        """
-        rows = _scheduler.SgeScheduler().parse_qacct(content)
-        self.assertEqual(1, len(rows))
-        expected = {
-            'job_ident': '3760712',
-            'task_ident': '1',
-            'name': 'MV_mock-A36971_batch-E6aEZJnTQAau598tcsMjAE',
-            'status': _constants.JOB_STATUS.FAILED,
-            'status_comment': 'opening input/output file',
-        }
-        self.assertEqual(expected, rows[0])
-
-    def test_cancelled(self):
-        content = """
-==============================================================
-qname        merge.q
-hostname     n603.numbers.bcgsc.ca
-group        users
-owner        creisle
-project      NONE
-department   defaultdepartment
-jobname      arrtest
-jobnumber    3757249
-taskid       undefined
-account      sge
-priority     0
-qsub_time    Thu May 24 10:50:27 2018
-start_time   Thu May 24 10:50:45 2018
-end_time     Thu May 24 10:51:09 2018
-granted_pe   NONE
-slots        1
-failed       100 : assumedly after job
-exit_status  137                  (Killed)
-ru_wallclock 24s
-ru_utime     0.052s
-ru_stime     0.088s
-ru_maxrss    5.160KB
-ru_ixrss     0.000B
-ru_ismrss    0.000B
-ru_idrss     0.000B
-ru_isrss     0.000B
-ru_minflt    20737
-ru_majflt    0
-ru_nswap     0
-ru_inblock   0
-ru_oublock   8
-ru_msgsnd    0
-ru_msgrcv    0
-ru_nsignals  0
-ru_nvcsw     215
-ru_nivcsw    63
-cpu          0.140s
-mem          0.000GBs
-io           0.001GB
-iow          0.000s
-maxvmem      1.934MB
-arid         undefined
-ar_sub_time  undefined
-category     -U transabyss_users
-        """
-        rows = _scheduler.SgeScheduler().parse_qacct(content)
-        self.assertEqual(1, len(rows))
-        expected = {
-            'job_ident': '3757249',
-            'task_ident': None,
-            'name': 'arrtest',
-            'status': _constants.JOB_STATUS.CANCELLED,
-            'status_comment': 'assumedly after job',
-        }
-        self.assertEqual(expected, rows[0])
-
-    def test_job_not_found(self):
-        content = """
-Total System Usage
-    WALLCLOCK         UTIME         STIME           CPU             MEMORY                 IO                IOW
-================================================================================================================
-   3786481073 6713770428.951 4374477378.582 11585461604.347   187237653407.317      156350319.140              0.000
-        """
-        with self.assertRaises(ValueError):
-            _scheduler.SgeScheduler().parse_qacct(content)
-
-
-class TestParseQstat(unittest.TestCase):
-    def test_single_job(self):
-        content = """
-job-ID  prior   name       user         state submit/start at     queue                          slots ja-task-ID
------------------------------------------------------------------------------------------------------------------
- 217940 1.50000 subtest.sh creisle      qw    05/22/2018 23:39:55                                    1
-        """
-        rows = _scheduler.SgeScheduler().parse_qstat(content, '217940')
-        self.assertEqual(1, len(rows))
-        expected = {
-            'job_ident': '217940',
-            'task_ident': None,
-            'status': _constants.JOB_STATUS.PENDING,
-            'name': 'subtest.sh',
-            'status_comment': '',
-        }
-        self.assertEqual(expected, rows[0])
-
-    def test_no_jobs_found(self):
-        rows = _scheduler.SgeScheduler().parse_qstat("", '217940')
-        self.assertEqual([], rows)
-
-
-class TestCancel(unittest.TestCase):
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_single_job(self, patcher):
-        sched = _scheduler.SgeScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        patcher.assert_called_with(['qdel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job(self, patcher):
-        sched = _scheduler.SgeScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        for task in job.task_list:
-            self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-        patcher.assert_called_with(['qdel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job_task(self, patcher):
-        sched = _scheduler.SgeScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job, task_ident=4)
-        self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, job.status)
-        for i, task in enumerate(job.task_list):
-            if i == 3:
-                self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-            else:
-                self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, task.status)
-        patcher.assert_called_with(['qdel', '1234', '-t', '4'])
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_bad_command(self, patcher):
-        patcher.side_effect = [subprocess.CalledProcessError(1, 'command')]
-        sched = _scheduler.SgeScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, job.status)
-
-
-class TestSubmit(unittest.TestCase):
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_job(self, patcher):
-        patcher.side_effect = ['Your job 3891651 ("MV1") has been submitted']
-        job = _job.Job(
-            SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler()
-        sched.submit(job)
-        self.assertEqual('3891651', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        patcher.assert_called_with(
-            'qsub -j y -q all -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-N MV1 -o output_dir/job-\\$JOB_NAME-\\$JOB_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_job_with_array_dep(self, patcher):
-        patcher.side_effect = ['Your job 3891651 ("MV1") has been submitted']
-        job = _job.Job(
-            SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-        )
-        dep = _job.ArrayJob(
-            job_ident='1234', task_list=10, output_dir='', stage=SUBCOMMAND.VALIDATE
-        )
-        job.dependencies.append(dep)
-        sched = _scheduler.SgeScheduler()
-        sched.submit(job)
-        self.assertEqual('3891651', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        patcher.assert_called_with(
-            'qsub -j y -q all -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-hold_jid 1234 -N MV1 -m abes -M me@example.com '
-            '-o output_dir/job-\\$JOB_NAME-\\$JOB_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_job_with_job_dep(self, patcher):
-        patcher.side_effect = ['Your job 3891651 ("MV1") has been submitted']
-        job = _job.Job(
-            SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-        )
-        dep = _job.Job(job_ident='1234', output_dir='', stage=SUBCOMMAND.VALIDATE)
-        job.dependencies.append(dep)
-        sched = _scheduler.SgeScheduler()
-        sched.submit(job)
-        self.assertEqual('3891651', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        patcher.assert_called_with(
-            'qsub -j y -q all -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-hold_jid 1234 -N MV1 -m abes -M me@example.com '
-            '-o output_dir/job-\\$JOB_NAME-\\$JOB_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job(self, patcher):
-        patcher.side_effect = ['Your job-array 3891657.2-4:1 ("MV1") has been submitted']
-        job = _job.ArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            task_list=[2, 3, 4],
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-        sched.submit(job)
-        self.assertEqual('3891657', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-
-        patcher.assert_called_with(
-            'qsub -j y -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-N MV1 -t 2-4 -o output_dir/job-\\$JOB_NAME-\\$JOB_ID-\\$TASK_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job_with_job_dep(self, patcher):
-        patcher.side_effect = ['Your job-array 3891657.2-4:1 ("MV1") has been submitted']
-        job = _job.ArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            task_list=[2, 3, 4],
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-
-        dep = _job.Job(job_ident='1234', output_dir='', stage=SUBCOMMAND.VALIDATE)
-        job.dependencies.append(dep)
-
-        sched.submit(job)
-        self.assertEqual('3891657', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-
-        patcher.assert_called_with(
-            'qsub -j y -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-hold_jid 1234 '
-            '-N MV1 -t 2-4 -o output_dir/job-\\$JOB_NAME-\\$JOB_ID-\\$TASK_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job_with_array_dep(self, patcher):
-        patcher.side_effect = ['Your job-array 3891657.2-4:1 ("MV1") has been submitted']
-        job = _job.ArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            task_list=[2, 3, 4],
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-
-        dep = _job.ArrayJob(
-            job_ident='1234', task_list=[2, 3, 4], output_dir='', stage=SUBCOMMAND.VALIDATE
-        )
-        job.dependencies.append(dep)
-
-        sched.submit(job)
-        self.assertEqual('3891657', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-
-        patcher.assert_called_with(
-            'qsub -j y -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-hold_jid_ad 1234 '
-            '-N MV1 -t 2-4 -o output_dir/job-\\$JOB_NAME-\\$JOB_ID-\\$TASK_ID.log script.sh',
-            shell=True,
-        )
-
-    @mock.patch('mavis.schedule.scheduler.SgeScheduler.command')
-    def test_array_job_with_diff_array(self, patcher):
-        patcher.side_effect = ['Your job-array 3891657.2-4:1 ("MV1") has been submitted']
-        job = _job.ArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            task_list=[2, 3, 4],
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-
-        dep = _job.ArrayJob(
-            job_ident='1234', task_list=[2, 3, 4, 5], output_dir='', stage=SUBCOMMAND.VALIDATE
-        )
-        job.dependencies.append(dep)
-
-        sched.submit(job)
-        self.assertEqual('3891657', job.job_ident)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-
-        patcher.assert_called_with(
-            'qsub -j y -l mem_free=1M,mem_token=1M,h_vmem=1M -l h_rt=16:00:00 -V '
-            '-hold_jid 1234 '
-            '-N MV1 -t 2-4 -o output_dir/job-\\$JOB_NAME-\\$JOB_ID-\\$TASK_ID.log script.sh',
-            shell=True,
-        )
-
-    def test_array_job_non_consec_error(self):
-        job = _job.ArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            output_dir='output_dir',
-            script='script.sh',
-            name='MV1',
-            task_list=[2, 3, 4, 7],
-            memory_limit=1,
-        )
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-        with self.assertRaises(ValueError):
-            sched.submit(job)
-
-    def test_already_submitted_error(self):
-        job = _job.Job(stage=SUBCOMMAND.VALIDATE, output_dir='output_dir', job_ident='1')
-        sched = _scheduler.SgeScheduler(concurrency_limit=2)
-        with self.assertRaises(ValueError):
-            sched.submit(job)
diff --git a/tests/integration/schedule/test_slurm.py b/tests/integration/schedule/test_slurm.py
deleted file mode 100644
index a8566531..00000000
--- a/tests/integration/schedule/test_slurm.py
+++ /dev/null
@@ -1,617 +0,0 @@
-import subprocess
-import unittest
-from unittest import mock
-
-from mavis.schedule import job as _job
-from mavis.schedule import constants as _constants
-from mavis.schedule import scheduler as _scheduler
-from mavis.constants import SUBCOMMAND
-
-
-class TestSubmit(unittest.TestCase):
-
-    # TODO: test initial submission
-    # TODO: test submit after failure
-    # TODO: test reporting errors
-
-    @mock.patch('subprocess.check_output')
-    def test_single_job(self, patch_check):
-        patch_check.return_value = "Submitted batch job 1665695".encode('utf8')
-        job = _job.Job(output_dir='temp', name='job1', stage='validate', script='submit.sh')
-        print(job)
-        _scheduler.SlurmScheduler().submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        patch_check.assert_called_with(
-            [
-                'sbatch',
-                '--mem',
-                '16000M',
-                '-t',
-                '16:00:00',
-                '--export=ALL',
-                '-J',
-                'job1',
-                '-o',
-                'temp/job-%x-%j.log',
-                'submit.sh',
-            ],
-            shell=False,
-        )
-
-    @mock.patch('subprocess.check_output')
-    def test_partition(self, patch_check):
-        patch_check.return_value = "Submitted batch job 1665695".encode('utf8')
-        job = _job.Job(
-            output_dir='temp', name='job1', stage='validate', script='submit.sh', queue='all'
-        )
-        print(job)
-        _scheduler.SlurmScheduler().submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        patch_check.assert_called_with(
-            [
-                'sbatch',
-                '--partition=all',
-                '--mem',
-                '16000M',
-                '-t',
-                '16:00:00',
-                '--export=ALL',
-                '-J',
-                'job1',
-                '-o',
-                'temp/job-%x-%j.log',
-                'submit.sh',
-            ],
-            shell=False,
-        )
-
-    @mock.patch('subprocess.check_output')
-    def test_mail_options(self, patch_check):
-        patch_check.return_value = "Submitted batch job 1665695".encode('utf8')
-        job = _job.Job(
-            output_dir='temp',
-            name='job1',
-            stage='validate',
-            script='submit.sh',
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-        )
-        print(job)
-        _scheduler.SlurmScheduler().submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        patch_check.assert_called_with(
-            [
-                'sbatch',
-                '--mem',
-                '16000M',
-                '-t',
-                '16:00:00',
-                '--export=ALL',
-                '-J',
-                'job1',
-                '-o',
-                'temp/job-%x-%j.log',
-                '--mail-type=ALL',
-                '--mail-user=me@example.com',
-                'submit.sh',
-            ],
-            shell=False,
-        )
-
-    @mock.patch('subprocess.check_output')
-    def test_dependent_job(self, patch_check):
-        patch_check.side_effect = ["Submitted batch job 1665695".encode('utf8')]
-        job = _job.Job(
-            output_dir='temp',
-            name='job1',
-            stage='validate',
-            script='submit.sh',
-            dependencies=[
-                _job.Job(
-                    output_dir='temp',
-                    name='job2',
-                    stage='cluster',
-                    script='submit2.sh',
-                    job_ident='12345678',
-                )
-            ],
-        )
-        print(job)
-        _scheduler.SlurmScheduler().submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        patch_check.assert_called_with(
-            [
-                'sbatch',
-                '--mem',
-                '16000M',
-                '-t',
-                '16:00:00',
-                '--export=ALL',
-                '--dependency=afterok:12345678',
-                '-J',
-                'job1',
-                '-o',
-                'temp/job-%x-%j.log',
-                'submit.sh',
-            ],
-            shell=False,
-        )
-
-    @mock.patch('subprocess.check_output')
-    def test_dependency_error(self, patch_check):
-        patch_check.side_effect = [
-            "Submitted batch job 12345678".encode('utf8'),
-            "Submitted batch job 1665695".encode('utf8'),
-        ]
-        job = _job.Job(
-            output_dir='temp',
-            name='job1',
-            stage='validate',
-            script='submit.sh',
-            dependencies=[
-                _job.Job(output_dir='temp', name='job2', stage='cluster', script='submit2.sh')
-            ],
-        )
-        print(job)
-        with self.assertRaises(ValueError):
-            _scheduler.SlurmScheduler().submit(job)
-
-    @mock.patch('subprocess.check_output')
-    def test_job_array(self, patch_check):
-        patch_check.return_value = "Submitted batch job 1665695".encode('utf8')
-        job = _job.ArrayJob(
-            output_dir='temp', name='job1', stage='validate', script='submit.sh', task_list=10
-        )
-        print(job)
-        _scheduler.SlurmScheduler().submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        patch_check.assert_called_with(
-            [
-                'sbatch',
-                '--mem',
-                '16000M',
-                '-t',
-                '16:00:00',
-                '--export=ALL',
-                '-J',
-                'job1',
-                '-o',
-                'temp/job-%x-%A-%a.log',
-                '--array=1-10',
-                'submit.sh',
-            ],
-            shell=False,
-        )
-
-    @mock.patch('subprocess.check_output')
-    def test_job_array_concurrency_limit(self, patch_check):
-        patch_check.side_effect = ["Submitted batch job 1665695".encode('utf8')]
-        print(patch_check)
-        job = _job.ArrayJob(
-            output_dir='temp',
-            name='job1',
-            stage='validate',
-            script='submit.sh',
-            task_list=[1, 2, 3, 4, 5, 14, 16],
-        )
-        _scheduler.SlurmScheduler(concurrency_limit=2).submit(job)
-        self.assertEqual(_constants.JOB_STATUS.SUBMITTED, job.status)
-        self.assertEqual('1665695', job.job_ident)
-        exp = [
-            'sbatch',
-            '--mem',
-            '16000M',
-            '-t',
-            '16:00:00',
-            '--export=ALL',
-            '-J',
-            'job1',
-            '-o',
-            'temp/job-%x-%A-%a.log',
-            '--array=1-5,14,16%2',
-            'submit.sh',
-        ]
-        patch_check.assert_called_with(exp, shell=False)
-
-
-class TestUpdate(unittest.TestCase):
-    # TODO: status of array job
-    # TODO: status of single job
-    # TODO: status of job waiting on dependency
-
-    @mock.patch('subprocess.check_output')
-    def test_job_array(self, patch_check):
-        content = """
-JobID|JobIDRaw|JobName|Partition|MaxVMSize|MaxVMSizeNode|MaxVMSizeTask|AveVMSize|MaxRSS|MaxRSSNode|MaxRSSTask|AveRSS|MaxPages|MaxPagesNode|MaxPagesTask|AvePages|MinCPU|MinCPUNode|MinCPUTask|AveCPU|NTasks|AllocCPUS|Elapsed|State|ExitCode|AveCPUFreq|ReqCPUFreqMin|ReqCPUFreqMax|ReqCPUFreqGov|ReqMem|ConsumedEnergy|MaxDiskRead|MaxDiskReadNode|MaxDiskReadTask|AveDiskRead|MaxDiskWrite|MaxDiskWriteNode|MaxDiskWriteTask|AveDiskWrite|AllocGRES|ReqGRES|ReqTRES|AllocTRES|
-1671879_1|1671879|MV_mock-A36971_batch-tX8SW6tEiEfZ8ZLHDPDa83|short||||||||||||||||||1|00:00:00|FAILED|1:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671879_1.batch|1671879.batch|batch||||||||||||||||||1|1|00:00:00|FAILED|1:0||0|0|0|16000Mn|||||||||||||cpu=1,mem=16000M,node=1|
-1671880_1|1671880|MV_mock-A47933_batch-tX8SW6tEiEfZ8ZLHDPDa83|short||||||||||||||||||1|00:00:00|FAILED|1:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671880_1.batch|1671880.batch|batch||||||||||||||||||1|1|00:00:00|FAILED|1:0||0|0|0|18000Mn|||||||||||||cpu=1,mem=18000M,node=1|
-1671893_1|1671893|MV_mock-A36971_batch-tX8SW6tEiEfZ8ZLHDPDa83|short||||||||||||||||||1|00:00:01|FAILED|1:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671893_1.batch|1671893.batch|batch||||||||||||||||||1|1|00:00:01|FAILED|1:0||0|0|0|16000Mn|||||||||||||cpu=1,mem=16000M,node=1|
-1671894_1|1671894|MV_mock-A47933_batch-tX8SW6tEiEfZ8ZLHDPDa83|short||||||||||||||||||1|00:00:00|FAILED|1:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671894_1.batch|1671894.batch|batch||||||||||||||||||1|1|00:00:00|FAILED|1:0||0|0|0|18000Mn|||||||||||||cpu=1,mem=18000M,node=1|
-1671915_1|1671915|MV_mock-A36971_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:20|CANCELLED by 1365|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671915_1.batch|1671915.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:20|CANCELLED|0:15|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1671916_1|1671916|MV_mock-A47933_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:20|CANCELLED by 1365|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671916_1.batch|1671916.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:20|CANCELLED|0:15|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1671970_1|1671970|MV_mock-A36971_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:21|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671970_1.batch|1671970.batch|batch||125588K|n305|0|125588K|908K|n305|0|908K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:21|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1671971_1|1671971|MV_mock-A47933_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:20|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671971_1.batch|1671971.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:20|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1671974_1|1671974|MV_mock-A36971_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:11|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671974_1.batch|1671974.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:11|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1671975_1|1671975|MV_mock-A47933_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:10|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671975_1.batch|1671975.batch|batch||125588K|n305|0|125588K|908K|n305|0|908K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:10|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1671981_1|1671981|MV_mock-A36971_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:12|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671981_1.batch|1671981.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:12|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1671982_1|1671982|MV_mock-A47933_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:11|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1671982_1.batch|1671982.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:11|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1671983_1|1671983|MA_mock-A36971_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:05|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1671983_1.batch|1671983.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:05|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1671984_1|1671984|MA_mock-A47933_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:04|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1671984_1.batch|1671984.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:04|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1671985|1671985|MP_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:02|FAILED|2:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1671985.batch|1671985.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|FAILED|2:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1671986|1671986|MS_batch-ezPmnHmYjZjsj8gfCynbsX|short||||||||||||||||||1|00:00:00|CANCELLED by 1365|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1||
-1672141|1672141|subtest.sh|all||||||||||||||||||1|00:01:01|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672141.batch|1672141.batch|batch||207004K|n106|0|207004K|1760K|n106|0|1760K|1K|n106|0|1K|00:00:00|n106|0|00:00:00|1|1|00:01:01|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672166|1672166|subtest.sh|all||||||||||||||||||1|00:01:03|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672166.batch|1672166.batch|batch||207004K|n106|0|207004K|1760K|n106|0|1760K|0|n106|0|0|00:00:00|n106|0|00:00:00|1|1|00:01:03|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_10|1672169|subtest.sh|all||||||||||||||||||1|00:01:02|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_10.batch|1672169.batch|batch||207004K|n130|0|207004K|1764K|n130|0|1764K|0|n130|0|0|00:00:00|n130|0|00:00:00|1|1|00:01:02|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_1|1672171|subtest.sh|all||||||||||||||||||1|00:01:00|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_1.batch|1672171.batch|batch||207004K|n106|0|207004K|1764K|n106|0|1764K|0|n106|0|0|00:00:00|n106|0|00:00:00|1|1|00:01:00|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_2|1672172|subtest.sh|all||||||||||||||||||1|00:01:00|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_2.batch|1672172.batch|batch||207004K|n106|0|207004K|1764K|n106|0|1764K|0|n106|0|0|00:00:00|n106|0|00:00:00|1|1|00:01:00|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_3|1672173|subtest.sh|all||||||||||||||||||1|00:01:01|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_3.batch|1672173.batch|batch||207004K|n106|0|207004K|1764K|n106|0|1764K|0|n106|0|0|00:00:00|n106|0|00:00:00|1|1|00:01:01|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_4|1672174|subtest.sh|all||||||||||||||||||1|00:01:01|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_4.batch|1672174.batch|batch||207004K|n106|0|207004K|1760K|n106|0|1760K|0|n106|0|0|00:00:00|n106|0|00:00:00|1|1|00:01:01|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n106|0|0.06M|0.00M|n106|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_5|1672175|subtest.sh|all||||||||||||||||||1|00:01:02|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_5.batch|1672175.batch|batch||207004K|n130|0|207004K|1764K|n130|0|1764K|0|n130|0|0|00:00:00|n130|0|00:00:00|1|1|00:01:02|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_6|1672176|subtest.sh|all||||||||||||||||||1|00:01:02|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_6.batch|1672176.batch|batch||207004K|n130|0|207004K|1764K|n130|0|1764K|1K|n130|0|1K|00:00:00|n130|0|00:00:00|1|1|00:01:02|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_7|1672177|subtest.sh|all||||||||||||||||||1|00:01:01|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_7.batch|1672177.batch|batch||207004K|n130|0|207004K|1756K|n130|0|1756K|1K|n130|0|1K|00:00:00|n130|0|00:00:00|1|1|00:01:01|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_8|1672178|subtest.sh|all||||||||||||||||||1|00:01:02|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_8.batch|1672178.batch|batch||207004K|n130|0|207004K|1764K|n130|0|1764K|0|n130|0|0|00:00:00|n130|0|00:00:00|1|1|00:01:02|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672169_9|1672179|subtest.sh|all||||||||||||||||||1|00:01:02|COMPLETED|0:0||Unknown|Unknown|Unknown|7000Mc||||||||||||cpu=1,mem=7000M,node=1|cpu=1,mem=7000M,node=1|
-1672169_9.batch|1672179.batch|batch||207004K|n130|0|207004K|1760K|n130|0|1760K|1K|n130|0|1K|00:00:00|n130|0|00:00:00|1|1|00:01:02|COMPLETED|0:0|2.19M|0|0|0|7000Mc|0|0.06M|n130|0|0.06M|0.00M|n130|0|0.00M||||cpu=1,mem=7000M,node=1|
-1672268_2|1672268|MV_mock-A36971_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:36|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672268_2.batch|1672268.batch|batch||2025280K|n305|0|2025280K|58776K|n305|0|58776K|0|n305|0|0|00:00:03|n305|0|00:00:03|1|1|00:00:36|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|13.01M|n305|0|13.01M|0.05M|n305|0|0.05M||||cpu=1,mem=16000M,node=1|
-1672269_3|1672269|MV_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:33|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672269_3.batch|1672269.batch|batch||2019424K|n305|0|2019424K|55844K|n305|0|55844K|0|n305|0|0|00:00:03|n305|0|00:00:03|1|1|00:00:33|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|11.60M|n305|0|11.60M|0.05M|n305|0|0.05M||||cpu=1,mem=18000M,node=1|
-1672270_2|1672270|MA_mock-A36971_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672270_2.batch|1672270.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672271_3|1672271|MA_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672271_3.batch|1672271.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672272|1672272|MP_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:02|FAILED|2:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672272.batch|1672272.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|FAILED|2:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1672273|1672273|MS_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:00|CANCELLED by 1365|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1||
-1672270_1|1672274|MA_mock-A36971_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:04|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672270_1.batch|1672274.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:04|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672271_1|1672275|MA_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672271_1.batch|1672275.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672271_2|1672276|MA_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672271_2.batch|1672276.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672268_1|1672277|MV_mock-A36971_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:32|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672268_1.batch|1672277.batch|batch||2018912K|n305|0|2018912K|51952K|n305|0|51952K|0|n305|0|0|00:00:03|n305|0|00:00:03|1|1|00:00:32|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|10.79M|n305|0|10.79M|0.05M|n305|0|0.05M||||cpu=1,mem=16000M,node=1|
-1672269_1|1672278|MV_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:32|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672269_1.batch|1672278.batch|batch||2018660K|n305|0|2018660K|57016K|n305|0|57016K|0|n305|0|0|00:00:03|n305|0|00:00:03|1|1|00:00:32|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|12.79M|n305|0|12.79M|0.05M|n305|0|0.05M||||cpu=1,mem=18000M,node=1|
-1672269_2|1672279|M1673291_mock-A47933_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:33|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672269_2.batch|1671673291279.batch|batch||2019424K|n305|0|2019424K|54212K|n305|0|54212K|0|n305|0|0|00:00:03|n305|0|00:00:03|1|1|00:00:33|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|11.51M|n305|0|11.51M|0.05M|n305|0|0.05M||||cpu=1,mem=18000M,node=1|
-1672454_2|1672454|M1673291_mock-A36971_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:29|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672454_2.batch|1671673291454.batch|batch||125588K|n305|0|125588K|908K|n305|0|908K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:29|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1672455_3|1672455|MV_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:26|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672455_3.batch|1672455.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:26|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1672456_2|1672456|MA_mock-A36971_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672456_2.batch|1672456.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672457_3|1672457|MA_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:03|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672457_3.batch|1672457.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:03|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672458|1672458|MP_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672458.batch|1672458.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1672456_1|1672459|MA_mock-A36971_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:04|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672456_1.batch|1672459.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:04|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672457_1|1672460|MA_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:03|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672457_1.batch|1672460.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:03|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672457_2|1672461|MA_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|12000Mn||||||||||||cpu=1,mem=12000M,node=1|cpu=1,mem=12000M,node=1|
-1672457_2.batch|1672461.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|12000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=12000M,node=1|
-1672454_1|1672462|MV_mock-A36971_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:25|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672454_1.batch|1672462.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:25|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-1672455_1|1672463|MV_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:25|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672455_1.batch|1672463.batch|batch||125588K|n305|0|125588K|904K|n305|0|904K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:25|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1672455_2|1672464|MV_mock-A47933_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:27|COMPLETED|0:0||Unknown|Unknown|Unknown|18000Mn||||||||||||cpu=1,mem=18000M,node=1|cpu=1,mem=18000M,node=1|
-1672455_2.batch|1672464.batch|batch||125588K|n305|0|125588K|900K|n305|0|900K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:27|COMPLETED|0:0|2.19M|0|0|0|18000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=18000M,node=1|
-1672465|1672465|MS_batch-uKEUyUuWbi2mgd75KjP4k5|short||||||||||||||||||1|00:00:02|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1|
-1672465.batch|1672465.batch|batch||125588K|n305|0|125588K|896K|n305|0|896K|0|n305|0|0|00:00:00|n305|0|00:00:00|1|1|00:00:02|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|0|n305|65534|0|0|n305|65534|0||||cpu=1,mem=16000M,node=1|
-        """.encode(
-            'utf8'
-        )
-        patch_check.return_value = content
-        job = _job.ArrayJob(output_dir='temp', job_ident='1672457', task_list=3, stage='validate')
-        _scheduler.SlurmScheduler().update_info(job)
-        self.assertEqual(_constants.JOB_STATUS.COMPLETED, job.status)
-        self.assertEqual(3, len(job.task_list))
-
-
-class TestParseScontrolShow(unittest.TestCase):
-    def test_pending_job(self):
-        content = """
-JobId=1673292 JobName=MP_batch-8PyNX8EN4cBdD9vQd9FrRG
-   UserId=creisle(1365) GroupId=users(100) MCS_label=N/A
-   Priority=31 Nice=0 Account=all QOS=normal
-   JobState=PENDING Reason=DependencyNeverSatisfied Dependency=afterok:1673291_*
-   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
-   RunTime=00:00:00 TimeLimit=16:00:00 TimeMin=N/A
-   SubmitTime=2018-05-24T11:32:44 EligibleTime=Unknown
-   StartTime=Unknown EndTime=Unknown Deadline=N/A
-   PreemptTime=None SuspendTime=None SecsPreSuspend=0
-   Partition=short AllocNode:Sid=n104:47409
-   ReqNodeList=(null) ExcNodeList=(null)
-   NodeList=(null)
-   NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
-   TRES=cpu=1,mem=16000,node=1
-   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
-   MinCPUsNode=1 MinMemoryNode=16000M MinTmpDiskNode=0
-   Features=(null) DelayBoot=00:00:00
-   Gres=(null) Reservation=(null)
-   OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
-   Command=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/output_slurm/pairing/submit.sh
-   WorkDir=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission
-   StdErr=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/output_slurm/pairing/job-%x-1673292.log
-   StdIn=/dev/null
-   StdOut=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/output_slurm/pairing/job-%x-1673292.log
-   Power=
-
-        """
-        rows = _scheduler.SlurmScheduler().parse_scontrol_show(content)
-        self.assertEqual(1, len(rows))
-        self.assertEqual(
-            {
-                'job_ident': '1673292',
-                'task_ident': None,
-                'status': 'PENDING',
-                'status_comment': 'DependencyNeverSatisfied',
-                'name': 'MP_batch-8PyNX8EN4cBdD9vQd9FrRG',
-            },
-            rows[0],
-        )
-
-    def test_job_array(self):
-        content = """
-JobId=1673301 ArrayJobId=1673301 ArrayTaskId=3 JobName=subtest.sh
-   UserId=creisle(1365) GroupId=users(100) MCS_label=N/A
-   Priority=31 Nice=0 Account=all QOS=normal
-   JobState=RUNNING Reason=None Dependency=(null)
-   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
-   RunTime=00:00:11 TimeLimit=90-00:00:00 TimeMin=N/A
-   SubmitTime=2018-05-24T11:38:28 EligibleTime=2018-05-24T11:38:28
-   StartTime=2018-05-24T11:38:29 EndTime=2018-08-22T11:38:29 Deadline=N/A
-   PreemptTime=None SuspendTime=None SecsPreSuspend=0
-   Partition=all AllocNode:Sid=n104:47409
-   ReqNodeList=(null) ExcNodeList=(null)
-   NodeList=n245
-   BatchHost=n245
-   NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
-   TRES=cpu=1,mem=7000M,node=1
-   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
-   MinCPUsNode=1 MinMemoryCPU=7000M MinTmpDiskNode=0
-   Features=(null) DelayBoot=00:00:00
-   Gres=(null) Reservation=(null)
-   OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
-   Command=/projects/trans_scratch/validations/workspace/creisle/temp/subtest.sh
-   WorkDir=/projects/trans_scratch/validations/workspace/creisle/temp
-   StdErr=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_3.out
-   StdIn=/dev/null
-   StdOut=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_3.out
-   Power=
-
-JobId=1673303 ArrayJobId=1673301 ArrayTaskId=2 JobName=subtest.sh
-   UserId=creisle(1365) GroupId=users(100) MCS_label=N/A
-   Priority=31 Nice=0 Account=all QOS=normal
-   JobState=RUNNING Reason=None Dependency=(null)
-   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
-   RunTime=00:00:11 TimeLimit=90-00:00:00 TimeMin=N/A
-   SubmitTime=2018-05-24T11:38:28 EligibleTime=2018-05-24T11:38:28
-   StartTime=2018-05-24T11:38:29 EndTime=2018-08-22T11:38:29 Deadline=N/A
-   PreemptTime=None SuspendTime=None SecsPreSuspend=0
-   Partition=all AllocNode:Sid=n104:47409
-   ReqNodeList=(null) ExcNodeList=(null)
-   NodeList=n235
-   BatchHost=n235
-   NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
-   TRES=cpu=1,mem=7000M,node=1
-   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
-   MinCPUsNode=1 MinMemoryCPU=7000M MinTmpDiskNode=0
-   Features=(null) DelayBoot=00:00:00
-   Gres=(null) Reservation=(null)
-   OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
-   Command=/projects/trans_scratch/validations/workspace/creisle/temp/subtest.sh
-   WorkDir=/projects/trans_scratch/validations/workspace/creisle/temp
-   StdErr=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_2.out
-   StdIn=/dev/null
-   StdOut=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_2.out
-   Power=
-
-JobId=1673302 ArrayJobId=1673301 ArrayTaskId=1 JobName=subtest.sh
-   UserId=creisle(1365) GroupId=users(100) MCS_label=N/A
-   Priority=31 Nice=0 Account=all QOS=normal
-   JobState=RUNNING Reason=None Dependency=(null)
-   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:0
-   RunTime=00:00:11 TimeLimit=90-00:00:00 TimeMin=N/A
-   SubmitTime=2018-05-24T11:38:28 EligibleTime=2018-05-24T11:38:28
-   StartTime=2018-05-24T11:38:29 EndTime=2018-08-22T11:38:29 Deadline=N/A
-   PreemptTime=None SuspendTime=None SecsPreSuspend=0
-   Partition=all AllocNode:Sid=n104:47409
-   ReqNodeList=(null) ExcNodeList=(null)
-   NodeList=n137
-   BatchHost=n137
-   NumNodes=1 NumCPUs=1 NumTasks=1 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
-   TRES=cpu=1,mem=7000M,node=1
-   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
-   MinCPUsNode=1 MinMemoryCPU=7000M MinTmpDiskNode=0
-   Features=(null) DelayBoot=00:00:00
-   Gres=(null) Reservation=(null)
-   OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
-   Command=/projects/trans_scratch/validations/workspace/creisle/temp/subtest.sh
-   WorkDir=/projects/trans_scratch/validations/workspace/creisle/temp
-   StdErr=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_1.out
-   StdIn=/dev/null
-   StdOut=/projects/trans_scratch/validations/workspace/creisle/temp/slurm-1673301_1.out
-   Power=
-
-        """
-        rows = _scheduler.SlurmScheduler().parse_scontrol_show(content)
-        self.assertEqual(3, len(rows))
-
-    def test_cancelled_task(self):
-        content = """
-
-JobId=1697512 ArrayJobId=1697503 ArrayTaskId=1 JobName=MV_mock-A47933_batch-uwSwW68EW43XNdvq85NxJ7
-   UserId=creisle(1365) GroupId=users(100) MCS_label=N/A
-   Priority=42 Nice=0 Account=all QOS=normal
-   JobState=CANCELLED Reason=None Dependency=(null)
-   Requeue=1 Restarts=0 BatchFlag=1 Reboot=0 ExitCode=0:15
-   RunTime=00:00:02 TimeLimit=16:00:00 TimeMin=N/A
-   SubmitTime=2018-05-31T20:01:46 EligibleTime=2018-05-31T20:01:49
-   StartTime=2018-05-31T20:02:05 EndTime=2018-05-31T20:02:07 Deadline=N/A
-   PreemptTime=None SuspendTime=None SecsPreSuspend=0
-   Partition=all AllocNode:Sid=n104:173998
-   ReqNodeList=(null) ExcNodeList=(null)
-   NodeList=n245
-   BatchHost=n245
-   NumNodes=1 NumCPUs=1 NumTasks=0 CPUs/Task=1 ReqB:S:C:T=0:0:*:*
-   TRES=cpu=1,mem=18000M,node=1
-   Socks/Node=* NtasksPerN:B:S:C=0:0:*:* CoreSpec=*
-   MinCPUsNode=1 MinMemoryNode=18000M MinTmpDiskNode=0
-   Features=(null) DelayBoot=00:00:00
-   Gres=(null) Reservation=(null)
-   OverSubscribe=OK Contiguous=0 Licenses=(null) Network=(null)
-   Command=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/slurm/mock-A47933_diseased_transcriptome/validate/submit.sh
-   WorkDir=/home/creisle
-   StdErr=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/slurm/mock-A47933_diseased_transcriptome/validate/batch-uwSwW68EW43XNdvq85NxJ7-1/job-%x-1697503-1.log
-   StdIn=/dev/null
-   StdOut=/projects/trans_scratch/validations/workspace/creisle/temp/test_submission/slurm/mock-A47933_diseased_transcriptome/validate/batch-uwSwW68EW43XNdvq85NxJ7-1/job-%x-1697503-1.log
-   Power=
-
-        """
-        rows = _scheduler.SlurmScheduler().parse_scontrol_show(content)
-        self.assertEqual(1, len(rows))
-        row = rows[0]
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, row['status'])
-
-
-class TestParseSacctTable(unittest.TestCase):
-    def test_basic_table(self):
-        content = """
-JobID|JobIDRaw|JobName|Partition|MaxVMSize|MaxVMSizeNode|MaxVMSizeTask|AveVMSize|MaxRSS|MaxRSSNode|MaxRSSTask|AveRSS|MaxPages|MaxPagesNode|MaxPagesTask|AvePages|MinCPU|MinCPUNode|MinCPUTask|AveCPU|NTasks|AllocCPUS|Elapsed|State|ExitCode|AveCPUFreq|ReqCPUFreqMin|ReqCPUFreqMax|ReqCPUFreqGov|ReqMem|ConsumedEnergy|MaxDiskRead|MaxDiskReadNode|MaxDiskReadTask|AveDiskRead|MaxDiskWrite|MaxDiskWriteNode|MaxDiskWriteTask|AveDiskWrite|AllocGRES|ReqGRES|ReqTRES|AllocTRES
-1672273|1672273|MS_batch-iJUMYRdLFDsuu9eVzGmmKm|short||||||||||||||||||1|00:00:00|CANCELLED by 1365|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|
-        """
-        rows = _scheduler.SlurmScheduler().parse_sacct(content)
-        self.assertEqual(1, len(rows))
-        row = rows[0]
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, row['status'])
-
-    # TODO: test empty header
-
-    def test_cancelled_task(self):
-        content = """
-JobID|JobName|User|ReqMem|Elapsed|State|MaxRSS|AveRSS|Partition
-1697503_3|MV_mock-A47933_batch-uwSwW68EW43XNdvq85NxJ7|creisle|18000Mn|00:00:10|COMPLETED|||all
-1697503_3.batch|batch||18000Mn|00:00:10|COMPLETED|904K|904K|
-1697503_1|MV_mock-A47933_batch-uwSwW68EW43XNdvq85NxJ7|creisle|18000Mn|00:00:02|CANCELLED by 1365|||all
-1697503_1.batch|batch||18000Mn|00:00:02|CANCELLED|896K|896K|
-1697503_2|MV_mock-A47933_batch-uwSwW68EW43XNdvq85NxJ7|creisle|18000Mn|00:00:10|COMPLETED|||all
-1697503_2.batch|batch||18000Mn|00:00:10|COMPLETED|904K|904K|
-        """
-        rows = _scheduler.SlurmScheduler().parse_sacct(content)
-        self.assertEqual(3, len(rows))
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, rows[1]['status'])
-        self.assertEqual(_constants.JOB_STATUS.COMPLETED, rows[0]['status'])
-
-    def test_pending_array(self):
-        content = """
-JobID|JobName|User|ReqMem|Elapsed|State|MaxRSS|AveRSS|Partition
-1701003_[37-200]|MA_L1522785992-normal_batch-aUmErftiY7eEWvENfSeJwc|creisle|12000Mn|00:00:00|PENDING|||all
-1701003_1|MA_L1522785992-normal_batch-aUmErftiY7eEWvENfSeJwc|creisle|12000Mn|00:05:00|RUNNING|||all
-        """
-        rows = _scheduler.SlurmScheduler().parse_sacct(content)
-        self.assertEqual(2, len(rows))
-        self.assertEqual(_constants.JOB_STATUS.PENDING, rows[0]['status'])
-        self.assertEqual(_constants.JOB_STATUS.RUNNING, rows[1]['status'])
-        self.assertIs(None, rows[0]['task_ident'])
-        self.assertEqual(1, rows[1]['task_ident'])
-
-    def test_resubmission_array(self):
-        content = """
-JobID|JobIDRaw|JobName|Partition|MaxVMSize|MaxVMSizeNode|MaxVMSizeTask|AveVMSize|MaxRSS|MaxRSSNode|MaxRSSTask|AveRSS|MaxPages|MaxPagesNode|MaxPagesTask|AvePages|MinCPU|MinCPUNode|MinCPUTask|AveCPU|NTasks|AllocCPUS|Elapsed|State|ExitCode|AveCPUFreq|ReqCPUFreqMin|ReqCPUFreqMax|ReqCPUFreqGov|ReqMem|ConsumedEnergy|MaxDiskRead|MaxDiskReadNode|MaxDiskReadTask|AveDiskRead|MaxDiskWrite|MaxDiskWriteNode|MaxDiskWriteTask|AveDiskWrite|AllocGRES|ReqGRES|ReqTRES|AllocTRES
-1873472_162|1873671|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|10:18:26|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_162.batch|1873671.batch|batch||13703984K|n106|0|8708976K|11725204K|n106|0|6743424K|53K|n106|0|53K|10:06:31|n106|0|10:06:31|1|1|10:18:26|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|11767.09M|n106|0|11767.09M|29.74M|n106|0|29.74M||||cpu=1,mem=16000M,node=1
-1873472_163|1873672|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|08:09:50|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_163.batch|1873672.batch|batch||13690948K|n106|0|8686468K|11712556K|n106|0|6721328K|45K|n106|0|45K|07:57:40|n106|0|07:57:40|1|1|08:09:50|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|13345.62M|n106|0|13345.62M|26.77M|n106|0|26.77M||||cpu=1,mem=16000M,node=1
-1873472_164|1873673|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|12:26:33|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_164.batch|1873673.batch|batch||13730588K|n106|0|9577424K|11750552K|n106|0|6777084K|55K|n106|0|55K|12:13:52|n106|0|12:13:52|1|1|12:26:33|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|17065.30M|n106|0|17065.30M|34.39M|n106|0|34.39M||||cpu=1,mem=16000M,node=1
-1873472_165|1873674|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|05:32:32|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_165.batch|1873674.batch|batch||13735224K|n106|0|9574752K|11756988K|n106|0|6773916K|52K|n106|0|52K|05:21:46|n106|0|05:21:46|1|1|05:32:32|COMPLETED|0:0|2.18M|0|0|0|16000Mn|0|15997.17M|n106|0|15997.17M|37.74M|n106|0|37.74M||||cpu=1,mem=16000M,node=1
-1873472_166|1873675|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|07:30:37|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_166.batch|1873675.batch|batch||13722476K|n106|0|8669768K|11742400K|n106|0|6702776K|53K|n106|0|53K|07:18:31|n106|0|07:18:31|1|1|07:30:37|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|14716.82M|n106|0|14716.82M|21.39M|n106|0|21.39M||||cpu=1,mem=16000M,node=1
-1873472_167|1873676|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|06:45:32|COMPLETED|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_167.batch|1873676.batch|batch||13686828K|n106|0|8596932K|11707132K|n106|0|6565180K|49K|n106|0|49K|06:35:26|n106|0|06:35:26|1|1|06:45:32|COMPLETED|0:0|2.19M|0|0|0|16000Mn|0|10274.82M|n106|0|10274.82M|39.37M|n106|0|39.37M||||cpu=1,mem=16000M,node=1
-1873472_168|1873677|MV_P02300_batch-egprnnYFaJtPtnECYfGiKf|all||||||||||||||||||1|16:00:06|TIMEOUT|0:0||Unknown|Unknown|Unknown|16000Mn||||||||||||cpu=1,mem=16000M,node=1|cpu=1,mem=16000M,node=1
-1873472_168.batch|1873677.batch|batch||13749848K|n106|0|8700272K|11771032K|n106|0|6734652K|46K|n106|0|46K|15:48:39|n106|0|15:48:39|1|1|16:00:07|CANCELLED|0:15|2.19M|0|0|0|16000Mn|0|10613.36M|n106|0|10613.36M|25.00M|n106|0|25.00M||||cpu=1,mem=16000M,node=1
-        """
-        rows = _scheduler.SlurmScheduler().parse_sacct(content)
-        complete = [
-            row['status'] for row in rows if row['status'] == _constants.JOB_STATUS.COMPLETED
-        ]
-        fail = [row['status'] for row in rows if row['status'] == _constants.JOB_STATUS.CANCELLED]
-        self.assertEqual(6, len(complete))
-        self.assertEqual(1, len(fail))
-
-
-class TestCancel(unittest.TestCase):
-    @mock.patch('mavis.schedule.scheduler.SlurmScheduler.command')
-    def test_single_job(self, patcher):
-        sched = _scheduler.SlurmScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        patcher.assert_called_with(['scancel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.SlurmScheduler.command')
-    def test_array_job(self, patcher):
-        sched = _scheduler.SlurmScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        for task in job.task_list:
-            self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-        patcher.assert_called_with(['scancel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.SlurmScheduler.command')
-    def test_array_job_task(self, patcher):
-        sched = _scheduler.SlurmScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job, task_ident=4)
-        self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, job.status)
-        for i, task in enumerate(job.task_list):
-            if i == 3:
-                self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-            else:
-                self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, task.status)
-        patcher.assert_called_with(['scancel', '1234_4'])
-
-    @mock.patch('mavis.schedule.scheduler.SlurmScheduler.command')
-    def test_bad_command(self, patcher):
-        patcher.side_effect = [subprocess.CalledProcessError(1, 'cmd')]
-        sched = _scheduler.SlurmScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        with self.assertRaises(subprocess.CalledProcessError):
-            sched.cancel(job)
-        patcher.assert_called_with(['scancel', '1234'])
diff --git a/tests/integration/schedule/test_torque.py b/tests/integration/schedule/test_torque.py
deleted file mode 100644
index 50773687..00000000
--- a/tests/integration/schedule/test_torque.py
+++ /dev/null
@@ -1,441 +0,0 @@
-import subprocess
-import unittest
-from unittest import mock
-
-from mavis.schedule import scheduler as _scheduler
-from mavis.schedule import constants as _constants
-from mavis.schedule import job as _job
-from mavis.constants import SUBCOMMAND
-
-
-class TestParseQstat(unittest.TestCase):
-
-    # TODO: single job running
-    # TODO: batch job running
-    # TODO: single job complete
-
-    def test_single_job_complete(self):
-        content = """
-Job Id: 9.torque01.bcgsc.ca
-    Job_Name = subtest.sh
-    Job_Owner = creisle@torque01.bcgsc.ca
-    resources_used.cput = 00:00:00
-    resources_used.vmem = 346716kb
-    resources_used.walltime = 00:01:00
-    resources_used.mem = 3624kb
-    resources_used.energy_used = 0
-    job_state = C
-    queue = batch
-    server = torque01.bcgsc.ca
-    Checkpoint = u
-    ctime = Tue May 29 09:37:00 2018
-    Error_Path = torque01.bcgsc.ca:/projects/trans_scratch/validations/workspa
-        ce/creisle/temp/subtest.sh.e9
-    exec_host = torque01.bcgsc.ca/0
-    Hold_Types = n
-    Join_Path = n
-    Keep_Files = n
-    Mail_Points = a
-    mtime = Tue May 29 09:38:01 2018
-    Output_Path = torque01.bcgsc.ca:/projects/trans_scratch/validations/worksp
-        ace/creisle/temp/subtest.sh.o9
-    Priority = 0
-    qtime = Tue May 29 09:37:00 2018
-    Rerunable = True
-    Resource_List.walltime = 01:00:00
-    Resource_List.nodes = 1
-    Resource_List.nodect = 1
-    session_id = 25438
-    Variable_List = PBS_O_QUEUE=batch,PBS_O_HOME=/home/creisle,
-        PBS_O_LOGNAME=creisle,
-        PBS_O_PATH=/home/creisle/applications/node-v10.1.0-linux-x64/bin:/hom
-        e/creisle/.npm-packages/bin:/home/creisle/bin:/home/creisle/applicatio
-        ns/centos06/python-3.6.1/bin:/projects/tumour_char/analysis_scripts/bi
-        n/pog:/gsc/software/linux-x86_64-centos6/git-2.12.0/bin/:/usr/local/bi
-        n:/usr/local/sbin:/usr/local/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/p
-        rojects/trans_scratch/software/pipeline_commands/:/home/creisle/bin,
-        PBS_O_MAIL=/var/spool/mail/creisle,PBS_O_SHELL=/bin/bash,
-        PBS_O_LANG=en_US.UTF-8,
-        PBS_O_WORKDIR=/projects/trans_scratch/validations/workspace/creisle/t
-        emp,PBS_O_HOST=torque01.bcgsc.ca,PBS_O_SERVER=torque01.bcgsc.ca
-    euser = creisle
-    egroup = users
-    queue_type = E
-    comment = Job started on Tue May 29 at 09:37
-    etime = Tue May 29 09:37:00 2018
-    exit_status = 0
-    submit_args = subtest.sh
-    start_time = Tue May 29 09:37:01 2018
-    start_count = 1
-    fault_tolerant = False
-    comp_time = Tue May 29 09:38:01 2018
-    job_radix = 0
-    total_runtime = 60.481239
-    submit_host = torque01.bcgsc.ca
-    init_work_dir = /projects/trans_scratch/validations/workspace/creisle/temp
-
-    request_version = 1
-
-        """
-        rows = _scheduler.TorqueScheduler().parse_qstat(content)
-        self.assertEqual(1, len(rows))
-        row = rows[0]
-        self.assertEqual(_constants.JOB_STATUS.COMPLETED, row['status'])
-        self.assertEqual('9.torque01.bcgsc.ca', row['job_ident'])
-        self.assertEqual('subtest.sh', row['name'])
-        self.assertIs(None, row['task_ident'])
-        self.assertEqual('', row['status_comment'])
-
-    def test_array_job(self):
-        content = """
-Job Id: 48[1].torque01.bcgsc.ca
-    Job_Name = MA_mock-A47933_batch-JT3CUggKXNStHcoFXYaGR3-1
-    Job_Owner = creisle@torque01.bcgsc.ca
-    job_state = C
-    queue = batch
-    server = torque01.bcgsc.ca
-    Checkpoint = u
-    ctime = Tue May 29 18:27:33 2018
-    depend = afterokarray:43[].torque01.bcgsc.ca
-    Error_Path = torque01.bcgsc.ca:/projects/trans_scratch/validations/workspa
-        ce/creisle/temp/test_submission/output_torque/mock-A47933_diseased_tra
-        nscriptome/annotate/batch-JT3CUggKXNStHcoFXYaGR3-/job---.log-1
-    Join_Path = oe
-    Keep_Files = n
-    Mail_Points = a
-    mtime = Tue May 29 18:27:33 2018
-    Output_Path = torque01.bcgsc.ca:/projects/trans_scratch/validations/worksp
-        ace/creisle/temp/test_submission/output_torque/mock-A47933_diseased_tr
-        anscriptome/annotate/batch-JT3CUggKXNStHcoFXYaGR3-/job---.log-1
-    Priority = 0
-    qtime = Tue May 29 18:27:33 2018
-    Rerunable = True
-    Resource_List.mem = 12000mb
-    Resource_List.walltime = 16:00:00
-    Resource_List.nodes = 1
-    Resource_List.nodect = 1
-    Variable_List = PBS_ARRAYID=1,PBS_O_QUEUE=batch,PBS_O_HOME=/home/creisle,
-        PBS_O_LOGNAME=creisle,
-        PBS_O_PATH=/home/creisle/git/mavis/venv/bin:/home/creisle/application
-        s/node-v10.1.0-linux-x64/bin:/home/creisle/.npm-packages/bin:/home/cre
-        isle/bin:/home/creisle/applications/centos06/python-3.6.1/bin:/project
-        s/tumour_char/analysis_scripts/bin/pog:/gsc/software/linux-x86_64-cent
-        os6/git-2.12.0/bin/:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr
-        /bin:/usr/local/sbin:/usr/sbin:/projects/trans_scratch/software/pipeli
-        ne_commands/:/home/creisle/bin,PBS_O_MAIL=/var/spool/mail/creisle,
-        PBS_O_SHELL=/bin/bash,PBS_O_LANG=en_US.UTF-8,
-        PBS_O_WORKDIR=/projects/trans_scratch/validations/workspace/creisle/t
-        emp/test_submission,PBS_O_HOST=torque01.bcgsc.ca,
-        PBS_O_SERVER=torque01.bcgsc.ca,
-        MANPATH=/home/creisle/.npm-packages/share/man:/home/creisle/applicati
-        ons/centos06/python-3.6.1/man:/usr/local/share/man:/usr/share/man/over
-        rides:/usr/share/man,XDG_SESSION_ID=1340,HOSTNAME=torque01.bcgsc.ca,
-        SHELL=/bin/bash,TERM=xterm-256color,HISTSIZE=1000,CLICOLOR=1,
-        SSH_CLIENT=10.9.202.242 35994 22,TMPDIR=/var/tmp/,
-        PYTHONUNBUFFERED=True,MAVIS_MIN_CLUSTERS_PER_FILE=2,
-        NODE_OPTIONS=--trace-warnings,SSH_TTY=/dev/pts/0,USER=creisle,
-        SVN_EDITOR=vim,LS_COLORS=di=34;01;47:mi=100;31;01:ln=36;01:ex=01;32,
-        MAVIS_SCHEDULER=TORQUE,VIRTUAL_ENV=/home/creisle/git/mavis/venv,
-        SACCT_FORMAT=jobid%-18\\,jobname%45\\,user%-8\\,reqmem\\,elapsed\\,state\\,
-        MaxRSS\\,AveRSS\\,Partition,
-        PATH=/home/creisle/git/mavis/venv/bin:/home/creisle/applications/node
-        -v10.1.0-linux-x64/bin:/home/creisle/.npm-packages/bin:/home/creisle/b
-        in:/home/creisle/applications/centos06/python-3.6.1/bin:/projects/tumo
-        ur_char/analysis_scripts/bin/pog:/gsc/software/linux-x86_64-centos6/gi
-        t-2.12.0/bin/:/usr/local/bin:/usr/local/sbin:/usr/local/bin:/usr/bin:/
-        usr/local/sbin:/usr/sbin:/projects/trans_scratch/software/pipeline_com
-        mands/:/home/creisle/bin,MAIL=/var/spool/mail/creisle,
-        _=/usr/local/bin/qsub,
-        PWD=/projects/trans_scratch/validations/workspace/creisle/temp/test_s
-        ubmission,XMODIFIERS=@im=none,LANG=en_US.UTF-8,
-        MODULEPATH=/usr/share/Modules/modulefiles:/etc/modulefiles,
-        LOADEDMODULES=,
-        NODE_PATH=/home/creisle/.npm-packages/lib/node_modules,
-        SQUEUE_FORMAT=%.12i %9P %45j %.8u %.2t %.10M %.6D %.8m %.14l %.4c %.2
-        0R %E,HISTCONTROL=ignoredups,MAVIS_MAX_FILES=1,HOME=/home/creisle,
-        SHLVL=2,LOGNAME=creisle,
-        PYTHONPATH=/home/creisle/applications/centos06/python-3.6.1/bin:,
-        SSH_CONNECTION=10.9.202.242 35994 10.9.220.231 22,
-        ORIENTDB_HOME=/home/creisle/applications/orientdb/orientdb-community-
-        2.2.34,MODULESHOME=/usr/share/Modules,
-        LESSOPEN=||/usr/bin/lesspipe.sh %s,BROWSER=/usr/bin/google-chrome,
-        NPM_PACKAGES=/home/creisle/.npm-packages,
-        XDG_RUNTIME_DIR=/run/user/1365,
-        BASH_FUNC_module()=() {  eval `/usr/bin/modulecmd bash $*`\\
-}
-    euser = creisle
-    egroup = users
-    queue_type = E
-    comment = Job 48[].torque01.bcgsc.ca deleted because its dependency of arr
-        ay 43[].torque01.bcgsc.ca can never be satisfied
-    etime = Tue May 29 18:27:33 2018
-    exit_status = 271
-    submit_args = -j oe -l mem=12000mb -l walltime=16:00:00 -V -W depend=after
-        okarray:43[].torque01.bcgsc.ca -N MA_mock-A47933_batch-JT3CUggKXNStHco
-        FXYaGR3 -o /projects/trans_scratch/validations/workspace/creisle/temp/
-        test_submission/output_torque/mock-A47933_diseased_transcriptome/annot
-        ate/batch-JT3CUggKXNStHcoFXYaGR3-/job---.log -t 1 /projects/trans_scra
-        tch/validations/workspace/creisle/temp/test_submission/output_torque/m
-        ock-A47933_diseased_transcriptome/annotate/submit.sh
-    job_array_id = 1
-    fault_tolerant = False
-    job_radix = 0
-    submit_host = torque01.bcgsc.ca
-    init_work_dir = /projects/trans_scratch/validations/workspace/creisle/temp
-        /test_submission
-    request_version = 1
-
-        """
-        rows = _scheduler.TorqueScheduler().parse_qstat(content)
-        self.assertEqual(1, len(rows))
-        row = rows[0]
-        self.assertEqual('48[].torque01.bcgsc.ca', row['job_ident'])
-        self.assertIs(1, row['task_ident'])
-
-    # TODO: single job error
-    # TODO: batch job error
-    # TODO: single job exiting
-    # TODO: batch job exiting
-
-
-class TestCancel(unittest.TestCase):
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_single_job(self, patcher):
-        sched = _scheduler.TorqueScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        patcher.assert_called_with(['qdel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_array_job(self, patcher):
-        sched = _scheduler.TorqueScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job)
-        self.assertEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-        for task in job.task_list:
-            self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-        patcher.assert_called_with(['qdel', '1234'])
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_array_job_task(self, patcher):
-        sched = _scheduler.TorqueScheduler()
-        job = _job.ArrayJob(SUBCOMMAND.VALIDATE, 10, output_dir='', job_ident='1234')
-        sched.cancel(job, task_ident='4')
-        self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, job.status)
-        for i, task in enumerate(job.task_list):
-            if i == 3:
-                self.assertEqual(_constants.JOB_STATUS.CANCELLED, task.status)
-            else:
-                self.assertEqual(_constants.JOB_STATUS.NOT_SUBMITTED, task.status)
-        patcher.assert_called_with(['qdel', '1234', '-t', '4'])
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_bad_command(self, patcher):
-        patcher.side_effect = [subprocess.CalledProcessError(1, 'cmd')]
-        sched = _scheduler.TorqueScheduler()
-        job = _job.Job(SUBCOMMAND.VALIDATE, '', job_ident='1234')
-        sched.cancel(job)
-        patcher.assert_called_with(['qdel', '1234'])
-        self.assertNotEqual(_constants.JOB_STATUS.CANCELLED, job.status)
-
-
-class TestSubmit(unittest.TestCase):
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_job(self, patcher):
-        patcher.side_effect = ['141.torque01.bcgsc.ca\n']
-        job = _job.Job(
-            stage=SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-            script='script.sh',
-        )
-
-        sched = _scheduler.TorqueScheduler()
-        sched.submit(job)
-        self.assertEqual('141.torque01.bcgsc.ca', job.job_ident)
-        patcher.assert_called_with(
-            [
-                'qsub',
-                '-j',
-                'oe',
-                '-q',
-                'all',
-                '-l',
-                'mem=1mb',
-                '-l',
-                'walltime=16:00:00',
-                '-V',
-                '-N',
-                'MV1',
-                '-o',
-                'output_dir/job-$PBS_JOBNAME-$PBS_JOBID.log',
-                '-m',
-                'abef',
-                '-M',
-                'me@example.com',
-                'script.sh',
-            ]
-        )
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_job_with_job_deps(self, patcher):
-        patcher.side_effect = ['141.torque01.bcgsc.ca\n']
-        job = _job.Job(
-            stage=SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-            script='script.sh',
-            dependencies=[
-                _job.Job(
-                    stage=SUBCOMMAND.VALIDATE,
-                    output_dir='output_dir',
-                    job_ident='1234.torque01.bcgsc.ca',
-                ),
-                _job.Job(
-                    stage=SUBCOMMAND.VALIDATE,
-                    output_dir='output_dir',
-                    job_ident='54.torque01.bcgsc.ca',
-                ),
-            ],
-        )
-
-        sched = _scheduler.TorqueScheduler()
-        sched.submit(job)
-        self.assertEqual('141.torque01.bcgsc.ca', job.job_ident)
-        patcher.assert_called_with(
-            [
-                'qsub',
-                '-j',
-                'oe',
-                '-q',
-                'all',
-                '-l',
-                'mem=1mb',
-                '-l',
-                'walltime=16:00:00',
-                '-V',
-                '-W depend=afterok:1234.torque01.bcgsc.ca:54.torque01.bcgsc.ca',
-                '-N',
-                'MV1',
-                '-o',
-                'output_dir/job-$PBS_JOBNAME-$PBS_JOBID.log',
-                '-m',
-                'abef',
-                '-M',
-                'me@example.com',
-                'script.sh',
-            ]
-        )
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_job_with_mixed_deps(self, patcher):
-        patcher.side_effect = ['141.torque01.bcgsc.ca\n']
-        job = _job.Job(
-            stage=SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-            script='script.sh',
-            dependencies=[
-                _job.Job(
-                    stage=SUBCOMMAND.VALIDATE,
-                    output_dir='output_dir',
-                    job_ident='1234.torque01.bcgsc.ca',
-                ),
-                _job.Job(
-                    stage=SUBCOMMAND.VALIDATE,
-                    output_dir='output_dir',
-                    job_ident='54.torque01.bcgsc.ca',
-                ),
-                _job.TorqueArrayJob(
-                    stage=SUBCOMMAND.VALIDATE,
-                    output_dir='output_dir',
-                    job_ident='99[].torque01.bcgsc.ca',
-                    task_list=5,
-                ),
-            ],
-        )
-
-        sched = _scheduler.TorqueScheduler()
-        sched.submit(job)
-        self.assertEqual('141.torque01.bcgsc.ca', job.job_ident)
-        patcher.assert_called_with(
-            [
-                'qsub',
-                '-j',
-                'oe',
-                '-q',
-                'all',
-                '-l',
-                'mem=1mb',
-                '-l',
-                'walltime=16:00:00',
-                '-V',
-                '-W depend=afterokarray:99[][5].torque01.bcgsc.ca,afterok:1234.torque01.bcgsc.ca:54.torque01.bcgsc.ca',
-                '-N',
-                'MV1',
-                '-o',
-                'output_dir/job-$PBS_JOBNAME-$PBS_JOBID.log',
-                '-m',
-                'abef',
-                '-M',
-                'me@example.com',
-                'script.sh',
-            ]
-        )
-
-    @mock.patch('mavis.schedule.scheduler.TorqueScheduler.command')
-    def test_array(self, patcher):
-        patcher.side_effect = ['142[].torque01.bcgsc.ca\n']
-        job = _job.TorqueArrayJob(
-            stage=SUBCOMMAND.VALIDATE,
-            queue='all',
-            output_dir='output_dir',
-            name='MV1',
-            memory_limit=1,
-            mail_user='me@example.com',
-            mail_type=_constants.MAIL_TYPE.ALL,
-            script='script.sh',
-            task_list=[1, 2, 3, 6, 9],
-        )
-
-        sched = _scheduler.TorqueScheduler(concurrency_limit=2)
-        sched.submit(job)
-        self.assertEqual('142[].torque01.bcgsc.ca', job.job_ident)
-        patcher.assert_called_with(
-            [
-                'qsub',
-                '-j',
-                'oe',
-                '-q',
-                'all',
-                '-l',
-                'mem=1mb',
-                '-l',
-                'walltime=16:00:00',
-                '-V',
-                '-N',
-                'MV1',
-                '-o',
-                'output_dir/job-$PBS_JOBNAME-$PBS_JOBID-$PBS_ARRAYID.log',
-                '-m',
-                'abef',
-                '-M',
-                'me@example.com',
-                '-t',
-                '1-3,6,9%2',
-                'script.sh',
-            ]
-        )
diff --git a/tests/integration/test_args.py b/tests/integration/test_args.py
index 33210a06..0dea660f 100644
--- a/tests/integration/test_args.py
+++ b/tests/integration/test_args.py
@@ -1,348 +1,398 @@
 import argparse
+import json
 import os
-import unittest
-from unittest.mock import patch
 import sys
-from mavis.main import main as mavis_main
+import tempfile
+from unittest.mock import patch
+
+import pytest
+from mavis import util
 from mavis.cluster import main as cluster_main
+from mavis.main import main as mavis_main
 from mavis.validate import main as validate_main
-from mavis import util
 
-from . import ARGUMENT_ERROR
 from ..util import get_data
 
 
-def expect_error(testcase, func, catchtype):
+@pytest.fixture
+def output_dir():
+    temp_output = tempfile.mkdtemp()
+    yield temp_output
+
+
+@pytest.fixture
+def configpath(tmp_path):
+    p = tmp_path / "config.json"
+    return p
+
+
+def expect_error(testcase, func, catchtype=None):
     try:
         func()
-    except catchtype as err:
-        return err
-    else:
+    except (SystemExit, Exception) as err:
+        if catchtype is None or isinstance(err, catchtype):
+            return err
         raise AssertionError('Did not throw the expected error', catchtype)
 
 
-class TestCluster(unittest.TestCase):
-    def test_trans_multiple_annotations_no_masking(self):
+class TestCluster:
+    def test_trans_multiple_annotations_no_masking(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'transcriptome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                        }
+                    },
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'cluster',
-            '--annotations',
-            get_data('example_genes.json'),
-            get_data('mock_annotations.json'),
             '--library',
             'translib',
-            '--protocol',
-            'transcriptome',
-            '--disease_status',
-            'diseased',
-            '--input',
+            '--inputs',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(cluster_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
-    def test_trans_multiple_annotations_with_masking(self):
+    def test_trans_multiple_annotations_with_masking(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'transcriptome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                        }
+                    },
+                    'cluster.uninformative_filter': True,
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'reference.masking': [get_data('mock_masking.tab')],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'cluster',
-            '--annotations',
-            get_data('example_genes.json'),
-            get_data('mock_annotations.json'),
             '--library',
             'translib',
-            '--protocol',
-            'transcriptome',
-            '--disease_status',
-            'diseased',
-            '--input',
+            '--inputs',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--masking',
-            get_data('mock_masking.tab'),
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(cluster_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
-    def test_error_missing_annotations_translib_uninform(self):
-        args = [
-            'mavis',
-            'cluster',
-            '--library',
-            'translib',
-            '--protocol',
-            'transcriptome',
-            '--disease_status',
-            'diseased',
-            '--input',
-            get_data('mock_sv_events.tsv'),
-            '--output',
-            'outdir',
-            '--uninformative_filter',
-            'True',
-        ]
-        with patch.object(cluster_main, 'main', util.DEVNULL):
-            with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
-
-    def test_ok_missing_annotations_translib_nofilter(self):
-        args = [
-            'mavis',
-            'cluster',
-            '--library',
-            'translib',
-            '--protocol',
-            'transcriptome',
-            '--disease_status',
-            'diseased',
-            '--input',
-            get_data('mock_sv_events.tsv'),
-            '--output',
-            'outdir',
-        ]
+    def test_error_missing_annotations_translib_uninform(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'transcriptome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                        }
+                    },
+                    'cluster.uninformative_filter': True,
+                    'output_dir': output_dir,
+                }
+            )
+        )
+        args = ['mavis', 'cluster', '--library', 'translib', '--output', output_dir]
         with patch.object(cluster_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                mavis_main()
-
+                expect_error(self, mavis_main)
 
-class TestValidate(unittest.TestCase):
-    def test_error_missing_annotations_translib(self):
-        args = [
-            'mavis',
-            'validate',
-            '--library',
-            'translib',
-            '--protocol',
-            'transcriptome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
-            '--input',
-            get_data('mock_sv_events.tsv'),
-            '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--aligner_reference',
-            get_data('mock_reference_genome.fa'),
-            '--read_length',
-            '125',
-        ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
-            with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
 
-    def test_ok_missing_annotations_genome(self):
+class TestValidate:
+    def test_error_missing_annotations_translib(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'transcriptome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'cluster.uninformative_filter': True,
+                    'reference.reference_genome': [get_data('mock_reference_genome.fa')],
+                    'reference.aligner_reference': [get_data('mock_reference_genome.fa')],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--aligner_reference',
-            get_data('mock_reference_genome.fa'),
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                mavis_main()
+                expect_error(self, mavis_main)
 
-    def test_ok_multi_ref_genome(self):
+    def test_ok_multi_ref_genome(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'genome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'cluster.uninformative_filter': True,
+                    'reference.reference_genome': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'reference.aligner_reference': [get_data('mock_reference_genome.fa')],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            get_data('example_genes.fa'),
-            '--aligner_reference',
-            get_data('mock_reference_genome.fa'),
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
-    def test_error_multi_aligner_ref(self):
-        args = [
-            'mavis',
-            'validate',
-            '--library',
-            'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
-            '--input',
-            get_data('mock_sv_events.tsv'),
-            '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--aligner_reference',
-            get_data('mock_reference_genome.fa'),
-            get_data('example_genes.fa'),
-            '--read_length',
-            '125',
-        ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
-            with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
-
-    def test_error_missing_aligner_ref(self):
+    def test_error_multi_aligner_ref(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'genome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'cluster.uninformative_filter': True,
+                    'reference.reference_genome': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'reference.aligner_reference': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
+                expect_error(self, mavis_main)
 
-    def test_error_missing_reference_genome(self):
+    def test_error_missing_aligner_ref(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'genome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'cluster.uninformative_filter': True,
+                    'reference.reference_genome': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--aligner_reference',
-            get_data('mock_reference_genome.fa'),
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
+                expect_error(self, mavis_main)
 
-    def test_error_bad_aligner_ref(self):
+    def test_error_missing_reference_genome(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'genome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'cluster.uninformative_filter': True,
+                    'reference.aligner_reference': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--aligner_reference',
-            'bad',
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
+                expect_error(self, mavis_main)
 
-    def test_error_none_aligner_ref(self):
+    def test_error_bad_aligner_ref(self, configpath, output_dir):
+        configpath.write_text(
+            json.dumps(
+                {
+                    'libraries': {
+                        'translib': {
+                            'disease_status': 'diseased',
+                            'protocol': 'genome',
+                            'assign': [get_data('mock_sv_events.tsv')],
+                            'bam_file': get_data('mock_trans_reads_for_events.sorted.bam'),
+                            'read_length': 125,
+                            'median_fragment_size': 200,
+                            'stdev_fragment_size': 50,
+                        }
+                    },
+                    'reference.annotations': [
+                        get_data('example_genes.json'),
+                        get_data('mock_annotations.json'),
+                    ],
+                    'cluster.uninformative_filter': True,
+                    'reference.reference_genome': [
+                        get_data('mock_reference_genome.fa'),
+                        get_data('example_genes.fa'),
+                    ],
+                    'reference.aligner_reference': [
+                        'fake_path',
+                    ],
+                    'output_dir': output_dir,
+                }
+            )
+        )
         args = [
             'mavis',
             'validate',
             '--library',
             'translib',
-            '--protocol',
-            'genome',
-            '--bam_file',
-            get_data('mock_trans_reads_for_events.sorted.bam'),
-            '--stdev_fragment_size',
-            '50',
-            '--median_fragment_size',
-            '200',
             '--input',
             get_data('mock_sv_events.tsv'),
             '--output',
-            'outdir',
-            '--reference_genome',
-            get_data('mock_reference_genome.fa'),
-            '--aligner_reference',
-            'none',
-            '--read_length',
-            '125',
+            output_dir,
+            '--config',
+            str(configpath),
         ]
         with patch.object(validate_main, 'main', util.DEVNULL):
             with patch.object(sys, 'argv', args):
-                err = expect_error(self, mavis_main, SystemExit)
-                self.assertEqual(ARGUMENT_ERROR, err.code)
+                expect_error(self, mavis_main)
diff --git a/tests/integration/test_checker.py b/tests/integration/test_checker.py
deleted file mode 100644
index d77c4453..00000000
--- a/tests/integration/test_checker.py
+++ /dev/null
@@ -1,111 +0,0 @@
-import errno
-import os
-import unittest
-from unittest.mock import mock_open, patch
-
-from mavis.schedule import job as _job
-from mavis.schedule import pipeline as _pipeline
-
-MOCK_GENOME = 'mock-A36971'
-MOCK_TRANS = 'mock-A47933'
-ERROR_MESSAGE = """Traceback (most recent call last):
-  File "/home/dpaulino/gitrepo/mavis/venv/bin/mavis_run.py", line 6, in <module>
-    exec(compile(open(__file__).read(), __file__, 'exec'))
-  File "/home/dpaulino/gitrepo/mavis/bin/mavis_run.py", line 7, in <module>
-    from mavis.annotate import load_reference_genes, load_reference_genome, load_masking_regions, load_templates
-  File "/home/dpaulino/gitrepo/mavis/mavis/__init__.py", line 6, in <module>
-    __version__ = get_version()
-  File "/home/dpaulino/gitrepo/mavis/mavis/util.py", line 32, in get_version
-    v = subprocess.check_output('cd {}; git describe'.format(os.path.dirname(__file__)), shell=True)
-  File "/projects/tumour_char/analysis_scripts/python/centos06/python-3.6.0/lib/python3.6/subprocess.py", line 336, in check_output
-    **kwargs).stdout
-  File "/projects/tumour_char/analysis_scripts/python/centos06/python-3.6.0/lib/python3.6/subprocess.py", line 418, in run
-    output=stdout, stderr=stderr)
-subprocess.CalledProcessError: Command 'cd /home/dpaulino/gitrepo/mavis/mavis; git describe' returned non-zero exit status 127."""
-
-
-def mkdirs(newdir, mode=0o777):
-    """
-    make directories and ignores if it already exists.
-    """
-    try:
-        os.makedirs(newdir, mode)
-    except OSError as err:
-        # Reraise the error unless it's about an already existing directory
-        if err.errno != errno.EEXIST or not os.path.isdir(newdir):
-            raise err
-
-
-class TestParseLogFile(unittest.TestCase):
-    def mock_log(self, content):
-        mockopen = mock_open(read_data=content)
-        with patch('builtins.open', mockopen), patch('os.path.isfile') as isfile, patch(
-            '__main__.open', mockopen
-        ):
-            isfile.return_value = True
-            return _job.LogFile.parse('log')
-
-    def test_command_not_found_error(self):
-        log = self.mock_log(
-            "stty: standard input: Inappropriate ioctl for device\n"
-            "/opt/slurm/spool/slurmd/job814329/slurm_script: line 9: mavis: command not found\n"
-        )
-        self.assertEqual(_job.LogFile.STATUS.CRASH, log.status)
-
-    def test_python_index_error(self):
-        content = """
-Traceback (most recent call last):
-    File "/home/creisle/git/mavis/venv/bin/mavis", line 11, in <module>
-        load_entry_point('mavis===v0.1.0-220-g3f65e68', 'console_scripts', 'mavis')()
-    File "/home/creisle/git/mavis/venv/lib/python3.6/site-packages/mavis-v0.1.0_220_g3f65e68-py3.6.egg/mavis/main.py", line 554, in main
-        check_completion(args.output)
-    File "/home/creisle/git/mavis/venv/lib/python3.6/site-packages/mavis-v0.1.0_220_g3f65e68-py3.6.egg/mavis/main.py", line 450, in check_completion
-        cur_time = check_single_job(d)
-    File "/home/creisle/git/mavis/venv/lib/python3.6/site-packages/mavis-v0.1.0_220_g3f65e68-py3.6.egg/mavis/main.py", line 429, in check_single_job
-        check_log(max(log_files, key=os.path.getctime))
-    File "/home/creisle/git/mavis/venv/lib/python3.6/site-packages/mavis-v0.1.0_220_g3f65e68-py3.6.egg/mavis/main.py", line 359, in check_log
-        if 'error' in lines[-1].lower():
-IndexError: list index out of range"""
-        log = self.mock_log(content)
-        self.assertEqual(_job.LogFile.STATUS.CRASH, log.status)
-
-    def test_python_keyerror(self):
-        content = "KeyError: ('cannot check membership column. column not found in header', 'protocol', {'break2_orientation', 'break1_chromosome', 'break1_orientation', 'tools', 'defuse_cluster_id', 'break1_position_end', 'event_type', 'defuse_split_read_count', 'break2_chromosome', 'break2_position_end', 'stranded', 'defuse_spanning_read_count', 'break2_strand', 'library', 'break1_position_start', 'defuse_probability', 'untemplated_seq', 'opposing_strands', 'break1_strand', 'break2_position_start'})"
-        log = self.mock_log(content)
-        self.assertEqual(_job.LogFile.STATUS.CRASH, log.status)
-
-    def test_empty_log(self):
-        log = self.mock_log("")
-        self.assertEqual(_job.LogFile.STATUS.EMPTY, log.status)
-        log = self.mock_log("\n\n")
-        self.assertEqual(_job.LogFile.STATUS.EMPTY, log.status)
-
-    def test_incomplete_log(self):
-        log = self.mock_log("other\n")
-        self.assertEqual(_job.LogFile.STATUS.INCOMPLETE, log.status)
-        log = self.mock_log("thing")
-        self.assertEqual(_job.LogFile.STATUS.INCOMPLETE, log.status)
-
-
-class TestModule(unittest.TestCase):
-    def test_parse_run_time_none(self):
-        content = ""
-        mockopen = mock_open(read_data=content)
-        with patch('builtins.open', mockopen), patch('os.path.isfile') as isfile, patch(
-            '__main__.open', mockopen
-        ), patch('os.path.getmtime') as getmtime:
-            getmtime.return_value = 1
-            isfile.return_value = True
-            result = _pipeline.parse_run_time('log')
-        self.assertEqual(-1, result)
-
-    def test_parse_valid_run_time(self):
-        content = "[2018-03-06 15:25:46.153560] complete: MAVIS.COMPLETE\nrun time (hh/mm/ss): 0:06:41\nrun time (s): 1\n"
-        mockopen = mock_open(read_data=content)
-        with patch('builtins.open', mockopen), patch('os.path.isfile') as isfile, patch(
-            '__main__.open', mockopen
-        ), patch('os.path.getmtime') as getmtime:
-            getmtime.return_value = 1
-            isfile.return_value = True
-            result = _pipeline.parse_run_time('log')
-        self.assertEqual(1, result)
diff --git a/tests/integration/test_config.py b/tests/integration/test_config.py
deleted file mode 100644
index bb1c5e1f..00000000
--- a/tests/integration/test_config.py
+++ /dev/null
@@ -1,64 +0,0 @@
-import unittest
-from unittest.mock import mock_open, patch
-import configparser
-
-from mavis.config import MavisConfig
-
-
-STUB = """
-[reference]
-template_metadata = tests/data/cytoBand.txt
-annotations = tests/data/mock_annotations.json
-masking = tests/data/mock_masking.tab
-reference_genome = tests/data/mock_reference_genome.fa
-aligner_reference = tests/data/mock_reference_genome.2bit
-dgv_annotation = tests/data/mock_dgv_annotation.txt
-
-[mock-A36971]
-read_length = 150
-median_fragment_size = 400
-stdev_fragment_size = 97
-bam_file = tests/data/mock_reads_for_events.sorted.bam
-protocol = genome
-inputs = mock_converted
-strand_specific = False
-disease_status=diseased
-
-[mock-A47933]
-read_length = 75
-median_fragment_size = 188
-stdev_fragment_size = 50
-bam_file = tests/data/mock_trans_reads_for_events.sorted.bam
-protocol = transcriptome
-inputs = mock_converted
-strand_specific = True
-disease_status=diseased
-
-[convert]
-assume_no_untemplated = True
-# addfile twice to check this notation is ok (will collapse them anyway)
-mock_converted = convert_tool_output
-    tests/data/mock_sv_events.tsv
-    tests/data/mock_sv_events.tsv
-    mavis
-    False
-"""
-
-
-class TestConfig(unittest.TestCase):
-    def mock_config(self, content=""):
-        with patch('configparser.ConfigParser.read', configparser.ConfigParser.read_string), patch(
-            'os.path.isfile'
-        ) as isfile, patch('os.path.exists') as exists:
-            isfile.return_value = True
-            exists.return_value = True
-            return MavisConfig.read(content)
-
-    def test_error_in_schedule(self):
-        with self.assertRaises(TypeError):
-            content = STUB + '\n[schedule]\nmail_type=\n'
-            print(content)
-            self.mock_config(content)
-
-    def test_ok(self):
-        self.mock_config(STUB)
diff --git a/tests/integration/test_mains.py b/tests/integration/test_mains.py
deleted file mode 100644
index 4fa1478c..00000000
--- a/tests/integration/test_mains.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import glob
-import os
-import re
-import shutil
-from tempfile import mkdtemp
-import unittest
-from unittest import mock
-
-from mavis.annotate.file_io import (
-    load_reference_genes,
-    load_reference_genome,
-    load_templates,
-    ReferenceFile,
-    load_annotations,
-)
-from mavis.annotate.main import main as annotate_main
-from mavis.cluster.main import main as cluster_main
-from mavis.constants import DISEASE_STATUS, PROTOCOL
-from mavis.validate.main import main as validate_main
-import pysam
-
-from . import RUN_FULL
-from ..util import get_data
-
-annotations = None
-reference_genome = None
-template_metadata = None
-trans_bam_fh = None
-genome_bam_fh = None
-masking = mock.Mock(content={})  # do not mask
-
-
-def setUpModule():
-    global annotations, reference_genome, template_metadata, genome_bam_fh, trans_bam_fh, masking
-    print('setup start')
-    annotations = ReferenceFile('annotations', get_data('mock_annotations.json'))
-    reference_genome = ReferenceFile(
-        'reference_genome', get_data('mock_reference_genome.fa'), eager_load=True
-    )
-    template_metadata = ReferenceFile(
-        'template_metadata', get_data('cytoBand.txt'), eager_load=True
-    )
-    genome_bam_fh = pysam.AlignmentFile(get_data('mock_reads_for_events.sorted.bam'))
-    trans_bam_fh = pysam.AlignmentFile(get_data('mock_trans_reads_for_events.sorted.bam'))
-    print('setup loading is complete')
-
-
-def tearDownModule():
-    trans_bam_fh.close()
-    genome_bam_fh.close()
-
-
-@unittest.skipIf(
-    not RUN_FULL, 'slower tests will not be run unless the environment variable RUN_FULL is given'
-)
-class TestPipeline(unittest.TestCase):
-    def setUp(self):
-        self.output = mkdtemp()
-
-    def tearDown(self):
-        shutil.rmtree(self.output)
-
-    @unittest.skipIf(not shutil.which('blat'), 'missing the blat command')
-    def test_mains(self):
-        # test the clustering
-        cluster_files = cluster_main(
-            [get_data('mock_sv_events.tsv')],
-            self.output,
-            False,
-            'mock-A36971',
-            PROTOCOL.GENOME,
-            DISEASE_STATUS.DISEASED,
-            limit_to_chr=[None],
-            log_args=True,
-            masking=masking,
-            cluster_clique_size=15,
-            cluster_radius=20,
-            uninformative_filter=True,
-            max_proximity=5000,
-            annotations=annotations,
-            min_clusters_per_file=5,
-            max_files=1,
-        )
-        self.assertGreaterEqual(100, len(cluster_files))
-        self.assertLessEqual(1, len(cluster_files))
-        # next test the validate runs without errors
-        validate_main(
-            [cluster_files[0]],
-            self.output,
-            genome_bam_fh,
-            False,
-            'mock-A36971',
-            PROTOCOL.GENOME,
-            median_fragment_size=427,
-            stdev_fragment_size=106,
-            read_length=150,
-            reference_genome=reference_genome,
-            annotations=annotations,
-            masking=masking,
-            aligner_reference=ReferenceFile(
-                'aligner_reference', get_data('mock_reference_genome.2bit')
-            ),
-        )
-        for suffix in [
-            'validation-passed.tab',
-            'validation-failed.tab',
-            'raw_evidence.bam',
-            'raw_evidence.sorted.bam',
-            'raw_evidence.sorted.bam.bai',
-            'contigs.sorted.bam',
-            'contigs.sorted.bam.bai',
-            'contigs.bam',
-            'igv.batch',
-        ]:
-            self.assertTrue(os.path.exists(os.path.join(self.output, suffix)))
-
-        # test the annotation
-        annotate_main(
-            [os.path.join(self.output, 'validation-passed.tab')],
-            self.output,
-            'mock-A36971',
-            PROTOCOL.GENOME,
-            reference_genome,
-            annotations,
-            template_metadata,
-            min_domain_mapping_match=0.95,
-            min_orf_size=300,
-            max_orf_cap=3,
-        )
-        self.assertTrue(os.path.exists(os.path.join(self.output, 'annotations.tab')))
-        self.assertTrue(os.path.exists(os.path.join(self.output, 'annotations.fusion-cdna.fa')))
-        drawings_dir = os.path.join(self.output, 'drawings')
-        self.assertTrue(os.path.exists(drawings_dir))
-        self.assertLessEqual(1, len(glob.glob(os.path.join(drawings_dir, '*.svg'))))
-        self.assertLessEqual(1, len(glob.glob(os.path.join(drawings_dir, '*.legend.json'))))
diff --git a/tests/mini-tutorial.config.json b/tests/mini-tutorial.config.json
new file mode 100644
index 00000000..00fd4d50
--- /dev/null
+++ b/tests/mini-tutorial.config.json
@@ -0,0 +1,64 @@
+{
+    "annotate.draw_fusions_only": false,
+    "convert": {
+        "mock_converted": {
+            "inputs": [
+                "tests/data/mock_sv_events.tsv"
+            ],
+            "file_type": "mavis",
+            "assume_no_untemplated": true
+        }
+    },
+    "cluster.uninformative_filter": true,
+    "cluster.limit_to_chr": null,
+    "cluster.min_clusters_per_file": 5,
+    "libraries": {
+        "mock-A47933": {
+            "assign": [
+                "tests/data/mock_trans_sv_events.tsv"
+            ],
+            "bam_file": "tests/data/mock_trans_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "median_fragment_size": 188,
+            "protocol": "transcriptome",
+            "read_length": 75,
+            "stdev_fragment_size": 50,
+            "strand_specific": true
+        },
+        "mock-A36971": {
+            "assign": [
+                "mock_converted"
+            ],
+            "bam_file": "tests/data/mock_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "median_fragment_size": 400,
+            "protocol": "genome",
+            "read_length": 150,
+            "stdev_fragment_size": 97,
+            "strand_specific": false
+        }
+    },
+    "output_dir": "output_dir",
+    "reference.aligner_reference": [
+        "tests/data/mock_reference_genome.2bit"
+    ],
+    "reference.annotations": [
+        "tests/data/mock_annotations.json"
+    ],
+    "reference.dgv_annotation": [
+        "tests/data/mock_dgv_annotation.txt"
+    ],
+    "reference.masking": [
+        "tests/data/mock_masking.tab"
+    ],
+    "reference.reference_genome": [
+        "tests/data/mock_reference_genome.fa"
+    ],
+    "reference.template_metadata": [
+        "tests/data/cytoBand.txt"
+    ],
+    "summary.filter_min_remapped_reads": 5,
+    "summary.filter_min_spanning_reads": 5,
+    "summary.filter_min_linking_split_reads": 1,
+    "summary.filter_min_flanking_reads": 10
+}
diff --git a/mavis/schedule/__init__.py b/tests/snakemake/__init__.py
similarity index 100%
rename from mavis/schedule/__init__.py
rename to tests/snakemake/__init__.py
diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
new file mode 100644
index 00000000..37e81b56
--- /dev/null
+++ b/tests/snakemake/test_mini_workflow.py
@@ -0,0 +1,55 @@
+import json
+import os
+import shutil
+import sys
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+from snakemake import main as snakemake_main
+
+from ..util import glob_exists, package_relative_file
+
+
+@pytest.fixture
+def output_dir():
+    temp_output = tempfile.mkdtemp()
+
+    os.makedirs(os.path.join(temp_output, 'mavis/schemas'))
+
+    with open(package_relative_file('tests/mini-tutorial.config.json'), 'r') as fh:
+        config = json.load(fh)
+    config['output_dir'] = os.path.join(temp_output, 'output_dir')
+    with open(os.path.join(temp_output, 'mini-tutorial.config.json'), 'w') as fh:
+        fh.write(json.dumps(config))
+    yield temp_output
+    shutil.rmtree(temp_output)
+
+
+def test_workflow(output_dir):
+    argv = [
+        'snakemake',
+        '-s',
+        package_relative_file('Snakefile'),
+        '-j',
+        '1',
+        '--configfile',
+        os.path.join(output_dir, 'mini-tutorial.config.json'),
+        '-d',
+        package_relative_file(),
+    ]
+    with patch.object(sys, 'argv', argv):
+        try:
+            snakemake_main()
+            assert glob_exists(os.path.join(output_dir, 'summary', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'pairing', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'cluster', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'validate', '*', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'annotate', '*', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'cluster', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'validate', '*', 'MAVIS.COMPLETE'))
+            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'annotate', '*', 'MAVIS.COMPLETE'))
+        except SystemExit as err:
+            if err.code != 0:
+                raise err
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
deleted file mode 100644
index 0edf3629..00000000
--- a/tests/unit/test_config.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import unittest
-from argparse import ArgumentTypeError
-
-from mavis.config import float_fraction, nameable_string
-
-
-class TestFloatFraction(unittest.TestCase):
-    def test_bad_string(self):
-        with self.assertRaises(ArgumentTypeError):
-            float_fraction('a')
-
-    def test_float_too_big(self):
-        with self.assertRaises(ArgumentTypeError):
-            float_fraction('1.1')
-
-    def test_float_negative_error(self):
-        with self.assertRaises(ArgumentTypeError):
-            float_fraction('-0.1')
-
-    def test_zero_ok(self):
-        self.assertEqual(0, float_fraction('0'))
-
-    def test_one_ok(self):
-        self.assertEqual(1, float_fraction('1'))
-
-
-class TestNoReservedChars(unittest.TestCase):
-    def test_semicolon_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('thing;thing')
-
-    def test_comma_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('thing,thing')
-
-    def test_underscore_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('thing_thing')
-
-    def test_space_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string(' ')
-
-        with self.assertRaises(TypeError):
-            nameable_string('thing thing')
-
-    def test_ok(self):
-        lib = 'libName'
-        self.assertEqual('libName', nameable_string(lib))
-
-    def test_number_start_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('1thing')
-
-        with self.assertRaises(TypeError):
-            nameable_string('1')
-
-    def test_empty_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('')
-
-    def test_none_error(self):
-        with self.assertRaises(TypeError):
-            nameable_string('none')
-
-        with self.assertRaises(TypeError):
-            nameable_string(None)
diff --git a/tests/util.py b/tests/util.py
index 089985bf..55db2d11 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -1,7 +1,31 @@
+import glob
 import os
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
+def package_relative_file(*paths):
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', *paths))
+
+
 def get_data(*paths):
     return os.path.join(DATA_DIR, *paths)
+
+
+def glob_exists(*pos, strict=False, n=1):
+    globexpr = os.path.join(*pos)
+    file_list = glob.glob(globexpr)
+    if strict and len(file_list) == n:
+        return file_list[0] if len(file_list) == 1 else file_list
+    elif not strict and len(file_list) > 0:
+        return file_list
+    else:
+        print(globexpr)
+        print(file_list)
+        return False
+
+
+def glob_not_exists(*pos):
+    globexpr = os.path.join(*pos)
+    file_list = glob.glob(globexpr)
+    return not file_list

From 57e0254eba0b2fcdf74e5513f4fca38049d14289 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 10:31:07 -0700
Subject: [PATCH 002/137] Include branch coverage

---
 .github/workflows/build.yml | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 1601aeba..ded37cdc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -45,14 +45,26 @@ jobs:
     - name: run short tests with pytest
       run: |
         export PATH=$PATH:$(pwd):$(pwd)/bwa
-        pytest tests -v --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov mavis --cov-report term --cov-report xml --durations=10
+        pytest tests -v \
+          --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
+          --cov mavis \
+          --cov-report term-missing \
+          --cov-report xml \
+          --durations=10 \
+          --cov-branch
       env:
         RUN_FULL: 0
       if: github.event_name != 'pull_request'
     - name: run full tests with pytest
       run: |
         export PATH=$PATH:$(pwd):$(pwd)/bwa
-        pytest tests -v --junitxml=junit/test-results-${{ matrix.python-version }}.xml --cov mavis --cov-report term --cov-report xml --durations=10
+        pytest tests -v \
+          --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
+          --cov mavis \
+          --cov-report term-missing \
+          --cov-report xml \
+          --durations=10 \
+          --cov-branch
       env:
         RUN_FULL: 1
       if: github.event_name == 'pull_request'
@@ -72,4 +84,4 @@ jobs:
         env_vars: OS,PYTHON
         name: codecov-umbrella
         fail_ci_if_error: true
-      if: matrix.python-version == 3.8
+      if: matrix.python-version == 3.7 && github.event_name == 'pull_request'

From d99112a9907bc9ab9fb02dcac2259caf5f3b16ce Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 11:01:46 -0700
Subject: [PATCH 003/137] Increase test timeout for slow tests due to increased
 coverage metrics

---
 tests/integration/test_assemble.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index f206ca5e..e732cfbc 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -3,15 +3,14 @@
 import unittest
 
 import timeout_decorator
-
 from mavis.assemble import Contig, assemble, filter_contigs
-from mavis.interval import Interval
 from mavis.constants import reverse_complement
-from mavis.validate.constants import DEFAULTS
+from mavis.interval import Interval
 from mavis.util import LOG
+from mavis.validate.constants import DEFAULTS
 
-from . import MockObject, RUN_FULL
 from ..util import get_data
+from . import RUN_FULL, MockObject
 
 
 class TestFilterContigs(unittest.TestCase):
@@ -629,7 +628,7 @@ def test_assemble_short_contig(self):
             print(len(contig.seq), contig.remap_score(), contig.seq)
         self.assertTrue({target, reverse_complement(target)} & {c.seq for c in contigs})
 
-    @timeout_decorator.timeout(60)
+    @timeout_decorator.timeout(120)
     @unittest.skipIf(
         not RUN_FULL,
         'slower tests will not be run unless the environment variable RUN_FULL is given',

From 770c29832a6a67d92be651b66bfebf43819a6be3 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 15:09:35 -0700
Subject: [PATCH 004/137] Add type annotations

---
 mavis/align.py                |  13 +-
 mavis/annotate/constants.py   |  42 ------
 mavis/annotate/file_io.py     |  46 +-----
 mavis/annotate/main.py        |   5 +-
 mavis/breakpoint.py           |  94 ++++++------
 mavis/cluster/constants.py    |  46 ------
 mavis/config.py               |  16 +--
 mavis/illustrate/constants.py |  68 +--------
 mavis/illustrate/scatter.py   |   8 +-
 mavis/interval.py             |   9 +-
 mavis/main.py                 |   1 -
 mavis/pairing/constants.py    |  46 +-----
 mavis/pairing/pairing.py      |   3 +-
 mavis/schemas/config.json     |   2 +-
 mavis/summary/constants.py    |  55 +------
 mavis/tools/__init__.py       |  48 ++++---
 mavis/tools/chimerascan.py    |   7 +-
 mavis/tools/vcf.py            |   6 +-
 mavis/util.py                 |  16 +--
 mavis/validate/base.py        | 210 ++++++++++++++++++---------
 mavis/validate/call.py        |  13 +-
 mavis/validate/constants.py   | 262 ----------------------------------
 mavis/validate/evidence.py    | 102 +++++++------
 mavis/validate/main.py        |  10 +-
 24 files changed, 349 insertions(+), 779 deletions(-)
 delete mode 100644 mavis/cluster/constants.py

diff --git a/mavis/align.py b/mavis/align.py
index 8fccc613..cdf0a287 100644
--- a/mavis/align.py
+++ b/mavis/align.py
@@ -1,33 +1,32 @@
 """
 Should take in a sam file from a aligner like bwa aln or bwa mem and convert it into a
 """
-from copy import copy
 import itertools
 import os
 import re
 import subprocess
 import warnings
+from copy import copy
 
 import pysam
 
 from .bam import cigar as _cigar
 from .bam import read as _read
-from .breakpoint import BreakpointPair, Breakpoint
+from .breakpoint import Breakpoint, BreakpointPair
 from .constants import (
     CIGAR,
     COLUMNS,
-    MavisNamespace,
+    NA_MAPPING_QUALITY,
     ORIENT,
-    reverse_complement,
     STRAND,
     SVTYPE,
-    NA_MAPPING_QUALITY,
+    MavisNamespace,
+    reverse_complement,
 )
 from .error import InvalidRearrangement
 from .interval import Interval
 from .util import DEVNULL
 
-
 SUPPORTED_ALIGNER = MavisNamespace(
     BWA_MEM='bwa mem', BLAT='blat', __name__='mavis.align.SUPPORTED_ALIGNER'
 )
@@ -229,9 +228,9 @@ def convert_to_duplication(alignment, reference_genome):
                 ),
                 untemplated_seq=alignment.untemplated_seq[dup_len:],
                 opposing_strands=alignment.opposing_strands,
-                data=alignment.data,
                 read1=alignment.read1,
                 read2=alignment.read2,
+                **alignment.data
             )
             return result
     return alignment
diff --git a/mavis/annotate/constants.py b/mavis/annotate/constants.py
index a7645efd..d2fbf5c2 100644
--- a/mavis/annotate/constants.py
+++ b/mavis/annotate/constants.py
@@ -5,50 +5,8 @@
 from ..constants import MavisNamespace, float_fraction
 from ..util import WeakMavisNamespace
 
-
 PASS_FILENAME = 'annotations.tab'
 
-DEFAULTS = WeakMavisNamespace()
-"""
-- [annotation_filters](/configuration/settings/#annotation_filters)
-- [max_orf_cap](/configuration/settings/#max_orf_cap)
-- [min_domain_mapping_match](/configuration/settings/#min_domain_mapping_match)
-- [min_orf_size](/configuration/settings/#min_orf_size)
-"""
-DEFAULTS.add(
-    'min_domain_mapping_match',
-    0.9,
-    cast_type=float_fraction,
-    defn='a number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript '
-    'to be displayed',
-)
-DEFAULTS.add(
-    'min_orf_size',
-    300,
-    defn='the minimum length (in base pairs) to retain a putative open reading frame (ORF)',
-)
-DEFAULTS.add(
-    'max_orf_cap',
-    3,
-    defn='the maximum number of ORFs to return (best putative ORFs will be retained)',
-)
-DEFAULTS.add(
-    'annotation_filters',
-    'choose_more_annotated,choose_transcripts_by_priority',
-    defn='a comma separated list of filters to apply to putative annotations',
-)
-DEFAULTS.add(
-    'draw_fusions_only',
-    True,
-    cast_type=tab.cast_boolean,
-    defn='flag to indicate if events which do not produce a fusion transcript should produce illustrations',
-)
-DEFAULTS.add(
-    'draw_non_synonymous_cdna_only',
-    True,
-    cast_type=tab.cast_boolean,
-    defn='flag to indicate if events which are synonymous at the cdna level should produce illustrations',
-)
 
 SPLICE_TYPE = MavisNamespace(
     RETAIN='retained intron',
diff --git a/mavis/annotate/file_io.py b/mavis/annotate/file_io.py
index 2683806f..ed4f45cb 100644
--- a/mavis/annotate/file_io.py
+++ b/mavis/annotate/file_io.py
@@ -11,55 +11,11 @@
 
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, STRAND, translate
 from ..interval import Interval
-from ..util import DEVNULL, LOG, WeakMavisNamespace, filepath
+from ..util import DEVNULL, LOG, filepath
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
 from .protein import Domain, Translation
 
-REFERENCE_DEFAULTS = WeakMavisNamespace()
-REFERENCE_DEFAULTS.add(
-    'template_metadata',
-    [],
-    cast_type=filepath,
-    listable=True,
-    defn='file containing the cytoband template information. Used for illustrations only',
-)
-REFERENCE_DEFAULTS.add(
-    'masking',
-    [],
-    cast_type=filepath,
-    listable=True,
-    defn='file containing regions for which input events overlapping them are dropped prior to validation',
-)
-REFERENCE_DEFAULTS.add(
-    'annotations',
-    [],
-    cast_type=filepath,
-    listable=True,
-    defn='path to the reference annotations of genes, transcript, exons, domains, etc',
-)
-REFERENCE_DEFAULTS.add(
-    'aligner_reference',
-    None,
-    cast_type=filepath,
-    nullable=True,
-    defn='path to the aligner reference file used for aligning the contig sequences',
-)
-REFERENCE_DEFAULTS.add(
-    'dgv_annotation',
-    [],
-    cast_type=filepath,
-    listable=True,
-    defn='Path to the dgv reference processed to look like the cytoband file.',
-)
-REFERENCE_DEFAULTS.add(
-    'reference_genome',
-    [],
-    cast_type=filepath,
-    listable=True,
-    defn='Path to the human reference genome fasta file',
-)
-
 
 def load_masking_regions(*filepaths):
     """
diff --git a/mavis/annotate/main.py b/mavis/annotate/main.py
index 61c7fd57..ffd54bd7 100644
--- a/mavis/annotate/main.py
+++ b/mavis/annotate/main.py
@@ -6,9 +6,9 @@
 
 from ..constants import COLUMNS, PRIME, PROTOCOL, sort_columns
 from ..error import DrawingFitError, NotSpecifiedError
-from ..illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
 from ..illustrate.constants import DiagramSettings
 from ..illustrate.diagram import draw_sv_summary_diagram
+from ..schemas import DEFAULTS
 from ..util import LOG, generate_complete_stamp, mkdirp, read_inputs
 from .constants import PASS_FILENAME
 from .file_io import ReferenceFile
@@ -167,8 +167,9 @@ def main(
     )
 
     # now try generating the svg
+    illustration_defaults = get_by_prefix(DEFAULTS, 'illustrate.')
     drawing_config = DiagramSettings(
-        **{k: v for k, v in kwargs.items() if k in ILLUSTRATION_DEFAULTS}
+        **{k: v for k, v in kwargs.items() if k in illustration_defaults}
     )
 
     header_req = {
diff --git a/mavis/breakpoint.py b/mavis/breakpoint.py
index 63384d4d..2f903cd6 100644
--- a/mavis/breakpoint.py
+++ b/mavis/breakpoint.py
@@ -1,7 +1,9 @@
 from __future__ import division
+
 from copy import copy as _copy
+from typing import Callable, Dict, List, Optional, Set, Tuple
 
-from .constants import CIGAR, COLUMNS, DNA_ALPHABET, ORIENT, reverse_complement, STRAND, SVTYPE
+from .constants import CIGAR, COLUMNS, DNA_ALPHABET, ORIENT, STRAND, SVTYPE, reverse_complement
 from .error import InvalidRearrangement, NotSpecifiedError
 from .interval import Interval
 
@@ -12,6 +14,11 @@ class for storing information about a SV breakpoint
     coordinates are given as 1-indexed
     """
 
+    orient: str
+    chr: str
+    strand: str
+    seq: str
+
     @property
     def key(self):
         return (self.chr, self.start, self.end, self.orient, self.strand)
@@ -73,18 +80,23 @@ def to_dict(self):
 
 
 class BreakpointPair:
-    """"""
-
-    def __getattr__(self, attr):
-        data = object.__getattribute__(self, 'data')
-        try:
-            return data[COLUMNS[attr]]
-        except (KeyError, AttributeError):
-            try:
-                return data[attr]
-            except KeyError:
-                pass
-        raise AttributeError(attr)
+    break1: Breakpoint
+    break2: Breakpoint
+    stranded: bool
+    opposing_strands: bool
+    untemplated_seq: Optional[str]
+    data: Dict
+
+    # def __getattr__(self, attr):
+    #     data = object.__getattribute__(self, 'data')
+    #     try:
+    #         return data[COLUMNS[attr]]
+    #     except (KeyError, AttributeError):
+    #         try:
+    #             return data[attr]
+    #         except KeyError:
+    #             pass
+    #     raise AttributeError(attr)
 
     def __getitem__(self, index):
         try:
@@ -128,26 +140,26 @@ def __lt__(self, other):
         return self.untemplated_seq < other.untemplated_seq
 
     @property
-    def interchromosomal(self):
+    def interchromosomal(self) -> bool:
         """bool: True if the breakpoints are on different chromosomes, False otherwise"""
         if self.break1.chr == self.break2.chr:
             return False
         return True
 
     @property
-    def LL(self):
+    def LL(self) -> bool:
         return self.break1.orient == ORIENT.LEFT and self.break2.orient == ORIENT.LEFT
 
     @property
-    def LR(self):
+    def LR(self) -> bool:
         return self.break1.orient == ORIENT.LEFT and self.break2.orient == ORIENT.RIGHT
 
     @property
-    def RL(self):
+    def RL(self) -> bool:
         return self.break1.orient == ORIENT.RIGHT and self.break2.orient == ORIENT.LEFT
 
     @property
-    def RR(self):
+    def RR(self) -> bool:
         return self.break1.orient == ORIENT.RIGHT and self.break2.orient == ORIENT.RIGHT
 
     def copy(self):
@@ -160,22 +172,21 @@ def copy(self):
 
     def __init__(
         self,
-        b1,
-        b2,
-        stranded=False,
-        opposing_strands=None,
-        untemplated_seq=None,
-        data=None,
+        b1: Breakpoint,
+        b2: Breakpoint,
+        stranded: bool = False,
+        opposing_strands: Optional[bool] = None,
+        untemplated_seq: Optional[str] = None,
         **kwargs
     ):
         """
         Args:
-            b1 (Breakpoint): the first breakpoint
-            b2 (Breakpoint): the second breakpoint
-            stranded (bool): if not stranded then +/- is equivalent to -/+
-            opposing_strands (bool): are the strands at the breakpoint opposite? i.e. +/- instead of +/+
-            untemplated_seq (str): seq between the breakpoints that is not part of either breakpoint
-            data (dict): optional dictionary of attributes associated with this pair
+            b1: the first breakpoint
+            b2: the second breakpoint
+            stranded: if not stranded then +/- is equivalent to -/+
+            opposing_strands: are the strands at the breakpoint opposite? i.e. +/- instead of +/+
+            untemplated_seq: seq between the breakpoints that is not part of either breakpoint
+            data: optional dictionary of attributes associated with this pair
 
         Note:
             untemplated_seq should always be given wrt to the positive/forward reference strand
@@ -192,7 +203,7 @@ def __init__(
             self.break1 = b1
             self.break2 = b2
         self.stranded = stranded
-        self.opposing_strands = opposing_strands
+        self.opposing_strands = opposing_strands  # type: ignore
 
         if self.break1.orient != ORIENT.NS and self.break2.orient != ORIENT.NS:
             if self.opposing_strands is not None:
@@ -209,13 +220,7 @@ def __init__(
                 self.opposing_strands = self.break1.orient == self.break2.orient
         # between break1 and break2 not in either
         self.untemplated_seq = untemplated_seq
-        self.data = {}
-        if data is not None:
-            self.data.update(data)
-            conflicts = set(data.keys()) & set(kwargs.keys())
-            if conflicts:
-                raise TypeError('data got multiple values for data elements:', conflicts)
-        self.data.update(kwargs)
+        self.data = kwargs
 
         if self.break1.strand != STRAND.NS and self.break2.strand != STRAND.NS:
             opposing = self.break1.strand != self.break2.strand
@@ -275,16 +280,16 @@ def flatten(self):
         return row
 
     @classmethod
-    def classify(cls, pair, distance=None):
+    def classify(cls, pair, distance: Optional[Callable] = None) -> Set[str]:
         """
         uses the chr, orientations and strands to determine the
         possible structural_variant types that this pair could support
 
         Args:
             pair (BreakpointPair): the pair to classify
-            distance (Callable): if defined, will be passed to net size to use in narrowing the list of putative types (del vs ins)
+            distance: if defined, will be passed to net size to use in narrowing the list of putative types (del vs ins)
         Returns:
-            List[SVTYPE]: a list of possible SVTYPE
+            a list of possible SVTYPE
 
         Example:
             >>> bpp = BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 9999), opposing_strands=True)
@@ -327,6 +332,7 @@ def classify(cls, pair, distance=None):
                     return {SVTYPE.DEL, SVTYPE.INS}
                 elif pair.break1.orient == ORIENT.RIGHT or pair.break2.orient == ORIENT.LEFT:
                     return {SVTYPE.DUP}
+                raise InvalidRearrangement(pair)
         else:  # interchromosomal
             if pair.opposing_strands:
                 if pair.LR or pair.RL:
@@ -337,7 +343,7 @@ def classify(cls, pair, distance=None):
                     raise InvalidRearrangement(pair)
                 return {SVTYPE.TRANS}
 
-    def net_size(self, distance=lambda x, y: Interval(abs(x - y))):
+    def net_size(self, distance=lambda x, y: Interval(abs(x - y))) -> Interval:
         """
         Returns the size of the event for a given pair. Mainly applicable to indels
         """
@@ -358,7 +364,7 @@ def net_size(self, distance=lambda x, y: Interval(abs(x - y))):
         return size
 
     @property
-    def is_putative_indel(self):
+    def is_putative_indel(self) -> bool:
         if self.interchromosomal or self.opposing_strands or self.break1.orient == ORIENT.RIGHT:
             return False
         return True
@@ -521,7 +527,7 @@ def untemplated_shift(self, reference_genome):
         )
         return (break2_shift, break1_shift)
 
-    def get_bed_repesentation(self):
+    def get_bed_repesentation(self) -> List[Tuple[str, int, int, Optional[str]]]:
         bed = []
         if self.interchromosomal:
             bed.append(
diff --git a/mavis/cluster/constants.py b/mavis/cluster/constants.py
deleted file mode 100644
index 107100bf..00000000
--- a/mavis/cluster/constants.py
+++ /dev/null
@@ -1,46 +0,0 @@
-from ..util import WeakMavisNamespace
-
-
-DEFAULTS = WeakMavisNamespace()
-"""
-- [cluster_initial_size_limit](/configuration/settings/#cluster_initial_size_limit)
-- [cluster_radius](/configuration/settings/#cluster_radius)
-- [limit_to_chr](/configuration/settings/#limit_to_chr)
-- [max_files](/configuration/settings/#max_files)
-- [max_proximity](/configuration/settings/#max_proximity)
-- [min_clusters_per_file](/configuration/settings/#min_clusters_per_file)
-- [uninformative_filter](/configuration/settings/#uninformative_filter)
-"""
-DEFAULTS.add(
-    'min_clusters_per_file', 50, defn='the minimum number of breakpoint pairs to output to a file'
-)
-DEFAULTS.add(
-    'max_files', 200, defn='The maximum number of files to output from clustering/splitting'
-)
-DEFAULTS.add(
-    'cluster_initial_size_limit',
-    25,
-    defn='the maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering '
-    'phase (combining based on overlap)',
-)
-DEFAULTS.add('cluster_radius', 100, defn='maximum distance allowed between paired breakpoint pairs')
-DEFAULTS.add(
-    'max_proximity',
-    5000,
-    defn='the maximum distance away from an annotation before the region in considered to be uninformative',
-)
-DEFAULTS.add(
-    'uninformative_filter',
-    False,
-    defn='flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered '
-    'out prior to clustering',
-)
-DEFAULTS.add(
-    'limit_to_chr',
-    [str(x) for x in range(1, 23)] + ['X', 'Y'],
-    cast_type=str,
-    listable=True,
-    nullable=True,
-    defn='A list of chromosome names to use. BreakpointPairs on other chromosomes will be filtered'
-    'out. For example \'1 2 3 4\' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4',
-)
diff --git a/mavis/config.py b/mavis/config.py
index f38483bb..480eb301 100644
--- a/mavis/config.py
+++ b/mavis/config.py
@@ -5,6 +5,8 @@
 
 import snakemake
 import tab
+from snakemake.exceptions import WorkflowError
+from snakemake.utils import validate as snakemake_validate
 
 from .annotate.file_io import ReferenceFile
 from .bam import stats
@@ -124,14 +126,16 @@ def validate_config(config: Dict, bam_stats: Optional[bool] = False, stage: str
     schema = 'config' if stage != SUBCOMMAND.OVERLAY else 'overlay'
 
     try:
-        snakemake.utils.validate(
-            config, os.path.join(os.path.dirname(__file__), f'schemas/{schema}.json')
+        snakemake_validate(
+            config,
+            os.path.join(os.path.dirname(__file__), f'schemas/{schema}.json'),
+            set_default=True,
         )
     except Exception as err:
         short_msg = '. '.join(
             [line for line in str(err).split('\n') if line.strip()][:3]
         )  # these can get super long
-        raise snakemake.WorkflowError(short_msg)
+        raise WorkflowError(short_msg)
 
     required = []
     if (
@@ -146,7 +150,7 @@ def validate_config(config: Dict, bam_stats: Optional[bool] = False, stage: str
 
     for req in required:
         if req not in config:
-            raise snakemake.WorkflowError(f'missing required property: {req}')
+            raise WorkflowError(f'missing required property: {req}')
 
     if schema == 'config':
         conversion_dir = os.path.join(config['output_dir'], 'converted_outputs')
@@ -218,7 +222,3 @@ def get_metavar(arg_type):
     elif arg_type == filepath:
         return 'FILEPATH'
     return None
-
-
-def get_by_prefix(config, prefix):
-    return {k.replace(prefix, ''): v for k, v in config.items() if k.startswith(prefix)}
diff --git a/mavis/illustrate/constants.py b/mavis/illustrate/constants.py
index 59e60121..670e5364 100644
--- a/mavis/illustrate/constants.py
+++ b/mavis/illustrate/constants.py
@@ -1,64 +1,7 @@
 from colour import Color
-from ..constants import GIEMSA_STAIN, float_fraction
-from ..util import WeakMavisNamespace
-
-DEFAULTS = WeakMavisNamespace()
-"""
-- [breakpoint_color](/configuration/settings/#breakpoint_color)
-- [domain_color](/configuration/settings/#domain_color)
-- [domain_mismatch_color](/configuration/settings/#domain_mismatch_color)
-- [domain_name_regex_filter](/configuration/settings/#domain_name_regex_filter)
-- [domain_scaffold_color](/configuration/settings/#domain_scaffold_color)
-- [drawing_width_iter_increase](/configuration/settings/#drawing_width_iter_increase)
-- [gene1_color_selected](/configuration/settings/#gene1_color_selected)
-- [gene1_color](/configuration/settings/#gene1_color)
-- [gene2_color_selected](/configuration/settings/#gene2_color_selected)
-- [gene2_color](/configuration/settings/#gene2_color)
-- [label_color](/configuration/settings/#label_color)
-- [mask_fill](/configuration/settings/#mask_fill)
-- [mask_opacity](/configuration/settings/#mask_opacity)
-- [max_drawing_retries](/configuration/settings/#max_drawing_retries)
-- [novel_exon_color](/configuration/settings/#novel_exon_color)
-- [scaffold_color](/configuration/settings/#scaffold_color)
-- [splice_color](/configuration/settings/#splice_color)
-- [width](/configuration/settings/#width)
-"""
-DEFAULTS.add('width', 1000, defn='The drawing width in pixels')
-DEFAULTS.add(
-    'domain_name_regex_filter',
-    r'^PF\d+$',
-    defn='The regular expression used to select domains to be displayed (filtered by name)',
-)
-DEFAULTS.add(
-    'max_drawing_retries',
-    5,
-    defn='The maximum number of retries for attempting a drawing. Each iteration the width is extended. If it '
-    'is still insufficient after this number a gene-level only drawing will be output',
-)
-DEFAULTS.add('scaffold_color', '#000000', defn='The color used for the gene/transcripts scaffolds')
-DEFAULTS.add('gene1_color_selected', '#518dc5', defn='The color of the first gene')
-DEFAULTS.add('gene2_color_selected', '#4c9677', defn='The color of the second gene')
-DEFAULTS.add('gene1_color', '#657e91', defn='The color of genes near the first gene')
-DEFAULTS.add('gene2_color', '#325556', defn='The color of genes near the second gene')
-DEFAULTS.add('label_color', '#000000', defn='The label color')
-DEFAULTS.add('domain_color', '#ccccb3', defn='Domain fill color')
-DEFAULTS.add('domain_mismatch_color', '#b2182b', defn='Domain fill color on 0%% match')
-DEFAULTS.add('novel_exon_color', '#5D3F6A', defn='Novel Exon fill color')
-DEFAULTS.add('splice_color', '#000000', defn='Splicing lines color')
-DEFAULTS.add('breakpoint_color', '#000000', defn='Breakpoint outline color')
-DEFAULTS.add('mask_fill', '#ffffff', defn='Color of mask (for deleted region etc.)')
-DEFAULTS.add('mask_opacity', 0.7, defn='opacity of the mask layer', cast_type=float_fraction)
-DEFAULTS.add('domain_scaffold_color', '#000000', defn='The color of the domain scaffold')
-DEFAULTS.add(
-    'drawing_width_iter_increase',
-    500,
-    defn='The amount (in  pixels) by which to increase the drawing width upon failure to fit',
-)
-DEFAULTS.add(
-    'exon_min_focus_size',
-    10,
-    defn='minimum size of an exon for it to be granted a label or min exon width',
-)
+
+from ..constants import GIEMSA_STAIN
+from ..schemas import DEFAULTS, get_by_prefix
 
 
 class DiagramSettings:
@@ -68,10 +11,11 @@ class DiagramSettings:
 
     def __init__(self, **kwargs):
         inputs = {}
-        inputs.update(DEFAULTS.items())
+        defaults = get_by_prefix(DEFAULTS, 'illustrate.')
+        inputs.update(defaults)
         inputs.update(kwargs)
         for arg, val in inputs.items():
-            if arg not in DEFAULTS:
+            if arg not in defaults:
                 raise KeyError('unrecognized argument', arg)
             setattr(self, arg, val)
         self.min_width = 10  # no element (exon, gene, etc can be less than this wide)
diff --git a/mavis/illustrate/scatter.py b/mavis/illustrate/scatter.py
index 5e0078df..976a6a85 100644
--- a/mavis/illustrate/scatter.py
+++ b/mavis/illustrate/scatter.py
@@ -1,9 +1,8 @@
 import os
 
-from ..bam.read import sequenced_strand, pileup
-from ..util import LOG, DEVNULL
+from ..bam.read import pileup, sequenced_strand
 from ..interval import Interval
-from ..validate.constants import DEFAULTS as VALIDATION_DEFAULTS
+from ..util import DEVNULL, LOG
 
 
 def bam_to_scatter(
@@ -16,6 +15,7 @@ def bam_to_scatter(
     axis_name=None,
     ymax=None,
     min_mapping_quality=0,
+    strand_determining_read=2,
     ymax_color='#FF0000',
 ):
     """
@@ -50,7 +50,7 @@ def read_filter(read):
         if strand is None:
             return False
         try:
-            return sequenced_strand(read, VALIDATION_DEFAULTS.strand_determining_read) != strand
+            return sequenced_strand(read, strand_determining_read) != strand
         except ValueError:
             return True
 
diff --git a/mavis/interval.py b/mavis/interval.py
index 941ee707..c78e3aa8 100644
--- a/mavis/interval.py
+++ b/mavis/interval.py
@@ -1,7 +1,12 @@
+from typing import Callable, Optional
+
+
 class Interval:
-    """"""
+    start: int
+    end: int
+    freq: int = 1
 
-    def __init__(self, start, end=None, freq=1, number_type=None):
+    def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_type=None):
         """
         Args:
             start (int): the start of the interval (inclusive)
diff --git a/mavis/main.py b/mavis/main.py
index 9c6d678c..dfa9f127 100644
--- a/mavis/main.py
+++ b/mavis/main.py
@@ -269,7 +269,6 @@ def main(argv=None):
             with open(args.outputfile, 'w') as fh:
                 fh.write(json.dumps(config, sort_keys=True, indent='  '))
         else:
-            print(args)
             overlay_main(
                 buffer_length=args.buffer_length,
                 gene_name=args.gene_name,
diff --git a/mavis/pairing/constants.py b/mavis/pairing/constants.py
index 73c9d8ca..eebeffec 100644
--- a/mavis/pairing/constants.py
+++ b/mavis/pairing/constants.py
@@ -1,46 +1,12 @@
 from ..constants import CALL_METHOD, MavisNamespace
-from ..util import WeakMavisNamespace
-
-
-DEFAULTS = WeakMavisNamespace()
-"""
-- [contig_call_distance](/configuration/settings/#contig_call_distance)
-- [flanking_call_distance](/configuration/settings/#flanking_call_distance)
-- [spanning_call_distance](/configuration/settings/#spanning_call_distance)
-- [split_call_distance](/configuration/settings/#split_call_distance)
-"""
-DEFAULTS.add(
-    'flanking_call_distance',
-    50,
-    defn='the maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair',
-)
-DEFAULTS.add(
-    'split_call_distance',
-    20,
-    defn='the maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair',
-)
-DEFAULTS.add(
-    'contig_call_distance',
-    10,
-    defn='the maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair',
-)
-DEFAULTS.add(
-    'spanning_call_distance',
-    20,
-    defn='the maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair',
-)
-DEFAULTS.add(
-    'input_call_distance',
-    20,
-    defn='the maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair',
-)
+from ..schemas import DEFAULTS
 
 PAIRING_DISTANCES = MavisNamespace(
     **{
-        CALL_METHOD.FLANK: DEFAULTS.flanking_call_distance,
-        CALL_METHOD.SPAN: DEFAULTS.spanning_call_distance,
-        CALL_METHOD.SPLIT: DEFAULTS.split_call_distance,
-        CALL_METHOD.CONTIG: DEFAULTS.contig_call_distance,
-        CALL_METHOD.INPUT: DEFAULTS.input_call_distance,
+        CALL_METHOD.FLANK: DEFAULTS['pairing.flanking_call_distance'],
+        CALL_METHOD.SPAN: DEFAULTS['pairing.spanning_call_distance'],
+        CALL_METHOD.SPLIT: DEFAULTS['pairing.split_call_distance'],
+        CALL_METHOD.CONTIG: DEFAULTS['pairing.contig_call_distance'],
+        CALL_METHOD.INPUT: DEFAULTS['pairing.input_call_distance'],
     }
 )
diff --git a/mavis/pairing/pairing.py b/mavis/pairing/pairing.py
index e94bde37..316598c2 100644
--- a/mavis/pairing/pairing.py
+++ b/mavis/pairing/pairing.py
@@ -1,11 +1,10 @@
-from .constants import DEFAULTS, PAIRING_DISTANCES
-
 from ..annotate.variant import determine_prime
 from ..breakpoint import Breakpoint
 from ..constants import CALL_METHOD, COLUMNS, ORIENT, PRIME, PROTOCOL, STRAND
 from ..error import NotSpecifiedError
 from ..interval import Interval
 from ..util import DEVNULL
+from .constants import PAIRING_DISTANCES
 
 
 def product_key(bpp):
diff --git a/mavis/schemas/config.json b/mavis/schemas/config.json
index c50a8632..9769754c 100644
--- a/mavis/schemas/config.json
+++ b/mavis/schemas/config.json
@@ -287,7 +287,7 @@
             "description": "The drawing width in pixels",
             "type": "integer"
         },
-        "illustratebreakpoint_color": {
+        "illustrate.breakpoint_color": {
             "default": "#000000",
             "description": "Breakpoint outline color",
             "type": "string"
diff --git a/mavis/summary/constants.py b/mavis/summary/constants.py
index 4d087d7a..27de3695 100644
--- a/mavis/summary/constants.py
+++ b/mavis/summary/constants.py
@@ -1,60 +1,7 @@
-from ..constants import MavisNamespace, float_fraction
-from ..util import WeakMavisNamespace
+from ..constants import MavisNamespace
 
-
-DEFAULTS = WeakMavisNamespace()
 HOMOPOLYMER_MIN_LENGTH = 3
 
-"""
-- [filter_cdna_synon](/configuration/settings/#filter_cdna_synon)
-- [filter_min_flanking_reads](/configuration/settings/#filter_min_flanking_reads)
-- [filter_min_linking_split_reads](/configuration/settings/#filter_min_linking_split_reads)
-- [filter_min_remapped_reads](/configuration/settings/#filter_min_remapped_reads)
-- [filter_min_spanning_reads](/configuration/settings/#filter_min_spanning_reads)
-- [filter_min_split_reads](/configuration/settings/#filter_min_split_reads)
-- [filter_protein_synon](/configuration/settings/#filter_protein_synon)
-- [filter_min_complexity](/configuration/settings/#filter_min_complexity)
-- [filter_trans_homopolymers](/configuration/settings/#filter_trans_homopolymers)
-"""
-DEFAULTS.add(
-    'filter_min_remapped_reads', 5, defn='Minimum number of remapped reads for a call by contig'
-)
-DEFAULTS.add(
-    'filter_min_spanning_reads',
-    5,
-    defn='Minimum number of spanning reads for a call by spanning reads',
-)
-DEFAULTS.add(
-    'filter_min_flanking_reads',
-    10,
-    defn='Minimum number of flanking pairs for a call by flanking pairs',
-)
-DEFAULTS.add(
-    'filter_min_split_reads', 5, defn='Minimum number of split reads for a call by split reads'
-)
-DEFAULTS.add(
-    'filter_min_linking_split_reads',
-    1,
-    defn='Minimum number of linking split reads for a call by split reads',
-)
-DEFAULTS.add('filter_cdna_synon', True, defn='Filter all annotations synonymous at the cdna level')
-DEFAULTS.add(
-    'filter_protein_synon', False, defn='Filter all annotations synonymous at the protein level'
-)
-DEFAULTS.add(
-    'filter_trans_homopolymers',
-    True,
-    defn='Filter all single bp ins/del/dup events that are in a homopolymer region of at least '
-    '{} bps and are not paired to a genomic event'.format(HOMOPOLYMER_MIN_LENGTH),
-)
-DEFAULTS.add(
-    'filter_min_complexity',
-    0.2,
-    cast_type=float_fraction,
-    defn='Filter event calls based on call sequence complexity',
-)
-
-
 PAIRING_STATE = MavisNamespace(
     EXP='expressed',
     NO_EXP='not expressed',
diff --git a/mavis/tools/__init__.py b/mavis/tools/__init__.py
index 94719b81..307f4af5 100644
--- a/mavis/tools/__init__.py
+++ b/mavis/tools/__init__.py
@@ -1,30 +1,30 @@
 import itertools
+from typing import Callable, Dict, List
 
-from shortuuid import uuid
 import tab
+from shortuuid import uuid
 
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import COLUMNS, ORIENT, STRAND, SVTYPE
 from ..error import InvalidRearrangement
 from ..util import DEVNULL, read_bpp_from_input_file
-
-from .constants import SUPPORTED_TOOL, TRACKING_COLUMN, TOOL_SVTYPE_MAPPING
-from .transabyss import convert_row as _parse_transabyss
-from .cnvnator import convert_row as _parse_cnvnator
-from .vcf import convert_file as read_vcf
 from .breakdancer import convert_file as _convert_breakdancer_file
-from .starfusion import convert_row as _parse_starfusion
 from .chimerascan import convert_row as _parse_chimerascan
+from .cnvnator import convert_row as _parse_cnvnator
+from .constants import SUPPORTED_TOOL, TOOL_SVTYPE_MAPPING, TRACKING_COLUMN
+from .starfusion import convert_row as _parse_starfusion
+from .transabyss import convert_row as _parse_transabyss
+from .vcf import convert_file as read_vcf
 
 
 def convert_tool_output(
-    fnames,
-    file_type=SUPPORTED_TOOL.MAVIS,
-    stranded=False,
-    log=DEVNULL,
-    collapse=True,
-    assume_no_untemplated=True,
-):
+    fnames: List[str],
+    file_type: str = SUPPORTED_TOOL.MAVIS,
+    stranded: bool = False,
+    log: Callable = DEVNULL,
+    collapse: bool = True,
+    assume_no_untemplated: bool = True,
+) -> List[BreakpointPair]:
     """
     Reads output from a given SV caller and converts to a set of MAVIS breakpoint pairs. Also collapses duplicates
     """
@@ -36,7 +36,7 @@ def convert_tool_output(
             )
         )
     if collapse:
-        collapse_mapping = {}
+        collapse_mapping: Dict[BreakpointPair, List[BreakpointPair]] = {}
         for bpp in result:
             collapse_mapping.setdefault(bpp, []).append(bpp)
         log('collapsed', len(result), 'to', len(collapse_mapping), 'calls')
@@ -62,7 +62,9 @@ def convert_tool_output(
     return result
 
 
-def _convert_tool_row(row, file_type, stranded, assume_no_untemplated=True):
+def _convert_tool_row(
+    row: Dict, file_type: str, stranded: bool, assume_no_untemplated: bool = True
+) -> List[BreakpointPair]:
     """
     converts a row parsed from an input file to the appropriate column names for it to be converted to MAVIS style row
     """
@@ -214,8 +216,8 @@ def _convert_tool_row(row, file_type, stranded, assume_no_untemplated=True):
                 opposing_strands=oppose,
                 untemplated_seq=untemplated_seq,
                 event_type=event_type,
-                data={COLUMNS.tools: file_type, COLUMNS.tracking_id: std_row[COLUMNS.tracking_id]},
                 stranded=stranded,
+                **{COLUMNS.tools: file_type, COLUMNS.tracking_id: std_row[COLUMNS.tracking_id]}
             )
 
             for col, value in std_row.items():
@@ -237,12 +239,12 @@ def _convert_tool_row(row, file_type, stranded, assume_no_untemplated=True):
 
 
 def _convert_tool_output(
-    input_file,
-    file_type=SUPPORTED_TOOL.MAVIS,
-    stranded=False,
-    log=DEVNULL,
-    assume_no_untemplated=True,
-):
+    input_file: str,
+    file_type: str = SUPPORTED_TOOL.MAVIS,
+    stranded: bool = False,
+    log: Callable = DEVNULL,
+    assume_no_untemplated: bool = True,
+) -> List[BreakpointPair]:
     log('reading:', input_file)
     result = []
     rows = None
diff --git a/mavis/tools/chimerascan.py b/mavis/tools/chimerascan.py
index fa71a8cb..1d4be17b 100644
--- a/mavis/tools/chimerascan.py
+++ b/mavis/tools/chimerascan.py
@@ -1,9 +1,10 @@
-from ..constants import COLUMNS, ORIENT
+from typing import Dict
 
-from .constants import TRACKING_COLUMN, SUPPORTED_TOOL
+from ..constants import COLUMNS, ORIENT
+from .constants import SUPPORTED_TOOL, TRACKING_COLUMN
 
 
-def convert_row(row):
+def convert_row(row: Dict) -> Dict:
     """
     transforms the chimerscan output into the common format for expansion. Maps the input column
     names to column names which MAVIS can read
diff --git a/mavis/tools/vcf.py b/mavis/tools/vcf.py
index 5a5ec4e7..4ffa1e2b 100644
--- a/mavis/tools/vcf.py
+++ b/mavis/tools/vcf.py
@@ -1,14 +1,14 @@
 import re
+from typing import Dict, List, Tuple
 
 from pysam import VariantFile
 
 from ..constants import COLUMNS, ORIENT, SVTYPE
 from ..util import DEVNULL
-
 from .constants import SUPPORTED_TOOL
 
 
-def parse_bnd_alt(alt):
+def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
     """
     parses the alt statement from vcf files using the specification in vcf 4.2/4.2.
 
@@ -74,7 +74,7 @@ def parse_bnd_alt(alt):
         raise NotImplementedError('alt specification in unexpected format', alt)
 
 
-def convert_record(record, record_mapping={}, log=DEVNULL):
+def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
     """
     converts a vcf record
 
diff --git a/mavis/util.py b/mavis/util.py
index dee6b1e9..db673fa2 100644
--- a/mavis/util.py
+++ b/mavis/util.py
@@ -1,21 +1,21 @@
-from argparse import Namespace
-from datetime import datetime
 import errno
-from functools import partial
-from glob import glob
 import itertools
+import logging
 import os
 import re
-import time
-import logging
 import sys
+import time
+from argparse import Namespace
+from datetime import datetime
+from functools import partial
+from glob import glob
 
 from braceexpand import braceexpand
-from tab import tab
 from shortuuid import uuid
+from tab import tab
 
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import COLUMNS, ORIENT, PROTOCOL, sort_columns, STRAND, SVTYPE, MavisNamespace
+from .constants import COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE, MavisNamespace, sort_columns
 from .error import InvalidRearrangement
 from .interval import Interval
 
diff --git a/mavis/validate/base.py b/mavis/validate/base.py
index 911e4984..49220ae9 100644
--- a/mavis/validate/base.py
+++ b/mavis/validate/base.py
@@ -1,29 +1,42 @@
 import itertools
 import logging
+from abc import abstractproperty
+from typing import Dict, List, Optional, Set, Tuple
+
+import pysam
 
 from ..assemble import assemble
 from ..bam import cigar as _cigar
 from ..bam import read as _read
 from ..bam.cache import BamCache
-from ..breakpoint import BreakpointPair
-from ..constants import (
-    CIGAR,
-    COLUMNS,
-    NA_MAPPING_QUALITY,
-    ORIENT,
-    PROTOCOL,
-    PYSAM_READ_FLAGS,
-    STRAND,
-    SVTYPE,
-    reverse_complement,
-)
+from ..breakpoint import Breakpoint, BreakpointPair
+from ..constants import (CIGAR, COLUMNS, NA_MAPPING_QUALITY, ORIENT, PROTOCOL,
+                         PYSAM_READ_FLAGS, STRAND, SVTYPE, reverse_complement)
 from ..error import NotSpecifiedError
 from ..interval import Interval
+from ..schemas import DEFAULTS
 from ..util import DEVNULL
-from .constants import DEFAULTS
 
 
 class Evidence(BreakpointPair):
+    bam_cache: BamCache
+    classification: Optional[str]
+    reference_genome: Dict
+    read_length: int
+    stdev_fragment_size: int
+    median_fragment_size: int
+    split_reads: Tuple[Set, Set]
+    flanking_pairs: Set
+    compatible_flanking_pairs: Set
+    spanning_reads: Set
+    counts: List[int]
+    contigs: List
+    half_mapped: Tuple[Set, Set]
+    compatible_window1: Optional[Interval]
+    compatible_window2: Optional[Interval]
+    config: Dict
+    assenmbly_max_kmer_size: int
+
     @property
     def min_expected_fragment_size(self):
         # cannot be negative
@@ -48,6 +61,30 @@ def max_expected_fragment_size(self):
             )
         )
 
+    @abstractproperty
+    def strand_determining_read(self):
+        pass
+
+    @abstractproperty
+    def outer_window1(self):
+        pass
+
+    @abstractproperty
+    def outer_window2(self):
+        pass
+
+    @abstractproperty
+    def inner_window1(self):
+        pass
+
+    @abstractproperty
+    def inner_window2(self):
+        pass
+
+    @abstractproperty
+    def min_mapping_quality(self):
+        pass
+
     def __init__(
         self,
         break1,
@@ -60,9 +97,10 @@ def __init__(
         stranded=False,
         opposing_strands=None,
         untemplated_seq=None,
-        data={},
         classification=None,
-        **kwargs
+        config=DEFAULTS,
+        assembly_max_kmer_size=None,
+        **kwargs,
     ):
         """
         Args:
@@ -77,6 +115,7 @@ def __init__(
         # initialize the breakpoint pair
         self.bam_cache = bam_cache
         self.stranded = stranded and bam_cache.stranded
+        self.config = config
         BreakpointPair.__init__(
             self,
             break1,
@@ -84,7 +123,7 @@ def __init__(
             stranded=stranded,
             opposing_strands=opposing_strands,
             untemplated_seq=untemplated_seq,
-            **data
+            **kwargs,
         )
         # check that the breakpoints are within the reference length
         if reference_genome:
@@ -110,16 +149,9 @@ def __init__(
                         len(reference_genome[self.break2.chr].seq),
                     )
                 )
-        defaults = dict()
-        for arg in kwargs:
-            if arg not in DEFAULTS:
-                raise AttributeError('unrecognized attribute', arg)
-        defaults.update(DEFAULTS.items())
-        kwargs.setdefault('assembly_max_kmer_size', int(read_length * 0.7))
-        defaults.update(kwargs)  # input arguments should override the defaults
-        for arg, val in defaults.items():
-            setattr(self, arg, val)
-
+        self.assembly_max_kmer_size = (
+            assembly_max_kmer_size if assembly_max_kmer_size is not None else int(read_length * 0.7)
+        )
         self.bam_cache = bam_cache
         self.classification = classification
         self.reference_genome = reference_genome
@@ -163,11 +195,11 @@ def __init__(
             pass
 
     @staticmethod
-    def distance(start, end):
+    def distance(start: int, end: int):
         return Interval(abs(end - start))
 
     @staticmethod
-    def traverse(start, distance, direction):
+    def traverse(start: int, distance: int, direction: str) -> Interval:
         if direction == ORIENT.LEFT:
             return Interval(start - distance)
         return Interval(start + distance)
@@ -195,7 +227,9 @@ def standardize_read(self, read):
         )
         prefix = 0
         try:
-            cigar, prefix = _cigar.extend_softclipping(cigar, self.min_anchor_exact)
+            cigar, prefix = _cigar.extend_softclipping(
+                cigar, self.config['validate.min_anchor_exact']
+            )
         except AttributeError:
             pass
         read.cigar = _cigar.join(cigar)
@@ -229,11 +263,8 @@ def compatible_type(self):
             return SVTYPE.INS
         return None
 
-    def compute_fragment_size(self, read, mate):
+    def compute_fragment_size(self, read: pysam.AlignedSegment, mate: pysam.AlignedSegment):
         """
-        Args:
-            read (pysam.AlignedSegment):
-            mate (pysam.AlignedSegment):
         Returns:
             Interval: interval representing the range of possible fragment sizes for this read pair
         """
@@ -251,7 +282,7 @@ def supporting_reads(self):
         result.update(self.spanning_reads)
         return result
 
-    def collect_spanning_read(self, read):
+    def collect_spanning_read(self, read: pysam.AlignedSegment):
         """
         spanning read: a read covering BOTH breakpoints
 
@@ -259,7 +290,7 @@ def collect_spanning_read(self, read):
         here since they will be collected already
 
         Args:
-            read (pysam.AlignedSegment): the putative spanning read
+            read: the putative spanning read
 
         Returns:
             bool:
@@ -302,14 +333,16 @@ def collect_spanning_read(self, read):
                         return True
         return False
 
-    def collect_compatible_flanking_pair(self, read, mate, compatible_type):
+    def collect_compatible_flanking_pair(
+        self, read: pysam.AlignedSegment, mate: pysam.AlignedSegment, compatible_type: str
+    ) -> bool:
         """
         checks if a given read meets the minimum quality criteria to be counted as evidence as stored as support for
         this event
 
         Args:
-            read (pysam.AlignedSegment): the read to add
-            mate (pysam.AlignedSegment): the mate
+            read: the read to add
+            mate: the mate
             compatible_type (SVTYPE): the type we are collecting for
 
         Returns:
@@ -394,14 +427,14 @@ def collect_compatible_flanking_pair(self, read, mate, compatible_type):
 
         return False
 
-    def collect_flanking_pair(self, read, mate):
+    def collect_flanking_pair(self, read: pysam.AlignedSegment, mate: pysam.AlignedSegment):
         """
         checks if a given read meets the minimum quality criteria to be counted as evidence as stored as support for
         this event
 
         Args:
-            read (pysam.AlignedSegment): the read to add
-            mate (pysam.AlignedSegment): the mate
+            read: the read to add
+            mate: the mate
 
         Returns:
             bool:
@@ -505,11 +538,11 @@ def collect_flanking_pair(self, read, mate):
 
         return False
 
-    def collect_half_mapped(self, read, mate):
+    def collect_half_mapped(self, read: pysam.AlignedSegment, mate: pysam.AlignedSegment):
         """
         Args:
-            read (pysam.AlignedSegment): the read to add
-            mate (pysam.AlignedSegment): the unmapped mate
+            read: the read to add
+            mate: the unmapped mate
 
         Returns:
             bool:
@@ -534,12 +567,12 @@ def collect_half_mapped(self, read, mate):
             added = True
         return added
 
-    def collect_split_read(self, read, first_breakpoint):
+    def collect_split_read(self, read: pysam.AlignedSegment, first_breakpoint: bool):
         """
         adds a split read if it passes the criteria filters and raises a warning if it does not
 
         Args:
-            read (pysam.AlignedSegment): the read to add
+            read: the read to add
             first_breakpoint (bool): add to the first breakpoint (or second if false)
         Returns:
             bool:
@@ -605,7 +638,10 @@ def collect_split_read(self, read, first_breakpoint):
                 len(read.query_sequence),
             )
 
-        if len(primary) < self.min_anchor_exact or len(clipped) < self.min_softclipping:
+        if (
+            len(primary) < self.config['validate.min_anchor_exact']
+            or len(clipped) < self.config['validate.min_softclipping']
+        ):
             # split read does not meet the minimum anchor criteria
             return False
         if not read.has_tag(PYSAM_READ_FLAGS.RECOMPUTED_CIGAR) or not read.get_tag(
@@ -614,14 +650,17 @@ def collect_split_read(self, read, first_breakpoint):
             read = self.standardize_read(read)
         # data quality filters
         if (
-            _cigar.alignment_matches(read.cigar) >= self.min_sample_size_to_apply_percentage
-            and _cigar.match_percent(read.cigar) < self.min_anchor_match
+            _cigar.alignment_matches(read.cigar)
+            >= self.config['validate.min_sample_size_to_apply_percentage']
+            and _cigar.match_percent(read.cigar) < self.config['validate.min_anchor_match']
         ):
             return False  # too poor quality of an alignment
         if (
-            _cigar.longest_exact_match(read.cigar) < self.min_anchor_exact
-            and _cigar.longest_fuzzy_match(read.cigar, self.fuzzy_mismatch_number)
-            < self.min_anchor_fuzzy
+            _cigar.longest_exact_match(read.cigar) < self.config['validate.min_anchor_exact']
+            and _cigar.longest_fuzzy_match(
+                read.cigar, self.config['validate.fuzzy_mismatch_number']
+            )
+            < self.config['validate.min_anchor_fuzzy']
         ):
             return False  # too poor quality of an alignment
         else:
@@ -636,14 +675,14 @@ def collect_split_read(self, read, first_breakpoint):
         putative_alignments = None
         # figure out how much of the read must match when remaped
         min_match_tgt = read.cigar[-1][1] if breakpoint.orient == ORIENT.LEFT else read.cigar[0][1]
-        min_match_tgt = min(min_match_tgt * self.min_anchor_match, min_match_tgt - 1) / len(
-            read.query_sequence
-        )
+        min_match_tgt = min(
+            min_match_tgt * self.config['validate.min_anchor_match'], min_match_tgt - 1
+        ) / len(read.query_sequence)
         if not self.opposing_strands:  # same strand
             sc_align = _read.nsb_align(
                 opposite_breakpoint_ref,
                 read.query_sequence,
-                min_consecutive_match=self.min_anchor_exact,
+                min_consecutive_match=self.config['validate.min_anchor_exact'],
                 min_match=min_match_tgt,
                 min_overlap_percent=min_match_tgt,
             )  # split half to this side
@@ -657,7 +696,7 @@ def collect_split_read(self, read, first_breakpoint):
             revcomp_sc_align = _read.nsb_align(
                 opposite_breakpoint_ref,
                 revcomp_sc_align,
-                min_consecutive_match=self.min_anchor_exact,
+                min_consecutive_match=self.config['validate.min_anchor_exact'],
                 min_match=min_match_tgt,
                 min_overlap_percent=min_match_tgt,
             )
@@ -683,7 +722,9 @@ def collect_split_read(self, read, first_breakpoint):
             alignment.next_reference_id = read.next_reference_id
             alignment.mapping_quality = NA_MAPPING_QUALITY
             try:
-                cigar, offset = _cigar.extend_softclipping(alignment.cigar, self.min_anchor_exact)
+                cigar, offset = _cigar.extend_softclipping(
+                    alignment.cigar, self.config['validate.min_anchor_exact']
+                )
                 alignment.cigar = cigar
                 alignment.reference_start = alignment.reference_start + offset
             except AttributeError:
@@ -705,27 +746,31 @@ def collect_split_read(self, read, first_breakpoint):
                 alignment.template_length = 0
             if (
                 _cigar.alignment_matches(alignment.cigar)
-                >= self.min_sample_size_to_apply_percentage
-                and _cigar.match_percent(alignment.cigar) < self.min_anchor_match
+                >= self.config['validate.min_sample_size_to_apply_percentage']
+                and _cigar.match_percent(alignment.cigar) < self.config['validate.min_anchor_match']
             ):
                 continue
             if (
-                _cigar.longest_exact_match(alignment.cigar) < self.min_anchor_exact
-                and _cigar.longest_fuzzy_match(alignment.cigar, self.fuzzy_mismatch_number)
-                < self.min_anchor_fuzzy
+                _cigar.longest_exact_match(alignment.cigar)
+                < self.config['validate.min_anchor_exact']
+                and _cigar.longest_fuzzy_match(
+                    alignment.cigar, self.config['validate.fuzzy_mismatch_number']
+                )
+                < self.config['validate.min_anchor_fuzzy']
             ):
                 continue
-            if self.max_sc_preceeding_anchor is not None:
+            if self.config['validate.max_sc_preceeding_anchor'] is not None:
                 if opposite_breakpoint.orient == ORIENT.LEFT:
                     if (
                         alignment.cigar[0][0] == CIGAR.S
-                        and alignment.cigar[0][1] > self.max_sc_preceeding_anchor
+                        and alignment.cigar[0][1] > self.config['validate.max_sc_preceeding_anchor']
                     ):
                         continue
                 elif opposite_breakpoint.orient == ORIENT.RIGHT:
                     if (
                         alignment.cigar[-1][0] == CIGAR.S
-                        and alignment.cigar[-1][1] > self.max_sc_preceeding_anchor
+                        and alignment.cigar[-1][1]
+                        > self.config['validate.max_sc_preceeding_anchor']
                     ):
                         continue
             alignment.set_key()  # set the hash key before we add the read as evidence
@@ -747,7 +792,7 @@ def collect_split_read(self, read, first_breakpoint):
             )  # add to the opposite breakpoint
         return True
 
-    def decide_sequenced_strand(self, reads):
+    def decide_sequenced_strand(self, reads: Set[pysam.AlignedSegment]):
         """
         given a set of reads, determines the sequenced strand (if possible) and then returns the majority
         strand found
@@ -780,9 +825,9 @@ def decide_sequenced_strand(self, reads):
         else:
             ratio = strand_calls[STRAND.POS] / (strand_calls[STRAND.NEG] + strand_calls[STRAND.POS])
             neg_ratio = 1 - ratio
-            if ratio >= self.assembly_strand_concordance:
+            if ratio >= self.config['validate.assembly_strand_concordance']:
                 return STRAND.POS
-            elif neg_ratio >= self.assembly_strand_concordance:
+            elif neg_ratio >= self.config['validate.assembly_strand_concordance']:
                 return STRAND.NEG
             raise ValueError(
                 'Could not determine the strand. Equivocal POS/(NEG + POS) ratio',
@@ -910,9 +955,9 @@ def assemble_contig(self, log=DEVNULL):
                         build_strand[STRAND.NEG] + build_strand[STRAND.POS]
                     )
                     neg_ratio = 1 - ratio
-                    if ratio >= self.assembly_strand_concordance:
+                    if ratio >= self.config['validate.assembly_strand_concordance']:
                         flipped_build = False
-                    elif neg_ratio >= self.assembly_strand_concordance:
+                    elif neg_ratio >= self.config['validate.assembly_strand_concordance']:
                         flipped_build = True
                     else:
                         continue
@@ -1180,3 +1225,26 @@ def get_bed_repesentation(self):
         bed.append((self.break2.chr, self.outer_window2[0] - 1, self.outer_window2[1], name))
         bed.append((self.break2.chr, self.inner_window2[0] - 1, self.inner_window2[1], name))
         return bed
+
+    def generate_window(self, breakpoint: Breakpoint) -> Interval:
+        """
+        given some input breakpoint uses the current evidence setting to determine an
+        appropriate window/range of where one should search for supporting reads
+
+        Args:
+            breakpoint (Breakpoint): the breakpoint we are generating the evidence window for
+            read_length (int): the read length
+            call_error (int):
+                adds a buffer to the calculations if confidence in the breakpoint calls is low can increase this
+        Returns:
+            Interval: the range where reads should be read from the bam looking for evidence for this event
+        """
+        call_error = self.config['validate.call_error']
+        start = breakpoint.start - self.max_expected_fragment_size - call_error + 1
+        end = breakpoint.end + self.max_expected_fragment_size + call_error - 1
+
+        if breakpoint.orient == ORIENT.LEFT:
+            end = breakpoint.end + call_error + self.read_length - 1
+        elif breakpoint.orient == ORIENT.RIGHT:
+            start = breakpoint.start - call_error - self.read_length + 1
+        return Interval(max([1, start]), max([end, 1]))
diff --git a/mavis/validate/call.py b/mavis/validate/call.py
index 2c69d981..a2f49bfc 100644
--- a/mavis/validate/call.py
+++ b/mavis/validate/call.py
@@ -1,10 +1,10 @@
 import itertools
 import math
 import statistics
+from typing import Optional, Set
 
-from ..align import SplitAlignment, call_read_events, call_paired_read_event, convert_to_duplication
+from ..align import SplitAlignment, call_paired_read_event, call_read_events, convert_to_duplication
 from ..bam import read as _read
-
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import (
     CALL_METHOD,
@@ -25,6 +25,15 @@ class for holding evidence and the related calls since we can't freeze the evide
     just a reference to the evidence object and decisions on class, exact breakpoints, etc
     """
 
+    spanning_reads: Set
+    flanking_pairs: Set
+    break1_split_reads: Set
+    break2_split_reads: Set
+    compatible_flanking_pairs: Set
+    compatible_type: str
+    contig: Optional
+    contig_alignment: Optional
+
     @property
     def has_compatible(self):
         return False if self.compatible_type is None else True
diff --git a/mavis/validate/constants.py b/mavis/validate/constants.py
index a1a84ed5..a8c884d6 100644
--- a/mavis/validate/constants.py
+++ b/mavis/validate/constants.py
@@ -1,263 +1 @@
-from ..align import SUPPORTED_ALIGNER
-from ..constants import float_fraction
-from ..util import WeakMavisNamespace
-
 PASS_FILENAME = 'validation-passed.tab'
-
-DEFAULTS = WeakMavisNamespace()
-"""
-see [settings section](/configuration/settings)
-"""
-DEFAULTS.add(
-    'min_call_complexity',
-    0.10,
-    cast_type=float_fraction,
-    defn='The minimum complexity score for a call sequence. Is an average for non-contig calls. Filters '
-    'low complexity contigs before alignment. see [contig_complexity](#contig_complexity)',
-)
-DEFAULTS.add(
-    'aligner',
-    SUPPORTED_ALIGNER.BLAT,
-    cast_type=SUPPORTED_ALIGNER,
-    defn='the aligner to use to map the contigs/reads back to the reference e.g blat or bwa',
-)
-DEFAULTS.add(
-    'assembly_kmer_size',
-    0.74,
-    cast_type=float_fraction,
-    defn='The percent of the read length to make kmers for assembly',
-)
-DEFAULTS.add(
-    'assembly_max_paths',
-    8,
-    defn='the maximum number of paths to resolve. This is used to limit when there is a messy assembly graph to '
-    'resolve. The assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater '
-    'than the given setting.',
-)
-DEFAULTS.add(
-    'assembly_min_uniq',
-    0.10,
-    cast_type=float_fraction,
-    defn='Minimum percent uniq required to keep separate assembled contigs. If contigs are more similar then the lower scoring, then shorter, contig is dropped',
-)
-DEFAULTS.add(
-    'assembly_min_exact_match_to_remap',
-    15,
-    defn='The minimum length of exact matches to initiate remapping a read to a contig',
-)
-DEFAULTS.add(
-    'assembly_min_edge_trim_weight',
-    3,
-    defn='this is used to simplify the DeBruijn graph before path finding. Edges with less than this frequency will '
-    'be discarded if they are non-cutting, at a fork, or the end of a path',
-)
-DEFAULTS.add(
-    'assembly_min_remap_coverage',
-    0.9,
-    cast_type=float_fraction,
-    defn='Minimum fraction of the contig sequence which the remapped sequences must align over',
-)
-DEFAULTS.add(
-    'assembly_min_remapped_seq',
-    3,
-    defn='The minimum input sequences that must remap for an assembled contig to be used',
-)
-DEFAULTS.add(
-    'assembly_strand_concordance',
-    0.51,
-    cast_type=float_fraction,
-    defn='When the number of remapped reads from each strand are compared, the ratio must be above this number to '
-    'decide on the strand',
-)
-DEFAULTS.add(
-    'blat_min_identity',
-    0.9,
-    cast_type=float_fraction,
-    defn='The minimum percent identity match required for blat results when aligning contigs',
-)
-DEFAULTS.add(
-    'blat_limit_top_aln', 10, defn='Number of results to return from blat (ranking based on score)'
-)
-DEFAULTS.add('call_error', 10, defn='buffer zone for the evidence window')
-DEFAULTS.add(
-    'contig_aln_max_event_size',
-    50,
-    defn='relates to determining breakpoints when pairing contig alignments. For any given read in a putative pair '
-    'the soft clipping is extended to include any events of greater than this size. The softclipping is added to the '
-    'side of the alignment as indicated by the breakpoint we are assigning pairs to',
-)
-DEFAULTS.add(
-    'contig_aln_merge_inner_anchor',
-    20,
-    defn='the minimum number of consecutive exact match base pairs to not merge events within a contig alignment',
-)
-DEFAULTS.add(
-    'contig_aln_merge_outer_anchor',
-    15,
-    defn='minimum consecutively aligned exact matches to anchor an end for merging internal events',
-)
-DEFAULTS.add(
-    'contig_aln_min_anchor_size',
-    50,
-    defn='the minimum number of aligned bases for a contig (M or =) in order to simplify. Do not have to be consecutive.',
-)
-DEFAULTS.add(
-    'contig_aln_min_query_consumption',
-    0.9,
-    cast_type=float_fraction,
-    defn='minimum fraction of the original query sequence that must be used by the read(s) of the alignment',
-)
-DEFAULTS.add(
-    'contig_aln_min_extend_overlap',
-    10,
-    defn='minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment',
-)
-DEFAULTS.add(
-    'contig_aln_min_score',
-    0.9,
-    cast_type=float_fraction,
-    defn='minimum score for a contig to be used as evidence in a call by contig',
-)
-DEFAULTS.add(
-    'fetch_min_bin_size',
-    50,
-    defn='the minimum size of any bin for reading from a bam file. Increasing this number will result in smaller bins '
-    'being merged or less bins being created (depending on the fetch method)',
-)
-DEFAULTS.add(
-    'fetch_reads_bins',
-    5,
-    defn='number of bins to split an evidence window into to ensure more even sampling of high coverage regions',
-)
-DEFAULTS.add(
-    'fetch_reads_limit',
-    3000,
-    defn='maximum number of reads, cap, to loop over for any given evidence window',
-)
-DEFAULTS.add(
-    'trans_fetch_reads_limit',
-    12000,
-    cast_type=int,
-    nullable=True,
-    defn='Related to [fetch_reads_limit](#fetch_reads_limit). Overrides fetch_reads_limit for transcriptome libraries when set. '
-    'If this has a value of None then fetch_reads_limit will be used for transcriptome libraries instead',
-)
-DEFAULTS.add(
-    'filter_secondary_alignments',
-    True,
-    defn='filter secondary alignments when gathering read evidence',
-)
-DEFAULTS.add(
-    'fuzzy_mismatch_number',
-    1,
-    defn='The number of events/mismatches allowed to be considered a fuzzy match',
-)
-DEFAULTS.add(
-    'max_sc_preceeding_anchor',
-    6,
-    defn='when remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of '
-    'where we expect it. For example for a softclipped read on a breakpoint with a left orientation this limits the '
-    'amount of softclipping that is allowed on the right. If this is set to None then there is no limit on softclipping',
-)
-DEFAULTS.add(
-    'min_anchor_exact',
-    6,
-    defn='Applies to re-aligning softclipped reads to the opposing breakpoint. The minimum '
-    'number of consecutive exact matches to anchor a read to initiate targeted realignment',
-)
-DEFAULTS.add(
-    'min_anchor_fuzzy',
-    10,
-    defn='Applies to re-aligning softclipped reads to the opposing breakpoint. The minimum '
-    'length of a fuzzy match to anchor a read to initiate targeted realignment',
-)
-DEFAULTS.add(
-    'min_anchor_match',
-    0.9,
-    cast_type=float_fraction,
-    defn='Minimum percent match for a read to be kept as evidence',
-)
-DEFAULTS.add(
-    'min_double_aligned_to_estimate_insertion_size',
-    2,
-    defn='The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the '
-    'untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping',
-)
-DEFAULTS.add(
-    'min_flanking_pairs_resolution',
-    10,
-    defn='the minimum number of flanking reads required to call a breakpoint by flanking evidence',
-)
-DEFAULTS.add(
-    'min_linking_split_reads',
-    2,
-    defn='The minimum number of split reads which aligned to both breakpoints',
-)
-DEFAULTS.add(
-    'min_mapping_quality', 5, defn='the minimum mapping quality of reads to be used as evidence'
-)
-DEFAULTS.add(
-    'trans_min_mapping_quality',
-    0,
-    cast_type=int,
-    nullable=True,
-    defn='Related to [min_mapping_quality](#min_mapping_quality). Overrides the min_mapping_quality if the library is a transcriptome '
-    'and this is set to any number not None. If this value is None, min_mapping_quality is used for transcriptomes as'
-    'well as genomes',
-)
-DEFAULTS.add(
-    'min_non_target_aligned_split_reads',
-    1,
-    defn='The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local '
-    'alignment to the target region to call a breakpoint by split read evidence',
-)
-DEFAULTS.add(
-    'min_sample_size_to_apply_percentage',
-    10,
-    defn='Minimum number of aligned bases to compute a match percent. '
-    'If there are less than this number of aligned bases (match or mismatch) the percent comparator is not used',
-)
-DEFAULTS.add(
-    'min_softclipping',
-    6,
-    defn='minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence',
-)
-DEFAULTS.add(
-    'min_spanning_reads_resolution',
-    5,
-    defn='Minimum number of spanning reads required to call an event by spanning evidence',
-)
-DEFAULTS.add(
-    'min_splits_reads_resolution',
-    3,
-    defn='minimum number of split reads required to call a breakpoint by split reads',
-)
-DEFAULTS.add(
-    'stdev_count_abnormal',
-    3.0,
-    defn='the number of standard deviations away from the normal considered expected and therefore not qualifying as '
-    'flanking reads',
-)
-DEFAULTS.add(
-    'strand_determining_read',
-    2,
-    defn='1 or 2. The read in the pair which determines if (assuming a stranded protocol) the first or second read in '
-    'the pair matches the strand sequenced',
-)
-DEFAULTS.add(
-    'outer_window_min_event_size',
-    125,
-    defn='the minimum size of an event in order for flanking read evidence to be collected',
-)
-DEFAULTS.add(
-    'write_evidence_files',
-    True,
-    defn='write the intermediate bam and bed files containing the raw evidence collected and '
-    'contigs aligned. Not required for subsequent steps but can be useful in debugging and deep investigation of events',
-)
-DEFAULTS.add(
-    'clean_aligner_files',
-    False,
-    defn='Remove the aligner output files after the validation stage is complete. Not'
-    ' required for subsequent steps but can be useful in debugging and deep investigation of events',
-)
diff --git a/mavis/validate/evidence.py b/mavis/validate/evidence.py
index 0d065d7e..8719210c 100644
--- a/mavis/validate/evidence.py
+++ b/mavis/validate/evidence.py
@@ -1,28 +1,49 @@
 import itertools
+from typing import Optional
+
+import pysam
 
-from .base import Evidence
 from ..align import SplitAlignment, call_read_events
-from ..bam import cigar as _cigar
 from ..annotate.variant import overlapping_transcripts
+from ..bam import cigar as _cigar
 from ..breakpoint import Breakpoint
-from ..constants import ORIENT, PROTOCOL, STRAND, SVTYPE, CIGAR
+from ..constants import CIGAR, ORIENT, PROTOCOL, STRAND, SVTYPE
 from ..interval import Interval
+from ..schemas import DEFAULTS
+from .base import Evidence
 
 
 class GenomeEvidence(Evidence):
+    outer_window1: Interval
+    outer_window2: Interval
+    inner_window1: Interval
+    inner_window2: Interval
+    compatible_window1: Interval
+    compatible_window2: Interval
+    protocol: str
+
+    @property
+    def min_mapping_quality(self):
+        return self.config['validate.min_mapping_quality']
+
+    @property
+    def fetch_reads_limit(self):
+        return self.config['validate.fetch_reads_limit']
+
     def __init__(self, *pos, **kwargs):
         Evidence.__init__(self, *pos, **kwargs)
         self.protocol = PROTOCOL.GENOME
 
         self.outer_window1 = self.generate_window(self.break1)
         self.outer_window2 = self.generate_window(self.break2)
+        call_error = self.config['validate.call_error']
         self.inner_window1 = Interval(
-            max([self.break1.start - self.call_error - self.read_length + 1, 1]),
-            self.break1.end + self.call_error + self.read_length - 1,
+            max([self.break1.start - call_error - self.read_length + 1, 1]),
+            self.break1.end + call_error + self.read_length - 1,
         )
         self.inner_window2 = Interval(
-            max([self.break2.start - self.call_error - self.read_length + 1, 1]),
-            self.break2.end + self.call_error + self.read_length - 1,
+            max([self.break2.start - call_error - self.read_length + 1, 1]),
+            self.break2.end + call_error + self.read_length - 1,
         )
 
         if SVTYPE.INS in self.putative_event_types():
@@ -64,42 +85,32 @@ def __init__(self, *pos, **kwargs):
             self.compatible_window1 = self.generate_window(compt_break1)
             self.compatible_window2 = self.generate_window(compt_break2)
 
-    def generate_window(self, breakpoint):
-        """
-        given some input breakpoint uses the current evidence setting to determine an
-        appropriate window/range of where one should search for supporting reads
-
-        Args:
-            breakpoint (Breakpoint): the breakpoint we are generating the evidence window for
-            read_length (int): the read length
-            call_error (int):
-                adds a buffer to the calculations if confidence in the breakpoint calls is low can increase this
-        Returns:
-            Interval: the range where reads should be read from the bam looking for evidence for this event
-        """
-        start = breakpoint.start - self.max_expected_fragment_size - self.call_error + 1
-        end = breakpoint.end + self.max_expected_fragment_size + self.call_error - 1
-
-        if breakpoint.orient == ORIENT.LEFT:
-            end = breakpoint.end + self.call_error + self.read_length - 1
-        elif breakpoint.orient == ORIENT.RIGHT:
-            start = breakpoint.start - self.call_error - self.read_length + 1
-        return Interval(max([1, start]), max([end, 1]))
-
-    def compute_fragment_size(self, read, mate=None):
+    def compute_fragment_size(
+        self, read: pysam.AlignedSegment, mate: Optional[pysam.AlignedSegment] = None
+    ):
         return Interval(abs(read.template_length))
 
 
 class TranscriptomeEvidence(Evidence):
+    outer_window1: Interval
+    outer_window2: Interval
+    inner_window1: Interval
+    inner_window2: Interval
+    compatible_window1: Interval
+    compatible_window2: Interval
+    protocol: str
+
+    @property
+    def min_mapping_quality(self):
+        return self.config['validate.trans_min_mapping_quality']
+
+    @property
+    def fetch_reads_limit(self):
+        return self.config['validate.trans_fetch_reads_limit']
+
     def __init__(self, annotations, *pos, **kwargs):
         Evidence.__init__(self, *pos, **kwargs)
 
-        # set the transcriptome specific overrides
-        if self.trans_min_mapping_quality is not None:
-            self.min_mapping_quality = self.trans_min_mapping_quality
-        if self.trans_fetch_reads_limit is not None:
-            self.fetch_reads_limit = self.trans_fetch_reads_limit
-
         self.protocol = PROTOCOL.TRANS
         # get the list of overlapping transcripts
         self.overlapping_transcripts = overlapping_transcripts(
@@ -108,7 +119,8 @@ def __init__(self, annotations, *pos, **kwargs):
 
         self.outer_window1 = self.generate_window(self.break1)
         self.outer_window2 = self.generate_window(self.break2)
-        tgt = self.call_error + self.read_length - 1
+        call_error = self.config['validate.call_error']
+        tgt = call_error + self.read_length - 1
 
         self.inner_window1 = self.traverse(self.break1.end, tgt, ORIENT.RIGHT) | self.traverse(
             self.break1.start, tgt, ORIENT.LEFT
@@ -156,7 +168,14 @@ def __init__(self, annotations, *pos, **kwargs):
             self.compatible_window1 = self.generate_window(compt_break1)
             self.compatible_window2 = self.generate_window(compt_break2)
 
-    def traverse(self, start, distance, direction, strand=STRAND.NS, chrom=None):
+    def traverse(  # type: ignore
+        self,
+        start: int,
+        distance: int,
+        direction: str,
+        strand: str = STRAND.NS,
+        chrom: Optional[str] = None,
+    ):
         """
         given some genomic position and a distance. Uses the input transcripts to
         compute all possible genomic end positions at that distance if intronic
@@ -166,7 +185,6 @@ def traverse(self, start, distance, direction, strand=STRAND.NS, chrom=None):
             start (int): the genomic start position
             distance (int): the amount of exonic/intergenic units to traverse
             direction (ORIENT): the direction wrt to the positive/forward reference strand to traverse
-            transcripts (List[PreTranscript]): list of transcripts to use
         """
         transcripts = self._select_transcripts(chrom, strand)
         is_left = True if direction == ORIENT.LEFT else False
@@ -230,7 +248,7 @@ def _select_transcripts(self, chrom=None, strand=STRAND.NS):
                 result.append(transcript)
         return result
 
-    def distance(self, start, end, strand=STRAND.NS, chrom=None):
+    def distance(self, start: int, end: int, strand: str = STRAND.NS, chrom: Optional[str] = None):
         """
         give the current list of transcripts, computes the putative exonic/intergenic distance
         given two genomic positions. Intronic positions are ignored
@@ -265,7 +283,7 @@ def distance(self, start, end, strand=STRAND.NS, chrom=None):
             return Interval.from_iterable(inter)
         return Evidence.distance(start, end)
 
-    def generate_window(self, breakpoint):
+    def generate_window(self, breakpoint: Breakpoint):
         """
         given some input breakpoint uses the current evidence setting to determine an
         appropriate window/range of where one should search for supporting reads
@@ -282,7 +300,7 @@ def generate_window(self, breakpoint):
         Returns:
             Interval: the range where reads should be read from the bam looking for evidence for this event
         """
-        window = GenomeEvidence.generate_window(self, breakpoint)
+        window = Evidence.generate_window(self, breakpoint)
         tgt_left = Evidence.distance(window.start, breakpoint.start)  # amount to expand to the left
         tgt_right = Evidence.distance(breakpoint.end, window.end)  # amount to expand to the right
         window1 = self.traverse(
diff --git a/mavis/validate/main.py b/mavis/validate/main.py
index 6dc14203..65855c37 100644
--- a/mavis/validate/main.py
+++ b/mavis/validate/main.py
@@ -15,8 +15,8 @@
 from ..bam import cigar as _cigar
 from ..bam.cache import BamCache
 from ..breakpoint import BreakpointPair
-from ..config import get_by_prefix
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL
+from ..schemas import get_by_prefix
 from ..util import (
     LOG,
     filter_on_overlap,
@@ -105,11 +105,11 @@ def main(
                     opposing_strands=bpp.opposing_strands,
                     stranded=bpp.stranded,
                     untemplated_seq=bpp.untemplated_seq,
-                    data=bpp.data,
                     stdev_fragment_size=config['libraries'][library]['stdev_fragment_size'],
                     read_length=config['libraries'][library]['read_length'],
                     median_fragment_size=config['libraries'][library]['median_fragment_size'],
-                    **get_by_prefix(config, 'validate.')
+                    config=config,
+                    **bpp.data
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:
@@ -127,12 +127,12 @@ def main(
                     opposing_strands=bpp.opposing_strands,
                     stranded=bpp.stranded,
                     untemplated_seq=bpp.untemplated_seq,
-                    data=bpp.data,
                     stdev_fragment_size=config['libraries'][library]['stdev_fragment_size'],
                     read_length=config['libraries'][library]['read_length'],
                     median_fragment_size=config['libraries'][library]['median_fragment_size'],
                     strand_determining_read=config['libraries'][library]['strand_determining_read'],
-                    **get_by_prefix(config, 'validate.')
+                    config=config,
+                    **bpp.data
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:

From 089c0327b554bcecb24058293607a50a0e3a4544 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 23:37:14 -0700
Subject: [PATCH 005/137] use config not constants

- replace the constants objects with the input config object
---
 mavis/align.py                              | 20 +++--
 mavis/annotate/main.py                      |  7 +-
 mavis/annotate/variant.py                   |  9 +-
 mavis/breakpoint.py                         | 73 +++++++++++++++-
 mavis/pairing/main.py                       |  7 +-
 mavis/schemas/__init__.py                   | 31 +++++++
 mavis/summary/summary.py                    | 44 +++++-----
 mavis/validate/base.py                      | 94 +++++++++------------
 mavis/validate/call.py                      | 38 +++++----
 mavis/validate/evidence.py                  |  4 +-
 mavis/validate/main.py                      |  7 +-
 tests/end_to_end/test_convert.py            |  4 +-
 tests/integration/test_align.py             | 38 +++++----
 tests/integration/test_assemble.py          | 16 ++--
 tests/integration/test_breakpoint.py        |  7 +-
 tests/integration/test_illustrate.py        | 37 ++++----
 tests/integration/test_pairing.py           | 24 +++---
 tests/integration/test_validate.py          | 38 +++++----
 tests/integration/test_validate_call.py     | 92 +++++++++++---------
 tests/integration/test_validate_evidence.py | 49 +++++++----
 tests/setup_subprocess_cov.py               | 10 +++
 tests/unit/test_breakpoint.py               | 21 -----
 tests/unit/test_summary.py                  |  8 +-
 tests/unit/test_tool.py                     | 27 +++---
 24 files changed, 410 insertions(+), 295 deletions(-)
 create mode 100644 mavis/schemas/__init__.py
 create mode 100644 tests/setup_subprocess_cov.py

diff --git a/mavis/align.py b/mavis/align.py
index cdf0a287..f5f493a7 100644
--- a/mavis/align.py
+++ b/mavis/align.py
@@ -534,11 +534,13 @@ def select_contig_alignments(evidence, reads_by_query):
     def filter_pass(alignment):
         return not any(
             [
-                alignment.query_consumption() < evidence.contig_aln_min_query_consumption,
-                alignment.score() < evidence.contig_aln_min_score,
+                alignment.query_consumption()
+                < evidence.config['validate.contig_aln_min_query_consumption'],
+                alignment.score() < evidence.config['validate.contig_aln_min_score'],
                 alignment.mapping_quality() == Interval(0),
                 alignment.read2 is not None
-                and alignment.query_overlap_extension() < evidence.contig_aln_min_extend_overlap,
+                and alignment.query_overlap_extension()
+                < evidence.config['validate.contig_aln_min_extend_overlap'],
             ]
         )
 
@@ -563,8 +565,8 @@ def supports_primary_event(alignment):
             read = evidence.standardize_read(raw_read)
             read.cigar = _cigar.merge_internal_events(
                 read.cigar,
-                inner_anchor=evidence.contig_aln_merge_inner_anchor,
-                outer_anchor=evidence.contig_aln_merge_outer_anchor,
+                inner_anchor=evidence.config['validate.contig_aln_merge_inner_anchor'],
+                outer_anchor=evidence.config['validate.contig_aln_merge_outer_anchor'],
             )
             read = evidence.standardize_read(
                 read
@@ -581,8 +583,8 @@ def supports_primary_event(alignment):
                 _read.convert_events_to_softclipping(
                     read,
                     evidence.break1.orient,
-                    max_event_size=evidence.contig_aln_max_event_size,
-                    min_anchor_size=evidence.contig_aln_min_anchor_size,
+                    max_event_size=evidence.config['validate.contig_aln_max_event_size'],
+                    min_anchor_size=evidence.config['validate.contig_aln_min_anchor_size'],
                 )
             )
             if evidence.break1.orient == evidence.break2.orient:
@@ -591,8 +593,8 @@ def supports_primary_event(alignment):
                 _read.convert_events_to_softclipping(
                     read,
                     evidence.break2.orient,
-                    max_event_size=evidence.contig_aln_max_event_size,
-                    min_anchor_size=evidence.contig_aln_min_anchor_size,
+                    max_event_size=evidence.config['validate.contig_aln_max_event_size'],
+                    min_anchor_size=evidence.config['validate.contig_aln_min_anchor_size'],
                 )
             )
 
diff --git a/mavis/annotate/main.py b/mavis/annotate/main.py
index ffd54bd7..6103ea44 100644
--- a/mavis/annotate/main.py
+++ b/mavis/annotate/main.py
@@ -8,7 +8,7 @@
 from ..error import DrawingFitError, NotSpecifiedError
 from ..illustrate.constants import DiagramSettings
 from ..illustrate.diagram import draw_sv_summary_diagram
-from ..schemas import DEFAULTS
+from ..schemas import DEFAULTS, get_by_prefix
 from ..util import LOG, generate_complete_stamp, mkdirp, read_inputs
 from .constants import PASS_FILENAME
 from .file_io import ReferenceFile
@@ -167,10 +167,7 @@ def main(
     )
 
     # now try generating the svg
-    illustration_defaults = get_by_prefix(DEFAULTS, 'illustrate.')
-    drawing_config = DiagramSettings(
-        **{k: v for k, v in kwargs.items() if k in illustration_defaults}
-    )
+    drawing_config = DiagramSettings(**get_by_prefix(config, 'illustrate.'))
 
     header_req = {
         COLUMNS.break1_strand,
diff --git a/mavis/annotate/variant.py b/mavis/annotate/variant.py
index 050242fa..64b3732e 100644
--- a/mavis/annotate/variant.py
+++ b/mavis/annotate/variant.py
@@ -47,14 +47,9 @@ def __init__(
             opposing_strands=bpp.opposing_strands,
             stranded=bpp.stranded,
             untemplated_seq=bpp.untemplated_seq,
+            **bpp.data,
+            **kwargs
         )
-        self.data.update(bpp.data)
-        if data is not None:
-            conflicts = set(kwargs.keys()) & set(data.keys())
-            self.data.update(data)
-            if conflicts:
-                raise TypeError('got multiple values for data elements:', conflicts)
-        self.data.update(kwargs)
 
         # match transcript to breakpoint if reveresed
         if bpp.break1.key[0:3] < bpp.break2.key[0:3]:
diff --git a/mavis/breakpoint.py b/mavis/breakpoint.py
index 2f903cd6..dfdda6a5 100644
--- a/mavis/breakpoint.py
+++ b/mavis/breakpoint.py
@@ -139,6 +139,74 @@ def __lt__(self, other):
             return True
         return self.untemplated_seq < other.untemplated_seq
 
+    @property
+    def library(self) -> Optional[str]:
+        return self.data.get(COLUMNS.library)
+
+    @property
+    def cdna_synon(self) -> Optional[bool]:
+        return self.data.get(COLUMNS.cdna_synon)
+
+    @property
+    def contig_remapped_reads(self) -> Optional[int]:
+        return self.data.get(COLUMNS.contig_remapped_reads)
+
+    @property
+    def disease_status(self) -> Optional[str]:
+        return self.data.get(COLUMNS.disease_status)
+
+    @property
+    def event_type(self) -> Optional[str]:
+        return self.data.get(COLUMNS.event_type)
+
+    @property
+    def inferred_pairing(self) -> Optional[str]:
+        return self.data.get(COLUMNS.inferred_pairing)
+
+    @property
+    def pairing(self) -> Optional[str]:
+        return self.data.get(COLUMNS.pairing)
+
+    @property
+    def protocol(self) -> Optional[str]:
+        return self.data.get(COLUMNS.protocol)
+
+    @property
+    def fusion_cdna_coding_start(self) -> Optional[int]:
+        return self.data.get(COLUMNS.fusion_cdna_coding_start)
+
+    @property
+    def fusion_cdna_coding_end(self) -> Optional[int]:
+        return self.data.get(COLUMNS.fusion_cdna_coding_end)
+
+    @property
+    def fusion_sequence_fasta_id(self) -> Optional[str]:
+        return self.data.get(COLUMNS.fusion_sequence_fasta_id)
+
+    @property
+    def fusion_splicing_pattern(self) -> Optional[str]:
+        return self.data.get(COLUMNS.fusion_splicing_pattern)
+
+    @property
+    def linking_split_reads(self) -> Optional[int]:
+        return self.data.get(COLUMNS.linking_split_reads)
+
+    @property
+    def repeat_count(self) -> Optional[int]:
+        return self.data.get(COLUMNS.repeat_count)
+
+    @property
+    def tracking_id(self) -> Optional[str]:
+        return self.data.get(COLUMNS.tracking_id)
+
+    @property
+    def cluster_id(self) -> Optional[str]:
+        return self.data.get(COLUMNS.cluster_id)
+
+    @property
+    def annotation_id(self) -> Optional[str]:
+        return self.data.get(COLUMNS.annotation_id)
+
     @property
     def interchromosomal(self) -> bool:
         """bool: True if the breakpoints are on different chromosomes, False otherwise"""
@@ -242,6 +310,9 @@ def __init__(
         # try classifying to make sure it's a valid combination
         BreakpointPair.classify(self)
 
+    def column(self, colname: str):
+        return self.data.get(COLUMNS[colname])
+
     def __str__(self):
         return 'BPP({}, {}{}{})'.format(
             str(self.break1),
@@ -332,7 +403,7 @@ def classify(cls, pair, distance: Optional[Callable] = None) -> Set[str]:
                     return {SVTYPE.DEL, SVTYPE.INS}
                 elif pair.break1.orient == ORIENT.RIGHT or pair.break2.orient == ORIENT.LEFT:
                     return {SVTYPE.DUP}
-                raise InvalidRearrangement(pair)
+                return {SVTYPE.DEL, SVTYPE.INS, SVTYPE.DUP}
         else:  # interchromosomal
             if pair.opposing_strands:
                 if pair.LR or pair.RL:
diff --git a/mavis/pairing/main.py b/mavis/pairing/main.py
index debd823d..8a6dbe1a 100644
--- a/mavis/pairing/main.py
+++ b/mavis/pairing/main.py
@@ -15,7 +15,6 @@ def main(
     output: str,
     config: Dict,
     start_time=int(time.time()),
-    **kwargs,
 ):
     """
     Args:
@@ -82,8 +81,10 @@ def main(
         category = (bpp.break1.chr, bpp.break2.chr, bpp.opposing_strands, bpp.event_type)
         bpp.data[COLUMNS.product_id] = product_key(bpp)
         calls_by_cat.setdefault(category, []).append(bpp)
-        if bpp.gene1 or bpp.gene2:
-            calls_by_ann.setdefault((bpp.transcript1, bpp.transcript2), []).append(bpp)
+        if bpp.data.get(COLUMNS.gene1) or bpp.data.get(COLUMNS.gene2):
+            calls_by_ann.setdefault(
+                (bpp.data.get(COLUMNS.transcript1), bpp.data.get(COLUMNS.transcript2)), []
+            ).append(bpp)
         bpp.data[COLUMNS.pairing] = ''
         bpp.data[COLUMNS.inferred_pairing] = ''
 
diff --git a/mavis/schemas/__init__.py b/mavis/schemas/__init__.py
new file mode 100644
index 00000000..a0568bac
--- /dev/null
+++ b/mavis/schemas/__init__.py
@@ -0,0 +1,31 @@
+import collections
+import os
+
+from snakemake.utils import validate as snakemake_validate
+
+
+class ImmutableDict(collections.Mapping):
+    def __init__(self, data):
+        self._data = data
+
+    def __getitem__(self, key):
+        return self._data[key]
+
+    def __len__(self):
+        return len(self._data)
+
+    def __iter__(self):
+        return iter(self._data)
+
+
+def get_by_prefix(config, prefix):
+    return {k.replace(prefix, ''): v for k, v in config.items() if k.startswith(prefix)}
+
+
+DEFAULTS = {}
+snakemake_validate(
+    DEFAULTS,
+    os.path.join(os.path.dirname(__file__), 'config.json'),
+    set_default=True,
+)
+DEFAULTS = ImmutableDict(DEFAULTS)
diff --git a/mavis/summary/summary.py b/mavis/summary/summary.py
index 524bbe95..a3ab1ecc 100644
--- a/mavis/summary/summary.py
+++ b/mavis/summary/summary.py
@@ -30,13 +30,13 @@ def sort_key(bpp):
 
         result.extend(
             [
-                0 if bpp.transcript1 in best_transcripts else 1,
-                0 if bpp.transcript2 in best_transcripts else 1,
-                sum([bpp.transcript1 is None, bpp.transcript2 is None]),
-                string_ranks[bpp.gene1],
-                string_ranks[bpp.gene2],
-                string_ranks[bpp.transcript1],
-                string_ranks[bpp.transcript2],
+                0 if bpp.data['transcript1'] in best_transcripts else 1,
+                0 if bpp.data['transcript2'] in best_transcripts else 1,
+                sum([bpp.data['transcript1'] is None, bpp.data['transcript2'] is None]),
+                string_ranks[bpp.data['gene1']],
+                string_ranks[bpp.data['gene2']],
+                string_ranks[bpp.data['transcript1']],
+                string_ranks[bpp.data['transcript2']],
             ]
         )
         return tuple(result)
@@ -281,35 +281,39 @@ def filter_by_evidence(
     filtered = []
     removed = []
     for bpp in bpps:
-        if bpp.call_method == CALL_METHOD.CONTIG:
+        if bpp.column('call_method') == CALL_METHOD.CONTIG:
             # inherently the breakpoints have been linked
             if int(bpp.contig_remapped_reads) < filter_min_remapped_reads:
                 removed.append(bpp)
                 continue
-        elif bpp.call_method == CALL_METHOD.SPAN:
+        elif bpp.column('call_method') == CALL_METHOD.SPAN:
             if bpp.spanning_reads < filter_min_spanning_reads:
                 removed.append(bpp)
                 continue
-        elif bpp.call_method == CALL_METHOD.SPLIT:
-            linking_split_reads = bpp.linking_split_reads
+        elif bpp.column('call_method') == CALL_METHOD.SPLIT:
+            linking_split_reads = bpp.column('linking_split_reads')
             if bpp.event_type == SVTYPE.INS:
-                linking_split_reads += bpp.flanking_pairs
+                linking_split_reads += bpp.column('flanking_pairs')
             if any(
                 [
-                    bpp.break1_split_reads + bpp.break1_split_reads_forced < filter_min_split_reads,
-                    bpp.break2_split_reads + bpp.break2_split_reads_forced < filter_min_split_reads,
+                    bpp.column('break1_split_reads') + bpp.column('break1_split_reads_forced')
+                    < filter_min_split_reads,
+                    bpp.column('break2_split_reads') + bpp.column('break2_split_reads_forced')
+                    < filter_min_split_reads,
                     linking_split_reads < filter_min_linking_split_reads,
-                    bpp.break1_split_reads < 1,
-                    bpp.break2_split_reads < 1,
+                    bpp.column('break1_split_reads') < 1,
+                    bpp.column('break2_split_reads') < 1,
                 ]
             ):
                 removed.append(bpp)
                 continue
-        elif bpp.call_method == CALL_METHOD.FLANK:
-            if bpp.flanking_pairs < filter_min_flanking_reads:
+        elif bpp.column('call_method') == CALL_METHOD.FLANK:
+            if bpp.column('flanking_pairs') < filter_min_flanking_reads:
                 removed.append(bpp)
                 continue
-        elif bpp.call_method != CALL_METHOD.INPUT:
-            raise AssertionError('unexpected value for call_method: {}'.format(bpp.call_method))
+        elif bpp.column('call_method') != CALL_METHOD.INPUT:
+            raise AssertionError(
+                'unexpected value for call_method: {}'.format(bpp.column('call_method'))
+            )
         filtered.append(bpp)
     return filtered, removed
diff --git a/mavis/validate/base.py b/mavis/validate/base.py
index 49220ae9..3b2da0aa 100644
--- a/mavis/validate/base.py
+++ b/mavis/validate/base.py
@@ -1,6 +1,6 @@
 import itertools
 import logging
-from abc import abstractproperty
+from abc import abstractmethod
 from typing import Dict, List, Optional, Set, Tuple
 
 import pysam
@@ -45,7 +45,7 @@ def min_expected_fragment_size(self):
                 max(
                     [
                         self.median_fragment_size
-                        - self.stdev_fragment_size * self.stdev_count_abnormal,
+                        - self.stdev_fragment_size * self.config['validate.stdev_count_abnormal'],
                         0,
                     ]
                 ),
@@ -57,31 +57,14 @@ def min_expected_fragment_size(self):
     def max_expected_fragment_size(self):
         return int(
             round(
-                self.median_fragment_size + self.stdev_fragment_size * self.stdev_count_abnormal, 0
+                self.median_fragment_size
+                + self.stdev_fragment_size * self.config['validate.stdev_count_abnormal'],
+                0,
             )
         )
 
-    @abstractproperty
-    def strand_determining_read(self):
-        pass
-
-    @abstractproperty
-    def outer_window1(self):
-        pass
-
-    @abstractproperty
-    def outer_window2(self):
-        pass
-
-    @abstractproperty
-    def inner_window1(self):
-        pass
-
-    @abstractproperty
-    def inner_window2(self):
-        pass
-
-    @abstractproperty
+    @property
+    @abstractmethod
     def min_mapping_quality(self):
         pass
 
@@ -100,6 +83,7 @@ def __init__(
         classification=None,
         config=DEFAULTS,
         assembly_max_kmer_size=None,
+        strand_determining_read=2,
         **kwargs,
     ):
         """
@@ -115,7 +99,8 @@ def __init__(
         # initialize the breakpoint pair
         self.bam_cache = bam_cache
         self.stranded = stranded and bam_cache.stranded
-        self.config = config
+        self.config = dict(**DEFAULTS)
+        self.config.update(config)
         BreakpointPair.__init__(
             self,
             break1,
@@ -154,12 +139,13 @@ def __init__(
         )
         self.bam_cache = bam_cache
         self.classification = classification
-        self.reference_genome = reference_genome
-        self.read_length = read_length
-        self.stdev_fragment_size = stdev_fragment_size
-        self.median_fragment_size = median_fragment_size
         self.compatible_window1 = None
         self.compatible_window2 = None
+        self.median_fragment_size = median_fragment_size
+        self.read_length = read_length
+        self.reference_genome = reference_genome
+        self.stdev_fragment_size = stdev_fragment_size
+        self.strand_determining_read = strand_determining_read
 
         if self.classification is not None and self.classification not in BreakpointPair.classify(
             self
@@ -235,8 +221,8 @@ def standardize_read(self, read):
         read.cigar = _cigar.join(cigar)
         read.cigar = _cigar.merge_internal_events(
             read.cigar,
-            inner_anchor=self.contig_aln_merge_inner_anchor,
-            outer_anchor=self.contig_aln_merge_outer_anchor,
+            inner_anchor=self.config['validate.contig_aln_merge_inner_anchor'],
+            outer_anchor=self.config['validate.contig_aln_merge_outer_anchor'],
         )
         read.reference_start = read.reference_start + prefix
 
@@ -876,22 +862,22 @@ def assemble_contig(self, log=DEVNULL):
 
         log('assembly size of {} sequences'.format(len(assembly_sequences) // 2))
 
-        kmer_size = self.read_length * self.assembly_kmer_size
+        kmer_size = self.read_length * self.config['validate.assembly_kmer_size']
         remap_min_overlap = max(
-            self.read_length - self.assembly_min_exact_match_to_remap, kmer_size
+            self.read_length - self.config['validate.assembly_min_exact_match_to_remap'], kmer_size
         )
 
         contigs = assemble(
             assembly_sequences,
             kmer_size,
-            min_edge_trim_weight=self.assembly_min_edge_trim_weight,
-            assembly_max_paths=self.assembly_max_paths,
+            min_edge_trim_weight=self.config['validate.assembly_min_edge_trim_weight'],
+            assembly_max_paths=self.config['validate.assembly_max_paths'],
             min_contig_length=self.read_length,
             log=log,
             remap_min_overlap=remap_min_overlap,
-            remap_min_exact_match=self.assembly_min_exact_match_to_remap,
-            assembly_min_uniq=self.assembly_min_uniq,
-            min_complexity=self.min_call_complexity,
+            remap_min_exact_match=self.config['validate.assembly_min_exact_match_to_remap'],
+            assembly_min_uniq=self.config['validate.assembly_min_uniq'],
+            min_complexity=self.config['validate.min_call_complexity'],
         )
 
         # add the input reads
@@ -970,8 +956,8 @@ def assemble_contig(self, log=DEVNULL):
         for contig in sorted(contigs, key=lambda x: (x.remap_score() * -1, x.seq)):
             # filter on evidence level
             if (
-                contig.remap_score() < self.assembly_min_remapped_seq
-                or contig.remap_coverage() < self.assembly_min_remap_coverage
+                contig.remap_score() < self.config['validate.assembly_min_remapped_seq']
+                or contig.remap_coverage() < self.config['validate.assembly_min_remap_coverage']
             ):
                 continue
             if self.stranded and self.bam_cache.stranded:
@@ -995,7 +981,7 @@ def cache_if_true(read):
                 return True
             elif any(
                 [
-                    self.filter_secondary_alignments and read.is_secondary,
+                    self.config['validate.filter_secondary_alignments'] and read.is_secondary,
                     read.mapping_quality < self.min_mapping_quality,
                 ]
             ):
@@ -1028,7 +1014,7 @@ def filter_if_true(read):
             if not cache_if_true(read):
                 if any(
                     [
-                        self.filter_secondary_alignments and read.is_secondary,
+                        self.config['validate.filter_secondary_alignments'] and read.is_secondary,
                         read.mapping_quality < self.min_mapping_quality,
                     ]
                 ):
@@ -1049,9 +1035,9 @@ def filter_if_true(read):
             '{0}'.format(self.break1.chr),
             self.outer_window1[0],
             self.outer_window1[1],
-            read_limit=self.fetch_reads_limit,
-            sample_bins=self.fetch_reads_bins,
-            min_bin_size=self.fetch_min_bin_size,
+            read_limit=self.config['validate.fetch_reads_limit'],
+            sample_bins=self.config['validate.fetch_reads_bins'],
+            min_bin_size=self.config['validate.fetch_min_bin_size'],
             cache=True,
             cache_if=cache_if_true,
             filter_if=filter_if_true,
@@ -1080,9 +1066,9 @@ def filter_if_true(read):
             '{0}'.format(self.break2.chr),
             self.outer_window2[0],
             self.outer_window2[1],
-            read_limit=self.fetch_reads_limit,
-            sample_bins=self.fetch_reads_bins,
-            min_bin_size=self.fetch_min_bin_size,
+            read_limit=self.config['validate.fetch_reads_limit'],
+            sample_bins=self.config['validate.fetch_reads_bins'],
+            min_bin_size=self.config['validate.fetch_min_bin_size'],
             cache=True,
             cache_if=cache_if_true,
             filter_if=filter_if_true,
@@ -1132,9 +1118,9 @@ def filter_if_true(read):
                 '{0}'.format(self.break1.chr),
                 self.compatible_window1[0],
                 self.compatible_window1[1],
-                read_limit=self.fetch_reads_limit,
-                sample_bins=self.fetch_reads_bins,
-                min_bin_size=self.fetch_min_bin_size,
+                read_limit=self.config['validate.fetch_reads_limit'],
+                sample_bins=self.config['validate.fetch_reads_bins'],
+                min_bin_size=self.config['validate.fetch_min_bin_size'],
                 cache=True,
                 cache_if=cache_if_true,
                 filter_if=filter_if_true,
@@ -1146,9 +1132,9 @@ def filter_if_true(read):
                 '{0}'.format(self.break2.chr),
                 self.compatible_window2[0],
                 self.compatible_window2[1],
-                read_limit=self.fetch_reads_limit,
-                sample_bins=self.fetch_reads_bins,
-                min_bin_size=self.fetch_min_bin_size,
+                read_limit=self.config['validate.fetch_reads_limit'],
+                sample_bins=self.config['validate.fetch_reads_bins'],
+                min_bin_size=self.config['validate.fetch_min_bin_size'],
                 cache=True,
                 cache_if=cache_if_true,
                 filter_if=filter_if_true,
diff --git a/mavis/validate/call.py b/mavis/validate/call.py
index a2f49bfc..c5a8048e 100644
--- a/mavis/validate/call.py
+++ b/mavis/validate/call.py
@@ -87,7 +87,7 @@ def __init__(
             event_type, self.compatible_type = self.compatible_type, event_type
             putative_types = BreakpointPair.classify(self, source_evidence.distance)
 
-        self.event_type = SVTYPE.enforce(event_type)
+        self.data[COLUMNS.event_type] = SVTYPE.enforce(event_type)
         if event_type not in putative_types | {self.compatible_type}:
             raise ValueError(
                 'event_type is not compatible with the breakpoint call',
@@ -634,8 +634,9 @@ def _call_by_spanning_reads(source_evidence, consumed_evidence):
             event = convert_to_duplication(event, source_evidence.reference_genome)
             if all(
                 [
-                    event.query_consumption() >= source_evidence.contig_aln_min_query_consumption,
-                    event.score() >= source_evidence.contig_aln_min_score,
+                    event.query_consumption()
+                    >= source_evidence.config['validate.contig_aln_min_query_consumption'],
+                    event.score() >= source_evidence.config['validate.contig_aln_min_score'],
                 ]
             ):
                 spanning_calls.setdefault(event, set()).add(read)
@@ -643,7 +644,7 @@ def _call_by_spanning_reads(source_evidence, consumed_evidence):
     for event, reads in spanning_calls.items():
         if any(
             [
-                len(reads) < source_evidence.min_spanning_reads_resolution,
+                len(reads) < source_evidence.config['validate.min_spanning_reads_resolution'],
                 source_evidence.opposing_strands != event.opposing_strands,
             ]
         ):
@@ -734,12 +735,15 @@ def call_events(source_evidence):
 
         try:
             call = _call_by_flanking_pairs(source_evidence, event_type, type_consumed_evidence)
-            if len(call.flanking_pairs) < source_evidence.min_flanking_pairs_resolution:
+            if (
+                len(call.flanking_pairs)
+                < source_evidence.config['validate.min_flanking_pairs_resolution']
+            ):
                 errors.add(
                     'flanking call ({}) failed to supply the minimum evidence required ({} < {})'.format(
                         event_type,
                         len(call.flanking_pairs),
-                        source_evidence.min_flanking_pairs_resolution,
+                        source_evidence.config['validate.min_flanking_pairs_resolution'],
                     )
                 )
             else:
@@ -883,7 +887,7 @@ def _compute_coverage_intervals(pairs):
             ]
         else:
             break
-    if len(selected_flanking_pairs) < evidence.min_flanking_pairs_resolution:
+    if len(selected_flanking_pairs) < evidence.config['validate.min_flanking_pairs_resolution']:
         raise AssertionError(
             'insufficient flanking pairs ({}) to call {} by flanking reads'.format(
                 len(selected_flanking_pairs), event_type
@@ -940,7 +944,7 @@ def _compute_coverage_intervals(pairs):
     if call.has_compatible:
         call.add_flanking_support(evidence.compatible_flanking_pairs, is_compatible=True)
 
-    if len(call.flanking_pairs) < evidence.min_flanking_pairs_resolution:
+    if len(call.flanking_pairs) < evidence.config['validate.min_flanking_pairs_resolution']:
         raise AssertionError(
             'insufficient flanking pairs ({}) to call {} by flanking reads'.format(
                 len(call.flanking_pairs), event_type
@@ -973,7 +977,7 @@ def _call_by_split_reads(evidence, event_type, consumed_evidence=None):
                 pass
         putative_positions = list(pos_dict.keys())
         for pos in putative_positions:
-            if len(pos_dict[pos]) < evidence.min_splits_reads_resolution:
+            if len(pos_dict[pos]) < evidence.config['validate.min_splits_reads_resolution']:
                 del pos_dict[pos]
             else:
                 count = 0
@@ -982,7 +986,7 @@ def _call_by_split_reads(evidence, event_type, consumed_evidence=None):
                         PYSAM_READ_FLAGS.TARGETED_ALIGNMENT
                     ):
                         count += 1
-                if count < evidence.min_non_target_aligned_split_reads:
+                if count < evidence.config['validate.min_non_target_aligned_split_reads']:
                     del pos_dict[pos]
 
     linked_pairings = []
@@ -1000,15 +1004,15 @@ def _call_by_split_reads(evidence, event_type, consumed_evidence=None):
                 links += 1
             if (read.query_name, read.query_sequence) in reads:
                 tgt_align += 1
-        if links < evidence.min_linking_split_reads:
+        if links < evidence.config['validate.min_linking_split_reads']:
             continue
         deletion_size = second - first - 1
-        if tgt_align >= evidence.min_double_aligned_to_estimate_insertion_size:
+        if tgt_align >= evidence.config['validate.min_double_aligned_to_estimate_insertion_size']:
             # we can estimate the fragment size
-            max_insert = evidence.read_length - 2 * evidence.min_softclipping
+            max_insert = evidence.read_length - 2 * evidence.config['validate.min_softclipping']
             if event_type == SVTYPE.INS and max_insert < deletion_size:
                 continue
-        elif links >= evidence.min_double_aligned_to_estimate_insertion_size:
+        elif links >= evidence.config['validate.min_double_aligned_to_estimate_insertion_size']:
             if deletion_size > evidence.max_expected_fragment_size and event_type == SVTYPE.INS:
                 continue
 
@@ -1095,12 +1099,12 @@ def _call_by_split_reads(evidence, event_type, consumed_evidence=None):
                 if not any(
                     [
                         len(call.break1_split_read_names(both=True))
-                        < evidence.min_splits_reads_resolution,
+                        < evidence.config['validate.min_splits_reads_resolution'],
                         len(call.break2_split_read_names(both=True))
-                        < evidence.min_splits_reads_resolution,
+                        < evidence.config['validate.min_splits_reads_resolution'],
                         len(call.break1_split_read_names()) < 1,
                         len(call.break2_split_read_names()) < 1,
-                        linking_reads < evidence.min_linking_split_reads,
+                        linking_reads < evidence.config['validate.min_linking_split_reads'],
                         call.event_type != event_type,
                     ]
                 ):
diff --git a/mavis/validate/evidence.py b/mavis/validate/evidence.py
index 8719210c..f371a72e 100644
--- a/mavis/validate/evidence.py
+++ b/mavis/validate/evidence.py
@@ -31,8 +31,8 @@ def fetch_reads_limit(self):
         return self.config['validate.fetch_reads_limit']
 
     def __init__(self, *pos, **kwargs):
+        kwargs[COLUMNS.protocol] = PROTOCOL.GENOME
         Evidence.__init__(self, *pos, **kwargs)
-        self.protocol = PROTOCOL.GENOME
 
         self.outer_window1 = self.generate_window(self.break1)
         self.outer_window2 = self.generate_window(self.break2)
@@ -109,9 +109,9 @@ def fetch_reads_limit(self):
         return self.config['validate.trans_fetch_reads_limit']
 
     def __init__(self, annotations, *pos, **kwargs):
+        kwargs[COLUMNS.protocol] = PROTOCOL.TRANS
         Evidence.__init__(self, *pos, **kwargs)
 
-        self.protocol = PROTOCOL.TRANS
         # get the list of overlapping transcripts
         self.overlapping_transcripts = overlapping_transcripts(
             annotations, self.break1
diff --git a/mavis/validate/main.py b/mavis/validate/main.py
index 65855c37..1136ff81 100644
--- a/mavis/validate/main.py
+++ b/mavis/validate/main.py
@@ -37,7 +37,6 @@ def main(
     library: str,
     config: Dict,
     start_time=int(time.time()),
-    **kwargs
 ):
     """
     Args:
@@ -225,11 +224,11 @@ def main(
         aligner_fa_input_file=contig_aligner_fa,
         aligner_output_file=contig_aligner_output,
         clean_files=config['validate.clean_aligner_files'],
-        aligner=kwargs.get('aligner', config['validate.aligner']),
+        aligner=config['validate.aligner'],
         aligner_reference=config['reference.aligner_reference'][0],
         aligner_output_log=contig_aligner_log,
-        blat_min_identity=kwargs.get('blat_min_identity', config['validate.blat_min_identity']),
-        blat_limit_top_aln=kwargs.get('blat_limit_top_aln', config['validate.blat_limit_top_aln']),
+        blat_min_identity=config['validate.blat_min_identity'],
+        blat_limit_top_aln=config['validate.blat_limit_top_aln'],
         log=LOG,
     )
     for evidence in evidence_clusters:
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index ff58b064..2b110802 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -44,7 +44,7 @@ def run_main(self, inputfile, file_type, strand_specific=False):
             self.assertTrue(unique_exists(outputfile))
         result = {}
         for pair in read_bpp_from_input_file(outputfile):
-            result.setdefault(pair.tracking_id, []).append(pair)
+            result.setdefault(pair.data['tracking_id'], []).append(pair)
         return result
 
     def test_chimerascan(self):
@@ -58,6 +58,8 @@ def test_delly(self):
         # test the contents were converted successfully
         self.assertEqual(1, len(result['delly-DUP00000424']))
         bpp = result['delly-DUP00000424'][0]
+        print(bpp.data)
+        print(bpp)
         self.assertEqual(SVTYPE.DUP, bpp.event_type)
         self.assertEqual('1', bpp.break1.chr)
         self.assertEqual('1', bpp.break2.chr)
diff --git a/tests/integration/test_align.py b/tests/integration/test_align.py
index c9590a0a..45c9cb1a 100644
--- a/tests/integration/test_align.py
+++ b/tests/integration/test_align.py
@@ -2,20 +2,20 @@
 import unittest
 from unittest import mock
 
+import mavis.bam.cigar as _cigar
 from mavis import align
 from mavis.annotate.file_io import load_reference_genome
 from mavis.assemble import Contig
 from mavis.bam.cache import BamCache
-import mavis.bam.cigar as _cigar
+from mavis.bam.read import SamRead
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import CIGAR, ORIENT, reverse_complement, STRAND, SVTYPE
+from mavis.constants import CIGAR, ORIENT, STRAND, SVTYPE, reverse_complement
 from mavis.interval import Interval
+from mavis.schemas import DEFAULTS
 from mavis.validate.evidence import GenomeEvidence
-from mavis.validate.constants import DEFAULTS
-from mavis.bam.read import SamRead
 
-from . import MockBamFileHandle, MockObject, MockLongString, MockRead
 from ..util import get_data
+from . import MockBamFileHandle, MockLongString, MockObject, MockRead
 
 REFERENCE_GENOME = None
 
@@ -66,9 +66,11 @@ def test_blat_contigs(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=100,
-            stdev_count_abnormal=2,
-            min_splits_reads_resolution=1,
-            min_flanking_pairs_resolution=1,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_flanking_pairs_resolution': 1,
+            },
         )
         ev.contigs = [
             Contig(
@@ -110,9 +112,11 @@ def test_bwa_contigs(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=100,
-            stdev_count_abnormal=2,
-            min_splits_reads_resolution=1,
-            min_flanking_pairs_resolution=1,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_flanking_pairs_resolution': 1,
+            },
         )
         ev.contigs = [
             Contig(
@@ -717,13 +721,11 @@ def test_inversion_and_deletion(self):
             break2=MockObject(orient=ORIENT.RIGHT, chr='3'),
             contigs=[MockObject(seq=s, alignments=set())],
             standardize_read=lambda x: x,
-            contig_aln_max_event_size=DEFAULTS.contig_aln_max_event_size,
-            contig_aln_merge_inner_anchor=5,
-            contig_aln_merge_outer_anchor=DEFAULTS.contig_aln_merge_outer_anchor,
-            contig_aln_min_query_consumption=0.9,
-            contig_aln_min_extend_overlap=DEFAULTS.contig_aln_min_extend_overlap,
-            contig_aln_min_anchor_size=DEFAULTS.contig_aln_min_anchor_size,
-            contig_aln_min_score=DEFAULTS.contig_aln_min_score,
+            config={
+                **DEFAULTS,
+                'validate.contig_aln_merge_inner_anchor': 5,
+                'validate.contig_aln_min_query_consumption': 0.9,
+            },
             outer_window1=Interval(1000, 1200),
             outer_window2=Interval(2000, 2200),
             LR=False,
diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index e732cfbc..9903f8ff 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -6,8 +6,8 @@
 from mavis.assemble import Contig, assemble, filter_contigs
 from mavis.constants import reverse_complement
 from mavis.interval import Interval
+from mavis.schemas import DEFAULTS
 from mavis.util import LOG
-from mavis.validate.constants import DEFAULTS
 
 from ..util import get_data
 from . import RUN_FULL, MockObject
@@ -334,12 +334,12 @@ def test_multiple_events(self):
         assemblies = assemble(
             sequences,
             kmer_size,
-            min_edge_trim_weight=DEFAULTS.assembly_min_edge_trim_weight,
+            min_edge_trim_weight=DEFAULTS['validate.assembly_min_edge_trim_weight'],
             remap_min_match=0.95,
             remap_min_overlap=75 * 0.9,
             min_contig_length=75,
-            remap_min_exact_match=DEFAULTS.assembly_min_exact_match_to_remap,
-            assembly_max_paths=DEFAULTS.assembly_max_paths,
+            remap_min_exact_match=DEFAULTS['validate.assembly_min_exact_match_to_remap'],
+            assembly_max_paths=DEFAULTS['validate.assembly_max_paths'],
             assembly_min_uniq=0.01,
             log=self.log,
         )
@@ -359,17 +359,17 @@ def test_multiple_events(self):
     def test_large_assembly(self):
         # simply testing that this will complete before the timeout
         sequences = self.large_assembly_seq
-        kmer_size = 150 * DEFAULTS.assembly_kmer_size
+        kmer_size = 150 * DEFAULTS['validate.assembly_kmer_size']
         print('read inputs')
         contigs = assemble(
             sequences,
             kmer_size,
-            min_edge_trim_weight=DEFAULTS.assembly_min_edge_trim_weight,
-            assembly_max_paths=DEFAULTS.assembly_max_paths,
+            min_edge_trim_weight=DEFAULTS['validate.assembly_min_edge_trim_weight'],
+            assembly_max_paths=DEFAULTS['validate.assembly_max_paths'],
             min_contig_length=150,
             log=LOG,
             remap_min_exact_match=30,
-            assembly_min_uniq=DEFAULTS.assembly_min_uniq,
+            assembly_min_uniq=DEFAULTS['validate.assembly_min_uniq'],
         )
         for contig in contigs:
             print(len(contig.seq), contig.remap_score())
diff --git a/tests/integration/test_breakpoint.py b/tests/integration/test_breakpoint.py
index b3cdce42..659cf486 100644
--- a/tests/integration/test_breakpoint.py
+++ b/tests/integration/test_breakpoint.py
@@ -1,15 +1,14 @@
 import unittest
+from functools import partial
 
 from mavis.annotate.file_io import load_reference_genome
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import CIGAR, ORIENT, reverse_complement, STRAND
+from mavis.constants import CIGAR, ORIENT, STRAND, reverse_complement
 from mavis.interval import Interval
 from mavis.validate.evidence import TranscriptomeEvidence
-from mavis.validate.constants import DEFAULTS
-from functools import partial
 
-from . import MockRead, MockObject, get_example_genes
 from ..util import get_data
+from . import MockObject, MockRead, get_example_genes
 
 REFERENCE_GENOME = None
 REF_CHR = 'fake'
diff --git a/tests/integration/test_illustrate.py b/tests/integration/test_illustrate.py
index 2589cfaa..ce377207 100644
--- a/tests/integration/test_illustrate.py
+++ b/tests/integration/test_illustrate.py
@@ -1,22 +1,19 @@
+import os
 import random
 import unittest
-import os
 
+from mavis.annotate import fusion, genomic, protein, variant
 from mavis.annotate.base import BioInterval
 from mavis.annotate.file_io import load_templates
-from mavis.annotate import genomic
-from mavis.annotate import protein
-from mavis.annotate import variant
-from mavis.annotate import fusion
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, PROTOCOL, STRAND, SVTYPE
-from mavis.illustrate.constants import DiagramSettings, DEFAULTS
+from mavis.illustrate.constants import DEFAULTS, DiagramSettings
 from mavis.illustrate.diagram import (
+    HEX_BLACK,
+    HEX_WHITE,
     draw_multi_transcript_overlay,
     draw_sv_summary_diagram,
     generate_interval_mapping,
-    HEX_BLACK,
-    HEX_WHITE,
 )
 from mavis.illustrate.elements import draw_genes, draw_legend, draw_template, draw_ustranscript
 from mavis.illustrate.scatter import ScatterPlot
@@ -24,8 +21,8 @@
 from mavis.interval import Interval
 from svgwrite import Drawing
 
-from . import build_transcript, MockObject, MockString, OUTPUT_SVG
 from ..util import get_data
+from . import OUTPUT_SVG, MockObject, MockString, build_transcript
 
 TEMPLATE_METADATA = None
 DEFAULTS.domain_name_regex_filter = r'.*'
@@ -65,7 +62,7 @@ def test_generate_gene_mapping_err(self):
         #  _generate_interval_mapping [genomic.IntergenicRegion(11:77361962_77361962+)] 1181.39453125 5 30 None 77356962 77366962)
         ir = genomic.IntergenicRegion('11', 5000, 5000, STRAND.POS)
         tgt_width = 1000
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         d.gene_min_buffer = 10
         # (self, canvas, gene, width, height, fill, label='', reference_genome=None)
         draw_genes(d, self.canvas, [ir], tgt_width, [])
@@ -91,7 +88,7 @@ def test_draw_genes(self):
         y = genomic.Gene('1', 5000, 7000, strand=STRAND.NEG)
         z = genomic.Gene('1', 1500, 2500, strand=STRAND.POS)
 
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         breakpoints = [Breakpoint('1', 1100, 1200, orient=ORIENT.RIGHT)]
         g = draw_genes(
             d,
@@ -121,7 +118,7 @@ def test_draw_genes(self):
         self.assertEqual(breakpoints[0], g.labels['B1'])
 
     def test_draw_ustranscript(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         # domains = [protein.Domain()]
         d1 = protein.Domain('first', [(55, 61), (71, 73)])
         d2 = protein.Domain('second', [(10, 20), (30, 34)])
@@ -172,7 +169,7 @@ def test_draw_ustranscript(self):
         self.assertEqual(d2.name, g.labels['D2'])
 
     def test_draw_consec_exons(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         # domains = [protein.Domain()]
         t = build_transcript(
             gene=None,
@@ -209,7 +206,7 @@ def test_dynamic_label_color(self):
         self.assertEqual(HEX_BLACK, dynamic_label_color(HEX_WHITE))
 
     def test_draw_legend(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         swatches = [
             ('#000000', 'black'),
             ('#FF0000', 'red'),
@@ -233,7 +230,7 @@ def test_draw_legend(self):
         )
 
     def test_draw_layout_single_transcript(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         d1 = protein.Domain('first', [(55, 61), (71, 73)])
         d2 = protein.Domain('second', [(10, 20), (30, 34)])
         g1 = genomic.Gene('1', 150, 1000, strand=STRAND.POS)
@@ -275,7 +272,7 @@ def test_draw_layout_single_transcript(self):
         self.assertEqual(expected_height, canvas.attribs['height'])
 
     def test_draw_layout_single_genomic(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         d1 = protein.Domain('first', [(55, 61), (71, 73)])
         d2 = protein.Domain('second', [(10, 20), (30, 34)])
         g1 = genomic.Gene('1', 150, 1000, strand=STRAND.POS)
@@ -339,7 +336,7 @@ def test_draw_layout_single_genomic(self):
             canvas.saveas('test_draw_layout_single_genomic.svg')
 
     def test_draw_layout_translocation(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         d1 = protein.Domain('first', [(55, 61), (71, 73)])
         d2 = protein.Domain('second', [(10, 20), (30, 34)])
         g1 = genomic.Gene('1', 150, 1000, strand=STRAND.POS)
@@ -406,7 +403,7 @@ def test_draw_layout_translocation(self):
 
     def test_draw_template(self):
         # def draw_template(self, canvas, template, target_width, height, labels=None, colors=None):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         canvas = Drawing(size=(1000, 50))
         t = genomic.Template(
             '1',
@@ -428,7 +425,7 @@ def test_draw_template(self):
         self.assertEqual(2, len(canvas.elements))
 
     def test_draw_translocation_with_template(self):
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         d1 = protein.Domain('PF0001', [(55, 61), (71, 73)])
         d2 = protein.Domain('PF0002', [(10, 20), (30, 34)])
         g1 = genomic.Gene(TEMPLATE_METADATA['1'], 150, 1000, strand=STRAND.POS, aliases=['HUGO2'])
@@ -547,7 +544,7 @@ def test_draw_overlay(self):
             gene=gene,
             domains=[],
         )
-        d = DiagramSettings()
+        d = DiagramSettings(domain_name_regex_filter=r'.*')
         for i, t in enumerate(gene.transcripts):
             t.name = 'transcript {}'.format(i + 1)
         scatterx = [x + 100 for x in range(gene.start, gene.end + 1, 400)]
diff --git a/tests/integration/test_pairing.py b/tests/integration/test_pairing.py
index d1fe8044..3a0064f8 100644
--- a/tests/integration/test_pairing.py
+++ b/tests/integration/test_pairing.py
@@ -12,7 +12,7 @@ def setUp(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -23,7 +23,7 @@ def setUp(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -100,7 +100,7 @@ def test_mixed_protocol_fusions_same_sequence(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -115,7 +115,7 @@ def test_mixed_protocol_fusions_same_sequence(self):
             Breakpoint('1', 50),
             Breakpoint('1', 60),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -136,7 +136,7 @@ def test_mixed_protocol_fusions_same_sequence_diff_translation(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: 'a',
@@ -151,7 +151,7 @@ def test_mixed_protocol_fusions_same_sequence_diff_translation(self):
             Breakpoint('1', 50),
             Breakpoint('1', 60),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: 'a',
@@ -169,7 +169,7 @@ def test_mixed_protocol_fusions_different_sequence(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: 'a',
@@ -184,7 +184,7 @@ def test_mixed_protocol_fusions_different_sequence(self):
             Breakpoint('1', 50),
             Breakpoint('1', 60),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: 'b',
@@ -202,7 +202,7 @@ def test_mixed_protocol_one_predicted_one_match(self):
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
             opposing_strands=False,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -215,7 +215,7 @@ def test_mixed_protocol_one_predicted_one_match(self):
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
             opposing_strands=False,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -239,7 +239,7 @@ def test_mixed_protocol_one_predicted_one_mismatch(self):
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
             opposing_strands=False,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
@@ -252,7 +252,7 @@ def test_mixed_protocol_one_predicted_one_mismatch(self):
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
             opposing_strands=False,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
diff --git a/tests/integration/test_validate.py b/tests/integration/test_validate.py
index a1527541..3458aa68 100644
--- a/tests/integration/test_validate.py
+++ b/tests/integration/test_validate.py
@@ -1,16 +1,17 @@
 import unittest
 
 from mavis.annotate.file_io import load_reference_genome
+from mavis.bam import cigar as _cigar
 from mavis.bam.cache import BamCache
+from mavis.bam.read import SamRead
 from mavis.breakpoint import Breakpoint
-from mavis.constants import ORIENT, PYSAM_READ_FLAGS, NA_MAPPING_QUALITY
-from mavis.validate.evidence import GenomeEvidence
+from mavis.constants import NA_MAPPING_QUALITY, ORIENT, PYSAM_READ_FLAGS
+from mavis.schemas import DEFAULTS
 from mavis.validate.base import Evidence
-from mavis.bam.read import SamRead
-from mavis.bam import cigar as _cigar
+from mavis.validate.evidence import GenomeEvidence
 
-from . import mock_read_pair, MockRead, RUN_FULL, MockObject, MockLongString
 from ..util import get_data
+from . import RUN_FULL, MockLongString, MockObject, MockRead, mock_read_pair
 
 REFERENCE_GENOME = None
 
@@ -57,11 +58,13 @@ def genome_evidence(self, break1, break2, opposing_strands):
             read_length=125,
             stdev_fragment_size=100,
             median_fragment_size=380,
-            stdev_count_abnormal=3,
-            min_flanking_pairs_resolution=3,
-            max_sc_preceeding_anchor=3,
-            outer_window_min_event_size=0,
-            min_mapping_quality=20,
+            config={
+                'validate.stdev_count_abnormal': 3,
+                'validate.min_flanking_pairs_resolution': 3,
+                'validate.max_sc_preceeding_anchor': 3,
+                'validate.outer_window_min_event_size': 0,
+                'validate.min_mapping_quality': 20,
+            },
         )
         print(ge.min_expected_fragment_size, ge.max_expected_fragment_size)
         print(ge.break1.chr, ge.outer_window1)
@@ -565,9 +568,11 @@ def setUp(self):
             read_length=125,
             stdev_fragment_size=100,
             median_fragment_size=380,
-            stdev_count_abnormal=3,
-            min_flanking_pairs_resolution=3,
-            assembly_min_edge_trim_weight=3,
+            config={
+                'validate.stdev_count_abnormal': 3,
+                'validate.min_flanking_pairs_resolution': 3,
+                'validate.assembly_min_edge_trim_weight': 3,
+            },
         )
 
     def test_collect_split_read(self):
@@ -738,8 +743,11 @@ def setUp(self):
                 )
             },
             bam_cache=MockObject(get_read_reference_name=lambda x: x.reference_name),
-            contig_aln_merge_inner_anchor=10,
-            contig_aln_merge_outer_anchor=20,
+            config={
+                'validate.contig_aln_merge_inner_anchor': 10,
+                'validate.contig_aln_merge_outer_anchor': 20,
+                **DEFAULTS,
+            },
         )
 
     def test_bwa_mem(self):
diff --git a/tests/integration/test_validate_call.py b/tests/integration/test_validate_call.py
index a7dcec29..5be1962a 100644
--- a/tests/integration/test_validate_call.py
+++ b/tests/integration/test_validate_call.py
@@ -4,10 +4,10 @@
 from mavis.align import call_paired_read_event, select_contig_alignments
 from mavis.annotate.file_io import load_reference_genome
 from mavis.annotate.genomic import PreTranscript, Transcript
+from mavis.bam import cigar as _cigar
 from mavis.bam.cache import BamCache
-from mavis.bam.read import sequenced_strand, SamRead, read_pair_type
 from mavis.bam.cigar import convert_string_to_cigar
-from mavis.bam import cigar as _cigar
+from mavis.bam.read import SamRead, read_pair_type, sequenced_strand
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import CALL_METHOD, CIGAR, ORIENT, PYSAM_READ_FLAGS, STRAND, SVTYPE
 from mavis.interval import Interval
@@ -15,8 +15,8 @@
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
 
-from . import mock_read_pair, MockBamFileHandle, MockRead, get_example_genes, MockLongString
 from ..util import get_data
+from . import MockBamFileHandle, MockLongString, MockRead, get_example_genes, mock_read_pair
 
 REFERENCE_GENOME = None
 
@@ -448,12 +448,14 @@ def build_genome_evidence(self, b1, b2, opposing_strands=False):
             read_length=100,
             median_fragment_size=200,
             stdev_fragment_size=50,
-            stdev_count_abnormal=3,
-            min_flanking_pairs_resolution=1,
-            min_splits_reads_resolution=1,
-            min_spanning_reads_resolution=3,
-            min_linking_split_reads=1,
-            min_call_complexity=0,
+            config={
+                'validate.stdev_count_abnormal': 3,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_spanning_reads_resolution': 3,
+                'validate.min_linking_split_reads': 1,
+                'validate.min_call_complexity': 0,
+            },
         )
         return evidence
 
@@ -850,12 +852,14 @@ def setUp(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=100,
-            stdev_count_abnormal=2,
-            min_splits_reads_resolution=1,
-            min_flanking_pairs_resolution=1,
-            min_linking_split_reads=1,
-            min_spanning_reads_resolution=3,
-            min_call_complexity=0,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_linking_split_reads': 1,
+                'validate.min_spanning_reads_resolution': 3,
+                'validate. min_call_complexity': 0,
+            },
         )
         self.dup = GenomeEvidence(
             Breakpoint('fake', 50, orient=ORIENT.RIGHT),
@@ -866,12 +870,14 @@ def setUp(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=100,
-            stdev_count_abnormal=2,
-            min_splits_reads_resolution=1,
-            min_flanking_pairs_resolution=1,
-            min_linking_split_reads=1,
-            min_spanning_reads_resolution=3,
-            min_call_complexity=0,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_linking_split_reads': 1,
+                'validate.min_spanning_reads_resolution': 3,
+                'validate. min_call_complexity': 0,
+            },
         )
 
     def test_empty(self):
@@ -1108,10 +1114,12 @@ def test_call_by_split_reads_consume_flanking(self):
             read_length=125,
             stdev_fragment_size=100,
             median_fragment_size=380,
-            stdev_count_abnormal=3,
-            min_flanking_pairs_resolution=1,
-            min_splits_reads_resolution=1,
-            min_linking_split_reads=1,
+            config={
+                'validate.stdev_count_abnormal': 3,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_linking_split_reads': 1,
+            },
         )
         evidence.split_reads[0].add(
             MockRead(
@@ -1195,9 +1203,11 @@ def setUp(self):
             read_length=25,
             stdev_fragment_size=25,
             median_fragment_size=100,
-            stdev_count_abnormal=2,
-            min_flanking_pairs_resolution=1,
-            min_call_complexity=0,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_call_complexity': 0,
+            },
         )
 
     def test_call_coverage_too_large(self):
@@ -1423,8 +1433,10 @@ def test_close_to_zero(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=180,
-            stdev_count_abnormal=2,
-            min_flanking_pairs_resolution=1,
+            config={
+                'validate.stdev_count_abnormal': 2,
+                'validate.min_flanking_pairs_resolution': 1,
+            },
         )
         ev.flanking_pairs.add(
             mock_read_pair(
@@ -1475,7 +1487,7 @@ def test_call_with_overlapping_coverage_intervals(self):
             read_length=150,
             stdev_fragment_size=98,
             median_fragment_size=433,
-            min_flanking_pairs_resolution=1,
+            config={'validate.min_flanking_pairs_resolution': 1},
         )
         evidence.flanking_pairs.add(
             mock_read_pair(
@@ -1510,11 +1522,13 @@ def build_transcriptome_evidence(self, b1, b2, opposing_strands=False):
             read_length=50,
             stdev_fragment_size=100,
             median_fragment_size=100,
-            stdev_count_abnormal=3,
-            min_splits_reads_resolution=1,
-            min_flanking_pairs_resolution=1,
-            strand_determining_read=2,
-            min_call_complexity=0,
+            config={
+                'validate.stdev_count_abnormal': 3,
+                'validate.min_splits_reads_resolution': 1,
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.strand_determining_read': 2,
+                'validate.min_call_complexity': 0,
+            },
         )
 
     def test_call_translocation(self):
@@ -1585,8 +1599,10 @@ def test_deletion(self):
             read_length=40,
             stdev_fragment_size=25,
             median_fragment_size=180,
-            min_flanking_pairs_resolution=1,
-            min_spanning_reads_resolution=1,
+            config={
+                'validate.min_flanking_pairs_resolution': 1,
+                'validate.min_spanning_reads_resolution': 1,
+            },
         )
         print(ev.outer_window1, ev.outer_window2)
         spanning_reads = [
diff --git a/tests/integration/test_validate_evidence.py b/tests/integration/test_validate_evidence.py
index 44b3709a..d4b747af 100644
--- a/tests/integration/test_validate_evidence.py
+++ b/tests/integration/test_validate_evidence.py
@@ -1,19 +1,18 @@
-from functools import partial
 import unittest
+from functools import partial
 
-from mavis.annotate.genomic import Gene, Transcript, PreTranscript
+from mavis.annotate.genomic import Gene, PreTranscript, Transcript
+from mavis.bam import cigar as _cigar
 from mavis.bam.cache import BamCache
 from mavis.bam.read import SamRead
-from mavis.bam import cigar as _cigar
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import CIGAR, ORIENT, STRAND
 from mavis.interval import Interval
-from mavis.validate.constants import DEFAULTS
+from mavis.schemas import DEFAULTS
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
 
-from . import mock_read_pair, MockBamFileHandle, MockRead, MockObject
-
+from . import MockBamFileHandle, MockObject, MockRead, mock_read_pair
 
 REFERENCE_GENOME = None
 
@@ -173,7 +172,7 @@ def setUp(self):
             read_length=self.read_length,
             stdev_fragment_size=100,
             median_fragment_size=100,
-            stdev_count_abnormal=1,
+            config={'validate.stdev_count_abnormal': 1},
         )
         self.genomic_ev = GenomeEvidence(
             b1,
@@ -184,7 +183,7 @@ def setUp(self):
             read_length=self.read_length,
             stdev_fragment_size=100,
             median_fragment_size=100,
-            stdev_count_abnormal=1,
+            config={'validate.stdev_count_abnormal': 1},
         )
 
     def test_genomic_vs_trans_no_annotations(self):
@@ -366,14 +365,17 @@ def setUp(self):
             self.pre_transcript.transcripts.append(Transcript(self.pre_transcript, spl))
         self.annotations = {gene.chr: [gene]}
         self.genome_evidence = MockObject(
-            annotations={}, read_length=100, max_expected_fragment_size=550, call_error=11
+            annotations={},
+            read_length=100,
+            max_expected_fragment_size=550,
+            config={**DEFAULTS, 'validate.call_error': 11},
         )
         self.trans_evidence = MockObject(
             annotations={},
             read_length=100,
             max_expected_fragment_size=550,
-            call_error=11,
             overlapping_transcripts={self.pre_transcript},
+            config={**DEFAULTS, 'validate.call_error': 11},
         )
         setattr(
             self.trans_evidence,
@@ -506,7 +508,12 @@ class TestGenomeEvidenceWindow(unittest.TestCase):
     def test_orient_ns(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.NS)
         window = GenomeEvidence.generate_window(
-            MockObject(read_length=100, max_expected_fragment_size=550, call_error=11), bpp
+            MockObject(
+                read_length=100,
+                max_expected_fragment_size=550,
+                config={**DEFAULTS, 'validate.call_error': 11},
+            ),
+            bpp,
         )
         self.assertEqual(440, window.start)
         self.assertEqual(1560, window.end)
@@ -515,7 +522,12 @@ def test_orient_ns(self):
     def test_orient_left(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.LEFT)
         window = GenomeEvidence.generate_window(
-            MockObject(read_length=100, call_error=11, max_expected_fragment_size=550), bpp
+            MockObject(
+                read_length=100,
+                max_expected_fragment_size=550,
+                config={**DEFAULTS, 'validate.call_error': 11},
+            ),
+            bpp,
         )
         self.assertEqual(440, window.start)
         self.assertEqual(1110, window.end)
@@ -524,7 +536,12 @@ def test_orient_left(self):
     def test_orient_right(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT)
         window = GenomeEvidence.generate_window(
-            MockObject(read_length=100, call_error=11, max_expected_fragment_size=550), bpp
+            MockObject(
+                read_length=100,
+                max_expected_fragment_size=550,
+                config={**DEFAULTS, 'validate.call_error': 11},
+            ),
+            bpp,
         )
         self.assertEqual(890, window.start)
         self.assertEqual(1560, window.end)
@@ -540,8 +557,7 @@ def test_window_accessors(self):
             read_length=150,
             stdev_fragment_size=500,
             median_fragment_size=100,
-            call_error=0,
-            stdev_count_abnormal=1,
+            config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
         )
         self.assertEqual(901, ge.outer_window1.start)
         self.assertEqual(1649, ge.outer_window1.end)
@@ -565,8 +581,7 @@ def setUp(self):
             read_length=150,
             stdev_fragment_size=500,
             median_fragment_size=100,
-            call_error=0,
-            stdev_count_abnormal=1,
+            config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
         )
         # outer windows (901, 1649)  (5852, 6600)
         # inner windows (1351, 1649)  (5852, 6150)
diff --git a/tests/setup_subprocess_cov.py b/tests/setup_subprocess_cov.py
new file mode 100644
index 00000000..c14c3359
--- /dev/null
+++ b/tests/setup_subprocess_cov.py
@@ -0,0 +1,10 @@
+import os
+import sys
+
+for p in sys.path:
+    if p.endswith('site-packages'):
+        pth_file = os.path.join(p, 'subprocess-coverage.pth')
+        print('writing path file:', pth_file)
+        with open(pth_file, 'w') as fh:
+            fh.write('import coverage\n\ncoverage.process_startup()\n')
+        break
diff --git a/tests/unit/test_breakpoint.py b/tests/unit/test_breakpoint.py
index a76770c0..56bdb9da 100644
--- a/tests/unit/test_breakpoint.py
+++ b/tests/unit/test_breakpoint.py
@@ -199,27 +199,6 @@ def test___init__invalid_inter_lr_opp(self):
                 opposing_strands=True,
             )
 
-    def test_accessing_data_attributes(self):
-        bp1 = Breakpoint(1, 1, 2, ORIENT.LEFT)
-        bp2 = Breakpoint(2, 1, 2, ORIENT.LEFT)
-        bpp = BreakpointPair(bp1, bp2, opposing_strands=True)
-        bpp.data['a'] = 1
-        print(bpp.data)
-        self.assertEqual(1, bpp.a)
-        with self.assertRaises(AttributeError):
-            bpp.random_attr
-
-        with self.assertRaises(AttributeError):
-            bpp.call_method
-
-        bpp.data[COLUMNS.call_method] = 1
-        print(bpp.data)
-        self.assertEqual(1, bpp.call_method)
-
-        COLUMNS.call_method = 'bbreak2_call_method'
-        bpp.data[COLUMNS.call_method] = 2
-        self.assertEqual(2, bpp.call_method)
-
 
 class TestClassifyBreakpointPair(unittest.TestCase):
     def test_inverted_translocation(self):
diff --git a/tests/unit/test_summary.py b/tests/unit/test_summary.py
index de2760a3..f2a81ef8 100644
--- a/tests/unit/test_summary.py
+++ b/tests/unit/test_summary.py
@@ -11,27 +11,27 @@ def setUp(self):
             Breakpoint('1', 1),
             Breakpoint('1', 10),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.GENOME,
                 COLUMNS.fusion_cdna_coding_end: None,
                 COLUMNS.fusion_cdna_coding_start: None,
-            },
+            }
         )
         self.gev2 = BreakpointPair(
             Breakpoint('1', 1),
             Breakpoint('1', 100),
             opposing_strands=True,
-            data={
+            **{
                 COLUMNS.event_type: SVTYPE.DEL,
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.GENOME,
                 COLUMNS.fusion_cdna_coding_start: None,
                 COLUMNS.fusion_cdna_coding_end: None,
-            },
+            }
         )
         self.best_transcripts = {'ABCA': True, 'ABCD': True}
 
diff --git a/tests/unit/test_tool.py b/tests/unit/test_tool.py
index 17228569..fffaa36a 100644
--- a/tests/unit/test_tool.py
+++ b/tests/unit/test_tool.py
@@ -1,12 +1,9 @@
 import unittest
 
 from mavis.constants import COLUMNS, ORIENT, STRAND, SVTYPE
-from mavis.tools import (
-    _convert_tool_row,
-    SUPPORTED_TOOL,
-    _parse_transabyss,
-)
-from mavis.tools.vcf import parse_bnd_alt as _parse_bnd_alt, convert_record as _parse_vcf_record
+from mavis.tools import SUPPORTED_TOOL, _convert_tool_row, _parse_transabyss
+from mavis.tools.vcf import convert_record as _parse_vcf_record
+from mavis.tools.vcf import parse_bnd_alt as _parse_bnd_alt
 
 from .mock import Mock
 
@@ -285,8 +282,8 @@ def test_convert_deletion(self):
         self.assertEqual(9412400, bpp.break2.start)
         self.assertEqual(9412404, bpp.break2.end)
         self.assertEqual('21', bpp.break2.chr)
-        print(bpp, bpp.tracking_id)
-        self.assertEqual('manta-MantaDEL:20644:0:2:0:0:0', bpp.tracking_id)
+        print(bpp, bpp.data['tracking_id'])
+        self.assertEqual('manta-MantaDEL:20644:0:2:0:0:0', bpp.data['tracking_id'])
 
     def test_convert_duplication(self):
         row = Mock(
@@ -302,7 +299,7 @@ def test_convert_duplication(self):
         bpp = bpp_list[0]
         self.assertEqual('1', bpp.break1.chr)
         self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual('manta-MantaDUP:TANDEM:22477:0:1:0:9:0', bpp.tracking_id)
+        self.assertEqual('manta-MantaDUP:TANDEM:22477:0:1:0:9:0', bpp.data['tracking_id'])
 
     def test_non_trans_bnd(self):
         row = Mock(
@@ -330,7 +327,7 @@ def test_non_trans_bnd(self):
         self.assertEqual(234912188, bpp.break2.start)
         self.assertEqual('R', bpp.break1.orient)
         self.assertEqual('R', bpp.break2.orient)
-        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:0', bpp.tracking_id)
+        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:0', bpp.data['tracking_id'])
         self.assertEqual(1, len(bpp_list))
 
     def test_non_trans_bnd_from_mate(self):
@@ -359,7 +356,7 @@ def test_non_trans_bnd_from_mate(self):
         self.assertEqual(234912188, bpp.break2.start)
         self.assertEqual('R', bpp.break1.orient)
         self.assertEqual('R', bpp.break2.orient)
-        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:1', bpp.tracking_id)
+        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:1', bpp.data['tracking_id'])
         self.assertEqual(1, len(bpp_list))
 
 
@@ -386,7 +383,7 @@ def test_convert_inverted_translocation(self):
         self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
         self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
         self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.tracking_id)
+        self.assertEqual('defuse-1', bpp.data['tracking_id'])
 
     def test_convert_translocation(self):
         row = {
@@ -410,7 +407,7 @@ def test_convert_translocation(self):
         self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
         self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
         self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.tracking_id)
+        self.assertEqual('defuse-1', bpp.data['tracking_id'])
 
     def test_convert_indel(self):
         row = {
@@ -434,7 +431,7 @@ def test_convert_indel(self):
         self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
         self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
         self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.tracking_id)
+        self.assertEqual('defuse-1', bpp.data['tracking_id'])
 
     def test_convert_inversion(self):
         row = {
@@ -458,7 +455,7 @@ def test_convert_inversion(self):
         self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
         self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
         self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.tracking_id)
+        self.assertEqual('defuse-1', bpp.data['tracking_id'])
 
 
 class TestChimerascan(unittest.TestCase):

From 559de2ab17cc8027d5cc2db86fbe3e9530a9e571 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 23:39:05 -0700
Subject: [PATCH 006/137] Add type annotations

---
 mavis/annotate/base.py     |  8 ++++
 mavis/annotate/variant.py  | 91 +++++++++++++++++++++++---------------
 mavis/pairing/main.py      | 15 ++++---
 mavis/pairing/pairing.py   | 26 +++++++----
 mavis/summary/main.py      | 17 +++----
 mavis/summary/summary.py   | 11 +++--
 mavis/validate/base.py     | 41 +++++++++++------
 mavis/validate/call.py     | 30 +++++++------
 mavis/validate/evidence.py | 18 +-------
 9 files changed, 150 insertions(+), 107 deletions(-)

diff --git a/mavis/annotate/base.py b/mavis/annotate/base.py
index e174ad25..bf0dea8f 100644
--- a/mavis/annotate/base.py
+++ b/mavis/annotate/base.py
@@ -1,4 +1,5 @@
 import re
+from typing import Any, Dict, Optional
 
 from ..constants import STRAND
 from ..interval import Interval
@@ -49,6 +50,13 @@ def __le__(self, other):
 
 
 class BioInterval:
+    position: Interval
+    name: Optional[str]
+    data: Dict
+    seq: Optional[str]
+    reference_object: Any
+    strand: Optional[str]
+
     def __init__(
         self, reference_object, start, end=None, name=None, seq=None, data=None, strand=None
     ):
diff --git a/mavis/annotate/variant.py b/mavis/annotate/variant.py
index 64b3732e..0a81f34b 100644
--- a/mavis/annotate/variant.py
+++ b/mavis/annotate/variant.py
@@ -1,14 +1,16 @@
 import itertools
 import json
+from typing import Callable, Dict, List, Optional, Set, Tuple, Union
+
 from shortuuid import uuid
 
-from .fusion import determine_prime, FusionTranscript
-from .genomic import IntergenicRegion
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import COLUMNS, GENE_PRODUCT_TYPE, PROTOCOL, STOP_AA, STRAND, SVTYPE
 from ..error import NotSpecifiedError
 from ..interval import Interval
 from ..util import DEVNULL
+from .fusion import FusionTranscript, determine_prime
+from .genomic import Gene, IntergenicRegion, PreTranscript, Transcript
 
 
 class Annotation(BreakpointPair):
@@ -17,8 +19,22 @@ class Annotation(BreakpointPair):
     will also hold the other annotations for overlapping and encompassed and nearest genes
     """
 
+    encompassed_genes: Set[Gene]
+    genes_proximal_to_break1: Set[Gene]
+    genes_proximal_to_break2: Set[Gene]
+    genes_overlapping_break1: Set[Gene]
+    genes_overlapping_break2: Set[Gene]
+    proximity: int
+    fusion: Optional[FusionTranscript]
+    transcript1: Optional[Transcript]
+    transcript2: Optional[Transcript]
+
+    @property
+    def validation_id(self) -> Optional[str]:
+        return self.data.get(COLUMNS.validation_id)
+
     def __init__(
-        self, bpp, transcript1=None, transcript2=None, proximity=5000, data=None, **kwargs
+        self, bpp: BreakpointPair, transcript1=None, transcript2=None, proximity=5000, **kwargs
     ):
         """
         Holds a breakpoint call and a set of transcripts, other information is gathered relative to these
@@ -27,8 +43,6 @@ def __init__(
             bpp (BreakpointPair): the breakpoint pair call. Will be adjusted and then stored based on the transcripts
             transcript1 (Transcript): transcript at the first breakpoint
             transcript2 (Transcript): Transcript at the second breakpoint
-            data (dict): optional dictionary to hold related attributes
-            event_type (SVTYPE): the type of event
         """
         # narrow the breakpoint windows by the transcripts being used for annotation
         temp = bpp.break1 if transcript1 is None else bpp.break1 & transcript1
@@ -528,15 +542,19 @@ def overlapping_transcripts(ref_ann, breakpoint):
     return putative_annotations
 
 
-def _gather_breakpoint_annotations(ref_ann, breakpoint):
+def _gather_breakpoint_annotations(
+    ref_ann: Dict[str, List[Gene]], breakpoint: Breakpoint
+) -> Tuple[
+    List[Union[PreTranscript, IntergenicRegion]], List[Union[PreTranscript, IntergenicRegion]]
+]:
     """
     Args:
-        ref_ann (Dict[str,List[Gene]]): the reference annotations split
+        ref_ann: the reference annotations split
             into lists of genes by chromosome
-        breakpoint (Breakpoint): the breakpoint annotations are to be gathered for
+        breakpoint: the breakpoint annotations are to be gathered for
 
     Returns:
-        Tuple[List[Union[PreTranscript,IntergenicRegion]],List[Union[PreTranscript,IntergenicRegion]]]:
+        transcripts:
             - transcripts or intergenic regions overlapping the breakpoint on the positive strand
             - transcripts or intergenic regions overlapping the breakpoint on the negative strand
 
@@ -618,16 +636,15 @@ def _gather_breakpoint_annotations(ref_ann, breakpoint):
     )
 
 
-def _gather_annotations(ref, bp, proximity=None):
+def _gather_annotations(ref: Dict[str, List[Gene]], bp: BreakpointPair, proximity=None):
     """
     each annotation is defined by the annotations selected at the breakpoints
     the other annotations are given relative to this
     the annotation at the breakpoint can be a transcript or an intergenic region
 
     Args:
-        ref (Dict[str,List[Gene]]): the list of reference genes hashed
-            by chromosomes
-        breakpoint_pairs (List[BreakpointPair]): breakpoint pairs we wish to annotate as events
+        ref: the list of reference genes hashedby chromosomes
+        breakpoint_pairs: breakpoint pair we wish to annotate as events
 
     Returns:
         List[Annotation]: The annotations
@@ -636,7 +653,9 @@ def _gather_annotations(ref, bp, proximity=None):
     break1_pos, break1_neg = _gather_breakpoint_annotations(ref, bp.break1)
     break2_pos, break2_neg = _gather_breakpoint_annotations(ref, bp.break2)
 
-    combinations = []
+    combinations: List[
+        Tuple[Union[PreTranscript, IntergenicRegion], Union[PreTranscript, IntergenicRegion]]
+    ] = []
 
     if bp.stranded:
         if bp.break1.strand == STRAND.POS:
@@ -653,7 +672,7 @@ def _gather_annotations(ref, bp, proximity=None):
         # single transcript starts ....
         for t in (set(break1_pos) | set(break1_neg)) & (set(break2_pos) | set(break2_neg)):
             try:
-                t.gene
+                t.gene  # type: ignore
             except AttributeError:
                 pass
             else:
@@ -682,7 +701,7 @@ def _gather_annotations(ref, bp, proximity=None):
         if (a1, a2) in annotations:  # ignore duplicates
             continue
         try:
-            if a1.gene == a2.gene and a1 != a2:
+            if a1.gene == a2.gene and a1 != a2:  # type: ignore
                 continue
         except AttributeError:
             pass
@@ -719,7 +738,7 @@ def _gather_annotations(ref, bp, proximity=None):
     return filtered
 
 
-def choose_more_annotated(ann_list):
+def choose_more_annotated(ann_list: List[Annotation]) -> List[Annotation]:
     """
     for a given set of annotations if there are annotations which contain transcripts and
     annotations that are simply intergenic regions, discard the intergenic region annotations
@@ -729,18 +748,18 @@ def choose_more_annotated(ann_list):
     that land in the intergenic region
 
     Args:
-        ann_list (List[Annotation]): list of input annotations
+        ann_list: list of input annotations
 
     Warning:
         input annotations are assumed to be the same event (the same validation_id)
         the logic used would not apply to different events
 
     Returns:
-        List[Annotation]: the filtered list
+        the filtered list
     """
-    two_transcript = []
-    one_transcript = []
-    intergenic = []
+    two_transcript: List[Annotation] = []
+    one_transcript: List[Annotation] = []
+    intergenic: List[Annotation] = []
 
     for ann in ann_list:
         if isinstance(ann.transcript1, IntergenicRegion) and isinstance(
@@ -762,7 +781,7 @@ def choose_more_annotated(ann_list):
         return intergenic
 
 
-def choose_transcripts_by_priority(ann_list):
+def choose_transcripts_by_priority(ann_list: List[Annotation]):
     """
     for each set of annotations with the same combinations of genes, choose the
     annotation with the most "best_transcripts" or most "alphanumeric" choices
@@ -778,8 +797,10 @@ def choose_transcripts_by_priority(ann_list):
     Returns:
         List[Annotation]: the filtered list
     """
-    annotations_by_gene_combination = {}
-    genes = set()
+    annotations_by_gene_combination: Dict[
+        Tuple[Optional[Gene], Optional[Gene]], List[Annotation]
+    ] = {}
+    genes: Set[Gene] = set()
 
     for ann in ann_list:
         gene1 = None
@@ -822,16 +843,16 @@ def choose_transcripts_by_priority(ann_list):
 
 
 def annotate_events(
-    bpps,
-    annotations,
-    reference_genome,
-    max_proximity=5000,
-    min_orf_size=200,
-    min_domain_mapping_match=0.95,
-    max_orf_cap=3,
-    log=DEVNULL,
-    filters=None,
-):
+    bpps: List[BreakpointPair],
+    annotations: Dict[str, List[Gene]],
+    reference_genome: Dict[str, str],
+    max_proximity: int = 5000,
+    min_orf_size: int = 200,
+    min_domain_mapping_match: float = 0.95,
+    max_orf_cap: int = 3,
+    log: Callable = DEVNULL,
+    filters: List[Callable] = None,
+) -> List[Annotation]:
     """
     Args:
         bpps (List[mavis.breakpoint.BreakpointPair]): list of events
diff --git a/mavis/pairing/main.py b/mavis/pairing/main.py
index 8a6dbe1a..1e332002 100644
--- a/mavis/pairing/main.py
+++ b/mavis/pairing/main.py
@@ -1,10 +1,11 @@
 import itertools
 import os
 import time
-from typing import Dict, List
+from typing import Dict, List, Set, Tuple
 
 from ..annotate.constants import SPLICE_TYPE
 from ..annotate.file_io import ReferenceFile
+from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
 from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs
 from .pairing import inferred_equivalent, pair_by_distance, product_key
@@ -70,9 +71,9 @@ def main(
                 reference_transcripts[unspliced_t.name] = unspliced_t
 
     # map the calls by library and ensure there are no name/key conflicts
-    calls_by_cat = dict()
-    calls_by_ann = dict()
-    bpp_by_product_key = dict()
+    calls_by_cat: Dict[Tuple[str, str, bool, str], List[BreakpointPair]] = dict()
+    calls_by_ann: Dict[Tuple[str, str], List[BreakpointPair]] = dict()
+    bpp_by_product_key: Dict[str, BreakpointPair] = dict()
     libraries = set()
 
     # initialize the pairing mappings
@@ -100,8 +101,8 @@ def main(
                 )
         bpp_by_product_key[product_key(bpp)] = bpp
 
-    distance_pairings = {}
-    product_pairings = {}
+    distance_pairings: Dict[str, Set[str]] = {}
+    product_pairings: Dict[str, Set[str]] = {}
     LOG('computing distance based pairings')
     # pairwise comparison of breakpoints between all libraries
     for set_num, (category, calls) in enumerate(
@@ -117,7 +118,7 @@ def main(
 
     LOG('computing inferred (by product) pairings')
     for calls in calls_by_ann.values():
-        calls_by_lib = {}
+        calls_by_lib: Dict[str, List[BreakpointPair]] = {}
         for call in calls:
             calls_by_lib.setdefault(call.library, []).append(call)
 
diff --git a/mavis/pairing/pairing.py b/mavis/pairing/pairing.py
index 316598c2..f1fb52b5 100644
--- a/mavis/pairing/pairing.py
+++ b/mavis/pairing/pairing.py
@@ -1,5 +1,7 @@
+from typing import Callable, Dict, List, Optional, Set
+
 from ..annotate.variant import determine_prime
-from ..breakpoint import Breakpoint
+from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, ORIENT, PRIME, PROTOCOL, STRAND
 from ..error import NotSpecifiedError
 from ..interval import Interval
@@ -7,7 +9,7 @@
 from .constants import PAIRING_DISTANCES
 
 
-def product_key(bpp):
+def product_key(bpp: BreakpointPair) -> str:
     """
     unique id for the product row
     """
@@ -26,7 +28,7 @@ def product_key(bpp):
     )
 
 
-def predict_transcriptome_breakpoint(breakpoint, transcript):
+def predict_transcriptome_breakpoint(breakpoint: Breakpoint, transcript):
     """
     for a given genomic breakpoint and the target transcript. Predicts the possible transcriptomic
     breakpoints that would be expected based on the splicing model for abrogated splice sites
@@ -121,7 +123,7 @@ def predict_transcriptome_breakpoint(breakpoint, transcript):
     return sorted(tbreaks)
 
 
-def _equivalent_events(event1, event2):
+def _equivalent_events(event1: BreakpointPair, event2: BreakpointPair) -> bool:
     # basic checks
     if any(
         [
@@ -138,7 +140,9 @@ def _equivalent_events(event1, event2):
     return True
 
 
-def comparison_distance(event1, event2, input_distances=None):
+def comparison_distance(
+    event1: BreakpointPair, event2: BreakpointPair, input_distances: Optional[Dict] = None
+) -> int:
     distances = {}
     distances.update(PAIRING_DISTANCES.items())
     if input_distances is not None:
@@ -150,7 +154,7 @@ def comparison_distance(event1, event2, input_distances=None):
     return max_distance
 
 
-def equivalent(event1, event2, distances=None):
+def equivalent(event1: BreakpointPair, event2: BreakpointPair, distances=None) -> bool:
     """
     compares two events by breakpoint position to see if they are equivalent
     """
@@ -178,11 +182,13 @@ def equivalent(event1, event2, distances=None):
     return True
 
 
-def pair_by_distance(calls, distances, log=DEVNULL, against_self=False):
+def pair_by_distance(
+    calls: List[BreakpointPair], distances, log: Callable = DEVNULL, against_self: bool = False
+) -> Dict[str, Set[str]]:
     """
     for a set of input calls, pair by distance
     """
-    distance_pairings = {}
+    distance_pairings: Dict[str, Set[str]] = {}
     break1_sorted = sorted(calls, key=lambda b: b.break1.start)
     break2_sorted = sorted(calls, key=lambda b: b.break2.start)
     lowest_resolution = max([len(b.break1) for b in calls] + [len(b.break2) for b in calls] + [1])
@@ -239,7 +245,9 @@ def pair_by_distance(calls, distances, log=DEVNULL, against_self=False):
     return distance_pairings
 
 
-def inferred_equivalent(event1, event2, reference_transcripts, distances=None):
+def inferred_equivalent(
+    event1: BreakpointPair, event2: BreakpointPair, reference_transcripts: Dict, distances=None
+) -> bool:
     """
     comparison of events using product prediction and breakpoint prediction
     """
diff --git a/mavis/summary/main.py b/mavis/summary/main.py
index 7e46f22d..c34cb5e8 100644
--- a/mavis/summary/main.py
+++ b/mavis/summary/main.py
@@ -2,11 +2,12 @@
 import re
 import time
 from functools import partial
-from typing import Dict, List
+from typing import Dict, List, Tuple
 
 import tab
 
 from ..annotate.file_io import ReferenceFile
+from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
 from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs, soft_cast
 from .constants import HOMOPOLYMER_MIN_LENGTH
@@ -184,7 +185,7 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
         pair.data[COLUMNS.filter_comment] = 'low evidence'
         filtered_pairs.append(pair)
 
-    bpps_by_library = {}  # split the input pairs by library
+    bpps_by_library: Dict[str, List[BreakpointPair]] = {}  # split the input pairs by library
     libraries = {}
     for bpp in bpps:
         bpps_by_library.setdefault(bpp.library, []).append(bpp)
@@ -192,12 +193,12 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
 
     # collapse identical calls with different call methods
     for library in bpps_by_library:
-        uncollapsed = dict()
+        uncollapsed: Dict[Tuple, List[BreakpointPair]] = dict()
         for bpp in bpps_by_library[library]:
-            group = (
+            group: Tuple[BreakpointPair, str, str, str, str, int, int] = (
                 bpp,
-                bpp.transcript1,
-                bpp.transcript2,
+                bpp.data.get(COLUMNS.transcript1),
+                bpp.data.get(COLUMNS.transcript2),
                 bpp.fusion_sequence_fasta_id,
                 bpp.fusion_splicing_pattern,
                 bpp.fusion_cdna_coding_start,
@@ -242,8 +243,8 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
                     bpp.opposing_strands,
                     bpp.break1.strand,
                     bpp.break2.strand,
-                    bpp.transcript1 if bpp.gene1 else None,
-                    bpp.transcript2 if bpp.gene2 else None,
+                    bpp.data.get(COLUMNS.transcript1) if bpp.data.get(COLUMNS.gene1) else None,
+                    bpp.data.get(COLUMNS.transcript2) if bpp.data.get(COLUMNS.gene2) else None,
                     bpp.fusion_sequence_fasta_id,  # id is a hash of the sequence
                     bpp.fusion_cdna_coding_start,
                     bpp.fusion_cdna_coding_end,
diff --git a/mavis/summary/summary.py b/mavis/summary/summary.py
index a3ab1ecc..67ef7420 100644
--- a/mavis/summary/summary.py
+++ b/mavis/summary/summary.py
@@ -1,16 +1,19 @@
-from .constants import PAIRING_STATE
+from typing import Dict, List
+
+from ..annotate.genomic import Transcript
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, DISEASE_STATUS, PROTOCOL, SVTYPE
 from ..interval import Interval
 from ..pairing.pairing import pair_by_distance, product_key
 from ..util import get_connected_components
+from .constants import PAIRING_STATE
 
 
-def filter_by_annotations(bpp_list, best_transcripts):
+def filter_by_annotations(bpp_list: List[BreakpointPair], best_transcripts: Dict[str, Transcript]):
     """
     Args:
-        bpp_list (List[BreakpointPair]): list of pairs to filter
-        best_transcripts (Dict[str,Transcript]): the best transcripts of the annotations
+        bpp_list: list of pairs to filter
+        best_transcripts: the best transcripts of the annotations
           based on their names
 
     """
diff --git a/mavis/validate/base.py b/mavis/validate/base.py
index 3b2da0aa..e6767d3a 100644
--- a/mavis/validate/base.py
+++ b/mavis/validate/base.py
@@ -10,8 +10,17 @@
 from ..bam import read as _read
 from ..bam.cache import BamCache
 from ..breakpoint import Breakpoint, BreakpointPair
-from ..constants import (CIGAR, COLUMNS, NA_MAPPING_QUALITY, ORIENT, PROTOCOL,
-                         PYSAM_READ_FLAGS, STRAND, SVTYPE, reverse_complement)
+from ..constants import (
+    CIGAR,
+    COLUMNS,
+    NA_MAPPING_QUALITY,
+    ORIENT,
+    PROTOCOL,
+    PYSAM_READ_FLAGS,
+    STRAND,
+    SVTYPE,
+    reverse_complement,
+)
 from ..error import NotSpecifiedError
 from ..interval import Interval
 from ..schemas import DEFAULTS
@@ -19,23 +28,29 @@
 
 
 class Evidence(BreakpointPair):
+    assembly_max_kmer_size: int
     bam_cache: BamCache
     classification: Optional[str]
-    reference_genome: Dict
-    read_length: int
-    stdev_fragment_size: int
-    median_fragment_size: int
-    split_reads: Tuple[Set, Set]
-    flanking_pairs: Set
     compatible_flanking_pairs: Set
-    spanning_reads: Set
-    counts: List[int]
-    contigs: List
-    half_mapped: Tuple[Set, Set]
     compatible_window1: Optional[Interval]
     compatible_window2: Optional[Interval]
     config: Dict
-    assenmbly_max_kmer_size: int
+    contigs: List
+    counts: List[int]
+    flanking_pairs: Set
+    half_mapped: Tuple[Set, Set]
+    median_fragment_size: int
+    read_length: int
+    reference_genome: Dict
+    spanning_reads: Set
+    split_reads: Tuple[Set, Set]
+    stdev_fragment_size: int
+    strand_determining_read: int
+    # abstract properties
+    inner_window1: Interval
+    inner_window2: Interval
+    outer_window1: Interval
+    outer_window2: Interval
 
     @property
     def min_expected_fragment_size(self):
diff --git a/mavis/validate/call.py b/mavis/validate/call.py
index c5a8048e..002534a3 100644
--- a/mavis/validate/call.py
+++ b/mavis/validate/call.py
@@ -1,9 +1,10 @@
 import itertools
 import math
 import statistics
-from typing import Optional, Set
+from typing import List, Optional, Set
 
 from ..align import SplitAlignment, call_paired_read_event, call_read_events, convert_to_duplication
+from ..assemble import Contig
 from ..bam import read as _read
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import (
@@ -16,6 +17,7 @@
     reverse_complement,
 )
 from ..interval import Interval
+from ..validate.base import Evidence
 
 
 class EventCall(BreakpointPair):
@@ -30,9 +32,9 @@ class for holding evidence and the related calls since we can't freeze the evide
     break1_split_reads: Set
     break2_split_reads: Set
     compatible_flanking_pairs: Set
-    compatible_type: str
-    contig: Optional
-    contig_alignment: Optional
+    compatible_type: Optional[str]
+    contig: Optional[Contig]
+    contig_alignment: Optional[SplitAlignment]
 
     @property
     def has_compatible(self):
@@ -40,14 +42,14 @@ def has_compatible(self):
 
     def __init__(
         self,
-        b1,
-        b2,
-        source_evidence,
-        event_type,
-        call_method,
-        contig=None,
-        contig_alignment=None,
-        untemplated_seq=None,
+        b1: Breakpoint,
+        b2: Breakpoint,
+        source_evidence: Evidence,
+        event_type: str,
+        call_method: str,
+        contig: Optional[Contig] = None,
+        contig_alignment: Optional[SplitAlignment] = None,
+        untemplated_seq: Optional[str] = None,
     ):
         """
         Args:
@@ -624,7 +626,7 @@ def filter_consumed_pairs(pairs, consumed_reads):
     return temp
 
 
-def _call_by_spanning_reads(source_evidence, consumed_evidence):
+def _call_by_spanning_reads(source_evidence: Evidence, consumed_evidence):
     spanning_calls = {}
     available_flanking_pairs = filter_consumed_pairs(
         source_evidence.flanking_pairs, consumed_evidence
@@ -695,7 +697,7 @@ def _call_by_spanning_reads(source_evidence, consumed_evidence):
     return filtered_events
 
 
-def call_events(source_evidence):
+def call_events(source_evidence) -> List[EventCall]:
     """
     generates a set of event calls based on the evidence associated with the source_evidence object
     will also narrow down the event type
diff --git a/mavis/validate/evidence.py b/mavis/validate/evidence.py
index f371a72e..a689170c 100644
--- a/mavis/validate/evidence.py
+++ b/mavis/validate/evidence.py
@@ -7,21 +7,13 @@
 from ..annotate.variant import overlapping_transcripts
 from ..bam import cigar as _cigar
 from ..breakpoint import Breakpoint
-from ..constants import CIGAR, ORIENT, PROTOCOL, STRAND, SVTYPE
+from ..constants import CIGAR, COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE
 from ..interval import Interval
 from ..schemas import DEFAULTS
 from .base import Evidence
 
 
 class GenomeEvidence(Evidence):
-    outer_window1: Interval
-    outer_window2: Interval
-    inner_window1: Interval
-    inner_window2: Interval
-    compatible_window1: Interval
-    compatible_window2: Interval
-    protocol: str
-
     @property
     def min_mapping_quality(self):
         return self.config['validate.min_mapping_quality']
@@ -92,14 +84,6 @@ def compute_fragment_size(
 
 
 class TranscriptomeEvidence(Evidence):
-    outer_window1: Interval
-    outer_window2: Interval
-    inner_window1: Interval
-    inner_window2: Interval
-    compatible_window1: Interval
-    compatible_window2: Interval
-    protocol: str
-
     @property
     def min_mapping_quality(self):
         return self.config['validate.trans_min_mapping_quality']

From 99a8b10d6afc1d132593889a07b1c4259ef3df2e Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 23:39:53 -0700
Subject: [PATCH 007/137] reference data dict directly

---
 tools/calculate_ref_alt_counts.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/calculate_ref_alt_counts.py b/tools/calculate_ref_alt_counts.py
index ee43dcfe..d873daba 100644
--- a/tools/calculate_ref_alt_counts.py
+++ b/tools/calculate_ref_alt_counts.py
@@ -7,7 +7,6 @@
 import statistics as stats
 
 import pysam
-
 from mavis.annotate.file_io import load_reference_genome
 from mavis.constants import SVTYPE
 from mavis.util import LOG as log
@@ -196,7 +195,7 @@ def calculate_all_counts(self, input_files, output_file):
         for bpp in bpps:
             # only use precise bpps that are within a certain event size
             try:
-                processed_bpps[bpp.product_id] = self.calculate_ref_counts(bpp)
+                processed_bpps[bpp.data['product_id']] = self.calculate_ref_counts(bpp)
             except ValueError:
                 # wrong event type to calculate a ref/alt count
                 filtered_events.append(bpp)

From 426a22644a5bdbd6aae3a7c41e378e3de75494fb Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 19 Apr 2021 23:41:26 -0700
Subject: [PATCH 008/137] Add codecov .pth file

---
 .github/workflows/build.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index ded37cdc..37c862f0 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -47,14 +47,13 @@ jobs:
         export PATH=$PATH:$(pwd):$(pwd)/bwa
         pytest tests -v \
           --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
-          --cov mavis \
-          --cov-report term-missing \
-          --cov-report xml \
-          --durations=10 \
-          --cov-branch
+          --durations=10
       env:
         RUN_FULL: 0
       if: github.event_name != 'pull_request'
+    - name: set up .pth file
+      run: |
+        python tests/setup_subprocess_cov.py
     - name: run full tests with pytest
       run: |
         export PATH=$PATH:$(pwd):$(pwd)/bwa

From e2e81ade4a6bda0883c4d29238402793c786e3cf Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 15:15:15 -0700
Subject: [PATCH 009/137] Use class-style syntax for constants

---
 Snakefile                    |    2 +-
 mavis/align.py               |   18 +-
 mavis/annotate/constants.py  |   43 +-
 mavis/config.py              |    9 +-
 mavis/constants.py           | 1001 +++++++++++++---------------------
 mavis/main.py                |    2 +-
 mavis/pairing/constants.py   |   18 +-
 mavis/summary/constants.py   |   26 +-
 mavis/tools/constants.py     |   55 +-
 mavis/util.py                |   10 +-
 tests/unit/test_constants.py |   72 +--
 tests/unit/test_util.py      |   57 +-
 12 files changed, 480 insertions(+), 833 deletions(-)

diff --git a/Snakefile b/Snakefile
index 2e617c92..7710c0d9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,5 +1,5 @@
 from snakemake.utils import validate
-from snakemake import WorkflowError
+from snakemake.exceptions import WorkflowError
 import os
 from typing import List, Dict
 import re
diff --git a/mavis/align.py b/mavis/align.py
index f5f493a7..be81b28d 100644
--- a/mavis/align.py
+++ b/mavis/align.py
@@ -27,14 +27,18 @@
 from .interval import Interval
 from .util import DEVNULL
 
-SUPPORTED_ALIGNER = MavisNamespace(
-    BWA_MEM='bwa mem', BLAT='blat', __name__='mavis.align.SUPPORTED_ALIGNER'
-)
-"""MavisNamespace: supported aligners
 
-- [blat](/glossary/#blat)
-- [bwa mem<BWA>](/glossary/#bwa-mem<BWA>)
-"""
+class SUPPORTED_ALIGNER(MavisNamespace):
+    """
+    supported aligners
+
+    Attributes:
+        BLAT: [blat](/glossary/#blat)
+        BWA_MEM: [bwa mem<BWA>](/glossary/#bwa-mem<BWA>)
+    """
+
+    BWA_MEM = 'bwa mem'
+    BLAT = 'blat'
 
 
 class SplitAlignment(BreakpointPair):
diff --git a/mavis/annotate/constants.py b/mavis/annotate/constants.py
index d2fbf5c2..1a4e324d 100644
--- a/mavis/annotate/constants.py
+++ b/mavis/annotate/constants.py
@@ -3,30 +3,35 @@
 import tab
 
 from ..constants import MavisNamespace, float_fraction
-from ..util import WeakMavisNamespace
 
 PASS_FILENAME = 'annotations.tab'
 
 
-SPLICE_TYPE = MavisNamespace(
-    RETAIN='retained intron',
-    SKIP='skipped exon',
-    NORMAL='normal',
-    MULTI_RETAIN='retained multiple introns',
-    MULTI_SKIP='skipped multiple exons',
-    COMPLEX='complex',
-)
-"""MavisNamespace: holds controlled vocabulary for allowed splice type classification values
-
-- ``RETAIN``: an intron was retained
-- ``SKIP``: an exon was skipped
-- ``NORMAL``: no exons were skipped and no introns were retained. the normal/expected splicing pattern was followed
-- ``MULTI_RETAIN``: multiple introns were retained
-- ``MULTI_SKIP``: multiple exons were skipped
-- ``COMPLEX``: some combination of exon skipping and intron retention
-"""
+class SPLICE_TYPE(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed splice type classification values
+
+    Attributes:
+        RETAIN: an intron was retained
+        SKIP: an exon was skipped
+        NORMAL: no exons were skipped and no introns were retained. the normal/expected splicing pattern was followed
+        MULTI_RETAIN: multiple introns were retained
+        MULTI_SKIP: multiple exons were skipped
+        COMPLEX: some combination of exon skipping and intron retention
+    """
+
+    RETAIN: str = 'retained intron'
+    SKIP: str = 'skipped exon'
+    NORMAL: str = 'normal'
+    MULTI_RETAIN: str = 'retained multiple introns'
+    MULTI_SKIP: str = 'skipped multiple exons'
+    COMPLEX: str = 'complex'
+
+
+class SPLICE_SITE_TYPE(MavisNamespace):
+    DONOR: int = 3
+    ACCEPTOR: int = 5
 
-SPLICE_SITE_TYPE = MavisNamespace(DONOR=3, ACCEPTOR=5)
 
 SPLICE_SITE_RADIUS = 2
 """int: number of bases away from an exon boundary considered to be part of the splice site such that if it were altered
diff --git a/mavis/config.py b/mavis/config.py
index 480eb301..6ff74392 100644
--- a/mavis/config.py
+++ b/mavis/config.py
@@ -12,14 +12,7 @@
 from .bam import stats
 from .bam.cache import BamCache
 from .constants import PROTOCOL, SUBCOMMAND, float_fraction
-from .util import WeakMavisNamespace, bash_expands, filepath
-
-CONVERT_OPTIONS = WeakMavisNamespace()
-CONVERT_OPTIONS.add(
-    'assume_no_untemplated',
-    True,
-    defn='assume that if not given there is no untemplated sequence between the breakpoints',
-)
+from .util import bash_expands, filepath
 
 
 def calculate_bam_stats(config: Dict, library_name: str) -> Dict:
diff --git a/mavis/constants.py b/mavis/constants.py
index 72fccdf4..da2a8d40 100644
--- a/mavis/constants.py
+++ b/mavis/constants.py
@@ -4,6 +4,7 @@
 import argparse
 import os
 import re
+from typing import List
 
 from Bio.Alphabet import Gapped
 from Bio.Alphabet.IUPAC import ambiguous_dna
@@ -11,234 +12,48 @@
 from Bio.Seq import Seq
 from tab import cast_boolean, cast_null
 
-PROGNAME = 'mavis'
-EXIT_OK = 0
-EXIT_ERROR = 1
-EXIT_INCOMPLETE = 2
+PROGNAME: str = 'mavis'
+EXIT_OK: int = 0
+EXIT_ERROR: int = 1
+EXIT_INCOMPLETE: int = 2
 
 
-class MavisNamespace:
-    """
-    Namespace to hold module constants
-
-    Example:
-        >>> nspace = MavisNamespace(thing=1, otherthing=2)
-        >>> nspace.thing
-        1
-        >>> nspace.otherthing
-        2
-    """
-
-    DELIM = r'[;,\s]+'
-    """str: delimiter to use is parsing listable variables from the environment or config file"""
-
-    def __init__(self, *pos, **kwargs):
-        object.__setattr__(self, '_defns', {})
-        object.__setattr__(self, '_types', {})
-        object.__setattr__(self, '_members', {})
-        object.__setattr__(self, '_nullable', set())
-        object.__setattr__(self, '_listable', set())
-        object.__setattr__(self, '_env_overwritable', set())
-        object.__setattr__(self, '_env_prefix', 'MAVIS')
-        if '__name__' in kwargs:  # for building auto documentation
-            object.__setattr__(self, '__name__', kwargs.pop('__name__'))
-
-        for k in pos:
-            if k in self._members:
-                raise AttributeError('Cannot respecify existing attribute', k, self._members[k])
-            self[k] = k
-
-        for attr, val in kwargs.items():
-            if attr in self._members:
-                raise AttributeError(
-                    'Cannot respecify existing attribute', attr, self._members[attr]
-                )
-            self[attr] = val
-
-        for attr, value in self._members.items():
-            self._set_type(attr, type(value))
-
-    def __repr__(self):
-        return '{}({})'.format(
-            self.__class__.__name__,
-            ', '.join(sorted(['{}={}'.format(k, repr(v)) for k, v in self.items()])),
-        )
-
-    def discard(self, attr):
-        """
-        Remove a variable if it exists
-        """
-        self._members.pop(attr, None)
-        self._listable.discard(attr)
-        self._nullable.discard(attr)
-        self._defns.pop(attr, None)
-        self._types.pop(attr, None)
-        self._env_overwritable.discard(attr)
-
-    def get_env_name(self, attr):
-        """
-        Get the name of the corresponding environment variable
+class EnumType(type):
+    def __contains__(cls, member):
+        return member in cls.values()
 
-        Example:
-            >>> nspace = MavisNamespace(a=1)
-            >>> nspace.get_env_name('a')
-            'MAVIS_A'
-        """
-        if self._env_prefix:
-            return '{}_{}'.format(self._env_prefix, attr).upper()
-        return attr.upper()
+    def __getitem__(cls, item):
+        for k, v in cls.items():
+            if k == item:
+                return v
+        raise KeyError(item)
 
-    def get_env_var(self, attr):
+    def __iter__(cls):
         """
-        retrieve the environment variable definition of a given attribute
+        Returns members in definition order.
         """
-        env_name = self.get_env_name(attr)
-        env = os.environ[env_name].strip()
-        attr_type = self._types.get(attr, str)
+        return cls.values()
 
-        if attr in self._listable:
-            return self.parse_listable_string(env, attr_type, attr in self._nullable)
-        if attr in self._nullable and env.lower() == 'none':
-            return None
-        return attr_type(env)
 
+class MavisNamespace(metaclass=EnumType):
     @classmethod
-    def parse_listable_string(cls, string, cast_type=str, nullable=False):
-        """
-        Given some string, parse it into a list
-
-        Example:
-            >>> MavisNamespace.parse_listable_string('1,2,3', int)
-            [1, 2, 3]
-            >>> MavisNamespace.parse_listable_string('1;2,None', int, True)
-            [1, 2, None]
-        """
-        result = []
-        string = string.strip()
-        for val in re.split(cls.DELIM, string) if string else []:
-            if nullable and val.lower() == 'none':
-                result.append(None)
-            else:
-                result.append(cast_type(val))
-        return result
-
-    def is_env_overwritable(self, attr):
-        """
-        Returns:
-            bool: True if the variable is overrided by specifying the environment variable equivalent
-        """
-        return attr in self._env_overwritable
-
-    def is_listable(self, attr):
-        """
-        Returns:
-            bool: True if the variable should be parsed as a list
-        """
-        return attr in self._listable
-
-    def is_nullable(self, attr):
-        """
-        Returns:
-            bool: True if the variable can be set to None
-        """
-        return attr in self._nullable
-
-    def __getattribute__(self, attr):
-        try:
-            return object.__getattribute__(self, attr)
-        except AttributeError as err:
-            variables = object.__getattribute__(self, '_members')
-            if attr not in variables:
-                raise err
-            if self.is_env_overwritable(attr):
-                try:
-                    return self.get_env_var(attr)
-                except KeyError:
-                    pass
-            return variables[attr]
-
-    def items(self):
-        """
-        Example:
-            >>> MavisNamespace(thing=1, otherthing=2).items()
-            [('thing', 1), ('otherthing', 2)]
-        """
-        return [(k, self[k]) for k in self.keys()]
-
-    def to_dict(self):
-        return dict(self.items())
-
-    def __getitem__(self, key):
-        return getattr(self, key)
-
-    def __setitem__(self, key, val):
-        self.__setattr__(key, val)
-
-    def __setattr__(self, attr, val):
-        if attr.startswith('_'):
-            raise ValueError('cannot set private', attr)
-        object.__getattribute__(self, '_members')[attr] = val
-
-    def copy_from(self, source, attrs=None):
-        """
-        Copy variables from one namespace onto the current namespace
-        """
-        if attrs is None:
-            attrs = source.keys()
-        for attr in attrs:
-            self.add(
-                attr,
-                source[attr],
-                listable=source.is_listable(attr),
-                nullable=source.is_nullable(attr),
-                defn=source.define(attr, None),
-                cast_type=source.type(attr, None),
-            )
-
-    def get(self, key, *pos):
-        """
-        get an attribute, return a default (if given) if the attribute does not exist
-
-        Example:
-            >>> nspace = MavisNamespace(thing=1, otherthing=2)
-            >>> nspace.get('thing', 2)
-            1
-            >>> nspace.get('nonexistant_thing', 2)
-            2
-            >>> nspace.get('nonexistant_thing')
-            Traceback (most recent call last):
-            ....
-        """
-        if len(pos) > 1:
-            raise TypeError('too many arguments. get takes a single \'default\' value argument')
-        try:
-            return self[key]
-        except AttributeError as err:
-            if pos:
-                return pos[0]
-            raise err
-
-    def keys(self):
-        """
-        get the attribute keys as a list
+    def items(cls):
+        return [(k, v) for k, v in cls.__dict__.items() if not k.startswith('_')]
 
-        Example:
-            >>> MavisNamespace(thing=1, otherthing=2).keys()
-            ['thing', 'otherthing']
-        """
-        return [k for k in self._members]
+    @classmethod
+    def to_dict(cls):
+        return dict(cls.items())
 
-    def values(self):
-        """
-        get the attribute values as a list
+    @classmethod
+    def keys(cls):
+        return [k for k, v in cls.items()]
 
-        Example:
-            >>> MavisNamespace(thing=1, otherthing=2).values()
-            [1, 2]
-        """
-        return [self[k] for k in self._members]
+    @classmethod
+    def values(cls):
+        return [v for k, v in cls.items()]
 
-    def enforce(self, value):
+    @classmethod
+    def enforce(cls, value):
         """
         checks that the current namespace has a given value
 
@@ -249,18 +64,18 @@ def enforce(self, value):
             KeyError: the value did not exist
 
         Example:
-            >>> nspace = MavisNamespace(thing=1, otherthing=2)
             >>> nspace.enforce(1)
             1
             >>> nspace.enforce(3)
             Traceback (most recent call last):
             ....
         """
-        if value not in self.values():
-            raise KeyError('value {0} is not a valid member of '.format(repr(value)), self.values())
+        if value not in cls.values():
+            raise KeyError('value {0} is not a valid member of '.format(repr(value)), cls.values())
         return value
 
-    def reverse(self, value):
+    @classmethod
+    def reverse(cls, value):
         """
         for a given value, return the associated key
 
@@ -272,13 +87,12 @@ def reverse(self, value):
             KeyError: the value is not assigned
 
         Example:
-            >>> nspace = MavisNamespace(thing=1, otherthing=2)
             >>> nspace.reverse(1)
             'thing'
         """
         result = []
-        for key in self.keys():
-            if self[key] == value:
+        for key in cls.keys():
+            if cls[key] == value:
                 result.append(key)
         if len(result) > 1:
             raise KeyError('could not reverse, the mapping is not unique', value, result)
@@ -286,121 +100,6 @@ def reverse(self, value):
             raise KeyError('input value is not assigned to a key', value)
         return result[0]
 
-    def __iter__(self):
-        return iter(self.keys())
-
-    def _set_type(self, attr, cast_type):
-        if cast_type == bool:
-            self._types[attr] = cast_boolean
-        else:
-            self._types[attr] = cast_type
-
-    def type(self, attr, *pos):
-        """
-        returns the type
-
-        Example:
-            >>> nspace = MavisNamespace(thing=1, otherthing=2)
-            >>> nspace.type('thing')
-            <class 'int'>
-        """
-        if len(pos) > 1:
-            raise TypeError('too many arguments. type takes a single \'default\' value argument')
-        try:
-            return self._types[attr]
-        except AttributeError as err:
-            if pos:
-                return pos[0]
-            raise err
-
-    def define(self, attr, *pos):
-        """
-        Get the definition of a given attribute or return a default (when given) if the attribute does not exist
-
-        Returns:
-            str: definition for the attribute
-
-        Raises:
-            KeyError: the attribute does not exist and a default was not given
-
-        Example:
-            >>> nspace = MavisNamespace()
-            >>> nspace.add('thing', 1, defn='I am a thing')
-            >>> nspace.add('otherthing', 2)
-            >>> nspace.define('thing')
-            'I am a thing'
-            >>> nspace.define('otherthing')
-            Traceback (most recent call last):
-            ....
-            >>> nspace.define('otherthing', 'I am some other thing')
-            'I am some other thing'
-        """
-        if len(pos) > 1:
-            raise TypeError('too many arguments. define takes a single \'default\' value argument')
-        try:
-            return self._defns[attr]
-        except KeyError as err:
-            if pos:
-                return pos[0]
-            raise err
-
-    def add(
-        self,
-        attr,
-        value,
-        defn=None,
-        cast_type=None,
-        nullable=False,
-        env_overwritable=False,
-        listable=False,
-    ):
-        """
-        Add an attribute to the name space
-
-        Args:
-            attr (str): name of the attribute being added
-            value: the value of the attribute
-            defn (str): the definition, will be used in generating documentation and help menus
-            cast_type (Callable): the function to use in casting the value
-            nullable (bool): True if this attribute can have a None value
-            env_overwritable (bool): True if this attribute will be overriden by its environment variable equivalent
-            listable (bool): True if this attribute can have multiple values
-
-        Example:
-            >>> nspace = MavisNamespace()
-            >>> nspace.add('thing', 1, int, 'I am a thing')
-            >>> nspace = MavisNamespace()
-            >>> nspace.add('thing', 1, int)
-            >>> nspace = MavisNamespace()
-            >>> nspace.add('thing', 1)
-            >>> nspace = MavisNamespace()
-            >>> nspace.add('thing', value=1, cast_type=int, defn='I am a thing')
-        """
-        if cast_type:
-            self._set_type(attr, cast_type)
-        else:
-            self._set_type(attr, type(value))
-        if defn:
-            self._defns[attr] = defn
-
-        if nullable:
-            self._nullable.add(attr)
-        if env_overwritable:
-            self._env_overwritable.add(attr)
-        if listable:
-            self._listable.add(attr)
-        self[attr] = value
-
-    def __call__(self, value):
-        try:
-            return self.enforce(value)
-        except KeyError:
-            raise TypeError(
-                'Invalid value {} for {}. Must be a valid member: {}'.format(
-                    repr(value), self.__class__.__name__, self.values()
-                )
-            )
-
 
 def float_fraction(num):
     """
@@ -424,34 +123,27 @@ def float_fraction(num):
     return num
 
 
-COMPLETE_STAMP = 'MAVIS.COMPLETE'
-"""str: Filename for all complete stamp files"""
-
-SUBCOMMAND = MavisNamespace(
-    ANNOTATE='annotate',
-    VALIDATE='validate',
-    CLUSTER='cluster',
-    PAIR='pairing',
-    SUMMARY='summary',
-    CONVERT='convert',
-    OVERLAY='overlay',
-    SETUP='setup',
-)
-"""MavisNamespace: holds controlled vocabulary for allowed pipeline stage values
-
-- annotate
-- cluster
-- config
-- convert
-- pairing
-- pipeline
-- summary
-- validate
-"""
+COMPLETE_STAMP: str = 'MAVIS.COMPLETE'
+"""Filename for all complete stamp files"""
+
+
+class SUBCOMMAND(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed pipeline stage values
+    """
+
+    ANNOTATE: str = 'annotate'
+    VALIDATE: str = 'validate'
+    CLUSTER: str = 'cluster'
+    PAIR: str = 'pairing'
+    SUMMARY: str = 'summary'
+    CONVERT: str = 'convert'
+    OVERLAY: str = 'overlay'
+    SETUP: str = 'setup'
 
 
-CODON_SIZE = 3
-"""int: the number of bases making up a codon"""
+CODON_SIZE: int = 3
+"""the number of bases making up a codon"""
 
 
 def reverse_complement(s):
@@ -478,16 +170,16 @@ def reverse_complement(s):
     return str(input_string.reverse_complement())
 
 
-def translate(s, reading_frame=0):
+def translate(s: str, reading_frame: int = 0) -> str:
     """
     given a DNA sequence, translates it and returns the protein amino acid sequence
 
     Args:
-        s (str): the input DNA sequence
-        reading_frame (int): where to start translating the sequence
+        s: the input DNA sequence
+        reading_frame: where to start translating the sequence
 
     Returns:
-        str: the amino acid sequence
+        the amino acid sequence
     """
     reading_frame = reading_frame % CODON_SIZE
 
@@ -497,117 +189,167 @@ def translate(s, reading_frame=0):
     elif len(temp) % 3 == 2:
         temp = temp[:-2]
     temp = Seq(temp, DNA_ALPHABET)
-    return str(temp.translate())
+    return str(temp.translate())  # type: ignore
 
 
-GAP = '-'
+GAP: str = '-'
 
-ORIENT = MavisNamespace(LEFT='L', RIGHT='R', NS='?')
-"""MavisNamespace: holds controlled vocabulary for allowed orientation values
 
-- ``LEFT``: left wrt to the positive/forward strand
-- ``RIGHT``: right wrt to the positive/forward strand
-- ``NS``: orientation is not specified
-"""
-setattr(ORIENT, 'expand', lambda x: [ORIENT.LEFT, ORIENT.RIGHT] if x == ORIENT.NS else [x])
-setattr(ORIENT, 'compare', lambda x, y: True if ORIENT.NS in [x, y] else (x == y))
+class ORIENT(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed orientation values
 
-PROTOCOL = MavisNamespace(GENOME='genome', TRANS='transcriptome')
-"""MavisNamespace: holds controlled vocabulary for allowed protocol values
+    Attributes:
+        LEFT: left wrt to the positive/forward strand
+        RIGHT: right wrt to the positive/forward strand
+        NS: orientation is not specified
+    """
 
-- ``GENOME``: genome
-- ``TRANS``: transcriptome
-"""
+    LEFT: str = 'L'
+    RIGHT: str = 'R'
+    NS: str = '?'
 
-DISEASE_STATUS = MavisNamespace(DISEASED='diseased', NORMAL='normal')
-"""MavisNamespace: holds controlled vocabulary for allowed disease status
+    @classmethod
+    def expand(cls, orientation) -> List[str]:
+        if orientation == cls.NS:
+            return [cls.LEFT, cls.RIGHT]
+        return [orientation]
 
-- ``DISEASED``: diseased
-- ``NORMAL``: normal
-"""
+    @classmethod
+    def compare(cls, first, second) -> bool:
+        if cls.NS in {first, second}:
+            return True
+        return first == second
 
-STRAND = MavisNamespace(POS='+', NEG='-', NS='?')
-"""MavisNamespace: holds controlled vocabulary for allowed strand values
 
-- ``POS``: the positive/forward strand
-- ``NEG``: the negative/reverse strand
-- ``NS``: strand is not specified
-"""
-setattr(STRAND, 'expand', lambda x: [STRAND.POS, STRAND.NEG] if x == STRAND.NS else [x])
-setattr(STRAND, 'compare', lambda x, y: True if STRAND.NS in [x, y] else (x == y))
-
-SVTYPE = MavisNamespace(
-    DEL='deletion',
-    TRANS='translocation',
-    ITRANS='inverted translocation',
-    INV='inversion',
-    INS='insertion',
-    DUP='duplication',
-)
-"""MavisNamespace: holds controlled vocabulary for acceptable structural variant classifications
-
-- ``DEL``: deletion
-- ``TRANS``: translocation
-- ``ITRANS``: inverted translocation
-- ``INV``: inversion
-- ``INS``: insertion
-- ``DUP``: duplication
-"""
+class PROTOCOL(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed protocol values
+    """
 
-CIGAR = MavisNamespace(M=0, I=1, D=2, N=3, S=4, H=5, P=6, X=8, EQ=7)  # noqa
-"""MavisNamespace: Enum-like. For readable cigar values
+    GENOME: str = 'genome'
+    TRANS: str = 'transcriptome'
 
-- ``M``: alignment match (can be a sequence match or mismatch)
-- ``I``: insertion to the reference
-- ``D``: deletion from the reference
-- ``N``: skipped region from the reference
-- ``S``: soft clipping (clipped sequences present in SEQ)
-- ``H``: hard clipping (clipped sequences NOT present in SEQ)
-- ``P``: padding (silent deletion from padded reference)
-- ``EQ``: sequence match (=)
-- ``X``: sequence mismatch
 
-note: descriptions are taken from the `samfile documentation <https://samtools.github.io/hts-specs/SAMv1.pdf>`_
-"""
+class DISEASE_STATUS(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed disease status
+    """
 
-NA_MAPPING_QUALITY = 255
-"""int: mapping quality value to indicate mapping was not performed/calculated"""
-
-PYSAM_READ_FLAGS = MavisNamespace(
-    REVERSE=16,
-    MATE_REVERSE=32,
-    UNMAPPED=4,
-    MATE_UNMAPPED=8,
-    FIRST_IN_PAIR=64,
-    LAST_IN_PAIR=128,
-    SECONDARY=256,
-    MULTIMAP=1,
-    SUPPLEMENTARY=2048,
-    TARGETED_ALIGNMENT='ta',
-    RECOMPUTED_CIGAR='rc',
-    BLAT_RANK='br',
-    BLAT_SCORE='bs',
-    BLAT_ALIGNMENTS='ba',
-    BLAT_PERCENT_IDENTITY='bi',
-    BLAT_PMS='bp',
-)
-
-"""MavisNamespace: Enum-like. For readable PYSAM flag constants
-
-- ``MULTIMAP``: template having multiple segments in sequencing
-- ``UNMAPPED``: segment unmapped
-- ``MATE_UNMAPPED``: next segment in the template unmapped
-- ``REVERSE``: SEQ being reverse complemented
-- ``MATE_REVERSE``: SEQ of the next segment in the template being reverse complemented
-- ``FIRST_IN_PAIR``: the first segment in the template
-- ``LAST_IN_PAIR``: the last segment in the template
-- ``SECONDARY``: secondary alignment
-- ``SUPPLEMENTARY``: supplementary alignment
-
-note: descriptions are taken from the `samfile documentation <https://samtools.github.io/hts-specs/SAMv1.pdf>`_
-"""
+    DISEASED: str = 'diseased'
+    NORMAL: str = 'normal'
+
+
+class STRAND(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed strand values
+
+    Attributes:
+        POS: the positive/forward strand
+        NEG: the negative/reverse strand
+        NS: strand is not specified
+    """
+
+    POS: str = '+'
+    NEG: str = '-'
+    NS: str = '?'
+
+    @classmethod
+    def expand(cls, strand: str) -> List[str]:
+        if strand == cls.NS:
+            return [cls.POS, cls.NEG]
+        return [strand]
+
+    @classmethod
+    def compare(cls, first, second) -> bool:
+        if cls.NS in {first, second}:
+            return True
+        return first == second
+
+
+class SVTYPE(MavisNamespace):
+    """
+    holds controlled vocabulary for acceptable structural variant classifications
+    """
 
-# read paired, read mapped in proper pair, mate reverse strand, first in pair
+    DEL = 'deletion'
+    TRANS = 'translocation'
+    ITRANS: str = 'inverted translocation'
+    INV: str = 'inversion'
+    INS: str = 'insertion'
+    DUP: str = 'duplication'
+
+
+class CIGAR(MavisNamespace):
+    """
+    Enum-like. For readable cigar values
+
+
+    Attributes:
+        M: alignment match (can be a sequence match or mismatch)
+        I: insertion to the reference
+        D: deletion from the reference
+        N: skipped region from the reference
+        S: soft clipping (clipped sequences present in SEQ)
+        H: hard clipping (clipped sequences NOT present in SEQ)
+        P: padding (silent deletion from padded reference)
+        EQ: sequence match (=)
+        X: sequence mismatch
+
+    Note:
+        descriptions are taken from the `samfile documentation <https://samtools.github.io/hts-specs/SAMv1.pdf>`_
+    """
+
+    M = 0
+    I = 1
+    D = 2
+    N = 3
+    S = 4
+    H = 5
+    P = 6
+    X = 8
+    EQ = 7
+
+
+NA_MAPPING_QUALITY: int = 255
+"""mapping quality value to indicate mapping was not performed/calculated"""
+
+
+class PYSAM_READ_FLAGS(MavisNamespace):
+    """
+    Enum-like. For readable PYSAM flag constants
+
+    Attributes:
+        MULTIMAP: template having multiple segments in sequencing
+        UNMAPPED: segment unmapped
+        MATE_UNMAPPED: next segment in the template unmapped
+        REVERSE: SEQ being reverse complemented
+        MATE_REVERSE: SEQ of the next segment in the template being reverse complemented
+        FIRST_IN_PAIR: the first segment in the template
+        LAST_IN_PAIR: the last segment in the template
+        SECONDARY: secondary alignment
+        SUPPLEMENTARY: supplementary alignment
+
+    Note:
+        descriptions are taken from the `samfile documentation <https://samtools.github.io/hts-specs/SAMv1.pdf>`_
+    """
+
+    REVERSE: int = 16
+    MATE_REVERSE: int = 32
+    UNMAPPED: int = 4
+    MATE_UNMAPPED: int = 8
+    FIRST_IN_PAIR: int = 64
+    LAST_IN_PAIR: int = 128
+    SECONDARY: int = 256
+    MULTIMAP: int = 1
+    SUPPLEMENTARY: int = 2048
+    TARGETED_ALIGNMENT: str = 'ta'
+    RECOMPUTED_CIGAR: str = 'rc'
+    BLAT_RANK: str = 'br'
+    BLAT_SCORE: str = 'bs'
+    BLAT_ALIGNMENTS: str = 'ba'
+    BLAT_PERCENT_IDENTITY: str = 'bi'
+    BLAT_PMS: str = 'bp'
 
 
 def _match_ambiguous_dna(x, y):
@@ -631,177 +373,200 @@ def _match_ambiguous_dna(x, y):
 DNA_ALPHABET = alphabet = Gapped(ambiguous_dna, '-')
 DNA_ALPHABET.match = lambda x, y: _match_ambiguous_dna(x, y)
 
-FLAGS = MavisNamespace(LQ='LOWQUAL')
 
-READ_PAIR_TYPE = MavisNamespace(RR='RR', LL='LL', RL='RL', LR='LR')
+class FLAGS(MavisNamespace):
+    LQ: str = 'LOWQUAL'
 
-CALL_METHOD = MavisNamespace(
-    CONTIG='contig',
-    SPLIT='split reads',
-    FLANK='flanking reads',
-    SPAN='spanning reads',
-    INPUT='input',
-)
-"""MavisNamespace: holds controlled vocabulary for allowed call methods
 
-- ``CONTIG``: a contig was assembled and aligned across the breakpoints
-- ``SPLIT``: the event was called by [split read](/glossary/#split-read)
-- ``FLANK``: the event was called by [flanking read pair](/glossary/#flanking-read-pair)
-- ``SPAN``: the event was called by [spanning read](/glossary/#spanning-read)
-"""
+class READ_PAIR_TYPE(MavisNamespace):
+    RR: str = 'RR'
+    LL: str = 'LL'
+    RL: str = 'RL'
+    LR: str = 'LR'
 
-GENE_PRODUCT_TYPE = MavisNamespace(SENSE='sense', ANTI_SENSE='anti-sense')
-"""MavisNamespace: controlled vocabulary for gene products
 
-- ``SENSE``: the gene product is a sense fusion
-- ``ANTI_SENSE``: the gene product is anti-sense
-"""
+class CALL_METHOD(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed call methods
 
-PRIME = MavisNamespace(FIVE=5, THREE=3)
-"""MavisNamespace: holds controlled vocabulary
+    Attributes:
+        CONTIG: a contig was assembled and aligned across the breakpoints
+        SPLIT: the event was called by [split read](/glossary/#split-read)
+        FLANK: the event was called by [flanking read pair](/glossary/#flanking-read-pair)
+        SPAN: the event was called by [spanning read](/glossary/#spanning-read)"""
 
-- ``FIVE``: five prime
-- ``THREE``: three prime
-"""
+    CONTIG: str = 'contig'
+    SPLIT: str = 'split reads'
+    FLANK: str = 'flanking reads'
+    SPAN: str = 'spanning reads'
+    INPUT: str = 'input'
+
+
+class GENE_PRODUCT_TYPE(MavisNamespace):
+    """
+    controlled vocabulary for gene products
+
+    Attributes:
+        SENSE: the gene product is a sense fusion
+        ANTI_SENSE: the gene product is anti-sense
+    """
+
+    SENSE: str = 'sense'
+    ANTI_SENSE: str = 'anti-sense'
+
+
+class PRIME(MavisNamespace):
+    """
+    Attributes:
+        FIVE: five prime
+        THREE: three prime
+    """
+
+    FIVE: int = 5
+    THREE: int = 3
 
-START_AA = 'M'
-"""str: The amino acid expected to start translation
+
+START_AA: str = 'M'
+"""The amino acid expected to start translation
 """
-STOP_AA = '*'
-"""str: The amino acid expected to end translation
+STOP_AA: str = '*'
+"""The amino acid expected to end translation
 """
 
-GIEMSA_STAIN = MavisNamespace(
-    GNEG='gneg',
-    GPOS33='gpos33',
-    GPOS50='gpos50',
-    GPOS66='gpos66',
-    GPOS75='gpos75',
-    GPOS25='gpos25',
-    GPOS100='gpos100',
-    ACEN='acen',
-    GVAR='gvar',
-    STALK='stalk',
-)
-"""MavisNamespace: holds controlled vocabulary relating to stains of chromosome bands"""
+
+class GIEMSA_STAIN(MavisNamespace):
+    """
+    holds controlled vocabulary relating to stains of chromosome bands
+    """
+
+    GNEG: str = 'gneg'
+    GPOS33: str = 'gpos33'
+    GPOS50: str = 'gpos50'
+    GPOS66: str = 'gpos66'
+    GPOS75: str = 'gpos75'
+    GPOS25: str = 'gpos25'
+    GPOS100: str = 'gpos100'
+    ACEN: str = 'acen'
+    GVAR: str = 'gvar'
+    STALK: str = 'stalk'
+
 
 # content related to tabbed files for input/output
 # ensure that we don't have to change ALL the code when we update column names
+class COLUMNS(MavisNamespace):
+    """
+    Column names for i/o files used throughout the pipeline
 
+    see [column descriptions](/outputs/columns)
+    """
 
-COLUMNS = MavisNamespace(
-    tracking_id='tracking_id',
-    library='library',
-    cluster_id='cluster_id',
-    cluster_size='cluster_size',
-    validation_id='validation_id',
-    annotation_id='annotation_id',
-    product_id='product_id',
-    event_type='event_type',
-    pairing='pairing',
-    inferred_pairing='inferred_pairing',
-    gene1='gene1',
-    gene1_direction='gene1_direction',
-    gene2='gene2',
-    gene2_direction='gene2_direction',
-    gene1_aliases='gene1_aliases',
-    gene2_aliases='gene2_aliases',
-    gene_product_type='gene_product_type',
-    transcript1='transcript1',
-    transcript2='transcript2',
-    fusion_splicing_pattern='fusion_splicing_pattern',
-    fusion_cdna_coding_start='fusion_cdna_coding_start',
-    fusion_cdna_coding_end='fusion_cdna_coding_end',
-    fusion_mapped_domains='fusion_mapped_domains',
-    fusion_sequence_fasta_id='fusion_sequence_fasta_id',
-    fusion_sequence_fasta_file='fusion_sequence_fasta_file',
-    fusion_protein_hgvs='fusion_protein_hgvs',
-    annotation_figure='annotation_figure',
-    annotation_figure_legend='annotation_figure_legend',
-    genes_encompassed='genes_encompassed',
-    genes_overlapping_break1='genes_overlapping_break1',
-    genes_overlapping_break2='genes_overlapping_break2',
-    genes_proximal_to_break1='genes_proximal_to_break1',
-    genes_proximal_to_break2='genes_proximal_to_break2',
-    break1_chromosome='break1_chromosome',
-    break1_position_start='break1_position_start',
-    break1_position_end='break1_position_end',
-    break1_orientation='break1_orientation',
-    exon_last_5prime='exon_last_5prime',
-    exon_first_3prime='exon_first_3prime',
-    break1_strand='break1_strand',
-    break1_seq='break1_seq',
-    break2_chromosome='break2_chromosome',
-    break2_position_start='break2_position_start',
-    break2_position_end='break2_position_end',
-    break2_orientation='break2_orientation',
-    break2_strand='break2_strand',
-    break2_seq='break2_seq',
-    opposing_strands='opposing_strands',
-    stranded='stranded',
-    protocol='protocol',
-    disease_status='disease_status',
-    tools='tools',
-    call_method='call_method',
-    break1_ewindow='break1_ewindow',
-    break1_ewindow_count='break1_ewindow_count',
-    break1_ewindow_practical_coverage='break1_ewindow_practical_coverage',
-    break1_homologous_seq='break1_homologous_seq',
-    break1_split_read_names='break1_split_read_names',
-    break1_split_reads='break1_split_reads',
-    break1_split_reads_forced='break1_split_reads_forced',
-    break2_ewindow='break2_ewindow',
-    break2_ewindow_count='break2_ewindow_count',
-    break2_ewindow_practical_coverage='break2_ewindow_practical_coverage',
-    break2_homologous_seq='break2_homologous_seq',
-    break2_split_read_names='break2_split_read_names',
-    break2_split_reads='break2_split_reads',
-    break2_split_reads_forced='break2_split_reads_forced',
-    contig_alignment_query_consumption='contig_alignment_query_consumption',
-    contig_alignment_score='contig_alignment_score',
-    contig_alignment_query_name='contig_alignment_query_name',
-    contig_read_depth='contig_read_depth',
-    contig_break1_read_depth='contig_break1_read_depth',
-    contig_break2_read_depth='contig_break2_read_depth',
-    contig_alignment_rank='contig_alignment_rank',
-    contig_build_score='contig_build_score',
-    contig_remap_score='contig_remap_score',
-    contig_remap_coverage='contig_remap_coverage',
-    contig_remapped_read_names='contig_remapped_read_names',
-    contig_remapped_reads='contig_remapped_reads',
-    contig_seq='contig_seq',
-    contig_strand_specific='contig_strand_specific',
-    contigs_assembled='contigs_assembled',
-    call_sequence_complexity='call_sequence_complexity',
-    spanning_reads='spanning_reads',
-    spanning_read_names='spanning_read_names',
-    flanking_median_fragment_size='flanking_median_fragment_size',
-    flanking_pairs='flanking_pairs',
-    flanking_pairs_compatible='flanking_pairs_compatible',
-    flanking_pairs_read_names='flanking_pairs_read_names',
-    flanking_pairs_compatible_read_names='flanking_pairs_compatible_read_names',
-    flanking_stdev_fragment_size='flanking_stdev_fragment_size',
-    linking_split_read_names='linking_split_read_names',
-    linking_split_reads='linking_split_reads',
-    raw_break1_half_mapped_reads='raw_break1_half_mapped_reads',
-    raw_break1_split_reads='raw_break1_split_reads',
-    raw_break2_half_mapped_reads='raw_break2_half_mapped_reads',
-    raw_break2_split_reads='raw_break2_split_reads',
-    raw_flanking_pairs='raw_flanking_pairs',
-    raw_spanning_reads='raw_spanning_reads',
-    untemplated_seq='untemplated_seq',
-    filter_comment='filter_comment',
-    cdna_synon='cdna_synon',
-    protein_synon='protein_synon',
-    supplementary_call='supplementary_call',
-    net_size='net_size',
-    repeat_count='repeat_count',
-    assumed_untemplated='assumed_untemplated',
-)
-"""MavisNamespace: Column names for i/o files used throughout the pipeline
-
-see [column descriptions](/outputs/columns)
-"""
+    tracking_id: str = 'tracking_id'
+    library: str = 'library'
+    cluster_id: str = 'cluster_id'
+    cluster_size: str = 'cluster_size'
+    validation_id: str = 'validation_id'
+    annotation_id: str = 'annotation_id'
+    product_id: str = 'product_id'
+    event_type: str = 'event_type'
+    pairing: str = 'pairing'
+    inferred_pairing: str = 'inferred_pairing'
+    gene1: str = 'gene1'
+    gene1_direction: str = 'gene1_direction'
+    gene2: str = 'gene2'
+    gene2_direction: str = 'gene2_direction'
+    gene1_aliases: str = 'gene1_aliases'
+    gene2_aliases: str = 'gene2_aliases'
+    gene_product_type: str = 'gene_product_type'
+    transcript1: str = 'transcript1'
+    transcript2: str = 'transcript2'
+    fusion_splicing_pattern: str = 'fusion_splicing_pattern'
+    fusion_cdna_coding_start: str = 'fusion_cdna_coding_start'
+    fusion_cdna_coding_end: str = 'fusion_cdna_coding_end'
+    fusion_mapped_domains: str = 'fusion_mapped_domains'
+    fusion_sequence_fasta_id: str = 'fusion_sequence_fasta_id'
+    fusion_sequence_fasta_file: str = 'fusion_sequence_fasta_file'
+    fusion_protein_hgvs: str = 'fusion_protein_hgvs'
+    annotation_figure: str = 'annotation_figure'
+    annotation_figure_legend: str = 'annotation_figure_legend'
+    genes_encompassed: str = 'genes_encompassed'
+    genes_overlapping_break1: str = 'genes_overlapping_break1'
+    genes_overlapping_break2: str = 'genes_overlapping_break2'
+    genes_proximal_to_break1: str = 'genes_proximal_to_break1'
+    genes_proximal_to_break2: str = 'genes_proximal_to_break2'
+    break1_chromosome: str = 'break1_chromosome'
+    break1_position_start: str = 'break1_position_start'
+    break1_position_end: str = 'break1_position_end'
+    break1_orientation: str = 'break1_orientation'
+    exon_last_5prime: str = 'exon_last_5prime'
+    exon_first_3prime: str = 'exon_first_3prime'
+    break1_strand: str = 'break1_strand'
+    break1_seq: str = 'break1_seq'
+    break2_chromosome: str = 'break2_chromosome'
+    break2_position_start: str = 'break2_position_start'
+    break2_position_end: str = 'break2_position_end'
+    break2_orientation: str = 'break2_orientation'
+    break2_strand: str = 'break2_strand'
+    break2_seq: str = 'break2_seq'
+    opposing_strands: str = 'opposing_strands'
+    stranded: str = 'stranded'
+    protocol: str = 'protocol'
+    disease_status: str = 'disease_status'
+    tools: str = 'tools'
+    call_method: str = 'call_method'
+    break1_ewindow: str = 'break1_ewindow'
+    break1_ewindow_count: str = 'break1_ewindow_count'
+    break1_ewindow_practical_coverage: str = 'break1_ewindow_practical_coverage'
+    break1_homologous_seq: str = 'break1_homologous_seq'
+    break1_split_read_names: str = 'break1_split_read_names'
+    break1_split_reads: str = 'break1_split_reads'
+    break1_split_reads_forced: str = 'break1_split_reads_forced'
+    break2_ewindow: str = 'break2_ewindow'
+    break2_ewindow_count: str = 'break2_ewindow_count'
+    break2_ewindow_practical_coverage: str = 'break2_ewindow_practical_coverage'
+    break2_homologous_seq: str = 'break2_homologous_seq'
+    break2_split_read_names: str = 'break2_split_read_names'
+    break2_split_reads: str = 'break2_split_reads'
+    break2_split_reads_forced: str = 'break2_split_reads_forced'
+    contig_alignment_query_consumption: str = 'contig_alignment_query_consumption'
+    contig_alignment_score: str = 'contig_alignment_score'
+    contig_alignment_query_name: str = 'contig_alignment_query_name'
+    contig_read_depth: str = 'contig_read_depth'
+    contig_break1_read_depth: str = 'contig_break1_read_depth'
+    contig_break2_read_depth: str = 'contig_break2_read_depth'
+    contig_alignment_rank: str = 'contig_alignment_rank'
+    contig_build_score: str = 'contig_build_score'
+    contig_remap_score: str = 'contig_remap_score'
+    contig_remap_coverage: str = 'contig_remap_coverage'
+    contig_remapped_read_names: str = 'contig_remapped_read_names'
+    contig_remapped_reads: str = 'contig_remapped_reads'
+    contig_seq: str = 'contig_seq'
+    contig_strand_specific: str = 'contig_strand_specific'
+    contigs_assembled: str = 'contigs_assembled'
+    call_sequence_complexity: str = 'call_sequence_complexity'
+    spanning_reads: str = 'spanning_reads'
+    spanning_read_names: str = 'spanning_read_names'
+    flanking_median_fragment_size: str = 'flanking_median_fragment_size'
+    flanking_pairs: str = 'flanking_pairs'
+    flanking_pairs_compatible: str = 'flanking_pairs_compatible'
+    flanking_pairs_read_names: str = 'flanking_pairs_read_names'
+    flanking_pairs_compatible_read_names: str = 'flanking_pairs_compatible_read_names'
+    flanking_stdev_fragment_size: str = 'flanking_stdev_fragment_size'
+    linking_split_read_names: str = 'linking_split_read_names'
+    linking_split_reads: str = 'linking_split_reads'
+    raw_break1_half_mapped_reads: str = 'raw_break1_half_mapped_reads'
+    raw_break1_split_reads: str = 'raw_break1_split_reads'
+    raw_break2_half_mapped_reads: str = 'raw_break2_half_mapped_reads'
+    raw_break2_split_reads: str = 'raw_break2_split_reads'
+    raw_flanking_pairs: str = 'raw_flanking_pairs'
+    raw_spanning_reads: str = 'raw_spanning_reads'
+    untemplated_seq: str = 'untemplated_seq'
+    filter_comment: str = 'filter_comment'
+    cdna_synon: str = 'cdna_synon'
+    protein_synon: str = 'protein_synon'
+    supplementary_call: str = 'supplementary_call'
+    net_size: str = 'net_size'
+    repeat_count: str = 'repeat_count'
+    assumed_untemplated: str = 'assumed_untemplated'
 
 
 def sort_columns(input_columns):
diff --git a/mavis/main.py b/mavis/main.py
index dfa9f127..1d86fc17 100644
--- a/mavis/main.py
+++ b/mavis/main.py
@@ -158,7 +158,7 @@ def create_parser(argv):
         action=_config.RangeAppendAction,
     )
 
-    return parser, _util.MavisNamespace(**parser.parse_args(argv).__dict__)
+    return parser, parser.parse_args(argv)
 
 
 def main(argv=None):
diff --git a/mavis/pairing/constants.py b/mavis/pairing/constants.py
index eebeffec..69077f4d 100644
--- a/mavis/pairing/constants.py
+++ b/mavis/pairing/constants.py
@@ -1,12 +1,12 @@
+from typing import Dict
+
 from ..constants import CALL_METHOD, MavisNamespace
 from ..schemas import DEFAULTS
 
-PAIRING_DISTANCES = MavisNamespace(
-    **{
-        CALL_METHOD.FLANK: DEFAULTS['pairing.flanking_call_distance'],
-        CALL_METHOD.SPAN: DEFAULTS['pairing.spanning_call_distance'],
-        CALL_METHOD.SPLIT: DEFAULTS['pairing.split_call_distance'],
-        CALL_METHOD.CONTIG: DEFAULTS['pairing.contig_call_distance'],
-        CALL_METHOD.INPUT: DEFAULTS['pairing.input_call_distance'],
-    }
-)
+PAIRING_DISTANCES: Dict[str, int] = {
+    CALL_METHOD.FLANK: DEFAULTS['pairing.flanking_call_distance'],
+    CALL_METHOD.SPAN: DEFAULTS['pairing.spanning_call_distance'],
+    CALL_METHOD.SPLIT: DEFAULTS['pairing.split_call_distance'],
+    CALL_METHOD.CONTIG: DEFAULTS['pairing.contig_call_distance'],
+    CALL_METHOD.INPUT: DEFAULTS['pairing.input_call_distance'],
+}
diff --git a/mavis/summary/constants.py b/mavis/summary/constants.py
index 27de3695..4ac58d00 100644
--- a/mavis/summary/constants.py
+++ b/mavis/summary/constants.py
@@ -2,16 +2,16 @@
 
 HOMOPOLYMER_MIN_LENGTH = 3
 
-PAIRING_STATE = MavisNamespace(
-    EXP='expressed',
-    NO_EXP='not expressed',
-    SOMATIC='somatic',
-    GERMLINE='germline',
-    CO_EXP='co-expressed',
-    GERMLINE_EXP='germline expression',
-    SOMATIC_EXP='somatic expression',
-    MATCH='matched',
-    NO_MATCH='not matched',
-    GENOMIC='genomic support',
-    NO_GENOMIC='no genomic support',
-)
+
+class PAIRING_STATE(MavisNamespace):
+    EXP = 'expressed'
+    NO_EXP = 'not expressed'
+    SOMATIC = 'somatic'
+    GERMLINE = 'germline'
+    CO_EXP = 'co-expressed'
+    GERMLINE_EXP = 'germline expression'
+    SOMATIC_EXP = 'somatic expression'
+    MATCH = 'matched'
+    NO_MATCH = 'not matched'
+    GENOMIC = 'genomic support'
+    NO_GENOMIC = 'no genomic support'
diff --git a/mavis/tools/constants.py b/mavis/tools/constants.py
index 3df79b7d..d2412dfa 100644
--- a/mavis/tools/constants.py
+++ b/mavis/tools/constants.py
@@ -1,32 +1,35 @@
-from ..constants import MavisNamespace, SVTYPE
+from ..constants import SVTYPE, MavisNamespace
 
-SUPPORTED_TOOL = MavisNamespace(
-    MANTA='manta',
-    DELLY='delly',
-    TA='transabyss',
-    PINDEL='pindel',
-    CHIMERASCAN='chimerascan',
-    MAVIS='mavis',
-    DEFUSE='defuse',
-    BREAKDANCER='breakdancer',
-    VCF='vcf',
-    BREAKSEQ='breakseq',
-    CNVNATOR='cnvnator',
-    STRELKA='strelka',
-    STARFUSION='starfusion',
-)
-"""
-Supported Tools used to call SVs and then used as input into MAVIS
 
-- chimerascan [Iyer-2011]_
-- defuse [McPherson-2011]_
-- delly [Rausch-2012]_
-- manta [Chen-2016]_
-- pindel [Ye-2009]_
-- transabyss [Robertson-2010]_
-"""
+class SUPPORTED_TOOL(MavisNamespace):
+    """
+    Supported Tools used to call SVs and then used as input into MAVIS
+
+    Attributes:
+        CHIMERASCAN: chimerascan [Iyer-2011]_
+        DEFUSE: defuse [McPherson-2011]_
+        DELLY: delly [Rausch-2012]_
+        MANTA: manta [Chen-2016]_
+        PINDEL: pindel [Ye-2009]_
+        TA: transabyss [Robertson-2010]_
+    """
+
+    MANTA = 'manta'
+    DELLY = 'delly'
+    TA = 'transabyss'
+    PINDEL = 'pindel'
+    CHIMERASCAN = 'chimerascan'
+    MAVIS = 'mavis'
+    DEFUSE = 'defuse'
+    BREAKDANCER = 'breakdancer'
+    VCF = 'vcf'
+    BREAKSEQ = 'breakseq'
+    CNVNATOR = 'cnvnator'
+    STRELKA = 'strelka'
+    STARFUSION = 'starfusion'
+
 
-TOOL_SVTYPE_MAPPING = {v: [v] for v in SVTYPE.values()}
+TOOL_SVTYPE_MAPPING = {v: [v] for v in SVTYPE.values()}  # type: ignore
 TOOL_SVTYPE_MAPPING.update(
     {
         'DEL': [SVTYPE.DEL],
diff --git a/mavis/util.py b/mavis/util.py
index db673fa2..8cf5c558 100644
--- a/mavis/util.py
+++ b/mavis/util.py
@@ -15,7 +15,8 @@
 from tab import tab
 
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE, MavisNamespace, sort_columns
+from .constants import (COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE,
+                        MavisNamespace, sort_columns)
 from .error import InvalidRearrangement
 from .interval import Interval
 
@@ -132,11 +133,6 @@ def get_env_variable(arg, default, cast_type=None):
     return default
 
 
-class WeakMavisNamespace(MavisNamespace):
-    def is_env_overwritable(self, attr):
-        return True
-
-
 def bash_expands(*expressions):
     """
     expand a file glob expression, allowing bash-style brackets.
@@ -169,7 +165,7 @@ def log_arguments(args):
     """
     LOG('arguments', time_stamp=True)
     with LOG.indent() as log:
-        for arg, val in sorted(args.items()):
+        for arg, val in sorted(args.__dict__.items()):
             if isinstance(val, list):
                 if len(val) <= 1:
                     log(arg, '= {}'.format(val))
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
index 9da45677..b69571db 100644
--- a/tests/unit/test_constants.py
+++ b/tests/unit/test_constants.py
@@ -1,11 +1,12 @@
 import unittest
+
 from mavis.constants import (
     COLUMNS,
-    MavisNamespace,
     ORIENT,
+    STRAND,
+    MavisNamespace,
     reverse_complement,
     sort_columns,
-    STRAND,
     translate,
 )
 
@@ -51,70 +52,3 @@ def test_column_matches_column_name(self):
         self.assertEqual(COLUMNS.library, COLUMNS.library)
         s = set([COLUMNS.library, COLUMNS.library])
         self.assertEqual(1, len(s))
-
-
-class TestMavisNamespace(unittest.TestCase):
-    def setUp(self):
-        self.namespace = MavisNamespace(a=1, b=2, c=3)
-
-    def test_get_item(self):
-        self.assertEqual(1, self.namespace['a'])
-        self.assertEqual(1, self.namespace.a)
-        self.assertEqual(1, self.namespace.get('a', None))
-
-    def test_to_dict(self):
-        self.assertEqual({'a': 1, 'b': 2, 'c': 3}, self.namespace.to_dict())
-
-    def test_get_with_default(self):
-        self.assertEqual(4, self.namespace.get('d', 4))
-
-    def test_get_without_default_errors(self):
-        self.assertEqual(None, self.namespace.get('d', None))
-
-    def test_error_on_undefined(self):
-        with self.assertRaises(KeyError):
-            self.namespace.define('a')
-
-    def test_infered_typing(self):
-        self.assertEqual(int, self.namespace.type('a'))
-
-    def test_keys(self):
-        self.assertEqual(['a', 'b', 'c'], self.namespace.keys())
-
-    def test_add(self):
-        self.namespace.add('d', 4, defn='this is the letter d', cast_type=float)
-        self.assertEqual(float, self.namespace.type('d'))
-        self.assertEqual('this is the letter d', self.namespace.define('d'))
-        self.assertEqual(4, self.namespace.d)
-
-    def test_add_infer_type(self):
-        self.namespace.add('d', 4, defn='this is the letter d')
-        self.assertEqual(int, self.namespace.type('d'))
-        self.assertEqual('this is the letter d', self.namespace.define('d'))
-        self.assertEqual(4, self.namespace.d)
-
-    def test_error_on_enforce_bad_value(self):
-        with self.assertRaises(KeyError):
-            self.namespace.enforce(5)
-
-    def test_reverse(self):
-        self.assertEqual('a', self.namespace.reverse(1))
-
-    def test_reverse_nonunique_error(self):
-        self.namespace['d'] = 1
-        with self.assertRaises(KeyError):
-            self.namespace.reverse(1)
-
-    def test_reverse_bad_value_error(self):
-        with self.assertRaises(KeyError):
-            self.namespace.reverse(5)
-
-    def test_get_argument_error(self):
-        with self.assertRaises(TypeError):
-            self.namespace.get('a', 1, 1)
-        with self.assertRaises(AttributeError):
-            self.namespace.get('d')
-
-    def test_iterating(self):
-        for act, exp in zip(self.namespace, ['a', 'b', 'c']):
-            self.assertEqual(exp, act)
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
index 00d8c90d..62b6910e 100644
--- a/tests/unit/test_util.py
+++ b/tests/unit/test_util.py
@@ -4,13 +4,11 @@
 from mavis.constants import COLUMNS, ORIENT, STRAND
 from mavis.error import NotSpecifiedError
 from mavis.util import (
-    cast,
     ENV_VAR_PREFIX,
+    cast,
+    get_connected_components,
     get_env_variable,
-    MavisNamespace,
-    WeakMavisNamespace,
     read_bpp_from_input_file,
-    get_connected_components,
 )
 
 from .mock import Mock
@@ -74,57 +72,6 @@ def test_needs_casting(self):
         self.assertEqual(15, get_env_variable('test_env', 1))
 
 
-class TestMavisNamespace(unittest.TestCase):
-    def setUp(self):
-        self.namespace = MavisNamespace()
-
-    def test_item_getter(self):
-        self.namespace.thing = 2
-        self.assertEqual(2, self.namespace['thing'])
-        self.assertEqual(2, self.namespace.thing)
-
-    def test_items(self):
-        print(self.namespace)
-        self.namespace.thing = 2
-        print(self.namespace)
-        self.namespace.otherthing = 3
-        print(self.namespace)
-        self.assertEqual({'thing': 2, 'otherthing': 3}, self.namespace._members)
-        self.assertEqual([('otherthing', 3), ('thing', 2)], list(sorted(self.namespace.items())))
-
-
-class TestWeakMavisNamespace(unittest.TestCase):
-    def setUp(self):
-        self.namespace = WeakMavisNamespace(a=1, b=2, c=3)
-        print(self.namespace._members)
-        for v in ['a', 'b', 'c']:
-            v = ENV_VAR_PREFIX + v.upper()
-            if v in os.environ:
-                del os.environ[v]
-
-    def test_no_env_set(self):
-        self.assertEqual(1, self.namespace.a)
-        self.assertEqual(1, self.namespace['a'])
-
-    def test_env_overrides_default(self):
-        os.environ['MAVIS_A'] = '5'
-        env_name = self.namespace.get_env_name('a')
-        self.assertEqual('MAVIS_A', env_name)
-        self.assertEqual('5', os.environ[env_name])
-        self.assertTrue(self.namespace.is_env_overwritable('a'))
-        self.assertEqual(5, self.namespace.a)
-        self.assertEqual(1, self.namespace._members['a'])
-        self.assertEqual(5, self.namespace['a'])
-
-    def test_error_on_invalid_attr(self):
-        with self.assertRaises(AttributeError):
-            self.namespace.other
-
-    def test_iterate_keys(self):
-        self.assertEqual(['a', 'b', 'c'], list(self.namespace.keys()))
-        self.assertEqual(['a', 'b', 'c'], [k for k in self.namespace])
-
-
 class TestReadBreakpointPairsFromFile(unittest.TestCase):
     def build_filehandle(self, row):
         header = [c for c in row]

From f88b6c4cd26e0f91b715507c2fa4c17871d9c84b Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 17:02:52 -0700
Subject: [PATCH 010/137] Update documentation

---
 docs/configuration/general.md  |   68 +-
 docs/configuration/pipeline.md |  122 +--
 docs/configuration/settings.md | 1829 +++++++++++++++++++++-----------
 docs/hooks.py                  |  109 +-
 docs/outputs/columns.md        |   24 +-
 docs/tutorials/full.md         |  289 +++--
 6 files changed, 1422 insertions(+), 1019 deletions(-)

diff --git a/docs/configuration/general.md b/docs/configuration/general.md
index 443cedee..e22f5339 100644
--- a/docs/configuration/general.md
+++ b/docs/configuration/general.md
@@ -4,54 +4,31 @@ An exhaustive list of the various configurable settings can be found [here](../s
 
 ## Pipeline Configuration File
 
-The pipeline can be run in steps or it can be configured using a
+The pipeline can be run in steps or it can be configured using a JSON
 configuration file and setup in a single step. Scripts will be generated
-to run all steps following clustering. The configuration file can be
-built from scratch or a template can be output as shown below
+to run all steps following clustering.
 
-```bash
-mavis config --write template.cfg
-```
+The config schema is found in the mavis package under `mavis/schemas/config.json`
 
-This will create a template config file called template.cfg which can
-then be edited by the user. However this will be a simple config with no
-library information. To generate a configuration file with the library
-information as well as estimates for the fragment size parameters more
-inputs are required (see
-[generating the config file](../../tutorials/full/#generating-the-config-file) for more information).
-
-## Environment Variables
-
-Most of the default settings can be changed by using environment
-variables. The value given by the environment variables will be used as
-the new default. Config or command-line parameters will still override
-these settings.
-
-All environment variables are prefixed with MAVIS and an underscore.
-Otherwise the variable name is the same as that used for the command
-line parameter or config setting (uppercased). For example to change the
-default minimum mapping quality used during the validate stage
-
-```bash
-export MAVIS_MIN_MAPPING_QUALITY=10
-```
+Top level settings follow the pattern `<section>.<setting>`. The convert and library
+sections are nested objects.
 
 ## Adjusting the Resource Requirements
 
 ### Choosing the Number of Validation/Annotation Jobs
 
 MAVIS chooses the number of jobs to split validate/annotate stages into
-based on two settings: [max_files](../../configuration/settings/#max_files) and
-[min_clusters_per_file](../../configuration/settings/#min-clusters-per-file).
+based on two settings: [cluster.max_files](../../configuration/settings/#clustermax_files) and
+[cluster.min_clusters_per_file](../../configuration/settings/#clustermin-clusters-per-file).
 
 For example, in the following situation say you have: 1000 clusters,
-`max_files=10`, and `min_clusters_per_file=10`. Then MAVIS will set up
+`cluster.max_files=10`, and `cluster.min_clusters_per_file=10`. Then MAVIS will set up
 10 validation jobs each with 100 events.
 
-However, if `min_clusters_per_file=500`, then MAVIS would only set up 2
+However, if `cluster.min_clusters_per_file=500`, then MAVIS would only set up 2
 jobs each with 500 events. This is because
-[min_clusters_per_file](../../configuration/settings/#min-clusters-per-file) takes precedence
-over [max_files](../../configuration/settings/#max_files).
+[cluster.min_clusters_per_file](../../configuration/settings/#clustermin-clusters-per-file) takes precedence
+over [custer.max_files](../../configuration/settings/#clustermax_files).
 
 Splitting into more jobs will lower the resource requirements per job
 (see [resource requirements](../performance/)). The memory and time requirements for validation are linear
@@ -60,27 +37,8 @@ with respect to the number of events to be validated.
 ### Uninformative Filter
 
 For example, if the user is only interested in events in genes, then the
-[uninformative_filter](../../configuration/settings/#uninformative_filter) can be used. This
+[cluster.uninformative_filter](../../configuration/settings/#clusteruninformative_filter) can be used. This
 will drop all events that are not within a certain distance
-([max_proximity](../../configuration/settings/#max_proximity)) to any annotation in
+([cluster.max_proximity](../../configuration/settings/#clustermax_proximity)) to any annotation in
 the annotations reference file. These events will be dropped prior to
 the validation stage which results in significant speed up.
-
-This can be set using the environment variable
-
-```bash
-export MAVIS_UNINFORMATIVE_FILTER=True
-```
-
-or in the pipeline config file
-
-```text
-[cluster]
-uninformative_filter = True
-```
-
-or as a command line argument to the cluster stage
-
-```bash
-mavis cluster --uninformative_filter True ....
-```
diff --git a/docs/configuration/pipeline.md b/docs/configuration/pipeline.md
index 76ffdbbf..d8073a1c 100644
--- a/docs/configuration/pipeline.md
+++ b/docs/configuration/pipeline.md
@@ -5,135 +5,21 @@
 MAVIS v3 uses [snakemake](https://snakemake.readthedocs.io/en/stable/) to handle job scheduling
 and setup
 
-The setup step of MAVIS is set up to use a job scheduler on a
-compute cluster. will generate submission scripts and a wrapper bash
-script for the user to execute on their cluster head node.
-
-![](../images/pipeline_options.svg)
-
 The MAVIS pipeline is highly configurable. Some pipeline steps
 (cluster, validate) are optional and can be automatically skipped. The
 standard pipeline is
 far-left.
 
-
-The most common use case is
-[auto-generating a configuration file](../../tutorials/full/#generating-the-config-file) and then running the pipeline setup step. The pipeline setup
-step will run clustering and create scripts for running the other steps.
+The most common use case is running the pipeline through snakemake
 
 ```bash
-mavis config .... -w config.cfg
-mavis setup config.cfg -o /path/to/top/output_dir
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG>
 ```
 
-This will create the build.cfg configuration file, which is used by the
-scheduler to submit jobs. To use a particular scheduler you will need to
-set the `MAVIS_SCHEDULER` environment variable. After the
-build configuration file has been created you can run the mavis schedule
-option to submit your jobs
+If you are submitting to a cluster, use the [snakemake profiles](https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles)
 
 ```bash
-ssh cluster_head_node
-mavis schedule -o /path/to/output_dir --submit
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME>
 ```
 
 This will submit a series of jobs with dependencies.
-
-![](../images/pipeline_dependency_graph.svg)
-
-Dependency graph of MAVIS jobs for the standard pipeline setup. The
-notation on the arrows indicates the SLURM setting on the job to add the
-dependency on the previous
-job.
-
-
-### Configuring Scheduler Settings
-
-There are multiple ways to configure the scheduler settings. Some of the
-configurable options are listed below
-
--   [MAVIS_QUEUE](../../configuration/settings/#queue)
--   [MAVIS_MEMORY_LIMIT](../../configuration/settings/#memory_limit)
--   [MAVIS_TIME_LIMIT](../../configuration/settings/#time_limit)
--   [MAVIS_IMPORT_ENV](../../configuration/settings/#import_env)
--   [MAVIS_SCHEDULER](../../configuration/settings/#scheduler)
-
-For example to set the job queue default using an
-[environment variable](../../configuration/general/#environment-variables)
-
-```bash
-export MAVIS_QUEUE=QUEUENAME
-```
-
-Or it can also be added to the config file manually
-
-    [schedule]
-    queue = QUEUENAME
-
-### Troubleshooting Dependency Failures
-
-The most common error to occur when running MAVIS on the cluster is a
-memory or time limit exception. These can be detected by running the
-schedule step or looking for dependency failures reported on the
-cluster. The suffix of the job name will be a number and will correspond
-to the suffix of the job directory.
-
-```bash
-mavis schedule -o /path/to/output/dir
-```
-
-This will report any failed jobs. For example if this were a crash
-report for one of the validation jobs we might expect to see something
-like below in the schedule output
-
-    [2018-05-31 13:02:06] validate
-                            MV_<library>_<batch id>-<task id> (<job id>) is FAILED
-                              CRASH: <error from log file>
-
-Any jobs in an error, failed, etc. state can be resubmitted by running
-mavis schedule with the resubmit flag
-
-```bash
-mavis schedule -o /path/to/output/dir --resubmit
-```
-
-If a job has failed due to memory or time limits, editing the
-`/path/to/output/dir/build.cfg` file can allow the user to change a job
-without resetting up and rerunning the other jobs. For example, below is
-the configuration for a validation job
-
-    [MV_mock-A47933_batch-D2nTiy9AhGye4UZNapAik6]
-    stage = validate
-    job_ident = 1691742
-    name = MV_mock-A47933_batch-D2nTiy9AhGye4UZNapAik6
-    dependencies =
-    script = /path/to/output/dir/mock-A47933_diseased_transcriptome/validate/submit.sh
-    status = FAILED
-    output_dir = /path/to/output/dir/mock-A47933_diseased_transcriptome/validate/batch-D2nTiy9AhGye4UZNapAik6-{task_ident}
-    stdout = /path/to/output/dir/mock-A47933_diseased_transcriptome/validate/batch-D2nTiy9AhGye4UZNapAik6-{task_ident}/job-{name}-{job_ident}-{task_ident}.log
-    created_at = 1527641526
-    status_comment =
-    memory_limit = 18000
-    queue = short
-    time_limit = 57600
-    import_env = True
-    mail_user =
-    mail_type = NONE
-    concurrency_limit = None
-    task_list = 1
-        2
-        3
-
-The memory\_limit is in Mb and the time\_limit is in seconds. Editing
-the values here will cause the job to be resubmitted with the new
-values.
-
-!!! warning
-    Incorrectly editing the build.cfg file may have unanticipated results
-    and require re-setting up MAVIS to fix. Generally the user should ONLY
-    edit `memory_limit` and `time_limit` values.
-
-    If memory errors are frequent then it would be better to adjust the
-    default values ([trans_validation_memory](../../configuration/settings/#trans_validation_memory),
-    [validation_memory](../../configuration/settings/#validation_memory),
-    [time_limit](../../configuration/settings/#time_limit))
diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md
index 1921e93b..9b388424 100644
--- a/docs/configuration/settings.md
+++ b/docs/configuration/settings.md
@@ -1,1211 +1,1798 @@
 
 
 # Configurable Settings
-## aligner
+## annotate.annotation_filters
 
-**type**: `#!python mavis.align.SUPPORTED_ALIGNER`
+**type**: `#!python List[str]`
 
-**environment variable**: `MAVIS_ALIGNER`
+**default**: `#!python ['choose_more_annotated', 'choose_transcripts_by_priority']`
 
-**default**: `#!python 'blat'`
+A comma separated list of filters to apply to putative annotations
 
-**accepted values**: `'bwa mem'`, `'blat'`
+**schema definition**:
+```json
+{
+    "items": {
+        "enum": [
+            "choose_more_annotated",
+            "choose_transcripts_by_priority"
+        ],
+        "type": "string"
+    },
+    "type": "array"
+}
+```
 
 
-The aligner to use to map the contigs/reads back to the reference e.g blat or bwa
-        
+## annotate.draw_fusions_only
 
-## aligner_reference
+**type**: `#!python boolean`
 
-**type**: `#!python filepath`
+**default**: `#!python True`
 
-**environment variable**: `MAVIS_ALIGNER_REFERENCE`
+Flag to indicate if events which do not produce a fusion transcript should produce illustrations
 
-**default**: `#!python None`
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-Path to the aligner reference file used for aligning the contig sequences
-        
 
-## annotation_filters
+## annotate.draw_non_synonymous_cdna_only
 
-**type**: `#!python str`
+**type**: `#!python boolean`
+
+**default**: `#!python True`
 
-**environment variable**: `MAVIS_ANNOTATION_FILTERS`
+Flag to indicate if events which are synonymous at the cdna level should produce illustrations
 
-**default**: `#!python 'choose_more_annotated,choose_transcripts_by_priority'`
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-A comma separated list of filters to apply to putative annotations
-        
 
-## annotation_memory
+## annotate.max_orf_cap
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_ANNOTATION_MEMORY`
+**default**: `#!python 3`
 
-**default**: `#!python 12000`
+The maximum number of orfs to return (best putative orfs will be retained)
 
-Default memory limit (mb) for the annotation stage
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## annotations
 
-**type**: `#!python filepath`
+## annotate.min_domain_mapping_match
 
-**environment variable**: `MAVIS_ANNOTATIONS`
+**type**: `#!python number`
 
-**default**: `#!python []`
+**default**: `#!python 0.9`
 
-Path to the reference annotations of genes, transcript, exons, domains, etc
-        
+A number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript to be displayed
 
-## assembly_kmer_size
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**type**: `#!python float_fraction`
 
-**environment variable**: `MAVIS_ASSEMBLY_KMER_SIZE`
+## annotate.min_orf_size
 
-**default**: `#!python 0.74`
+**type**: `#!python int`
 
-The percent of the read length to make kmers for assembly
-        
+**default**: `#!python 300`
+
+The minimum length (in base pairs) to retain a putative open reading frame (orf)
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
+
+
+## bam_stats.distribution_fraction
+
+**type**: `#!python number`
+
+**default**: `#!python 0.97`
 
-## assembly_max_paths
+the proportion of the distribution to use in computing stdev
+
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0.01,
+    "type": "number"
+}
+```
+
+
+## bam_stats.sample_bin_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_ASSEMBLY_MAX_PATHS`
+**default**: `#!python 1000`
 
-**default**: `#!python 8`
+how large to make the sample bin (in bp)
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The maximum number of paths to resolve. this is used to limit when there is a messy assembly graph to resolve. the assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting
-        
 
-## assembly_min_edge_trim_weight
+## bam_stats.sample_cap
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_ASSEMBLY_MIN_EDGE_TRIM_WEIGHT`
+**default**: `#!python 1000`
 
-**default**: `#!python 3`
+maximum number of reads to collect for any given sample region
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-This is used to simplify the debruijn graph before path finding. edges with less than this frequency will be discarded if they are non-cutting, at a fork, or the end of a path
-        
 
-## assembly_min_exact_match_to_remap
+## bam_stats.sample_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_ASSEMBLY_MIN_EXACT_MATCH_TO_REMAP`
+**default**: `#!python 500`
 
-**default**: `#!python 15`
+the number of genes/bins to compute stats over
 
-The minimum length of exact matches to initiate remapping a read to a contig
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## assembly_min_remap_coverage
 
-**type**: `#!python float_fraction`
+## cluster.cluster_initial_size_limit
 
-**environment variable**: `MAVIS_ASSEMBLY_MIN_REMAP_COVERAGE`
+**type**: `#!python int`
 
-**default**: `#!python 0.9`
+**default**: `#!python 25`
+
+The maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering phase (combining based on overlap)
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-Minimum fraction of the contig sequence which the remapped sequences must align over
-        
 
-## assembly_min_remapped_seq
+## cluster.cluster_radius
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_ASSEMBLY_MIN_REMAPPED_SEQ`
+**default**: `#!python 100`
 
-**default**: `#!python 3`
+Maximum distance allowed between paired breakpoint pairs
 
-The minimum input sequences that must remap for an assembled contig to be used
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## assembly_min_uniq
 
-**type**: `#!python float_fraction`
+## cluster.limit_to_chr
 
-**environment variable**: `MAVIS_ASSEMBLY_MIN_UNIQ`
+**type**: `#!python Union[List, null]`
 
-**default**: `#!python 0.1`
+**default**: `#!python ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y']`
 
-Minimum percent uniq required to keep separate assembled contigs. if contigs are more similar then the lower scoring, then shorter, contig is dropped
-        
+A list of chromosome names to use. breakpointpairs on other chromosomes will be filteredout. for example '1 2 3 4' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4
 
-## assembly_strand_concordance
+**schema definition**:
+```json
+{
+    "items": {
+        "type": "string"
+    },
+    "type": [
+        "array",
+        "null"
+    ]
+}
+```
 
-**type**: `#!python float_fraction`
 
-**environment variable**: `MAVIS_ASSEMBLY_STRAND_CONCORDANCE`
+## cluster.max_files
 
-**default**: `#!python 0.51`
+**type**: `#!python int`
 
-When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand
-        
+**default**: `#!python 200`
+
+The maximum number of files to output from clustering/splitting
+
+**schema definition**:
+```json
+{
+    "minimum": 1,
+    "type": "integer"
+}
+```
 
-## blat_limit_top_aln
+
+## cluster.max_proximity
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_BLAT_LIMIT_TOP_ALN`
+**default**: `#!python 5000`
 
-**default**: `#!python 10`
+The maximum distance away from an annotation before the region in considered to be uninformative
 
-Number of results to return from blat (ranking based on score)
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## blat_min_identity
 
-**type**: `#!python float_fraction`
+## cluster.min_clusters_per_file
 
-**environment variable**: `MAVIS_BLAT_MIN_IDENTITY`
+**type**: `#!python int`
 
-**default**: `#!python 0.9`
+**default**: `#!python 50`
 
-The minimum percent identity match required for blat results when aligning contigs
-        
+The minimum number of breakpoint pairs to output to a file
 
-## breakpoint_color
+**schema definition**:
+```json
+{
+    "minimum": 1,
+    "type": "integer"
+}
+```
+
+
+## cluster.split_only
+
+**type**: `#!python boolean`
+
+**default**: `#!python False`
+
+just split the input files, do not merge input breakpoints into clusters
+
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
+
+
+## cluster.uninformative_filter
+
+**type**: `#!python boolean`
+
+**default**: `#!python False`
+
+Flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered out prior to clustering
+
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-**type**: `#!python str`
 
-**environment variable**: `MAVIS_BREAKPOINT_COLOR`
+## illustrate.breakpoint_color
+
+**type**: `#!python str`
 
 **default**: `#!python '#000000'`
 
 Breakpoint outline color
-        
 
-## call_error
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_CALL_ERROR`
+## illustrate.domain_color
 
-**default**: `#!python 10`
+**type**: `#!python str`
 
-Buffer zone for the evidence window
-        
+**default**: `#!python '#ccccb3'`
 
-## clean_aligner_files
+Domain fill color
 
-**type**: `#!python cast_boolean`
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-**environment variable**: `MAVIS_CLEAN_ALIGNER_FILES`
 
-**default**: `#!python False`
+## illustrate.domain_mismatch_color
 
-Remove the aligner output files after the validation stage is complete. not required for subsequent steps but can be useful in debugging and deep investigation of events
-        
+**type**: `#!python str`
 
-## cluster_initial_size_limit
+**default**: `#!python '#b2182b'`
 
-**type**: `#!python int`
+Domain fill color on 0%% match
 
-**environment variable**: `MAVIS_CLUSTER_INITIAL_SIZE_LIMIT`
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-**default**: `#!python 25`
 
-The maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering phase (combining based on overlap)
-        
+## illustrate.domain_name_regex_filter
+
+**type**: `#!python str`
 
-## cluster_radius
+**default**: `#!python '^PF\\d+$'`
 
-**type**: `#!python int`
+The regular expression used to select domains to be displayed (filtered by name)
 
-**environment variable**: `MAVIS_CLUSTER_RADIUS`
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-**default**: `#!python 100`
 
-Maximum distance allowed between paired breakpoint pairs
-        
+## illustrate.domain_scaffold_color
 
-## concurrency_limit
+**type**: `#!python str`
 
-**type**: `#!python int`
+**default**: `#!python '#000000'`
 
-**environment variable**: `MAVIS_CONCURRENCY_LIMIT`
+The color of the domain scaffold
 
-**default**: `#!python None`
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-The concurrency limit for tasks in any given job array or the number of concurrent processes allowed for a local run
-        
 
-## contig_aln_max_event_size
+## illustrate.drawing_width_iter_increase
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_CONTIG_ALN_MAX_EVENT_SIZE`
+**default**: `#!python 500`
 
-**default**: `#!python 50`
+The amount (in  pixels) by which to increase the drawing width upon failure to fit
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-Relates to determining breakpoints when pairing contig alignments. for any given read in a putative pair the soft clipping is extended to include any events of greater than this size. the softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to
-        
 
-## contig_aln_merge_inner_anchor
+## illustrate.exon_min_focus_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_CONTIG_ALN_MERGE_INNER_ANCHOR`
+**default**: `#!python 10`
 
-**default**: `#!python 20`
+Minimum size of an exon for it to be granted a label or min exon width
 
-The minimum number of consecutive exact match base pairs to not merge events within a contig alignment
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## contig_aln_merge_outer_anchor
 
-**type**: `#!python int`
+## illustrate.gene1_color
 
-**environment variable**: `MAVIS_CONTIG_ALN_MERGE_OUTER_ANCHOR`
+**type**: `#!python str`
 
-**default**: `#!python 15`
+**default**: `#!python '#657e91'`
 
-Minimum consecutively aligned exact matches to anchor an end for merging internal events
-        
+The color of genes near the first gene
 
-## contig_aln_min_anchor_size
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_CONTIG_ALN_MIN_ANCHOR_SIZE`
+## illustrate.gene1_color_selected
 
-**default**: `#!python 50`
+**type**: `#!python str`
 
-The minimum number of aligned bases for a contig (m or =) in order to simplify. do not have to be consecutive
-        
+**default**: `#!python '#518dc5'`
 
-## contig_aln_min_extend_overlap
+The color of the first gene
 
-**type**: `#!python int`
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-**environment variable**: `MAVIS_CONTIG_ALN_MIN_EXTEND_OVERLAP`
 
-**default**: `#!python 10`
+## illustrate.gene2_color
 
-Minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment
-        
+**type**: `#!python str`
 
-## contig_aln_min_query_consumption
+**default**: `#!python '#325556'`
 
-**type**: `#!python float_fraction`
+The color of genes near the second gene
 
-**environment variable**: `MAVIS_CONTIG_ALN_MIN_QUERY_CONSUMPTION`
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-**default**: `#!python 0.9`
 
-Minimum fraction of the original query sequence that must be used by the read(s) of the alignment
-        
+## illustrate.gene2_color_selected
 
-## contig_aln_min_score
+**type**: `#!python str`
 
-**type**: `#!python float_fraction`
+**default**: `#!python '#4c9677'`
 
-**environment variable**: `MAVIS_CONTIG_ALN_MIN_SCORE`
+The color of the second gene
 
-**default**: `#!python 0.9`
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-Minimum score for a contig to be used as evidence in a call by contig
-        
 
-## contig_call_distance
+## illustrate.label_color
 
-**type**: `#!python int`
+**type**: `#!python str`
 
-**environment variable**: `MAVIS_CONTIG_CALL_DISTANCE`
+**default**: `#!python '#000000'`
 
-**default**: `#!python 10`
+The label color
 
-The maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair
-        
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
+
+
+## illustrate.mask_fill
 
-## dgv_annotation
+**type**: `#!python str`
+
+**default**: `#!python '#ffffff'`
+
+Color of mask (for deleted region etc.)
+
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
+
+
+## illustrate.mask_opacity
+
+**type**: `#!python number`
 
-**type**: `#!python filepath`
+**default**: `#!python 0.7`
+
+Opacity of the mask layer
 
-**environment variable**: `MAVIS_DGV_ANNOTATION`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**default**: `#!python []`
 
-Path to the dgv reference processed to look like the cytoband file
-        
+## illustrate.max_drawing_retries
+
+**type**: `#!python int`
+
+**default**: `#!python 5`
+
+The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output
 
-## domain_color
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
+
+
+## illustrate.novel_exon_color
 
 **type**: `#!python str`
 
-**environment variable**: `MAVIS_DOMAIN_COLOR`
+**default**: `#!python '#5D3F6A'`
 
-**default**: `#!python '#ccccb3'`
+Novel exon fill color
+
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-Domain fill color
-        
 
-## domain_mismatch_color
+## illustrate.scaffold_color
 
 **type**: `#!python str`
 
-**environment variable**: `MAVIS_DOMAIN_MISMATCH_COLOR`
+**default**: `#!python '#000000'`
 
-**default**: `#!python '#b2182b'`
+The color used for the gene/transcripts scaffolds
+
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-Domain fill color on 0%% match
-        
 
-## domain_name_regex_filter
+## illustrate.splice_color
 
 **type**: `#!python str`
 
-**environment variable**: `MAVIS_DOMAIN_NAME_REGEX_FILTER`
+**default**: `#!python '#000000'`
 
-**default**: `#!python '^PF\\d+$'`
+Splicing lines color
 
-The regular expression used to select domains to be displayed (filtered by name)
-        
+**schema definition**:
+```json
+{
+    "pattern": "^#[a-zA-Z0-9]{6}",
+    "type": "string"
+}
+```
 
-## domain_scaffold_color
+
+## illustrate.width
+
+**type**: `#!python int`
+
+**default**: `#!python 1000`
+
+The drawing width in pixels
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
+
+
+## log
 
 **type**: `#!python str`
 
-**environment variable**: `MAVIS_DOMAIN_SCAFFOLD_COLOR`
+**default**: `#!python None`
 
-**default**: `#!python '#000000'`
 
-The color of the domain scaffold
-        
 
-## draw_fusions_only
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
 
-**type**: `#!python cast_boolean`
 
-**environment variable**: `MAVIS_DRAW_FUSIONS_ONLY`
+## log_level
 
-**default**: `#!python True`
+**type**: `#!python str`
 
-Flag to indicate if events which do not produce a fusion transcript should produce illustrations
-        
+**default**: `#!python 'INFO'`
 
-## draw_non_synonymous_cdna_only
 
-**type**: `#!python cast_boolean`
 
-**environment variable**: `MAVIS_DRAW_NON_SYNONYMOUS_CDNA_ONLY`
+**schema definition**:
+```json
+{
+    "enum": [
+        "INFO",
+        "DEBUG"
+    ],
+    "type": "string"
+}
+```
 
-**default**: `#!python True`
 
-Flag to indicate if events which are synonymous at the cdna level should produce illustrations
-        
+## output_dir
 
-## drawing_width_iter_increase
+**type**: `#!python str`
+
+**default**: `#!python None`
+
+
+
+**schema definition**:
+```json
+{
+    "type": "string"
+}
+```
+
+
+## pairing.contig_call_distance
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_DRAWING_WIDTH_ITER_INCREASE`
+**default**: `#!python 10`
 
-**default**: `#!python 500`
+The maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The amount (in  pixels) by which to increase the drawing width upon failure to fit
-        
 
-## exon_min_focus_size
+## pairing.flanking_call_distance
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_EXON_MIN_FOCUS_SIZE`
+**default**: `#!python 50`
 
-**default**: `#!python 10`
+The maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair
 
-Minimum size of an exon for it to be granted a label or min exon width
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## fetch_min_bin_size
+
+## pairing.input_call_distance
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_FETCH_MIN_BIN_SIZE`
+**default**: `#!python 20`
 
-**default**: `#!python 50`
+The maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair
 
-The minimum size of any bin for reading from a bam file. increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## fetch_reads_bins
+
+## pairing.spanning_call_distance
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_FETCH_READS_BINS`
+**default**: `#!python 20`
 
-**default**: `#!python 5`
+The maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair
+
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-Number of bins to split an evidence window into to ensure more even sampling of high coverage regions
-        
 
-## fetch_reads_limit
+## pairing.split_call_distance
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_FETCH_READS_LIMIT`
+**default**: `#!python 20`
 
-**default**: `#!python 3000`
+The maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair
 
-Maximum number of reads, cap, to loop over for any given evidence window
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## filter_cdna_synon
 
-**type**: `#!python cast_boolean`
+## reference.aligner_reference
 
-**environment variable**: `MAVIS_FILTER_CDNA_SYNON`
+**type**: `#!python List[str]`
 
-**default**: `#!python True`
+**default**: `#!python None`
 
-Filter all annotations synonymous at the cdna level
-        
 
-## filter_min_complexity
 
-**type**: `#!python float_fraction`
+**schema definition**:
+```json
+{
+    "examples": [
+        "tests/data/mock_reference_genome.2bit"
+    ],
+    "items": {
+        "type": "string"
+    },
+    "maxItems": 1,
+    "minItems": 1,
+    "type": "array"
+}
+```
 
-**environment variable**: `MAVIS_FILTER_MIN_COMPLEXITY`
 
-**default**: `#!python 0.2`
+## reference.annotations
 
-Filter event calls based on call sequence complexity
-        
+**type**: `#!python List[str]`
 
-## filter_min_flanking_reads
+**default**: `#!python None`
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_FILTER_MIN_FLANKING_READS`
 
-**default**: `#!python 10`
+**schema definition**:
+```json
+{
+    "examples": [
+        "tests/data/mock_annotations.json"
+    ],
+    "items": {
+        "type": "string"
+    },
+    "minItems": 1,
+    "type": "array"
+}
+```
 
-Minimum number of flanking pairs for a call by flanking pairs
-        
 
-## filter_min_linking_split_reads
+## reference.dgv_annotation
 
-**type**: `#!python int`
+**type**: `#!python List[str]`
 
-**environment variable**: `MAVIS_FILTER_MIN_LINKING_SPLIT_READS`
+**default**: `#!python None`
 
-**default**: `#!python 1`
 
-Minimum number of linking split reads for a call by split reads
-        
 
-## filter_min_remapped_reads
+**schema definition**:
+```json
+{
+    "examples": [
+        [
+            "tests/data/mock_dgv_annotation.txt"
+        ]
+    ],
+    "items": {
+        "type": "string"
+    },
+    "minItems": 1,
+    "type": "array"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_FILTER_MIN_REMAPPED_READS`
+## reference.masking
 
-**default**: `#!python 5`
+**type**: `#!python List[str]`
 
-Minimum number of remapped reads for a call by contig
-        
+**default**: `#!python None`
 
-## filter_min_spanning_reads
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_FILTER_MIN_SPANNING_READS`
+**schema definition**:
+```json
+{
+    "examples": [
+        [
+            "tests/data/mock_masking.tab"
+        ]
+    ],
+    "items": {
+        "type": "string"
+    },
+    "minItems": 1,
+    "type": "array"
+}
+```
 
-**default**: `#!python 5`
 
-Minimum number of spanning reads for a call by spanning reads
-        
+## reference.reference_genome
 
-## filter_min_split_reads
+**type**: `#!python List[str]`
 
-**type**: `#!python int`
+**default**: `#!python None`
 
-**environment variable**: `MAVIS_FILTER_MIN_SPLIT_READS`
 
-**default**: `#!python 5`
 
-Minimum number of split reads for a call by split reads
-        
+**schema definition**:
+```json
+{
+    "examples": [
+        [
+            "tests/data/mock_reference_genome.fa"
+        ]
+    ],
+    "items": {
+        "type": "string"
+    },
+    "minItems": 1,
+    "type": "array"
+}
+```
+
+
+## reference.template_metadata
+
+**type**: `#!python List[str]`
+
+**default**: `#!python None`
+
+
 
-## filter_protein_synon
+**schema definition**:
+```json
+{
+    "examples": [
+        [
+            "tests/data/cytoBand.txt"
+        ]
+    ],
+    "items": {
+        "type": "string"
+    },
+    "minItems": 1,
+    "type": "array"
+}
+```
 
-**type**: `#!python cast_boolean`
 
-**environment variable**: `MAVIS_FILTER_PROTEIN_SYNON`
+## skip_stage.validate
+
+**type**: `#!python boolean`
 
 **default**: `#!python False`
 
-Filter all annotations synonymous at the protein level
-        
+skip the validation stage of the MAVIS pipeline
+
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
+
 
-## filter_secondary_alignments
+## summary.filter_cdna_synon
 
-**type**: `#!python cast_boolean`
+**type**: `#!python boolean`
+
+**default**: `#!python True`
+
+Filter all annotations synonymous at the cdna level
 
-**environment variable**: `MAVIS_FILTER_SECONDARY_ALIGNMENTS`
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-**default**: `#!python True`
 
-Filter secondary alignments when gathering read evidence
-        
+## summary.filter_min_complexity
 
-## filter_trans_homopolymers
+**type**: `#!python number`
 
-**type**: `#!python cast_boolean`
+**default**: `#!python 0.2`
 
-**environment variable**: `MAVIS_FILTER_TRANS_HOMOPOLYMERS`
+Filter event calls based on call sequence complexity
 
-**default**: `#!python True`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-Filter all single bp ins/del/dup events that are in a homopolymer region of at least 3 bps and are not paired to a genomic event
-        
 
-## flanking_call_distance
+## summary.filter_min_flanking_reads
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_FLANKING_CALL_DISTANCE`
+**default**: `#!python 10`
 
-**default**: `#!python 50`
+Minimum number of flanking pairs for a call by flanking pairs
 
-The maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## fuzzy_mismatch_number
 
-**type**: `#!python int`
+## summary.filter_min_linking_split_reads
 
-**environment variable**: `MAVIS_FUZZY_MISMATCH_NUMBER`
+**type**: `#!python int`
 
 **default**: `#!python 1`
 
-The number of events/mismatches allowed to be considered a fuzzy match
-        
+Minimum number of linking split reads for a call by split reads
 
-## gene1_color
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**type**: `#!python str`
 
-**environment variable**: `MAVIS_GENE1_COLOR`
+## summary.filter_min_remapped_reads
 
-**default**: `#!python '#657e91'`
+**type**: `#!python int`
 
-The color of genes near the first gene
-        
+**default**: `#!python 5`
 
-## gene1_color_selected
+Minimum number of remapped reads for a call by contig
 
-**type**: `#!python str`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_GENE1_COLOR_SELECTED`
 
-**default**: `#!python '#518dc5'`
+## summary.filter_min_spanning_reads
 
-The color of the first gene
-        
+**type**: `#!python int`
 
-## gene2_color
+**default**: `#!python 5`
 
-**type**: `#!python str`
+Minimum number of spanning reads for a call by spanning reads
 
-**environment variable**: `MAVIS_GENE2_COLOR`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python '#325556'`
 
-The color of genes near the second gene
-        
+## summary.filter_min_split_reads
 
-## gene2_color_selected
+**type**: `#!python int`
 
-**type**: `#!python str`
+**default**: `#!python 5`
 
-**environment variable**: `MAVIS_GENE2_COLOR_SELECTED`
+Minimum number of split reads for a call by split reads
 
-**default**: `#!python '#4c9677'`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The color of the second gene
-        
 
-## import_env
+## summary.filter_protein_synon
 
-**type**: `#!python cast_boolean`
+**type**: `#!python boolean`
 
-**environment variable**: `MAVIS_IMPORT_ENV`
+**default**: `#!python False`
 
-**default**: `#!python True`
+Filter all annotations synonymous at the protein level
 
-Flag to import environment variables
-        
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-## input_call_distance
 
-**type**: `#!python int`
+## summary.filter_trans_homopolymers
 
-**environment variable**: `MAVIS_INPUT_CALL_DISTANCE`
+**type**: `#!python boolean`
 
-**default**: `#!python 20`
+**default**: `#!python True`
 
-The maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair
-        
+Filter all single bp ins/del/dup events that are in a homopolymer region of at least 3 bps and are not paired to a genomic event
 
-## label_color
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-**type**: `#!python str`
 
-**environment variable**: `MAVIS_LABEL_COLOR`
+## validate.aligner
 
-**default**: `#!python '#000000'`
+**type**: `#!python str`
 
-The label color
-        
+**default**: `#!python 'blat'`
 
-## limit_to_chr
+The aligner to use to map the contigs/reads back to the reference e.g blat or bwa
 
-**type**: `#!python str`
+**schema definition**:
+```json
+{
+    "enum": [
+        "bwa mem",
+        "blat"
+    ],
+    "type": "string"
+}
+```
 
-**environment variable**: `MAVIS_LIMIT_TO_CHR`
 
-**default**: `#!python ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y']`
+## validate.assembly_kmer_size
 
-A list of chromosome names to use. breakpointpairs on other chromosomes will be filteredout. for example '1 2 3 4' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4
-        
+**type**: `#!python number`
 
-## mail_type
+**default**: `#!python 0.74`
 
-**type**: `#!python mavis.schedule.constants.MAIL_TYPE`
+The percent of the read length to make kmers for assembly
 
-**environment variable**: `MAVIS_MAIL_TYPE`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**default**: `#!python 'NONE'`
 
-**accepted values**: `'BEGIN'`, `'END'`, `'FAIL'`, `'ALL'`, `'NONE'`
+## validate.assembly_max_paths
 
+**type**: `#!python int`
 
-When to notify the mail_user (if given)
-        
+**default**: `#!python 8`
 
-## mail_user
+The maximum number of paths to resolve. this is used to limit when there is a messy assembly graph to resolve. the assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting
 
-**type**: `#!python str`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_MAIL_USER`
 
-**default**: `#!python ''`
+## validate.assembly_min_edge_trim_weight
 
-User(s) to send notifications to
-        
+**type**: `#!python int`
 
-## mask_fill
+**default**: `#!python 3`
 
-**type**: `#!python str`
+This is used to simplify the debruijn graph before path finding. edges with less than this frequency will be discarded if they are non-cutting, at a fork, or the end of a path
 
-**environment variable**: `MAVIS_MASK_FILL`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python '#ffffff'`
 
-Color of mask (for deleted region etc.)
-        
+## validate.assembly_min_exact_match_to_remap
 
-## mask_opacity
+**type**: `#!python int`
 
-**type**: `#!python float_fraction`
+**default**: `#!python 15`
 
-**environment variable**: `MAVIS_MASK_OPACITY`
+The minimum length of exact matches to initiate remapping a read to a contig
 
-**default**: `#!python 0.7`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-Opacity of the mask layer
-        
 
-## masking
+## validate.assembly_min_remap_coverage
+
+**type**: `#!python number`
 
-**type**: `#!python filepath`
+**default**: `#!python 0.9`
 
-**environment variable**: `MAVIS_MASKING`
+Minimum fraction of the contig sequence which the remapped sequences must align over
 
-**default**: `#!python []`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-File containing regions for which input events overlapping them are dropped prior to validation
-        
 
-## max_drawing_retries
+## validate.assembly_min_remapped_seq
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MAX_DRAWING_RETRIES`
+**default**: `#!python 3`
 
-**default**: `#!python 5`
+The minimum input sequences that must remap for an assembled contig to be used
 
-The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## max_files
 
-**type**: `#!python int`
+## validate.assembly_min_uniq
 
-**environment variable**: `MAVIS_MAX_FILES`
+**type**: `#!python number`
 
-**default**: `#!python 200`
+**default**: `#!python 0.1`
 
-The maximum number of files to output from clustering/splitting
-        
+Minimum percent uniq required to keep separate assembled contigs. if contigs are more similar then the lower scoring, then shorter, contig is dropped
 
-## max_orf_cap
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_MAX_ORF_CAP`
+## validate.assembly_strand_concordance
 
-**default**: `#!python 3`
+**type**: `#!python number`
 
-The maximum number of orfs to return (best putative orfs will be retained)
-        
+**default**: `#!python 0.51`
 
-## max_proximity
+When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand
 
-**type**: `#!python int`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**environment variable**: `MAVIS_MAX_PROXIMITY`
 
-**default**: `#!python 5000`
+## validate.blat_limit_top_aln
 
-The maximum distance away from an annotation before the region in considered to be uninformative
-        
+**type**: `#!python int`
 
-## max_sc_preceeding_anchor
+**default**: `#!python 10`
 
-**type**: `#!python int`
+Number of results to return from blat (ranking based on score)
 
-**environment variable**: `MAVIS_MAX_SC_PRECEEDING_ANCHOR`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python 6`
 
-When remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. for example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. if this is set to none then there is no limit on softclipping
-        
+## validate.blat_min_identity
 
-## memory_limit
+**type**: `#!python number`
 
-**type**: `#!python int`
+**default**: `#!python 0.9`
 
-**environment variable**: `MAVIS_MEMORY_LIMIT`
+The minimum percent identity match required for blat results when aligning contigs
 
-**default**: `#!python 16000`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-The maximum number of megabytes (mb) any given job is allowed
-        
 
-## min_anchor_exact
+## validate.call_error
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_ANCHOR_EXACT`
+**default**: `#!python 10`
 
-**default**: `#!python 6`
+Buffer zone for the evidence window
 
-Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum number of consecutive exact matches to anchor a read to initiate targeted realignment
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## min_anchor_fuzzy
 
-**type**: `#!python int`
+## validate.clean_aligner_files
 
-**environment variable**: `MAVIS_MIN_ANCHOR_FUZZY`
+**type**: `#!python boolean`
 
-**default**: `#!python 10`
+**default**: `#!python False`
 
-Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum length of a fuzzy match to anchor a read to initiate targeted realignment
-        
+Remove the aligner output files after the validation stage is complete. not required for subsequent steps but can be useful in debugging and deep investigation of events
 
-## min_anchor_match
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-**type**: `#!python float_fraction`
 
-**environment variable**: `MAVIS_MIN_ANCHOR_MATCH`
+## validate.contig_aln_max_event_size
 
-**default**: `#!python 0.9`
+**type**: `#!python int`
 
-Minimum percent match for a read to be kept as evidence
-        
+**default**: `#!python 50`
 
-## min_call_complexity
+Relates to determining breakpoints when pairing contig alignments. for any given read in a putative pair the soft clipping is extended to include any events of greater than this size. the softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to
 
-**type**: `#!python float_fraction`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_MIN_CALL_COMPLEXITY`
 
-**default**: `#!python 0.1`
+## validate.contig_aln_merge_inner_anchor
 
-The minimum complexity score for a call sequence. is an average for non-contig calls. filters low complexity contigs before alignment. see [contig_complexity](#contig_complexity)
-        
+**type**: `#!python int`
 
-## min_clusters_per_file
+**default**: `#!python 20`
 
-**type**: `#!python int`
+The minimum number of consecutive exact match base pairs to not merge events within a contig alignment
 
-**environment variable**: `MAVIS_MIN_CLUSTERS_PER_FILE`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python 50`
 
-The minimum number of breakpoint pairs to output to a file
-        
+## validate.contig_aln_merge_outer_anchor
 
-## min_domain_mapping_match
+**type**: `#!python int`
 
-**type**: `#!python float_fraction`
+**default**: `#!python 15`
 
-**environment variable**: `MAVIS_MIN_DOMAIN_MAPPING_MATCH`
+Minimum consecutively aligned exact matches to anchor an end for merging internal events
 
-**default**: `#!python 0.9`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-A number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript to be displayed
-        
 
-## min_double_aligned_to_estimate_insertion_size
+## validate.contig_aln_min_anchor_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_DOUBLE_ALIGNED_TO_ESTIMATE_INSERTION_SIZE`
+**default**: `#!python 50`
 
-**default**: `#!python 2`
+The minimum number of aligned bases for a contig (m or =) in order to simplify. do not have to be consecutive
 
-The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## min_flanking_pairs_resolution
 
-**type**: `#!python int`
+## validate.contig_aln_min_extend_overlap
 
-**environment variable**: `MAVIS_MIN_FLANKING_PAIRS_RESOLUTION`
+**type**: `#!python int`
 
 **default**: `#!python 10`
 
-The minimum number of flanking reads required to call a breakpoint by flanking evidence
-        
-
-## min_linking_split_reads
+Minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment
 
-**type**: `#!python int`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_MIN_LINKING_SPLIT_READS`
 
-**default**: `#!python 2`
+## validate.contig_aln_min_query_consumption
 
-The minimum number of split reads which aligned to both breakpoints
-        
+**type**: `#!python number`
 
-## min_mapping_quality
+**default**: `#!python 0.9`
 
-**type**: `#!python int`
+Minimum fraction of the original query sequence that must be used by the read(s) of the alignment
 
-**environment variable**: `MAVIS_MIN_MAPPING_QUALITY`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-**default**: `#!python 5`
 
-The minimum mapping quality of reads to be used as evidence
-        
+## validate.contig_aln_min_score
 
-## min_non_target_aligned_split_reads
+**type**: `#!python number`
 
-**type**: `#!python int`
+**default**: `#!python 0.9`
 
-**environment variable**: `MAVIS_MIN_NON_TARGET_ALIGNED_SPLIT_READS`
+Minimum score for a contig to be used as evidence in a call by contig
 
-**default**: `#!python 1`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence
-        
 
-## min_orf_size
+## validate.fetch_min_bin_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_ORF_SIZE`
+**default**: `#!python 50`
+
+The minimum size of any bin for reading from a bam file. increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)
 
-**default**: `#!python 300`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The minimum length (in base pairs) to retain a putative open reading frame (orf)
-        
 
-## min_sample_size_to_apply_percentage
+## validate.fetch_reads_bins
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_SAMPLE_SIZE_TO_APPLY_PERCENTAGE`
+**default**: `#!python 5`
 
-**default**: `#!python 10`
+Number of bins to split an evidence window into to ensure more even sampling of high coverage regions
 
-Minimum number of aligned bases to compute a match percent. if there are less than this number of aligned bases (match or mismatch) the percent comparator is not used
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## min_softclipping
+
+## validate.fetch_reads_limit
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_SOFTCLIPPING`
+**default**: `#!python 3000`
 
-**default**: `#!python 6`
+Maximum number of reads, cap, to loop over for any given evidence window
 
-Minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## min_spanning_reads_resolution
 
-**type**: `#!python int`
+## validate.filter_secondary_alignments
 
-**environment variable**: `MAVIS_MIN_SPANNING_READS_RESOLUTION`
+**type**: `#!python boolean`
 
-**default**: `#!python 5`
+**default**: `#!python True`
+
+Filter secondary alignments when gathering read evidence
+
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
 
-Minimum number of spanning reads required to call an event by spanning evidence
-        
 
-## min_splits_reads_resolution
+## validate.fuzzy_mismatch_number
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_MIN_SPLITS_READS_RESOLUTION`
+**default**: `#!python 1`
 
-**default**: `#!python 3`
+The number of events/mismatches allowed to be considered a fuzzy match
 
-Minimum number of split reads required to call a breakpoint by split reads
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## novel_exon_color
 
-**type**: `#!python str`
+## validate.max_sc_preceeding_anchor
 
-**environment variable**: `MAVIS_NOVEL_EXON_COLOR`
+**type**: `#!python int`
 
-**default**: `#!python '#5D3F6A'`
+**default**: `#!python 6`
 
-Novel exon fill color
-        
+When remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. for example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. if this is set to none then there is no limit on softclipping
 
-## outer_window_min_event_size
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_OUTER_WINDOW_MIN_EVENT_SIZE`
+## validate.min_anchor_exact
 
-**default**: `#!python 125`
+**type**: `#!python int`
 
-The minimum size of an event in order for flanking read evidence to be collected
-        
+**default**: `#!python 6`
 
-## queue
+Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum number of consecutive exact matches to anchor a read to initiate targeted realignment
 
-**type**: `#!python str`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_QUEUE`
 
-**default**: `#!python ''`
+## validate.min_anchor_fuzzy
 
-The queue jobs are to be submitted to
-        
+**type**: `#!python int`
 
-## reference_genome
+**default**: `#!python 10`
 
-**type**: `#!python filepath`
+Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum length of a fuzzy match to anchor a read to initiate targeted realignment
 
-**environment variable**: `MAVIS_REFERENCE_GENOME`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python []`
 
-Path to the human reference genome fasta file
-        
+## validate.min_anchor_match
 
-## remote_head_ssh
+**type**: `#!python number`
 
-**type**: `#!python str`
+**default**: `#!python 0.9`
 
-**environment variable**: `MAVIS_REMOTE_HEAD_SSH`
+Minimum percent match for a read to be kept as evidence
 
-**default**: `#!python ''`
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-Ssh target for remote scheduler commands
-        
 
-## scaffold_color
+## validate.min_call_complexity
 
-**type**: `#!python str`
+**type**: `#!python number`
 
-**environment variable**: `MAVIS_SCAFFOLD_COLOR`
+**default**: `#!python 0.1`
 
-**default**: `#!python '#000000'`
+The minimum complexity score for a call sequence. is an average for non-contig calls. filters low complexity contigs before alignment. see [contig_complexity](#contig_complexity)
 
-The color used for the gene/transcripts scaffolds
-        
+**schema definition**:
+```json
+{
+    "maximum": 1,
+    "minimum": 0,
+    "type": "number"
+}
+```
 
-## scheduler
 
-**type**: `#!python mavis.schedule.constants.SCHEDULER`
+## validate.min_double_aligned_to_estimate_insertion_size
 
-**environment variable**: `MAVIS_SCHEDULER`
+**type**: `#!python int`
 
-**default**: `#!python 'SLURM'`
+**default**: `#!python 2`
 
-**accepted values**: `'SGE'`, `'SLURM'`, `'TORQUE'`, `'LOCAL'`
+The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping
 
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The scheduler being used
-        
 
-## spanning_call_distance
+## validate.min_flanking_pairs_resolution
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_SPANNING_CALL_DISTANCE`
+**default**: `#!python 10`
 
-**default**: `#!python 20`
+The minimum number of flanking reads required to call a breakpoint by flanking evidence
 
-The maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## splice_color
 
-**type**: `#!python str`
+## validate.min_linking_split_reads
 
-**environment variable**: `MAVIS_SPLICE_COLOR`
+**type**: `#!python int`
 
-**default**: `#!python '#000000'`
+**default**: `#!python 2`
 
-Splicing lines color
-        
+The minimum number of split reads which aligned to both breakpoints
 
-## split_call_distance
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_SPLIT_CALL_DISTANCE`
+## validate.min_mapping_quality
 
-**default**: `#!python 20`
+**type**: `#!python int`
 
-The maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair
-        
+**default**: `#!python 5`
 
-## stdev_count_abnormal
+The minimum mapping quality of reads to be used as evidence
 
-**type**: `#!python float`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**environment variable**: `MAVIS_STDEV_COUNT_ABNORMAL`
 
-**default**: `#!python 3.0`
+## validate.min_non_target_aligned_split_reads
 
-The number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads
-        
+**type**: `#!python int`
 
-## strand_determining_read
+**default**: `#!python 1`
 
-**type**: `#!python int`
+The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence
 
-**environment variable**: `MAVIS_STRAND_DETERMINING_READ`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-**default**: `#!python 2`
 
-1 or 2. the read in the pair which determines if (assuming a stranded protocol) the first or second read in the pair matches the strand sequenced
-        
+## validate.min_sample_size_to_apply_percentage
 
-## template_metadata
+**type**: `#!python int`
 
-**type**: `#!python filepath`
+**default**: `#!python 10`
 
-**environment variable**: `MAVIS_TEMPLATE_METADATA`
+Minimum number of aligned bases to compute a match percent. if there are less than this number of aligned bases (match or mismatch) the percent comparator is not used
 
-**default**: `#!python []`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-File containing the cytoband template information. used for illustrations only
-        
 
-## time_limit
+## validate.min_softclipping
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_TIME_LIMIT`
+**default**: `#!python 6`
+
+Minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence
 
-**default**: `#!python 57600`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-The time in seconds any given jobs is allowed
-        
 
-## trans_fetch_reads_limit
+## validate.min_spanning_reads_resolution
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_TRANS_FETCH_READS_LIMIT`
+**default**: `#!python 5`
+
+Minimum number of spanning reads required to call an event by spanning evidence
 
-**default**: `#!python 12000`
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-Related to [fetch_reads_limit](#fetch_reads_limit). overrides fetch_reads_limit for transcriptome libraries when set. if this has a value of none then fetch_reads_limit will be used for transcriptome libraries instead
-        
 
-## trans_min_mapping_quality
+## validate.min_splits_reads_resolution
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_TRANS_MIN_MAPPING_QUALITY`
+**default**: `#!python 3`
 
-**default**: `#!python 0`
+Minimum number of split reads required to call a breakpoint by split reads
 
-Related to [min_mapping_quality](#min_mapping_quality). overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not none. if this value is none, min_mapping_quality is used for transcriptomes aswell as genomes
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## trans_validation_memory
+
+## validate.outer_window_min_event_size
 
 **type**: `#!python int`
 
-**environment variable**: `MAVIS_TRANS_VALIDATION_MEMORY`
+**default**: `#!python 125`
 
-**default**: `#!python 18000`
+The minimum size of an event in order for flanking read evidence to be collected
 
-Default memory limit (mb) for the validation stage (for transcriptomes)
-        
+**schema definition**:
+```json
+{
+    "type": "integer"
+}
+```
 
-## uninformative_filter
 
-**type**: `#!python cast_boolean`
+## validate.stdev_count_abnormal
 
-**environment variable**: `MAVIS_UNINFORMATIVE_FILTER`
+**type**: `#!python number`
 
-**default**: `#!python False`
+**default**: `#!python 3`
 
-Flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered out prior to clustering
-        
+The number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads
 
-## validation_memory
+**schema definition**:
+```json
+{
+    "type": "number"
+}
+```
 
-**type**: `#!python int`
 
-**environment variable**: `MAVIS_VALIDATION_MEMORY`
+## validate.trans_fetch_reads_limit
 
-**default**: `#!python 16000`
+**type**: `#!python Union[int, null]`
 
-Default memory limit (mb) for the validation stage
-        
+**default**: `#!python 12000`
 
-## width
+Related to [fetch_reads_limit](#fetch_reads_limit). overrides fetch_reads_limit for transcriptome libraries when set. if this has a value of none then fetch_reads_limit will be used for transcriptome libraries instead
 
-**type**: `#!python int`
+**schema definition**:
+```json
+{
+    "type": [
+        "integer",
+        "null"
+    ]
+}
+```
 
-**environment variable**: `MAVIS_WIDTH`
 
-**default**: `#!python 1000`
+## validate.trans_min_mapping_quality
 
-The drawing width in pixels
-        
+**type**: `#!python Union[int, null]`
+
+**default**: `#!python 0`
+
+Related to [min_mapping_quality](#min_mapping_quality). overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not none. if this value is none, min_mapping_quality is used for transcriptomes aswell as genomes
+
+**schema definition**:
+```json
+{
+    "type": [
+        "integer",
+        "null"
+    ]
+}
+```
 
-## write_evidence_files
 
-**type**: `#!python cast_boolean`
+## validate.write_evidence_files
 
-**environment variable**: `MAVIS_WRITE_EVIDENCE_FILES`
+**type**: `#!python boolean`
 
 **default**: `#!python True`
 
 Write the intermediate bam and bed files containing the raw evidence collected and contigs aligned. not required for subsequent steps but can be useful in debugging and deep investigation of events
-        
+
+**schema definition**:
+```json
+{
+    "type": "boolean"
+}
+```
+
 
diff --git a/docs/hooks.py b/docs/hooks.py
index 30314742..dcb00448 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -1,71 +1,78 @@
+import json
 import os
 import re
+from textwrap import dedent
 
 from markdown_refdocs.main import extract_to_markdown
-from mavis.annotate.constants import DEFAULTS as ANNOTATION_DEFAULTS
-from mavis.cluster.constants import DEFAULTS as CLUSTER_DEFAULTS
-from mavis.config import REFERENCE_DEFAULTS
-from mavis.illustrate.constants import DEFAULTS as ILLUSTRATION_DEFAULTS
-from mavis.pairing.constants import DEFAULTS as PAIRING_DEFAULTS
-from mavis.summary.constants import DEFAULTS as SUMMARY_DEFAULTS
+from mavis.schemas import DEFAULTS
 from mavis.util import ENV_VAR_PREFIX
-from mavis.validate.constants import DEFAULTS as VALIDATION_DEFAULTS
 
 
-def generate_settings_doc():
-    dirname = os.path.dirname(os.path.abspath(__file__))
+def json_to_pytype(record):
+    input_type = record
+    try:
+        input_type = record['type']
+    except TypeError:
+        pass
+    types = {'string': 'str', 'integer': 'int', 'float': 'float'}
+
+    if input_type == 'array':
+        try:
+            sub_type = json_to_pytype(record['items']['type'])
+            return f'List[{sub_type}]'
+        except TypeError:
+            return 'List'
 
-    for (filepath, title, namespaces) in [
-        (
-            'configuration/settings.md',
-            'Configurable Settings',
-            [
-                REFERENCE_DEFAULTS,
-                SUMMARY_DEFAULTS,
-                PAIRING_DEFAULTS,
-                ANNOTATION_DEFAULTS,
-                VALIDATION_DEFAULTS,
-                CLUSTER_DEFAULTS,
-                ILLUSTRATION_DEFAULTS,
-            ],
-        ),
-    ]:
-        fname = os.path.join(dirname, filepath)
-        print('writing:', fname)
-        with open(fname, 'w') as fh:
-            fh.write(f'\n\n# {title}\n')
-            glossary = {}
-            for namespace in namespaces:
-                for term, value in namespace.items():
-                    typ = namespace.type(term).__name__
-                    # typ = CUSTOM_TYPES.get(typ, typ)
-                    desc = re.sub(r"\.?$", "", namespace.define(term, "")).capitalize()
-                    accepted = ''
-                    try:
-                        accepted = '\n\n**accepted values**: {}\n'.format(
-                            ', '.join(['`{}`'.format(repr(v)) for v in namespace.type(term).values()])
-                        )
-                    except AttributeError:
-                        pass
-                    defn = f'''## {term}
+    if isinstance(input_type, list):
+        # Union
+        types = ', '.join([json_to_pytype(t) for t in input_type])
+        return f'Union[{types}]'
+    return types.get(input_type, input_type)
 
-**type**: `#!python {typ}`
 
-**environment variable**: `{ENV_VAR_PREFIX}{term.upper()}`
+def generate_settings_doc(schema_file):
+    with open(schema_file, 'r') as fh:
+        schema = json.load(fh)
+    dirname = os.path.dirname(os.path.abspath(__file__))
+    filepath = 'configuration/settings.md'
+    title = 'Configurable Settings'
 
-**default**: `#!python {repr(value)}`{accepted}
+    fname = os.path.join(dirname, filepath)
+    print('writing:', fname)
+    with open(fname, 'w') as fh:
+        fh.write(f'\n\n# {title}\n')
+        glossary = {}
+        for term, defn in schema['properties'].items():
+            if term in ['libraries', 'convert']:
+                continue
+            typ = json_to_pytype(defn)
+            desc = defn.get('description', '')
+            default_value = defn.get('default')
+            schema_defn = json.dumps(
+                {k: v for k, v in defn.items() if k not in ['description', 'default']},
+                sort_keys=True,
+                indent='    ',
+            )
+            schema_defn = f'**schema definition**:\n```json\n{schema_defn}\n```\n'
 
-{desc}
-        '''
-                    glossary[term] = defn
-            for term, defn in sorted(glossary.items()):
-                fh.write(f'{defn}\n\n')
+            lines = [
+                f'## {term}',
+                f'**type**: `#!python {typ}`',
+                f'**default**: `#!python {repr(default_value)}`',
+                desc,
+                schema_defn,
+            ]
+            glossary[term] = '\n\n'.join(lines)
+        for term, defn in sorted(glossary.items()):
+            fh.write(f'{defn}\n\n')
 
 
 def build_package_docs(config):
-    generate_settings_doc()
+    schema_file = os.path.join(os.path.dirname(__file__), '../mavis/schemas/config.json')
+    generate_settings_doc(schema_file)
     package_dir = os.path.join(os.path.dirname(__file__), '../mavis')
     output_dir = os.path.join(os.path.dirname(__file__), 'package')
+
     extract_to_markdown(
         [package_dir],
         output_dir,
diff --git a/docs/outputs/columns.md b/docs/outputs/columns.md
index f2c8ba19..3dfd797f 100644
--- a/docs/outputs/columns.md
+++ b/docs/outputs/columns.md
@@ -34,7 +34,7 @@ decision from the annotation step
 
 ## event\_type
 
-**type**: `mavis.constants.SVTYPE`
+**type**: [`mavis.constants.SVTYPE`](/package/mavis/constants/#class-mavisconstantssvtype)
 
 The
 classification of the event
@@ -57,7 +57,7 @@ Gene for the current annotation at the first breakpoint
 
 ## gene1\_direction
 
-**type**: `mavis.constants.PRIME`
+**type**: [`mavis.constants.PRIME`](/package/mavis/constants/#class-mavisconstantsprime)
 
 The
 direction/prime of the gene
@@ -68,7 +68,7 @@ Gene for the current annotation at the second breakpoint
 
 ## gene2\_direction
 
-**type**: `mavis.constants.PRIME`
+**type**: [`mavis.constants.PRIME`](/package/mavis/constants/#class-mavisconstantsprime)
 
 The
 direction/prime of the gene. Has the following possible values
@@ -85,7 +85,7 @@ second breakpoint
 
 ## gene\_product\_type
 
-**type**: `mavis.constants.GENE_PRODUCT_TYPE`
+**type**: [`mavis.constants.GENE_PRODUCT_TYPE`](/package/mavis/constants/#class-mavisconstantsgene_product_type)
 
 Describes if the putative fusion product will be
 sense or anti-sense
@@ -105,7 +105,8 @@ Transcript for the current annotation at the second breakpoint
 
 ## fusion\_splicing\_pattern
 
-`mavis.constants.SPLICE_TYPE` -
+**type**: [`mavis.constants.SPLICE_TYPE`](/package/mavis/constants/#class-mavisconstantsslice_type)
+
 Type of splicing pattern used to create the fusion cDNA.
 
 ## fusion\_cdna\_coding\_start
@@ -205,14 +206,14 @@ End integer inclusive
 
 ## break1\_orientation
 
-**type**: `mavis.constants.ORIENT`
+**type**: [`mavis.constants.ORIENT`](/package/mavis/constants/#class-mavisconstantsorient)
 
 The side
 of the breakpoint wrt the positive/forward strand that is retained.
 
 ## break1\_strand
 
-**type**: `mavis.constants.STRAND`
+**type**: [`mavis.constants.STRAND`](/package/mavis/constants/#class-mavisconstantsstrand)
 
 The
 strand wrt to the reference positive/forward strand at this
@@ -246,14 +247,14 @@ End integer inclusive
 
 ## break2\_orientation
 
-**type**: `mavis.constants.ORIENT`
+**type**: [`mavis.constants.ORIENT`](/package/mavis/constants/#class-mavisconstantsorient)
 
 The side
 of the breakpoint wrt the positive/forward strand that is retained.
 
 ## break2\_strand
 
-**type**: `mavis.constants.STRAND`
+**type**: [`mavis.constants.STRAND`](/package/mavis/constants/#class-mavisconstantsstrand)
 
 The
 strand wrt to the reference positive/forward strand at this
@@ -283,7 +284,8 @@ protocol was strand specific or not. Expects a boolean
 
 ## protocol
 
-`mavis.constants.PROTOCOL` -
+**type**: [`mavis.constants.PROTOCOL`](/package/mavis/constants/#class-mavisconstantsprotocol)
+
 Specifies the type of library
 
 ## tools
@@ -404,7 +406,7 @@ event
 
 ## call\_method
 
-**type**: `mavis.constants.CALL_METHOD`
+**type**: [`mavis.constants.CALL_METHOD`](/package/mavis/constants/#class-mavisconstantscall_method)
 
 The
 method used to call the breakpoints
diff --git a/docs/tutorials/full.md b/docs/tutorials/full.md
index b42ae624..44b187bc 100644
--- a/docs/tutorials/full.md
+++ b/docs/tutorials/full.md
@@ -18,16 +18,16 @@ tar -xvzf tutorial_data.tar.gz
 
 The expected contents are
 
-| Path                               | Description                                                                                                         |
-| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
-| README                             | Information regarding the other files in the directory                                                              |
-| L1522785992\_expected\_events.tab  | The events that we expect to find, either experimentally validated or 'spiked' in                                 |
-| L1522785992\_normal.sorted.bam     | Paired normal library BAM file                                                                                      |
-| L1522785992\_normal.sorted.bam.bai | BAM index                                                                                                           |
-| L1522785992\_trans.sorted.bam      | Tumour transcriptome BAM file                                                                                       |
-| L1522785992\_trans.sorted.bam.bai  | BAM index file                                                                                                      |
-| L1522785992\_tumour.sorted.bam     | Tumour genome BAM file                                                                                              |
-| L1522785992\_tumour.sorted.bam.bai | BAM index file                                                                                                      |
+| Path                               | Description                                                                                                              |
+| ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------ |
+| README                             | Information regarding the other files in the directory                                                                   |
+| L1522785992\_expected\_events.tab  | The events that we expect to find, either experimentally validated or 'spiked' in                                        |
+| L1522785992\_normal.sorted.bam     | Paired normal library BAM file                                                                                           |
+| L1522785992\_normal.sorted.bam.bai | BAM index                                                                                                                |
+| L1522785992\_trans.sorted.bam      | Tumour transcriptome BAM file                                                                                            |
+| L1522785992\_trans.sorted.bam.bai  | BAM index file                                                                                                           |
+| L1522785992\_tumour.sorted.bam     | Tumour genome BAM file                                                                                                   |
+| L1522785992\_tumour.sorted.bam.bai | BAM index file                                                                                                           |
 | breakdancer-1.4.5/                 | Contains the [BreakDancer](../../glossary/#breakdancer) output which was run on the tumour genome BAM file               |
 | breakseq-2.2/                      | Contains the [BreakSeq](../../glossary/#breakseq) output which was run on the tumour genome BAM file                     |
 | chimerascan-0.4.5/                 | Contains the [ChimeraScan](../../glossary/#chimerascan) output which was run on the tumour transcriptome BAM file        |
@@ -45,40 +45,12 @@ bash get_hg19_reference_files.sh
 source reference_inputs/hg19_env.sh
 ```
 
-## Generating the Config File
+## Creating the Config File
 
-The [config](../../background/citations/#pipeline-config) command
-does most of the work of creating the config for you but there are a few
-things you need to tell it
+Most settings can be left as defaults, however you will need to fill out the `libraries` and
+`convert` sections to tell MAVIS how to convert your inputs and what libraries to expect.
 
-1.  **Where your bams are and what library they belong to**
-
-```text
---library L1522785992-normal genome normal False tutorial_data/L1522785992_normal.sorted.bam
---library L1522785992-tumour genome diseased False tutorial_data/L1522785992_tumour.sorted.bam
---library L1522785992-trans transcriptome diseased True tutorial_data/L1522785992_trans.sorted.bam
-```
-
-1.  **Where your SV caller output files (events) are**
-
-If they are raw tool output as in the current example you will need to
-use the convert argument to tell MAVIS the file type
-
-```text
---convert breakdancer tutorial_data/breakdancer-1.4.5/*txt breakdancer
---convert breakseq tutorial_data/breakseq-2.2/breakseq.vcf.gz breakseq
---convert chimerascan tutorial_data/chimerascan-0.4.5/chimeras.bedpe chimerascan
---convert defuse tutorial_data/defuse-0.6.2/results.classify.tsv defuse
---convert manta tutorial_data/manta-1.0.0/diploidSV.vcf.gz tutorial_data/manta-1.0.0/somaticSV.vcf manta
-```
-
-!!! note
-    For older versions of MAVIS the convert command may require the path to
-    the file(s) be quoted and the strandedness be specified (default is
-    False)
-
-
-3.  **Which events you should validate in which libraries**
+### Libraries Settings
 
 For this example, because we want to determine which events are
 germline/somatic we are going to pass all genome calls to both genomes.
@@ -86,142 +58,133 @@ We can use either full file paths (if the input is already in the
 standard format) or the alias from a conversion (the first argument
 given to the convert option)
 
-```text
---assign L1522785992-trans chimerascan defuse
---assign L1522785992-tumour breakdancer breakseq manta
---assign L1522785992-normal breakdancer breakseq manta
+```json
+{
+    "libraries": {
+        "L1522785992-normal": { // keyed by library name
+            "assign": [ // these are the names of the input files (or conversion aliases) to check for this library
+                "breakdancer",
+                "breakseq",
+                "manta"
+            ],
+            "bam_file": "tutorial_data/L1522785992_normal.sorted.bam",
+            "disease_status": "normal",
+            "protocol": "genome"
+        },
+        "L1522785992-trans": {
+            "assign": [
+                "chimerascan",
+                "defuse"
+            ],
+            "bam_file": "tutorial_data/L1522785992_trans.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "transcriptome",
+            "strand_specific": true
+        },
+        "L1522785992-tumour": {
+            "assign": [
+                "breakdancer",
+                "breakseq",
+                "manta"
+            ],
+            "bam_file": "tutorial_data/L1522785992_tumour.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "genome"
+        }
+    }
+}
 ```
 
-Putting this altogether with a name to call the config, we have the
-command to generate the pipeline config. You should expect this step
-with these inputs to take about \~5GB memory.
+### Convert Settings
 
-```bash
-mavis config \
-    --library L1522785992-normal genome normal False tutorial_data/L1522785992_normal.sorted.bam \
-    --library L1522785992-tumour genome diseased False tutorial_data/L1522785992_tumour.sorted.bam \
-    --library L1522785992-trans transcriptome diseased True tutorial_data/L1522785992_trans.sorted.bam \
-    --convert breakdancer tutorial_data/breakdancer-1.4.5/*txt breakdancer \
-    --convert breakseq tutorial_data/breakseq-2.2/breakseq.vcf.gz breakseq \
-    --convert chimerascan tutorial_data/chimerascan-0.4.5/chimeras.bedpe chimerascan \
-    --convert defuse tutorial_data/defuse-0.6.2/results.classify.tsv defuse \
-    --convert manta tutorial_data/manta-1.0.0/diploidSV.vcf.gz tutorial_data/manta-1.0.0/somaticSV.vcf manta \
-    --assign L1522785992-trans chimerascan defuse \
-    --assign L1522785992-tumour breakdancer breakseq manta  \
-    --assign L1522785992-normal breakdancer breakseq manta \
-    -w mavis.cfg
-```
-
-## Setting Up the Pipeline
-
-The next step is running the setup stage. This will perform conversion, clustering, and creating the
-submission scripts for the other stages.
+If they are raw tool output as in the current example you will need to
+use the convert argument to tell MAVIS the file type
 
-```bash
-mavis setup mavis.cfg -o output_dir/
+```json
+{
+    "convert": {
+        "breakdancer": {  // conversion alias/key
+            "assume_no_untemplated": true,
+            "file_type": "breakdancer",  // input/file type
+            "inputs": [
+                "tutorial_data/breakdancer-1.4.5/*txt"
+            ]
+        },
+        "breakseq": {
+            "assume_no_untemplated": true,
+            "file_type": "breakseq",
+            "inputs": [
+                "tutorial_data/breakseq-2.2/breakseq.vcf.gz"
+            ]
+        },
+        "chimerascan": {
+            "assume_no_untemplated": true,
+            "file_type": "chimerascan",
+            "inputs": [
+                "tutorial_data/chimerascan-0.4.5/chimeras.bedpe"
+            ]
+        },
+        "defuse": {
+            "assume_no_untemplated": true,
+            "file_type": "defuse",
+            "inputs": [
+                "tutorial_data/defuse-0.6.2/results.classify.tsv"
+            ]
+        },
+        "manta": {
+            "assume_no_untemplated": true,
+            "file_type": "manta",
+            "inputs": [
+                "tutorial_data/manta-1.0.0/diploidSV.vcf.gz",
+                "tutorial_data/manta-1.0.0/somaticSV.vcf"
+            ]
+        }
+    }
+}
 ```
 
-At this stage you should have something that looks like this. For
-simplicity not all files/directories have been shown.
-
-    output_dir/
-    |-- build.cfg
-    |-- converted_inputs
-    |   |-- breakdancer.tab
-    |   |-- breakseq.tab
-    |   |-- chimerascan.tab
-    |   |-- defuse.tab
-    |   `-- manta.tab
-    |-- L1522785992-normal_normal_genome
-    |   |-- annotate
-    |   |   |-- batch-aUmErftiY7eEWvENfSeJwc-1/
-    |   |   `-- submit.sh
-    |   |-- cluster
-    |   |   |-- batch-aUmErftiY7eEWvENfSeJwc-1.tab
-    |   |   |-- cluster_assignment.tab
-    |   |   |-- clusters.bed
-    |   |   |-- filtered_pairs.tab
-    |   |   `-- MAVIS-batch-aUmErftiY7eEWvENfSeJwc.COMPLETE
-    |   `-- validate
-    |       |-- batch-aUmErftiY7eEWvENfSeJwc-1/
-    |       `-- submit.sh
-    |-- pairing
-    |   `-- submit.sh
-    `-- summary
-        `-- submit.sh
-
-## Submitting Jobs to the Cluster
-
-The last step is simple, ssh to your head node of your
-[SLURM](../../glossary/#slurm) cluster (or run locally if you
-have configured [remote_head_ssh](../../configuration/settings/#remote_head_ssh) and
-run the schedule step. This will submit the jobs and create the
-dependency chain
-
-```bash
-ssh head_node
-mavis schedule -o output_dir --submit
+### Top-level Settings
+
+Finally you will need to set output directory and the reference files
+
+```json
+{
+  "output_dir": "output_dir_full",  // where to output files
+  "reference.aligner_reference": [
+      "reference_inputs/hg19.2bit"
+  ],
+  "reference.annotations": [
+      "reference_inputs/ensembl69_hg19_annotations.json"
+  ],
+  "reference.dgv_annotation": [
+      "reference_inputs/dgv_hg19_variants.tab"
+  ],
+  "reference.masking": [
+      "reference_inputs/hg19_masking.tab"
+  ],
+  "reference.reference_genome": [
+      "reference_inputs/hg19.fa"
+  ],
+  "reference.template_metadata": [
+      "reference_inputs/cytoBand.txt"
+  ]
+}
 ```
 
-The schedule step also acts as a built-in checker and can be run to
-check for errors or if the pipeline has completed.
+## Running the Workflow
 
-```bash
-mavis schedule -o output_dir
-```
-
-This should give you output something like below (times may vary) after
-your run completed correctly.
+You are now ready to run the workflow
 
-```text
-                      MAVIS: 2.0.0
-                      hostname: gphost08.bcgsc.ca
-[2018-06-02 19:47:56] arguments
-                        command = 'schedule'
-                        log = None
-                        log_level = 'INFO'
-                        output = 'output_dir/'
-                        resubmit = False
-                        submit = False
-[2018-06-02 19:48:01] validate
-                        MV_L1522785992-normal_batch-aUmErftiY7eEWvENfSeJwc (1701000) is COMPLETED
-                          200 tasks are COMPLETED
-                          run time: 609
-                        MV_L1522785992-tumour_batch-aUmErftiY7eEWvENfSeJwc (1701001) is COMPLETED
-                          200 tasks are COMPLETED
-                          run time: 669
-                        MV_L1522785992-trans_batch-aUmErftiY7eEWvENfSeJwc (1701002) is COMPLETED
-                          23 tasks are COMPLETED
-                          run time: 1307
-[2018-06-02 19:48:02] annotate
-                        MA_L1522785992-normal_batch-aUmErftiY7eEWvENfSeJwc (1701003) is COMPLETED
-                          200 tasks are COMPLETED
-                          run time: 622
-                        MA_L1522785992-tumour_batch-aUmErftiY7eEWvENfSeJwc (1701004) is COMPLETED
-                          200 tasks are COMPLETED
-                          run time: 573
-                        MA_L1522785992-trans_batch-aUmErftiY7eEWvENfSeJwc (1701005) is COMPLETED
-                          23 tasks are COMPLETED
-                          run time: 537
-[2018-06-02 19:48:07] pairing
-                        MP_batch-aUmErftiY7eEWvENfSeJwc (1701006) is COMPLETED
-                          run time: 466
-[2018-06-02 19:48:07] summary
-                        MS_batch-aUmErftiY7eEWvENfSeJwc (1701007) is COMPLETED
-                          run time: 465
-                      parallel run time: 3545
-                      rewriting: output_dir/build.cfg
-                      run time (hh/mm/ss): 0:00:11
-                      run time (s): 11
+```bash
+snakemake --jobs 100 --configfile=tests/full-tutorial.config.json
 ```
 
-The parallel run time reported corresponds to the sum of the slowest job
-for each stage and does not include any queue time etc.
-
 ## Analyzing the Output
 
 The best place to start with looking at the MAVIS output is the summary
 folder which contains the final results. For column name definitions see
 the [glossary](../../outputs/columns).
 
-    output_dir/summary/mavis_summary_all_L1522785992-normal_L1522785992-trans_L1522785992-tumour.tab
+```text
+output_dir/summary/mavis_summary_all_L1522785992-normal_L1522785992-trans_L1522785992-tumour.tab
+```

From dff468c4b98929349079e4667b751733157b5875 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 17:15:28 -0700
Subject: [PATCH 011/137] Fix linting

---
 mavis/util.py                         |  3 +--
 tests/end_to_end/test_help.py         |  1 -
 tests/snakemake/test_mini_workflow.py | 16 ++++++++++++----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/mavis/util.py b/mavis/util.py
index 8cf5c558..1b145d29 100644
--- a/mavis/util.py
+++ b/mavis/util.py
@@ -15,8 +15,7 @@
 from tab import tab
 
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import (COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE,
-                        MavisNamespace, sort_columns)
+from .constants import COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE, MavisNamespace, sort_columns
 from .error import InvalidRearrangement
 from .interval import Interval
 
diff --git a/tests/end_to_end/test_help.py b/tests/end_to_end/test_help.py
index 76823d7f..4ff3172a 100644
--- a/tests/end_to_end/test_help.py
+++ b/tests/end_to_end/test_help.py
@@ -28,7 +28,6 @@ def test_pipeline(self):
             else:
                 self.assertEqual(0, returncode)
 
-
     def test_cluster(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.CLUSTER, '-h']):
             try:
diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index 37e81b56..1125cf3e 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -45,11 +45,19 @@ def test_workflow(output_dir):
             assert glob_exists(os.path.join(output_dir, 'summary', 'MAVIS.COMPLETE'))
             assert glob_exists(os.path.join(output_dir, 'pairing', 'MAVIS.COMPLETE'))
             assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'cluster', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'validate', '*', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'annotate', '*', 'MAVIS.COMPLETE'))
+            assert glob_exists(
+                os.path.join(output_dir, 'mock-A47933', 'validate', '*', 'MAVIS.COMPLETE')
+            )
+            assert glob_exists(
+                os.path.join(output_dir, 'mock-A47933', 'annotate', '*', 'MAVIS.COMPLETE')
+            )
             assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'cluster', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'validate', '*', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'annotate', '*', 'MAVIS.COMPLETE'))
+            assert glob_exists(
+                os.path.join(output_dir, 'mock-A36971', 'validate', '*', 'MAVIS.COMPLETE')
+            )
+            assert glob_exists(
+                os.path.join(output_dir, 'mock-A36971', 'annotate', '*', 'MAVIS.COMPLETE')
+            )
         except SystemExit as err:
             if err.code != 0:
                 raise err

From 1ae83ac57ab281ae0e6dca90200bfedbe884953a Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 17:45:58 -0700
Subject: [PATCH 012/137] Add library/convert sections to config list

---
 .gitignore                     |    1 +
 docs/configuration/settings.md | 1798 --------------------------------
 docs/hooks.py                  |  114 +-
 3 files changed, 89 insertions(+), 1824 deletions(-)
 delete mode 100644 docs/configuration/settings.md

diff --git a/.gitignore b/.gitignore
index 26638751..0745a3b2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ junit
 /docs/package/mavis/*/*.md
 # don't ignore subpackage summary files
 !/docs/package/mavis/*/index.md
+docs/configuration/settings.md
 
 .snakemake
 output_dir*
diff --git a/docs/configuration/settings.md b/docs/configuration/settings.md
deleted file mode 100644
index 9b388424..00000000
--- a/docs/configuration/settings.md
+++ /dev/null
@@ -1,1798 +0,0 @@
-
-
-# Configurable Settings
-## annotate.annotation_filters
-
-**type**: `#!python List[str]`
-
-**default**: `#!python ['choose_more_annotated', 'choose_transcripts_by_priority']`
-
-A comma separated list of filters to apply to putative annotations
-
-**schema definition**:
-```json
-{
-    "items": {
-        "enum": [
-            "choose_more_annotated",
-            "choose_transcripts_by_priority"
-        ],
-        "type": "string"
-    },
-    "type": "array"
-}
-```
-
-
-## annotate.draw_fusions_only
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Flag to indicate if events which do not produce a fusion transcript should produce illustrations
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## annotate.draw_non_synonymous_cdna_only
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Flag to indicate if events which are synonymous at the cdna level should produce illustrations
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## annotate.max_orf_cap
-
-**type**: `#!python int`
-
-**default**: `#!python 3`
-
-The maximum number of orfs to return (best putative orfs will be retained)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## annotate.min_domain_mapping_match
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-A number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript to be displayed
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## annotate.min_orf_size
-
-**type**: `#!python int`
-
-**default**: `#!python 300`
-
-The minimum length (in base pairs) to retain a putative open reading frame (orf)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## bam_stats.distribution_fraction
-
-**type**: `#!python number`
-
-**default**: `#!python 0.97`
-
-the proportion of the distribution to use in computing stdev
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0.01,
-    "type": "number"
-}
-```
-
-
-## bam_stats.sample_bin_size
-
-**type**: `#!python int`
-
-**default**: `#!python 1000`
-
-how large to make the sample bin (in bp)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## bam_stats.sample_cap
-
-**type**: `#!python int`
-
-**default**: `#!python 1000`
-
-maximum number of reads to collect for any given sample region
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## bam_stats.sample_size
-
-**type**: `#!python int`
-
-**default**: `#!python 500`
-
-the number of genes/bins to compute stats over
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## cluster.cluster_initial_size_limit
-
-**type**: `#!python int`
-
-**default**: `#!python 25`
-
-The maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering phase (combining based on overlap)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## cluster.cluster_radius
-
-**type**: `#!python int`
-
-**default**: `#!python 100`
-
-Maximum distance allowed between paired breakpoint pairs
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## cluster.limit_to_chr
-
-**type**: `#!python Union[List, null]`
-
-**default**: `#!python ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', 'X', 'Y']`
-
-A list of chromosome names to use. breakpointpairs on other chromosomes will be filteredout. for example '1 2 3 4' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4
-
-**schema definition**:
-```json
-{
-    "items": {
-        "type": "string"
-    },
-    "type": [
-        "array",
-        "null"
-    ]
-}
-```
-
-
-## cluster.max_files
-
-**type**: `#!python int`
-
-**default**: `#!python 200`
-
-The maximum number of files to output from clustering/splitting
-
-**schema definition**:
-```json
-{
-    "minimum": 1,
-    "type": "integer"
-}
-```
-
-
-## cluster.max_proximity
-
-**type**: `#!python int`
-
-**default**: `#!python 5000`
-
-The maximum distance away from an annotation before the region in considered to be uninformative
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## cluster.min_clusters_per_file
-
-**type**: `#!python int`
-
-**default**: `#!python 50`
-
-The minimum number of breakpoint pairs to output to a file
-
-**schema definition**:
-```json
-{
-    "minimum": 1,
-    "type": "integer"
-}
-```
-
-
-## cluster.split_only
-
-**type**: `#!python boolean`
-
-**default**: `#!python False`
-
-just split the input files, do not merge input breakpoints into clusters
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## cluster.uninformative_filter
-
-**type**: `#!python boolean`
-
-**default**: `#!python False`
-
-Flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered out prior to clustering
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## illustrate.breakpoint_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#000000'`
-
-Breakpoint outline color
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## illustrate.domain_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#ccccb3'`
-
-Domain fill color
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## illustrate.domain_mismatch_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#b2182b'`
-
-Domain fill color on 0%% match
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## illustrate.domain_name_regex_filter
-
-**type**: `#!python str`
-
-**default**: `#!python '^PF\\d+$'`
-
-The regular expression used to select domains to be displayed (filtered by name)
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## illustrate.domain_scaffold_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#000000'`
-
-The color of the domain scaffold
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## illustrate.drawing_width_iter_increase
-
-**type**: `#!python int`
-
-**default**: `#!python 500`
-
-The amount (in  pixels) by which to increase the drawing width upon failure to fit
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## illustrate.exon_min_focus_size
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Minimum size of an exon for it to be granted a label or min exon width
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## illustrate.gene1_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#657e91'`
-
-The color of genes near the first gene
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.gene1_color_selected
-
-**type**: `#!python str`
-
-**default**: `#!python '#518dc5'`
-
-The color of the first gene
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.gene2_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#325556'`
-
-The color of genes near the second gene
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.gene2_color_selected
-
-**type**: `#!python str`
-
-**default**: `#!python '#4c9677'`
-
-The color of the second gene
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.label_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#000000'`
-
-The label color
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.mask_fill
-
-**type**: `#!python str`
-
-**default**: `#!python '#ffffff'`
-
-Color of mask (for deleted region etc.)
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.mask_opacity
-
-**type**: `#!python number`
-
-**default**: `#!python 0.7`
-
-Opacity of the mask layer
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## illustrate.max_drawing_retries
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## illustrate.novel_exon_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#5D3F6A'`
-
-Novel exon fill color
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.scaffold_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#000000'`
-
-The color used for the gene/transcripts scaffolds
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.splice_color
-
-**type**: `#!python str`
-
-**default**: `#!python '#000000'`
-
-Splicing lines color
-
-**schema definition**:
-```json
-{
-    "pattern": "^#[a-zA-Z0-9]{6}",
-    "type": "string"
-}
-```
-
-
-## illustrate.width
-
-**type**: `#!python int`
-
-**default**: `#!python 1000`
-
-The drawing width in pixels
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## log
-
-**type**: `#!python str`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## log_level
-
-**type**: `#!python str`
-
-**default**: `#!python 'INFO'`
-
-
-
-**schema definition**:
-```json
-{
-    "enum": [
-        "INFO",
-        "DEBUG"
-    ],
-    "type": "string"
-}
-```
-
-
-## output_dir
-
-**type**: `#!python str`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "type": "string"
-}
-```
-
-
-## pairing.contig_call_distance
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-The maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## pairing.flanking_call_distance
-
-**type**: `#!python int`
-
-**default**: `#!python 50`
-
-The maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## pairing.input_call_distance
-
-**type**: `#!python int`
-
-**default**: `#!python 20`
-
-The maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## pairing.spanning_call_distance
-
-**type**: `#!python int`
-
-**default**: `#!python 20`
-
-The maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## pairing.split_call_distance
-
-**type**: `#!python int`
-
-**default**: `#!python 20`
-
-The maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## reference.aligner_reference
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        "tests/data/mock_reference_genome.2bit"
-    ],
-    "items": {
-        "type": "string"
-    },
-    "maxItems": 1,
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## reference.annotations
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        "tests/data/mock_annotations.json"
-    ],
-    "items": {
-        "type": "string"
-    },
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## reference.dgv_annotation
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        [
-            "tests/data/mock_dgv_annotation.txt"
-        ]
-    ],
-    "items": {
-        "type": "string"
-    },
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## reference.masking
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        [
-            "tests/data/mock_masking.tab"
-        ]
-    ],
-    "items": {
-        "type": "string"
-    },
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## reference.reference_genome
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        [
-            "tests/data/mock_reference_genome.fa"
-        ]
-    ],
-    "items": {
-        "type": "string"
-    },
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## reference.template_metadata
-
-**type**: `#!python List[str]`
-
-**default**: `#!python None`
-
-
-
-**schema definition**:
-```json
-{
-    "examples": [
-        [
-            "tests/data/cytoBand.txt"
-        ]
-    ],
-    "items": {
-        "type": "string"
-    },
-    "minItems": 1,
-    "type": "array"
-}
-```
-
-
-## skip_stage.validate
-
-**type**: `#!python boolean`
-
-**default**: `#!python False`
-
-skip the validation stage of the MAVIS pipeline
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## summary.filter_cdna_synon
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Filter all annotations synonymous at the cdna level
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## summary.filter_min_complexity
-
-**type**: `#!python number`
-
-**default**: `#!python 0.2`
-
-Filter event calls based on call sequence complexity
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## summary.filter_min_flanking_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Minimum number of flanking pairs for a call by flanking pairs
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## summary.filter_min_linking_split_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 1`
-
-Minimum number of linking split reads for a call by split reads
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## summary.filter_min_remapped_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-Minimum number of remapped reads for a call by contig
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## summary.filter_min_spanning_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-Minimum number of spanning reads for a call by spanning reads
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## summary.filter_min_split_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-Minimum number of split reads for a call by split reads
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## summary.filter_protein_synon
-
-**type**: `#!python boolean`
-
-**default**: `#!python False`
-
-Filter all annotations synonymous at the protein level
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## summary.filter_trans_homopolymers
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Filter all single bp ins/del/dup events that are in a homopolymer region of at least 3 bps and are not paired to a genomic event
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## validate.aligner
-
-**type**: `#!python str`
-
-**default**: `#!python 'blat'`
-
-The aligner to use to map the contigs/reads back to the reference e.g blat or bwa
-
-**schema definition**:
-```json
-{
-    "enum": [
-        "bwa mem",
-        "blat"
-    ],
-    "type": "string"
-}
-```
-
-
-## validate.assembly_kmer_size
-
-**type**: `#!python number`
-
-**default**: `#!python 0.74`
-
-The percent of the read length to make kmers for assembly
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.assembly_max_paths
-
-**type**: `#!python int`
-
-**default**: `#!python 8`
-
-The maximum number of paths to resolve. this is used to limit when there is a messy assembly graph to resolve. the assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.assembly_min_edge_trim_weight
-
-**type**: `#!python int`
-
-**default**: `#!python 3`
-
-This is used to simplify the debruijn graph before path finding. edges with less than this frequency will be discarded if they are non-cutting, at a fork, or the end of a path
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.assembly_min_exact_match_to_remap
-
-**type**: `#!python int`
-
-**default**: `#!python 15`
-
-The minimum length of exact matches to initiate remapping a read to a contig
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.assembly_min_remap_coverage
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-Minimum fraction of the contig sequence which the remapped sequences must align over
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.assembly_min_remapped_seq
-
-**type**: `#!python int`
-
-**default**: `#!python 3`
-
-The minimum input sequences that must remap for an assembled contig to be used
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.assembly_min_uniq
-
-**type**: `#!python number`
-
-**default**: `#!python 0.1`
-
-Minimum percent uniq required to keep separate assembled contigs. if contigs are more similar then the lower scoring, then shorter, contig is dropped
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.assembly_strand_concordance
-
-**type**: `#!python number`
-
-**default**: `#!python 0.51`
-
-When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.blat_limit_top_aln
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Number of results to return from blat (ranking based on score)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.blat_min_identity
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-The minimum percent identity match required for blat results when aligning contigs
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.call_error
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Buffer zone for the evidence window
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.clean_aligner_files
-
-**type**: `#!python boolean`
-
-**default**: `#!python False`
-
-Remove the aligner output files after the validation stage is complete. not required for subsequent steps but can be useful in debugging and deep investigation of events
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## validate.contig_aln_max_event_size
-
-**type**: `#!python int`
-
-**default**: `#!python 50`
-
-Relates to determining breakpoints when pairing contig alignments. for any given read in a putative pair the soft clipping is extended to include any events of greater than this size. the softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.contig_aln_merge_inner_anchor
-
-**type**: `#!python int`
-
-**default**: `#!python 20`
-
-The minimum number of consecutive exact match base pairs to not merge events within a contig alignment
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.contig_aln_merge_outer_anchor
-
-**type**: `#!python int`
-
-**default**: `#!python 15`
-
-Minimum consecutively aligned exact matches to anchor an end for merging internal events
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.contig_aln_min_anchor_size
-
-**type**: `#!python int`
-
-**default**: `#!python 50`
-
-The minimum number of aligned bases for a contig (m or =) in order to simplify. do not have to be consecutive
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.contig_aln_min_extend_overlap
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.contig_aln_min_query_consumption
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-Minimum fraction of the original query sequence that must be used by the read(s) of the alignment
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.contig_aln_min_score
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-Minimum score for a contig to be used as evidence in a call by contig
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.fetch_min_bin_size
-
-**type**: `#!python int`
-
-**default**: `#!python 50`
-
-The minimum size of any bin for reading from a bam file. increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.fetch_reads_bins
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-Number of bins to split an evidence window into to ensure more even sampling of high coverage regions
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.fetch_reads_limit
-
-**type**: `#!python int`
-
-**default**: `#!python 3000`
-
-Maximum number of reads, cap, to loop over for any given evidence window
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.filter_secondary_alignments
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Filter secondary alignments when gathering read evidence
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
-## validate.fuzzy_mismatch_number
-
-**type**: `#!python int`
-
-**default**: `#!python 1`
-
-The number of events/mismatches allowed to be considered a fuzzy match
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.max_sc_preceeding_anchor
-
-**type**: `#!python int`
-
-**default**: `#!python 6`
-
-When remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. for example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. if this is set to none then there is no limit on softclipping
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_anchor_exact
-
-**type**: `#!python int`
-
-**default**: `#!python 6`
-
-Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum number of consecutive exact matches to anchor a read to initiate targeted realignment
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_anchor_fuzzy
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum length of a fuzzy match to anchor a read to initiate targeted realignment
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_anchor_match
-
-**type**: `#!python number`
-
-**default**: `#!python 0.9`
-
-Minimum percent match for a read to be kept as evidence
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.min_call_complexity
-
-**type**: `#!python number`
-
-**default**: `#!python 0.1`
-
-The minimum complexity score for a call sequence. is an average for non-contig calls. filters low complexity contigs before alignment. see [contig_complexity](#contig_complexity)
-
-**schema definition**:
-```json
-{
-    "maximum": 1,
-    "minimum": 0,
-    "type": "number"
-}
-```
-
-
-## validate.min_double_aligned_to_estimate_insertion_size
-
-**type**: `#!python int`
-
-**default**: `#!python 2`
-
-The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_flanking_pairs_resolution
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-The minimum number of flanking reads required to call a breakpoint by flanking evidence
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_linking_split_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 2`
-
-The minimum number of split reads which aligned to both breakpoints
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_mapping_quality
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-The minimum mapping quality of reads to be used as evidence
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_non_target_aligned_split_reads
-
-**type**: `#!python int`
-
-**default**: `#!python 1`
-
-The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_sample_size_to_apply_percentage
-
-**type**: `#!python int`
-
-**default**: `#!python 10`
-
-Minimum number of aligned bases to compute a match percent. if there are less than this number of aligned bases (match or mismatch) the percent comparator is not used
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_softclipping
-
-**type**: `#!python int`
-
-**default**: `#!python 6`
-
-Minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_spanning_reads_resolution
-
-**type**: `#!python int`
-
-**default**: `#!python 5`
-
-Minimum number of spanning reads required to call an event by spanning evidence
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.min_splits_reads_resolution
-
-**type**: `#!python int`
-
-**default**: `#!python 3`
-
-Minimum number of split reads required to call a breakpoint by split reads
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.outer_window_min_event_size
-
-**type**: `#!python int`
-
-**default**: `#!python 125`
-
-The minimum size of an event in order for flanking read evidence to be collected
-
-**schema definition**:
-```json
-{
-    "type": "integer"
-}
-```
-
-
-## validate.stdev_count_abnormal
-
-**type**: `#!python number`
-
-**default**: `#!python 3`
-
-The number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads
-
-**schema definition**:
-```json
-{
-    "type": "number"
-}
-```
-
-
-## validate.trans_fetch_reads_limit
-
-**type**: `#!python Union[int, null]`
-
-**default**: `#!python 12000`
-
-Related to [fetch_reads_limit](#fetch_reads_limit). overrides fetch_reads_limit for transcriptome libraries when set. if this has a value of none then fetch_reads_limit will be used for transcriptome libraries instead
-
-**schema definition**:
-```json
-{
-    "type": [
-        "integer",
-        "null"
-    ]
-}
-```
-
-
-## validate.trans_min_mapping_quality
-
-**type**: `#!python Union[int, null]`
-
-**default**: `#!python 0`
-
-Related to [min_mapping_quality](#min_mapping_quality). overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not none. if this value is none, min_mapping_quality is used for transcriptomes aswell as genomes
-
-**schema definition**:
-```json
-{
-    "type": [
-        "integer",
-        "null"
-    ]
-}
-```
-
-
-## validate.write_evidence_files
-
-**type**: `#!python boolean`
-
-**default**: `#!python True`
-
-Write the intermediate bam and bed files containing the raw evidence collected and contigs aligned. not required for subsequent steps but can be useful in debugging and deep investigation of events
-
-**schema definition**:
-```json
-{
-    "type": "boolean"
-}
-```
-
-
diff --git a/docs/hooks.py b/docs/hooks.py
index dcb00448..baa09506 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -14,7 +14,13 @@ def json_to_pytype(record):
         input_type = record['type']
     except TypeError:
         pass
-    types = {'string': 'str', 'integer': 'int', 'float': 'float'}
+    types = {
+        'string': 'str',
+        'integer': 'int',
+        'float': 'float',
+        'boolean': 'bool',
+        'number': 'float',
+    }
 
     if input_type == 'array':
         try:
@@ -30,6 +36,37 @@ def json_to_pytype(record):
     return types.get(input_type, input_type)
 
 
+def list_properties(schema, skip_terms=tuple()):
+    glossary = {}
+    for term, defn in schema['properties'].items():
+        if term in skip_terms:
+            continue
+        typ = json_to_pytype(defn)
+        desc = defn.get('description', '')
+        default_value = defn.get('default')
+        schema_fields = {k: v for k, v in defn.items() if k not in ['description', 'default']}
+
+        if len(schema_fields) > 1:
+            schema_defn = json.dumps(
+                schema_fields,
+                sort_keys=True,
+                indent='    ',
+            )
+            schema_defn = f'**schema definition**:\n```json\n{schema_defn}\n```\n'
+        else:
+            schema_defn = ''
+
+        lines = [
+            f'### {term}',
+            f'**type**: `#!python {typ}`',
+            f'**default**: `#!python {repr(default_value)}`' if default_value is not None else '',
+            desc,
+            schema_defn,
+        ]
+        glossary[term] = '\n\n'.join(lines)
+    return [v for k, v in sorted(glossary.items())]
+
+
 def generate_settings_doc(schema_file):
     with open(schema_file, 'r') as fh:
         schema = json.load(fh)
@@ -38,33 +75,58 @@ def generate_settings_doc(schema_file):
     title = 'Configurable Settings'
 
     fname = os.path.join(dirname, filepath)
+
+    result = [f'\n\n# {title}\n']
+    result.append(
+        dedent(
+            '''\
+            ## Defining Samples/Libraries
+
+            The `libraries` property of the mavis config is required to run the snakemake
+            workflow. This is the section that defines what inputs to use, and what types of
+            samples are available.
+
+            ```json
+            {
+                "libraries": {
+                    "<LIBRARY_NAME>": { }  // mapping of library name to library settings
+                }
+            }
+            ```
+
+            The library specific settings are listed below
+            '''
+        )
+    )
+    result.extend(list_properties(schema['properties']['libraries']['additionalProperties']))
+    result.append(
+        dedent(
+            '''\
+            ## Defining Conversions
+
+            If the input to MAVIS is raw tool output and has not been pre-converted to the
+            standard tab delimited format expected by MAVIS then you will need to add
+            a section to the config to tell mavis how to perform the required conversions
+
+            ```json
+            {
+                "convert": {
+                    "<ALIAS>": { }  // mapping of alias to conversion settings
+                }
+            }
+            ```
+
+            The conversion specific settings are listed below
+            '''
+        )
+    )
+    result.extend(list_properties(schema['properties']['convert']['additionalProperties']))
+    result.append('\n## General Settings\n')
+    result.extend(list_properties(schema, ('libraries', 'convert')))
+
     print('writing:', fname)
     with open(fname, 'w') as fh:
-        fh.write(f'\n\n# {title}\n')
-        glossary = {}
-        for term, defn in schema['properties'].items():
-            if term in ['libraries', 'convert']:
-                continue
-            typ = json_to_pytype(defn)
-            desc = defn.get('description', '')
-            default_value = defn.get('default')
-            schema_defn = json.dumps(
-                {k: v for k, v in defn.items() if k not in ['description', 'default']},
-                sort_keys=True,
-                indent='    ',
-            )
-            schema_defn = f'**schema definition**:\n```json\n{schema_defn}\n```\n'
-
-            lines = [
-                f'## {term}',
-                f'**type**: `#!python {typ}`',
-                f'**default**: `#!python {repr(default_value)}`',
-                desc,
-                schema_defn,
-            ]
-            glossary[term] = '\n\n'.join(lines)
-        for term, defn in sorted(glossary.items()):
-            fh.write(f'{defn}\n\n')
+        fh.write('\n\n'.join(result) + '\n')
 
 
 def build_package_docs(config):

From 325b188f357328e6f2d1012e6f2840074d45da63 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 17:52:57 -0700
Subject: [PATCH 013/137] Add more descriptions

---
 docs/tutorials/mini.md    |  2 +-
 mavis/schemas/config.json | 44 ++++++++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index bb7f00f2..657dac26 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -30,7 +30,7 @@ mavis/schemas/config.json
 Now you are ready to run MAVIS. This can be done in a single command using snakemake.
 
 ```bash
-snakemake -j 1 --configfig tests/mini-tutorial.config.json
+snakemake -j 1 --configfile=tests/mini-tutorial.config.json
 ```
 
 Which will run the mini tutorial version and output files into a folder called `output_dir` in the
diff --git a/mavis/schemas/config.json b/mavis/schemas/config.json
index 9769754c..a948cd77 100644
--- a/mavis/schemas/config.json
+++ b/mavis/schemas/config.json
@@ -171,7 +171,8 @@
                             "type": "string"
                         },
                         "minItems": 1,
-                        "type": "array"
+                        "type": "array",
+                        "description": "List of input files"
                     },
                     "strand_specific": {
                         "default": false,
@@ -189,12 +190,14 @@
         "illustrate.domain_color": {
             "default": "#ccccb3",
             "description": "Domain fill color",
-            "type": "string"
+            "type": "string",
+            "pattern": "^#[a-zA-Z0-9]{6}"
         },
         "illustrate.domain_mismatch_color": {
             "default": "#b2182b",
             "description": "Domain fill color on 0%% match",
-            "type": "string"
+            "type": "string",
+            "pattern": "^#[a-zA-Z0-9]{6}"
         },
         "illustrate.domain_name_regex_filter": {
             "default": "^PF\\d+$",
@@ -204,7 +207,8 @@
         "illustrate.domain_scaffold_color": {
             "default": "#000000",
             "description": "The color of the domain scaffold",
-            "type": "string"
+            "type": "string",
+            "pattern": "^#[a-zA-Z0-9]{6}"
         },
         "illustrate.drawing_width_iter_increase": {
             "default": 500,
@@ -290,7 +294,8 @@
         "illustrate.breakpoint_color": {
             "default": "#000000",
             "description": "Breakpoint outline color",
-            "type": "string"
+            "type": "string",
+            "pattern": "^#[a-zA-Z0-9]{6}"
         },
         "libraries": {
             "additionalProperties": {
@@ -301,15 +306,17 @@
                             "type": "string"
                         },
                         "minItems": 1,
-                        "type": "array"
+                        "type": "array",
+                        "description": "List of input files or conversion aliases that should be processed for this library"
                     },
                     "total_batches": {
                         "type": "integer",
                         "min": 1,
-                        "description": "The number of jobs to slit a library into for cluster/validate/annotate"
+                        "description": "The number of jobs to slit a library into for cluster/validate/annotate. This will be set during initialization of the config if not given"
                     },
                     "bam_file": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Path to the bam file containing the sequencing reads for this library"
                     },
                     "disease_status": {
                         "enum": [
@@ -319,7 +326,8 @@
                         "type": "string"
                     },
                     "median_fragment_size": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The median fragment size in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
                     },
                     "protocol": {
                         "enum": [
@@ -329,10 +337,12 @@
                         "type": "string"
                     },
                     "read_length": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The read length in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
                     },
                     "stdev_fragment_size": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "The standard deviation of fragment size in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
                     },
                     "strand_determining_read": {
                         "default": 2,
@@ -366,7 +376,8 @@
             "type": "string"
         },
         "output_dir": {
-            "type": "string"
+            "type": "string",
+            "description": "path to the directory to output the MAVIS files to"
         },
         "pairing.contig_call_distance": {
             "default": 10,
@@ -402,7 +413,8 @@
             },
             "maxItems": 1,
             "minItems": 1,
-            "type": "array"
+            "type": "array",
+            "description": "The reference genome file used by the aligner"
         },
         "reference.annotations": {
             "examples": [
@@ -412,7 +424,8 @@
                 "type": "string"
             },
             "minItems": 1,
-            "type": "array"
+            "type": "array",
+            "description": "The reference file containing gene/transcript position information"
         },
         "reference.dgv_annotation": {
             "examples": [
@@ -436,7 +449,8 @@
                 "type": "string"
             },
             "minItems": 1,
-            "type": "array"
+            "type": "array",
+            "description": "A list of regions to ignore in validation. Generally these are centromeres and telomeres or known poor mapping areas"
         },
         "reference.reference_genome": {
             "examples": [

From edb5e8a1564f65145e7584da313c6e638e65a2c8 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 17:58:14 -0700
Subject: [PATCH 014/137] Remove commented out code

---
 mavis/breakpoint.py | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/mavis/breakpoint.py b/mavis/breakpoint.py
index dfdda6a5..10218017 100644
--- a/mavis/breakpoint.py
+++ b/mavis/breakpoint.py
@@ -87,17 +87,6 @@ class BreakpointPair:
     untemplated_seq: Optional[str]
     data: Dict
 
-    # def __getattr__(self, attr):
-    #     data = object.__getattribute__(self, 'data')
-    #     try:
-    #         return data[COLUMNS[attr]]
-    #     except (KeyError, AttributeError):
-    #         try:
-    #             return data[attr]
-    #         except KeyError:
-    #             pass
-    #     raise AttributeError(attr)
-
     def __getitem__(self, index):
         try:
             index = int(index)

From 5c318f767916449eb53fa656d78e2d83c9b8d686 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 18:16:26 -0700
Subject: [PATCH 015/137] Try splitting workflows

---
 .github/workflows/build.yml       | 31 ++++++------------
 .github/workflows/quick-tests.yml | 53 +++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 22 deletions(-)
 create mode 100644 .github/workflows/quick-tests.yml

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 37c862f0..411b1e07 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -3,7 +3,12 @@
 
 name: build
 
-on: [push, pull_request]
+on:
+  push:
+    branches:
+      - master
+      - develop
+  pull_request:
 
 jobs:
   build:
@@ -11,6 +16,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
+    name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python ${{ matrix.python-version }}
@@ -21,16 +27,6 @@ jobs:
       run: |
         python -m pip install --upgrade pip setuptools
         pip install .[test]
-    - name: Lint with flake8
-      run: |
-        pip install flake8
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 mavis --count --select=E9,F63,F7,F82 --show-source --statistics
-    - name: Lint with black
-      run: |
-        pip install black
-        # stop the build if black needs to be run
-        black mavis -S -l 100 --check
     - name: install bwa
       run: |
         git clone https://github.com/lh3/bwa.git
@@ -42,21 +38,14 @@ jobs:
       run: |
         wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat
         chmod a+x blat
-    - name: run short tests with pytest
-      run: |
-        export PATH=$PATH:$(pwd):$(pwd)/bwa
-        pytest tests -v \
-          --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
-          --durations=10
-      env:
-        RUN_FULL: 0
-      if: github.event_name != 'pull_request'
     - name: set up .pth file
       run: |
         python tests/setup_subprocess_cov.py
     - name: run full tests with pytest
       run: |
         export PATH=$PATH:$(pwd):$(pwd)/bwa
+        export COVERAGE_PROCESS_START=$(pwd)/.coveragerc
+
         pytest tests -v \
           --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
           --cov mavis \
@@ -66,7 +55,6 @@ jobs:
           --cov-branch
       env:
         RUN_FULL: 1
-      if: github.event_name == 'pull_request'
     - name: Upload pytest test results
       uses: actions/upload-artifact@master
       with:
@@ -83,4 +71,3 @@ jobs:
         env_vars: OS,PYTHON
         name: codecov-umbrella
         fail_ci_if_error: true
-      if: matrix.python-version == 3.7 && github.event_name == 'pull_request'
diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
new file mode 100644
index 00000000..ffadaebc
--- /dev/null
+++ b/.github/workflows/quick-tests.yml
@@ -0,0 +1,53 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: quick-tests
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+    name: python-${{ matrix.python-version }} quick
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip setuptools
+        pip install .[test]
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 mavis --count --select=E9,F63,F7,F82 --show-source --statistics
+    - name: Lint with black
+      run: |
+        pip install black
+        # stop the build if black needs to be run
+        black mavis -S -l 100 --check
+    - name: install bwa
+      run: |
+        git clone https://github.com/lh3/bwa.git
+        cd bwa
+        git checkout v0.7.17
+        make
+        cd ..
+    - name: install blat
+      run: |
+        wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat
+        chmod a+x blat
+    - name: run short tests with pytest
+      run: |
+        export PATH=$PATH:$(pwd):$(pwd)/bwa
+        pytest tests -v \
+          --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
+          --durations=10
+      env:
+        RUN_FULL: 0

From a181d6a5b4adb1307be40c07137e9db8b36ad6ea Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 18:29:11 -0700
Subject: [PATCH 016/137] Only report coverage once

---
 .github/workflows/build.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 411b1e07..855df03b 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -71,3 +71,4 @@ jobs:
         env_vars: OS,PYTHON
         name: codecov-umbrella
         fail_ci_if_error: true
+      if: matrix.python-version == 3.8

From be3772e572b169d60ec836e7843758a5896e113c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 20 Apr 2021 21:42:05 -0700
Subject: [PATCH 017/137] Fix coverage issues

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 855df03b..e8107623 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -26,7 +26,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip setuptools
-        pip install .[test]
+        pip install -e .[test]  # need editable to make sure the coverage reports correctly
     - name: install bwa
       run: |
         git clone https://github.com/lh3/bwa.git

From 890cf05ca5d6b0646ea55a7b80174e445c5a184e Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 21 Apr 2021 12:06:28 -0700
Subject: [PATCH 018/137] Use src folder for code

---
 .github/workflows/quick-tests.yml                | 4 ++--
 MANIFEST.in                                      | 8 +-------
 Snakefile                                        | 2 +-
 docs/hooks.py                                    | 4 ++--
 setup.py                                         | 3 ++-
 {mavis => src/mavis}/__init__.py                 | 0
 {mavis => src/mavis}/align.py                    | 0
 {mavis => src/mavis}/annotate/__init__.py        | 0
 {mavis => src/mavis}/annotate/base.py            | 0
 {mavis => src/mavis}/annotate/constants.py       | 0
 {mavis => src/mavis}/annotate/file_io.py         | 0
 {mavis => src/mavis}/annotate/fusion.py          | 0
 {mavis => src/mavis}/annotate/genomic.py         | 0
 {mavis => src/mavis}/annotate/main.py            | 0
 {mavis => src/mavis}/annotate/protein.py         | 0
 {mavis => src/mavis}/annotate/splicing.py        | 0
 {mavis => src/mavis}/annotate/variant.py         | 0
 {mavis => src/mavis}/assemble.py                 | 0
 {mavis => src/mavis}/bam/__init__.py             | 0
 {mavis => src/mavis}/bam/cache.py                | 0
 {mavis => src/mavis}/bam/cigar.py                | 0
 {mavis => src/mavis}/bam/read.py                 | 0
 {mavis => src/mavis}/bam/stats.py                | 0
 {mavis => src/mavis}/blat.py                     | 0
 {mavis => src/mavis}/breakpoint.py               | 0
 {mavis => src/mavis}/cluster/__init__.py         | 0
 {mavis => src/mavis}/cluster/cluster.py          | 0
 {mavis => src/mavis}/cluster/main.py             | 0
 {mavis => src/mavis}/config.py                   | 0
 {mavis => src/mavis}/constants.py                | 0
 {mavis => src/mavis}/error.py                    | 0
 {mavis => src/mavis}/illustrate/__init__.py      | 0
 {mavis => src/mavis}/illustrate/constants.py     | 0
 {mavis => src/mavis}/illustrate/diagram.py       | 0
 {mavis => src/mavis}/illustrate/elements.py      | 0
 {mavis => src/mavis}/illustrate/scatter.py       | 0
 {mavis => src/mavis}/illustrate/util.py          | 0
 {mavis => src/mavis}/interval.py                 | 0
 {mavis => src/mavis}/main.py                     | 0
 {mavis => src/mavis}/overlay.py                  | 0
 {mavis => src/mavis}/pairing/__init__.py         | 0
 {mavis => src/mavis}/pairing/constants.py        | 0
 {mavis => src/mavis}/pairing/main.py             | 0
 {mavis => src/mavis}/pairing/pairing.py          | 0
 {mavis => src/mavis}/schemas/__init__.py         | 0
 {mavis => src/mavis}/schemas/config.json         | 0
 {mavis => src/mavis}/schemas/overlay.json        | 0
 {mavis => src/mavis}/summary/__init__.py         | 0
 {mavis => src/mavis}/summary/constants.py        | 0
 {mavis => src/mavis}/summary/main.py             | 0
 {mavis => src/mavis}/summary/summary.py          | 0
 {mavis => src/mavis}/tools/__init__.py           | 0
 {mavis => src/mavis}/tools/breakdancer.py        | 0
 {mavis => src/mavis}/tools/chimerascan.py        | 0
 {mavis => src/mavis}/tools/cnvnator.py           | 0
 {mavis => src/mavis}/tools/constants.py          | 0
 {mavis => src/mavis}/tools/starfusion.py         | 0
 {mavis => src/mavis}/tools/transabyss.py         | 0
 {mavis => src/mavis}/tools/vcf.py                | 0
 {mavis => src/mavis}/util.py                     | 0
 {mavis => src/mavis}/validate/__init__.py        | 0
 {mavis => src/mavis}/validate/base.py            | 0
 {mavis => src/mavis}/validate/call.py            | 0
 {mavis => src/mavis}/validate/constants.py       | 0
 {mavis => src/mavis}/validate/evidence.py        | 0
 {mavis => src/mavis}/validate/main.py            | 0
 {tab => src/tab}/__init__.py                     | 0
 {tab => src/tab}/tab.py                          | 0
 {tools => src/tools}/TSV.pm                      | 0
 src/tools/__init__.py                            | 0
 {tools => src/tools}/calculate_ref_alt_counts.py | 0
 {tools => src/tools}/find_repeats.py             | 0
 {tools => src/tools}/generate_ensembl_json.py    | 0
 {tools => src/tools}/get_hg19_reference_files.sh | 0
 74 files changed, 8 insertions(+), 13 deletions(-)
 rename {mavis => src/mavis}/__init__.py (100%)
 rename {mavis => src/mavis}/align.py (100%)
 rename {mavis => src/mavis}/annotate/__init__.py (100%)
 rename {mavis => src/mavis}/annotate/base.py (100%)
 rename {mavis => src/mavis}/annotate/constants.py (100%)
 rename {mavis => src/mavis}/annotate/file_io.py (100%)
 rename {mavis => src/mavis}/annotate/fusion.py (100%)
 rename {mavis => src/mavis}/annotate/genomic.py (100%)
 rename {mavis => src/mavis}/annotate/main.py (100%)
 rename {mavis => src/mavis}/annotate/protein.py (100%)
 rename {mavis => src/mavis}/annotate/splicing.py (100%)
 rename {mavis => src/mavis}/annotate/variant.py (100%)
 rename {mavis => src/mavis}/assemble.py (100%)
 rename {mavis => src/mavis}/bam/__init__.py (100%)
 rename {mavis => src/mavis}/bam/cache.py (100%)
 rename {mavis => src/mavis}/bam/cigar.py (100%)
 rename {mavis => src/mavis}/bam/read.py (100%)
 rename {mavis => src/mavis}/bam/stats.py (100%)
 rename {mavis => src/mavis}/blat.py (100%)
 rename {mavis => src/mavis}/breakpoint.py (100%)
 rename {mavis => src/mavis}/cluster/__init__.py (100%)
 rename {mavis => src/mavis}/cluster/cluster.py (100%)
 rename {mavis => src/mavis}/cluster/main.py (100%)
 rename {mavis => src/mavis}/config.py (100%)
 rename {mavis => src/mavis}/constants.py (100%)
 rename {mavis => src/mavis}/error.py (100%)
 rename {mavis => src/mavis}/illustrate/__init__.py (100%)
 rename {mavis => src/mavis}/illustrate/constants.py (100%)
 rename {mavis => src/mavis}/illustrate/diagram.py (100%)
 rename {mavis => src/mavis}/illustrate/elements.py (100%)
 rename {mavis => src/mavis}/illustrate/scatter.py (100%)
 rename {mavis => src/mavis}/illustrate/util.py (100%)
 rename {mavis => src/mavis}/interval.py (100%)
 rename {mavis => src/mavis}/main.py (100%)
 rename {mavis => src/mavis}/overlay.py (100%)
 rename {mavis => src/mavis}/pairing/__init__.py (100%)
 rename {mavis => src/mavis}/pairing/constants.py (100%)
 rename {mavis => src/mavis}/pairing/main.py (100%)
 rename {mavis => src/mavis}/pairing/pairing.py (100%)
 rename {mavis => src/mavis}/schemas/__init__.py (100%)
 rename {mavis => src/mavis}/schemas/config.json (100%)
 rename {mavis => src/mavis}/schemas/overlay.json (100%)
 rename {mavis => src/mavis}/summary/__init__.py (100%)
 rename {mavis => src/mavis}/summary/constants.py (100%)
 rename {mavis => src/mavis}/summary/main.py (100%)
 rename {mavis => src/mavis}/summary/summary.py (100%)
 rename {mavis => src/mavis}/tools/__init__.py (100%)
 rename {mavis => src/mavis}/tools/breakdancer.py (100%)
 rename {mavis => src/mavis}/tools/chimerascan.py (100%)
 rename {mavis => src/mavis}/tools/cnvnator.py (100%)
 rename {mavis => src/mavis}/tools/constants.py (100%)
 rename {mavis => src/mavis}/tools/starfusion.py (100%)
 rename {mavis => src/mavis}/tools/transabyss.py (100%)
 rename {mavis => src/mavis}/tools/vcf.py (100%)
 rename {mavis => src/mavis}/util.py (100%)
 rename {mavis => src/mavis}/validate/__init__.py (100%)
 rename {mavis => src/mavis}/validate/base.py (100%)
 rename {mavis => src/mavis}/validate/call.py (100%)
 rename {mavis => src/mavis}/validate/constants.py (100%)
 rename {mavis => src/mavis}/validate/evidence.py (100%)
 rename {mavis => src/mavis}/validate/main.py (100%)
 rename {tab => src/tab}/__init__.py (100%)
 rename {tab => src/tab}/tab.py (100%)
 rename {tools => src/tools}/TSV.pm (100%)
 create mode 100644 src/tools/__init__.py
 rename {tools => src/tools}/calculate_ref_alt_counts.py (100%)
 rename {tools => src/tools}/find_repeats.py (100%)
 rename {tools => src/tools}/generate_ensembl_json.py (100%)
 rename {tools => src/tools}/get_hg19_reference_files.sh (100%)

diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index ffadaebc..e6f6a917 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -26,12 +26,12 @@ jobs:
       run: |
         pip install flake8
         # stop the build if there are Python syntax errors or undefined names
-        flake8 mavis --count --select=E9,F63,F7,F82 --show-source --statistics
+        flake8 src/mavis --count --select=E9,F63,F7,F82 --show-source --statistics
     - name: Lint with black
       run: |
         pip install black
         # stop the build if black needs to be run
-        black mavis -S -l 100 --check
+        black src/mavis -S -l 100 --check
     - name: install bwa
       run: |
         git clone https://github.com/lh3/bwa.git
diff --git a/MANIFEST.in b/MANIFEST.in
index 165d54e6..8b270b97 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,11 +1,5 @@
-recursive-include docs *
-recursive-include tests *.py 
-include tests/*/data/*
-recursive-include mavis *.py *.json
-recursive-include tools *.pl *.py *.pm
-recursive-include tab *.py
+recursive-include src *.py *.json
 include README.md
 include LICENSE.txt
-include mavis/config-schema.json
 prune docs/build
 prune docs/source/auto
diff --git a/Snakefile b/Snakefile
index 7710c0d9..351e0ab9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -18,7 +18,7 @@ try:
     # TODO: replace with URL so that the user does not need a copy of the config schema
     validate(
         config,
-        os.path.join(os.getcwd(), 'mavis/schemas/config.json')
+        os.path.join(os.getcwd(), 'src/mavis/schemas/config.json')
     )
     for key in [
         "libraries",
diff --git a/docs/hooks.py b/docs/hooks.py
index baa09506..44931755 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -130,9 +130,9 @@ def generate_settings_doc(schema_file):
 
 
 def build_package_docs(config):
-    schema_file = os.path.join(os.path.dirname(__file__), '../mavis/schemas/config.json')
+    schema_file = os.path.join(os.path.dirname(__file__), '../src/mavis/schemas/config.json')
     generate_settings_doc(schema_file)
-    package_dir = os.path.join(os.path.dirname(__file__), '../mavis')
+    package_dir = os.path.join(os.path.dirname(__file__), '../src/mavis')
     output_dir = os.path.join(os.path.dirname(__file__), 'package')
 
     extract_to_markdown(
diff --git a/setup.py b/setup.py
index dd23fb29..53374dcb 100644
--- a/setup.py
+++ b/setup.py
@@ -101,7 +101,8 @@ def check_nonpython_dependencies():
     version='{}'.format(VERSION),
     url='https://github.com/bcgsc/mavis.git',
     download_url='https://github.com/bcgsc/mavis/archive/v{}.tar.gz'.format(VERSION),
-    packages=find_packages(exclude=['tests']),
+    package_dir={'': 'src'},
+    packages=find_packages(where='src'),
     description='A Structural Variant Post-Processing Package',
     long_description=parse_md_readme(),
     install_requires=INSTALL_REQS,
diff --git a/mavis/__init__.py b/src/mavis/__init__.py
similarity index 100%
rename from mavis/__init__.py
rename to src/mavis/__init__.py
diff --git a/mavis/align.py b/src/mavis/align.py
similarity index 100%
rename from mavis/align.py
rename to src/mavis/align.py
diff --git a/mavis/annotate/__init__.py b/src/mavis/annotate/__init__.py
similarity index 100%
rename from mavis/annotate/__init__.py
rename to src/mavis/annotate/__init__.py
diff --git a/mavis/annotate/base.py b/src/mavis/annotate/base.py
similarity index 100%
rename from mavis/annotate/base.py
rename to src/mavis/annotate/base.py
diff --git a/mavis/annotate/constants.py b/src/mavis/annotate/constants.py
similarity index 100%
rename from mavis/annotate/constants.py
rename to src/mavis/annotate/constants.py
diff --git a/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
similarity index 100%
rename from mavis/annotate/file_io.py
rename to src/mavis/annotate/file_io.py
diff --git a/mavis/annotate/fusion.py b/src/mavis/annotate/fusion.py
similarity index 100%
rename from mavis/annotate/fusion.py
rename to src/mavis/annotate/fusion.py
diff --git a/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
similarity index 100%
rename from mavis/annotate/genomic.py
rename to src/mavis/annotate/genomic.py
diff --git a/mavis/annotate/main.py b/src/mavis/annotate/main.py
similarity index 100%
rename from mavis/annotate/main.py
rename to src/mavis/annotate/main.py
diff --git a/mavis/annotate/protein.py b/src/mavis/annotate/protein.py
similarity index 100%
rename from mavis/annotate/protein.py
rename to src/mavis/annotate/protein.py
diff --git a/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
similarity index 100%
rename from mavis/annotate/splicing.py
rename to src/mavis/annotate/splicing.py
diff --git a/mavis/annotate/variant.py b/src/mavis/annotate/variant.py
similarity index 100%
rename from mavis/annotate/variant.py
rename to src/mavis/annotate/variant.py
diff --git a/mavis/assemble.py b/src/mavis/assemble.py
similarity index 100%
rename from mavis/assemble.py
rename to src/mavis/assemble.py
diff --git a/mavis/bam/__init__.py b/src/mavis/bam/__init__.py
similarity index 100%
rename from mavis/bam/__init__.py
rename to src/mavis/bam/__init__.py
diff --git a/mavis/bam/cache.py b/src/mavis/bam/cache.py
similarity index 100%
rename from mavis/bam/cache.py
rename to src/mavis/bam/cache.py
diff --git a/mavis/bam/cigar.py b/src/mavis/bam/cigar.py
similarity index 100%
rename from mavis/bam/cigar.py
rename to src/mavis/bam/cigar.py
diff --git a/mavis/bam/read.py b/src/mavis/bam/read.py
similarity index 100%
rename from mavis/bam/read.py
rename to src/mavis/bam/read.py
diff --git a/mavis/bam/stats.py b/src/mavis/bam/stats.py
similarity index 100%
rename from mavis/bam/stats.py
rename to src/mavis/bam/stats.py
diff --git a/mavis/blat.py b/src/mavis/blat.py
similarity index 100%
rename from mavis/blat.py
rename to src/mavis/blat.py
diff --git a/mavis/breakpoint.py b/src/mavis/breakpoint.py
similarity index 100%
rename from mavis/breakpoint.py
rename to src/mavis/breakpoint.py
diff --git a/mavis/cluster/__init__.py b/src/mavis/cluster/__init__.py
similarity index 100%
rename from mavis/cluster/__init__.py
rename to src/mavis/cluster/__init__.py
diff --git a/mavis/cluster/cluster.py b/src/mavis/cluster/cluster.py
similarity index 100%
rename from mavis/cluster/cluster.py
rename to src/mavis/cluster/cluster.py
diff --git a/mavis/cluster/main.py b/src/mavis/cluster/main.py
similarity index 100%
rename from mavis/cluster/main.py
rename to src/mavis/cluster/main.py
diff --git a/mavis/config.py b/src/mavis/config.py
similarity index 100%
rename from mavis/config.py
rename to src/mavis/config.py
diff --git a/mavis/constants.py b/src/mavis/constants.py
similarity index 100%
rename from mavis/constants.py
rename to src/mavis/constants.py
diff --git a/mavis/error.py b/src/mavis/error.py
similarity index 100%
rename from mavis/error.py
rename to src/mavis/error.py
diff --git a/mavis/illustrate/__init__.py b/src/mavis/illustrate/__init__.py
similarity index 100%
rename from mavis/illustrate/__init__.py
rename to src/mavis/illustrate/__init__.py
diff --git a/mavis/illustrate/constants.py b/src/mavis/illustrate/constants.py
similarity index 100%
rename from mavis/illustrate/constants.py
rename to src/mavis/illustrate/constants.py
diff --git a/mavis/illustrate/diagram.py b/src/mavis/illustrate/diagram.py
similarity index 100%
rename from mavis/illustrate/diagram.py
rename to src/mavis/illustrate/diagram.py
diff --git a/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
similarity index 100%
rename from mavis/illustrate/elements.py
rename to src/mavis/illustrate/elements.py
diff --git a/mavis/illustrate/scatter.py b/src/mavis/illustrate/scatter.py
similarity index 100%
rename from mavis/illustrate/scatter.py
rename to src/mavis/illustrate/scatter.py
diff --git a/mavis/illustrate/util.py b/src/mavis/illustrate/util.py
similarity index 100%
rename from mavis/illustrate/util.py
rename to src/mavis/illustrate/util.py
diff --git a/mavis/interval.py b/src/mavis/interval.py
similarity index 100%
rename from mavis/interval.py
rename to src/mavis/interval.py
diff --git a/mavis/main.py b/src/mavis/main.py
similarity index 100%
rename from mavis/main.py
rename to src/mavis/main.py
diff --git a/mavis/overlay.py b/src/mavis/overlay.py
similarity index 100%
rename from mavis/overlay.py
rename to src/mavis/overlay.py
diff --git a/mavis/pairing/__init__.py b/src/mavis/pairing/__init__.py
similarity index 100%
rename from mavis/pairing/__init__.py
rename to src/mavis/pairing/__init__.py
diff --git a/mavis/pairing/constants.py b/src/mavis/pairing/constants.py
similarity index 100%
rename from mavis/pairing/constants.py
rename to src/mavis/pairing/constants.py
diff --git a/mavis/pairing/main.py b/src/mavis/pairing/main.py
similarity index 100%
rename from mavis/pairing/main.py
rename to src/mavis/pairing/main.py
diff --git a/mavis/pairing/pairing.py b/src/mavis/pairing/pairing.py
similarity index 100%
rename from mavis/pairing/pairing.py
rename to src/mavis/pairing/pairing.py
diff --git a/mavis/schemas/__init__.py b/src/mavis/schemas/__init__.py
similarity index 100%
rename from mavis/schemas/__init__.py
rename to src/mavis/schemas/__init__.py
diff --git a/mavis/schemas/config.json b/src/mavis/schemas/config.json
similarity index 100%
rename from mavis/schemas/config.json
rename to src/mavis/schemas/config.json
diff --git a/mavis/schemas/overlay.json b/src/mavis/schemas/overlay.json
similarity index 100%
rename from mavis/schemas/overlay.json
rename to src/mavis/schemas/overlay.json
diff --git a/mavis/summary/__init__.py b/src/mavis/summary/__init__.py
similarity index 100%
rename from mavis/summary/__init__.py
rename to src/mavis/summary/__init__.py
diff --git a/mavis/summary/constants.py b/src/mavis/summary/constants.py
similarity index 100%
rename from mavis/summary/constants.py
rename to src/mavis/summary/constants.py
diff --git a/mavis/summary/main.py b/src/mavis/summary/main.py
similarity index 100%
rename from mavis/summary/main.py
rename to src/mavis/summary/main.py
diff --git a/mavis/summary/summary.py b/src/mavis/summary/summary.py
similarity index 100%
rename from mavis/summary/summary.py
rename to src/mavis/summary/summary.py
diff --git a/mavis/tools/__init__.py b/src/mavis/tools/__init__.py
similarity index 100%
rename from mavis/tools/__init__.py
rename to src/mavis/tools/__init__.py
diff --git a/mavis/tools/breakdancer.py b/src/mavis/tools/breakdancer.py
similarity index 100%
rename from mavis/tools/breakdancer.py
rename to src/mavis/tools/breakdancer.py
diff --git a/mavis/tools/chimerascan.py b/src/mavis/tools/chimerascan.py
similarity index 100%
rename from mavis/tools/chimerascan.py
rename to src/mavis/tools/chimerascan.py
diff --git a/mavis/tools/cnvnator.py b/src/mavis/tools/cnvnator.py
similarity index 100%
rename from mavis/tools/cnvnator.py
rename to src/mavis/tools/cnvnator.py
diff --git a/mavis/tools/constants.py b/src/mavis/tools/constants.py
similarity index 100%
rename from mavis/tools/constants.py
rename to src/mavis/tools/constants.py
diff --git a/mavis/tools/starfusion.py b/src/mavis/tools/starfusion.py
similarity index 100%
rename from mavis/tools/starfusion.py
rename to src/mavis/tools/starfusion.py
diff --git a/mavis/tools/transabyss.py b/src/mavis/tools/transabyss.py
similarity index 100%
rename from mavis/tools/transabyss.py
rename to src/mavis/tools/transabyss.py
diff --git a/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
similarity index 100%
rename from mavis/tools/vcf.py
rename to src/mavis/tools/vcf.py
diff --git a/mavis/util.py b/src/mavis/util.py
similarity index 100%
rename from mavis/util.py
rename to src/mavis/util.py
diff --git a/mavis/validate/__init__.py b/src/mavis/validate/__init__.py
similarity index 100%
rename from mavis/validate/__init__.py
rename to src/mavis/validate/__init__.py
diff --git a/mavis/validate/base.py b/src/mavis/validate/base.py
similarity index 100%
rename from mavis/validate/base.py
rename to src/mavis/validate/base.py
diff --git a/mavis/validate/call.py b/src/mavis/validate/call.py
similarity index 100%
rename from mavis/validate/call.py
rename to src/mavis/validate/call.py
diff --git a/mavis/validate/constants.py b/src/mavis/validate/constants.py
similarity index 100%
rename from mavis/validate/constants.py
rename to src/mavis/validate/constants.py
diff --git a/mavis/validate/evidence.py b/src/mavis/validate/evidence.py
similarity index 100%
rename from mavis/validate/evidence.py
rename to src/mavis/validate/evidence.py
diff --git a/mavis/validate/main.py b/src/mavis/validate/main.py
similarity index 100%
rename from mavis/validate/main.py
rename to src/mavis/validate/main.py
diff --git a/tab/__init__.py b/src/tab/__init__.py
similarity index 100%
rename from tab/__init__.py
rename to src/tab/__init__.py
diff --git a/tab/tab.py b/src/tab/tab.py
similarity index 100%
rename from tab/tab.py
rename to src/tab/tab.py
diff --git a/tools/TSV.pm b/src/tools/TSV.pm
similarity index 100%
rename from tools/TSV.pm
rename to src/tools/TSV.pm
diff --git a/src/tools/__init__.py b/src/tools/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tools/calculate_ref_alt_counts.py b/src/tools/calculate_ref_alt_counts.py
similarity index 100%
rename from tools/calculate_ref_alt_counts.py
rename to src/tools/calculate_ref_alt_counts.py
diff --git a/tools/find_repeats.py b/src/tools/find_repeats.py
similarity index 100%
rename from tools/find_repeats.py
rename to src/tools/find_repeats.py
diff --git a/tools/generate_ensembl_json.py b/src/tools/generate_ensembl_json.py
similarity index 100%
rename from tools/generate_ensembl_json.py
rename to src/tools/generate_ensembl_json.py
diff --git a/tools/get_hg19_reference_files.sh b/src/tools/get_hg19_reference_files.sh
similarity index 100%
rename from tools/get_hg19_reference_files.sh
rename to src/tools/get_hg19_reference_files.sh

From 47f22bbc89787f5415acc3446c81895ab83df50d Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 21 Apr 2021 12:10:52 -0700
Subject: [PATCH 019/137] Add docker container

---
 .github/workflows/quick-tests.yml | 11 ++++++++++
 Dockerfile                        | 35 +++++++++++++++++++++++++++++++
 Snakefile                         |  2 +-
 3 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 Dockerfile

diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index e6f6a917..689ea8fb 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -51,3 +51,14 @@ jobs:
           --durations=10
       env:
         RUN_FULL: 0
+  docker:
+    runs-on: ubuntu-latest
+    name: docker build
+    steps:
+      - uses: actions/checkout@v2
+      - name: build the docker container
+        run: |
+          docker build --file Dockerfile --tag bcgsc/mavis .
+      - name: test the help menu
+        run: |
+          docker run bcgsc/mavis -h
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..3f2ef284
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,35 @@
+FROM python:3.7-slim-buster
+
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get upgrade -y && \
+    apt-get install -y git wget make gcc libz-dev
+
+# pysam dependencies
+RUN apt-get install -y libncurses5-dev zlib1g-dev libbz2-dev libncursesw5-dev liblzma-dev
+
+# install BWA
+RUN git clone https://github.com/lh3/bwa.git && \
+    cd bwa && \
+    git checkout v0.7.17 && \
+    make && \
+    cd .. && \
+    mv bwa/bwa /usr/local/bin
+
+# install blat
+RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
+    chmod a+x blat && \
+    mv blat /usr/local/bin
+
+COPY setup.py setup.py
+COPY setup.cfg setup.cfg
+COPY src src
+COPY LICENSE.txt LICENSE.txt
+COPY README.md README.md
+
+# install python package
+RUN pip install -U setuptools pip wheel
+RUN pip install .
+RUN which mavis
+ENTRYPOINT [ "mavis" ]
diff --git a/Snakefile b/Snakefile
index 351e0ab9..23c0772e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -6,7 +6,7 @@ import re
 import json
 import pandas as pd
 
-CONTAINER = 'creisle/mavis:latest'
+CONTAINER = 'bcgsc/mavis:latest'
 
 def output_dir(*paths):
     return os.path.join(config['output_dir'], *paths)

From b8e15d081e378d38481eaeeebca9e2dddaf96703 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 21 Apr 2021 12:20:40 -0700
Subject: [PATCH 020/137] Add src prefix to data files

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 53374dcb..5aab6d08 100644
--- a/setup.py
+++ b/setup.py
@@ -126,7 +126,7 @@ def check_nonpython_dependencies():
         ]
     },
     include_package_data=True,
-    data_files=[('mavis', ['mavis/schemas/config.json', 'mavis/schemas/overlay.json'])],
+    data_files=[('mavis', ['src/mavis/schemas/config.json', 'src/mavis/schemas/overlay.json'])],
     project_urls={'mavis': 'http://mavis.bcgsc.ca'},
 )
 check_nonpython_dependencies()

From 5063aa5fffdbe8a6524192ec72af050f3dd2e718 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 23 Apr 2021 13:33:32 -0700
Subject: [PATCH 021/137] Replace custom tab package with pandas

---
 docs/migrating.md                             |  21 +
 setup.py                                      |   2 +-
 src/mavis/align.py                            |  14 +-
 src/mavis/annotate/constants.py               |  23 -
 src/mavis/annotate/file_io.py                 | 213 ++---
 src/mavis/annotate/main.py                    |   2 -
 src/mavis/annotate/splicing.py                |   6 +-
 src/mavis/bam/stats.py                        |   2 +-
 src/mavis/blat.py                             |  41 +-
 src/mavis/cluster/main.py                     |   4 +-
 src/mavis/config.py                           |   7 +-
 src/mavis/constants.py                        |  84 +-
 src/mavis/main.py                             |   6 +-
 src/mavis/overlay.py                          |   4 +-
 src/mavis/pairing/main.py                     |  22 +-
 src/mavis/summary/main.py                     |  62 +-
 src/mavis/summary/summary.py                  |   1 +
 src/mavis/tools/__init__.py                   |  23 +-
 src/mavis/tools/breakdancer.py                |  43 +-
 src/mavis/util.py                             | 263 ++++--
 src/mavis/validate/main.py                    |  10 +-
 src/tab/__init__.py                           | 127 ---
 src/tab/tab.py                                | 401 ---------
 src/tools/calculate_ref_alt_counts.py         |   2 +-
 tests/data/annotations_subsample.tab          |   2 +-
 tests/data/clustering_input.tab               |   2 +-
 ...is_summary_all_mock-A36971_mock-A47933.tab |   2 +-
 tests/data/mock_masking.tab                   |   2 +-
 tests/data/mock_pairing_input.tab             |   2 +-
 .../data/mock_reference_annotations.full.tsv  |   2 +-
 tests/data/mock_reference_annotations.tsv     |   2 +-
 tests/data/mock_sv_events.tsv                 |   2 +-
 tests/data/mock_trans_sv_events.tsv           |   2 +-
 tests/data/pairing_annotations.tab            |   2 +-
 .../pairing_reference_annotations_file.tab    |   2 +-
 tests/end_to_end/test_convert.py              |   2 +-
 tests/integration/test_annotate.py            |  26 +-
 tests/integration/test_annotate_examples.py   |  13 +-
 tests/integration/test_bam.py                 |  15 +-
 tests/integration/test_splicing.py            |   4 +-
 tests/unit/test_tab.py                        | 287 ------
 tests/unit/test_tool.py                       | 837 +++++++++---------
 tests/unit/test_util.py                       | 635 ++++++-------
 43 files changed, 1286 insertions(+), 1938 deletions(-)
 create mode 100644 docs/migrating.md
 delete mode 100644 src/tab/__init__.py
 delete mode 100755 src/tab/tab.py
 delete mode 100644 tests/unit/test_tab.py

diff --git a/docs/migrating.md b/docs/migrating.md
new file mode 100644
index 00000000..213ee00c
--- /dev/null
+++ b/docs/migrating.md
@@ -0,0 +1,21 @@
+# Migrating
+
+## Migrating from v2 to v3
+
+There are major changes from v2 to v3 of MAVIS.
+
+### Tab File Headers
+
+Tab file headers no longer start with `#`. Any lines starting with a pound will be treated
+as comments. This will apply to mavis-style inputs as well as any tab delimited
+reference files
+
+### Configuration
+
+MAVIS no longer users command line arguments, config files, and environment variables for
+configuration. Instead all configurable settings are controlled via a single input JSON
+config file
+
+### Scheduling
+
+MAVIS is now integrated with snakemake instead of handling its own scheduling
diff --git a/setup.py b/setup.py
index 5aab6d08..0245e812 100644
--- a/setup.py
+++ b/setup.py
@@ -115,7 +115,7 @@ def check_nonpython_dependencies():
     },
     tests_require=TEST_REQS,
     setup_requires=['pip>=9.0.0', 'setuptools>=36.0.0'],
-    python_requires='>=3.2',
+    python_requires='>=3.6',
     author='Caralyn Reisle',
     author_email='creisle@bcgsc.ca',
     test_suite='tests',
diff --git a/src/mavis/align.py b/src/mavis/align.py
index be81b28d..dffed765 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -13,16 +13,8 @@
 from .bam import cigar as _cigar
 from .bam import read as _read
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import (
-    CIGAR,
-    COLUMNS,
-    NA_MAPPING_QUALITY,
-    ORIENT,
-    STRAND,
-    SVTYPE,
-    MavisNamespace,
-    reverse_complement,
-)
+from .constants import (CIGAR, COLUMNS, NA_MAPPING_QUALITY, ORIENT, STRAND,
+                        SVTYPE, MavisNamespace, reverse_complement)
 from .error import InvalidRearrangement
 from .interval import Interval
 from .util import DEVNULL
@@ -91,7 +83,7 @@ def query_overlap_extension(self):
             return total_overlap
         return 0
 
-    def score(self, consec_bonus=10):
+    def score(self, consec_bonus=10) -> float:
         """
         scores events between 0 and 1 penalizing events interrupting the alignment. Counts a split
         alignment as a single event
diff --git a/src/mavis/annotate/constants.py b/src/mavis/annotate/constants.py
index 1a4e324d..62882a9b 100644
--- a/src/mavis/annotate/constants.py
+++ b/src/mavis/annotate/constants.py
@@ -1,33 +1,10 @@
 import re
 
-import tab
-
 from ..constants import MavisNamespace, float_fraction
 
 PASS_FILENAME = 'annotations.tab'
 
 
-class SPLICE_TYPE(MavisNamespace):
-    """
-    holds controlled vocabulary for allowed splice type classification values
-
-    Attributes:
-        RETAIN: an intron was retained
-        SKIP: an exon was skipped
-        NORMAL: no exons were skipped and no introns were retained. the normal/expected splicing pattern was followed
-        MULTI_RETAIN: multiple introns were retained
-        MULTI_SKIP: multiple exons were skipped
-        COMPLEX: some combination of exon skipping and intron retention
-    """
-
-    RETAIN: str = 'retained intron'
-    SKIP: str = 'skipped exon'
-    NORMAL: str = 'normal'
-    MULTI_RETAIN: str = 'retained multiple introns'
-    MULTI_SKIP: str = 'skipped multiple exons'
-    COMPLEX: str = 'complex'
-
-
 class SPLICE_SITE_TYPE(MavisNamespace):
     DONOR: int = 3
     ACCEPTOR: int = 5
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index ed4f45cb..b41489c7 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -5,19 +5,21 @@
 import os
 import re
 import warnings
+from typing import Callable, Dict, List, Optional, Tuple
 
-import tab
+import pandas as pd
 from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
 
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, STRAND, translate
 from ..interval import Interval
-from ..util import DEVNULL, LOG, filepath
+from ..util import DEVNULL, LOG, cast_boolean, filepath
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
 from .protein import Domain, Translation
 
 
-def load_masking_regions(*filepaths):
+def load_masking_regions(*filepaths: str) -> Dict[str, List[BioInterval]]:
     """
     reads a file of regions. The expect input format for the file is tab-delimited and
     the header should contain the following columns
@@ -35,23 +37,20 @@ def load_masking_regions(*filepaths):
         chr20   25600000    27500000    centromere
 
     Args:
-        filepath (str): path to the input tab-delimited file
+        filepath: path to the input tab-delimited file
     Returns:
-        Dict[str,List[BioInterval]]: a dictionary keyed by chromosome name with values of lists of regions on the chromosome
-
-    Example:
-        >>> m = load_masking_regions('filename')
-        >>> m['1']
-        [BioInterval(), BioInterval(), ...]
+        a dictionary keyed by chromosome name with values of lists of regions on the chromosome
     """
-    regions = {}
+    regions: Dict[str, List[BioInterval]] = {}
     for filepath in filepaths:
-        _, rows = tab.read_file(
-            filepath,
-            require=['chr', 'start', 'end', 'name'],
-            cast={'start': int, 'end': int, 'chr': ReferenceName},
+        df = pd.read_csv(
+            filepath, sep='\t', dtype={'chr': str, 'start': int, 'end': int, 'name': str}
         )
-        for row in rows:
+        for col in ['chr', 'start', 'end', 'name']:
+            if col not in df:
+                raise KeyError(f'missing required column ({col})')
+        df['chr'] = df['chr'].apply(lambda c: ReferenceName(c))
+        for row in df.to_dict('records'):
             mask_region = BioInterval(
                 reference_object=row['chr'], start=row['start'], end=row['end'], name=row['name']
             )
@@ -59,38 +58,32 @@ def load_masking_regions(*filepaths):
     return regions
 
 
-def load_reference_genes(*pos, **kwargs):
-    """
-    *Deprecated* Use :func:`load_annotations` instead
-    """
-    warnings.warn('this function has been replaced by load_annotations', DeprecationWarning)
-    return load_annotations(*pos, **kwargs)
-
-
-def load_annotations(*filepaths, warn=DEVNULL, reference_genome=None, best_transcripts_only=False):
+def load_annotations(
+    *filepaths: str,
+    warn: Callable = DEVNULL,
+    reference_genome: Optional[Dict[str, SeqRecord]] = None,
+    best_transcripts_only: bool = False,
+) -> Dict[str, List[Gene]]:
     """
     loads gene models from an input file. Expects a tabbed or json file.
 
     Args:
-        filepath (str): path to the input file
-        verbose (bool): output extra information to stdout
-        reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence by
-            template/chr name
-        filetype (str): json or tab/tsv. only required if the file type can't be interpolated from the path extension
+        filepath: path to the input file
+        reference_genome: dict of reference sequence by template/chr name
 
     Returns:
-        Dict[str,List[mavis.annotate.genomic.Gene]]: lists of genes keyed by chromosome name
+        lists of genes keyed by chromosome name
     """
-    total_annotations = {}
+    total_annotations: Dict[str, List[Gene]] = {}
 
     for filename in filepaths:
         data = None
 
-        if filename.endswith('.tab') or filename.endswith('.tsv'):
-            data = convert_tab_to_json(filename, warn)
-        else:
+        if filename.endswith('.json'):
             with open(filename) as fh:
                 data = json.load(fh)
+        else:
+            data = convert_tab_to_json(filename, warn)
 
         current_annotations = parse_annotations_json(
             data,
@@ -105,11 +98,17 @@ def load_annotations(*filepaths, warn=DEVNULL, reference_genome=None, best_trans
     return total_annotations
 
 
-def parse_annotations_json(data, reference_genome=None, best_transcripts_only=False, warn=DEVNULL):
+def parse_annotations_json(
+    data,
+    reference_genome: Optional[Dict[str, SeqRecord]] = None,
+    best_transcripts_only=False,
+    warn=DEVNULL,
+) -> Dict[str, List[Gene]]:
     """
     parses a json of annotation information into annotation objects
     """
-    genes_by_chr = {}
+    genes_by_chr: Dict[str, List[Gene]] = {}
+
     for gene_dict in data['genes']:
         if gene_dict['strand'] in ['1', '+', 1]:
             gene_dict['strand'] = STRAND.POS
@@ -131,7 +130,7 @@ def parse_annotations_json(data, reference_genome=None, best_transcripts_only=Fa
 
         has_best = False
         for transcript in gene_dict['transcripts']:
-            transcript['is_best_transcript'] = tab.cast_boolean(transcript['is_best_transcript'])
+            transcript['is_best_transcript'] = cast_boolean(transcript['is_best_transcript'])
             transcript.setdefault('exons', [])
             exons = [Exon(strand=gene.strand, **ex) for ex in transcript['exons']]
             if not exons:
@@ -206,7 +205,7 @@ def parse_annotations_json(data, reference_genome=None, best_transcripts_only=Fa
     return genes_by_chr
 
 
-def convert_tab_to_json(filepath, warn=DEVNULL):
+def convert_tab_to_json(filepath: str, warn: Callable = DEVNULL) -> Dict:
     """
     given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
 
@@ -236,17 +235,12 @@ def convert_tab_to_json(filepath, warn=DEVNULL):
     Returns:
         Dict[str,List[Gene]]: a dictionary keyed by chromosome name with values of list of genes on the chromosome
 
-    Example:
-        >>> ref = load_reference_genes('filename')
-        >>> ref['1']
-        [Gene(), Gene(), ....]
-
     Warning:
         does not load translations unless then start with 'M', end with '*' and have a length of multiple 3
     """
 
     def parse_exon_list(row):
-        if not row:
+        if pd.isnull(row):
             return []
         exons = []
         for temp in re.split('[; ]', row):
@@ -258,7 +252,7 @@ def parse_exon_list(row):
         return exons
 
     def parse_domain_list(row):
-        if not row:
+        if pd.isnull(row):
             return []
         domains = []
         for domain in row.split(';'):
@@ -272,38 +266,41 @@ def parse_domain_list(row):
                 warn('error in domain:', domain, row, repr(err))
         return domains
 
-    def nullable_int(row):
-        try:
-            row = int(row)
-        except ValueError:
-            row = tab.cast_null(row)
-        return row
-
-    _, rows = tab.read_file(
+    df = pd.read_csv(
         filepath,
-        require=['ensembl_gene_id', 'chr', 'ensembl_transcript_id'],
-        add_default={
-            'cdna_coding_start': 'null',
-            'cdna_coding_end': 'null',
-            'AA_domain_ranges': '',
-            'genomic_exon_ranges': '',
-            'hugo_names': '',
-            'transcript_genomic_start': 'null',
-            'transcript_genomic_end': 'null',
-            'best_ensembl_transcript_id': 'null',
-        },
-        cast={
-            'genomic_exon_ranges': parse_exon_list,
-            'AA_domain_ranges': parse_domain_list,
-            'cdna_coding_end': nullable_int,
-            'cdna_coding_start': nullable_int,
-            'transcript_genomic_end': nullable_int,
-            'transcript_genomic_start': nullable_int,
+        dtype={
+            'ensembl_gene_id': str,
+            'ensembl_transcript_id': str,
+            'chr': str,
+            'cdna_coding_start': pd.Int64Dtype(),
+            'cdna_coding_end': pd.Int64Dtype(),
+            'AA_domain_ranges': str,
+            'genomic_exon_ranges': str,
+            'hugo_names': str,
+            'transcript_genomic_start': pd.Int64Dtype(),
+            'transcript_genomic_end': pd.Int64Dtype(),
+            'best_ensembl_transcript_id': str,
             'gene_start': int,
             'gene_end': int,
         },
+        sep='\t',
+        comment='#',
     )
+
+    for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
+        if col not in df:
+            raise KeyError(f'missing required column: {col}')
+
+    for col, parser in [
+        ('genomic_exon_ranges', parse_exon_list),
+        ('AA_domain_ranges', parse_domain_list),
+    ]:
+        if col in df:
+            df[col] = df[col].apply(parser)
+
     genes = {}
+    rows = df.where(df.notnull(), None).to_dict('records')
+
     for row in rows:
         gene = {
             'chr': row['chr'],
@@ -311,23 +308,26 @@ def nullable_int(row):
             'end': row['gene_end'],
             'name': row['ensembl_gene_id'],
             'strand': row['strand'],
-            'aliases': row['hugo_names'].split(';') if row['hugo_names'] else [],
+            'aliases': row['hugo_names'].split(';') if row.get('hugo_names') else [],
             'transcripts': [],
         }
         if gene['name'] not in genes:
             genes[gene['name']] = gene
         else:
             gene = genes[gene['name']]
-
+        is_best_transcript = (
+            row.get('best_ensembl_transcript_id', row['ensembl_transcript_id'])
+            == row['ensembl_transcript_id']
+        )
         transcript = {
-            'is_best_transcript': row['best_ensembl_transcript_id'] == row['ensembl_transcript_id'],
+            'is_best_transcript': is_best_transcript,
             'name': row['ensembl_transcript_id'],
-            'exons': row['genomic_exon_ranges'],
-            'domains': row['AA_domain_ranges'],
-            'start': row['transcript_genomic_start'],
-            'end': row['transcript_genomic_end'],
-            'cdna_coding_start': row['cdna_coding_start'],
-            'cdna_coding_end': row['cdna_coding_end'],
+            'exons': row.get('genomic_exon_ranges', []),
+            'domains': row.get('AA_domain_ranges', []),
+            'start': row.get('transcript_genomic_start'),
+            'end': row.get('transcript_genomic_end'),
+            'cdna_coding_start': row.get('cdna_coding_start'),
+            'cdna_coding_end': row.get('cdna_coding_end'),
             'aliases': [],
         }
         gene['transcripts'].append(transcript)
@@ -335,13 +335,13 @@ def nullable_int(row):
     return {'genes': genes.values()}
 
 
-def load_reference_genome(*filepaths):
+def load_reference_genome(*filepaths: str) -> Dict[str, SeqRecord]:
     """
     Args:
-        filepaths (List[str]): the paths to the files containing the input fasta genomes
+        filepaths: the paths to the files containing the input fasta genomes
 
     Returns:
-        Dict[str,Bio.SeqRecord]: a dictionary representing the sequences in the fasta file
+        a dictionary representing the sequences in the fasta file
     """
     reference_genome = {}
     for filename in filepaths:
@@ -376,7 +376,7 @@ def load_reference_genome(*filepaths):
     return reference_genome
 
 
-def load_templates(*filepaths):
+def load_templates(*filepaths: str) -> Dict[str, Template]:
     """
     primarily useful if template drawings are required and is not necessary otherwise
     assumes the input file is 0-indexed with [start,end) style. Columns are expected in
@@ -395,26 +395,30 @@ def load_templates(*filepaths):
         chr1    0   2300000 p36.33  gneg
         chr1    2300000 5400000 p36.32  gpos25
 
-    Args:
-        filename (str): the path to the file with the cytoband template information
-
     Returns:
-        List[Template]: list of the templates loaded
-
+        templates loaded
     """
     header = ['name', 'start', 'end', 'band_name', 'giemsa_stain']
-    templates = {}
+    templates: Dict[str, Template] = {}
 
     for filename in filepaths:
-        header, rows = tab.read_file(
+        df = pd.read_csv(
             filename,
-            header=header,
-            cast={'start': int, 'end': int},
-            in_={'giemsa_stain': GIEMSA_STAIN.values()},
+            sep='\t',
+            dtype={
+                'start': int,
+                'end': int,
+                'name': str,
+                'band_name': str,
+                'giemsa_stain': str,
+            },
+            names=header,
+            comment='#',
         )
+        df['giemsa_stain'].apply(lambda v: GIEMSA_STAIN.enforce(v))
 
-        bands_by_template = {}
-        for row in rows:
+        bands_by_template: Dict[str, List[BioInterval]] = {}
+        for row in df.to_dict('records'):
             band = BioInterval(None, row['start'] + 1, row['end'], name=row['band_name'], data=row)
             bands_by_template.setdefault(row['name'], []).append(band)
 
@@ -427,10 +431,10 @@ def load_templates(*filepaths):
 
 
 class ReferenceFile:
+    # store loaded file to avoid re-loading
+    CACHE = {}  # type: ignore
 
-    CACHE = {}  # store loaded file to avoid re-loading
-
-    LOAD_FUNCTIONS = {
+    LOAD_FUNCTIONS: Dict[str, Optional[Callable]] = {
         'annotations': load_annotations,
         'reference_genome': load_reference_genome,
         'masking': load_masking_regions,
@@ -440,7 +444,14 @@ class ReferenceFile:
     }
     """dict: Mapping of file types (based on ENV name) to load functions"""
 
-    def __init__(self, file_type, *filepaths, eager_load=False, assert_exists=False, **opt):
+    def __init__(
+        self,
+        file_type: str,
+        *filepaths: str,
+        eager_load: bool = False,
+        assert_exists: bool = False,
+        **opt,
+    ):
         """
         Args:
             *filepaths (str): list of paths to load
diff --git a/src/mavis/annotate/main.py b/src/mavis/annotate/main.py
index 6103ea44..f1a9456e 100644
--- a/src/mavis/annotate/main.py
+++ b/src/mavis/annotate/main.py
@@ -138,13 +138,11 @@ def main(
     # test that the sequence makes sense for a random transcript
     bpps = read_inputs(
         inputs,
-        in_={COLUMNS.protocol: PROTOCOL.values()},
         add_default={
             COLUMNS.protocol: config['libraries'][library]['protocol'],
             COLUMNS.library: library,
             COLUMNS.stranded: False,
         },
-        require=[COLUMNS.protocol, COLUMNS.library],
         expand_strand=False,
         expand_orient=True,
         expand_svtype=True,
diff --git a/src/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
index 910d8c0e..ae9d4ef4 100644
--- a/src/mavis/annotate/splicing.py
+++ b/src/mavis/annotate/splicing.py
@@ -1,9 +1,9 @@
 import itertools
 
-from .base import BioInterval
-from .constants import ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS, SPLICE_SITE_TYPE, SPLICE_TYPE
-from ..constants import reverse_complement, STRAND
+from ..constants import SPLICE_TYPE, STRAND, reverse_complement
 from ..interval import Interval
+from .base import BioInterval
+from .constants import ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS, SPLICE_SITE_TYPE
 
 
 class SplicingPattern(list):
diff --git a/src/mavis/bam/stats.py b/src/mavis/bam/stats.py
index e7ba151e..e161227c 100644
--- a/src/mavis/bam/stats.py
+++ b/src/mavis/bam/stats.py
@@ -117,7 +117,7 @@ def compute_transcriptome_bam_stats(
 
     Args:
         bam_file_handle (BamCache): the input bam file handle
-        annotations (object): see :func:`mavis.annotate.load_reference_genes`
+        annotations (object): see :func:`mavis.annotate.load_annotations`
         sample_size (int): the number of genes to compute stats over
         log (Callable): outputs logging information
         min_mapping_quality (int): the minimum mapping quality for a read to be used
diff --git a/src/mavis/blat.py b/src/mavis/blat.py
index 12c9a7b6..c8cd325f 100644
--- a/src/mavis/blat.py
+++ b/src/mavis/blat.py
@@ -13,7 +13,7 @@
 import math
 import re
 
-import tab
+import pandas as pd
 
 from .align import query_coverage_interval
 from .bam import cigar as _cigar
@@ -24,11 +24,11 @@
     DNA_ALPHABET,
     NA_MAPPING_QUALITY,
     PYSAM_READ_FLAGS,
-    reverse_complement,
     STRAND,
+    reverse_complement,
 )
-from .util import LOG
 from .interval import Interval
+from .util import LOG
 
 
 class Blat:
@@ -107,7 +107,7 @@ def percent_identity(row, is_protein=False, is_mrna=True):
 
     @staticmethod
     def read_pslx(filename, seqid_to_sequence_mapping, is_protein=False, verbose=True):
-        pslx_header = [
+        header = [
             'match',
             'mismatch',
             'repmatch',
@@ -139,10 +139,11 @@ def split_csv_trailing_seq(x):
         def split_csv_trailing_ints(x):
             return [int(s) for s in re.sub(',$', '', x).split(',')]
 
-        header, rows = tab.read_file(
+        df = pd.read_csv(
             filename,
-            header=pslx_header,
-            cast={
+            sep='\t',
+            names=header,
+            dtype={
                 'match': int,
                 'mismatch': int,
                 'repmatch': int,
@@ -158,18 +159,23 @@ def split_csv_trailing_ints(x):
                 'tstart': int,
                 'tend': int,
                 'block_count': int,
-                'tname': lambda x: re.sub('^chr', '', x),
-                'block_sizes': split_csv_trailing_ints,
-                'qstarts': split_csv_trailing_ints,
-                'tstarts': split_csv_trailing_ints,
-                'qseqs': split_csv_trailing_seq,
-                'tseqs': split_csv_trailing_seq,
+                'tname': str,
+                'block_sizes': str,
+                'qstarts': str,
+                'tstarts': str,
+                'qseqs': str,
+                'tseqs': str,
             },
-            validate={'strand': r'^[\+-]$'},
         )
 
+        for col in ['block_sizes', 'qstarts', 'tstarts']:
+            df[col] = df[col].apply(split_csv_trailing_ints)
+        for col in ['qseqs', 'tseqs']:
+            df[col] = df[col].apply(split_csv_trailing_seq)
+        df['strand'].apply(lambda x: STRAND.enforce(x))
+
         final_rows = []
-        for row in rows:
+        for row in df.to_dict('records'):
             try:
                 row['score'] = Blat.score(row, is_protein=is_protein)
                 row['percent_ident'] = Blat.percent_identity(row, is_protein=is_protein)
@@ -366,10 +372,7 @@ def process_blat_output(
     if is_protein:
         raise NotImplementedError('currently does not support aligning protein sequences')
 
-    try:
-        _, rows = Blat.read_pslx(aligner_output_file, query_id_mapping, is_protein=is_protein)
-    except tab.tab.EmptyFileError:
-        rows = []
+    _, rows = Blat.read_pslx(aligner_output_file, query_id_mapping, is_protein=is_protein)
 
     # split the rows by query id
     rows_by_query = {}
diff --git a/src/mavis/cluster/main.py b/src/mavis/cluster/main.py
index 3ea459fd..ae07c755 100644
--- a/src/mavis/cluster/main.py
+++ b/src/mavis/cluster/main.py
@@ -76,7 +76,7 @@ def main(
         output: path to the output directory
         library: the library to look for in each of the input files
         masking (ReferenceFile): see :func:`mavis.annotate.file_io.load_masking_regions`
-        annotations (ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genes`
+        annotations (ReferenceFile): see :func:`mavis.annotate.file_io.load_annotations`
     """
     masking = ReferenceFile.load_from_config(config, 'masking', eager_load=True)
     annotations = ReferenceFile.load_from_config(config, 'annotations')
@@ -95,7 +95,7 @@ def main(
     # load the input files
     breakpoint_pairs = read_inputs(
         inputs,
-        cast={
+        apply={
             COLUMNS.tools: lambda x: set(x.split(';'))
             if x
             else set()
diff --git a/src/mavis/config.py b/src/mavis/config.py
index 6ff74392..480d3f72 100644
--- a/src/mavis/config.py
+++ b/src/mavis/config.py
@@ -4,15 +4,14 @@
 from typing import Dict, Optional
 
 import snakemake
-import tab
 from snakemake.exceptions import WorkflowError
 from snakemake.utils import validate as snakemake_validate
 
 from .annotate.file_io import ReferenceFile
 from .bam import stats
 from .bam.cache import BamCache
-from .constants import PROTOCOL, SUBCOMMAND, float_fraction
-from .util import bash_expands, filepath
+from .constants import INTEGER_COLUMNS, PROTOCOL, SUBCOMMAND, float_fraction
+from .util import bash_expands, cast_boolean, filepath
 
 
 def calculate_bam_stats(config: Dict, library_name: str) -> Dict:
@@ -206,7 +205,7 @@ def get_metavar(arg_type):
         >>> get_metavar(bool)
         '{True,False}'
     """
-    if arg_type in [bool, tab.cast_boolean]:
+    if arg_type in [bool, cast_boolean]:
         return '{True,False}'
     elif arg_type in [float_fraction, float]:
         return 'FLOAT'
diff --git a/src/mavis/constants.py b/src/mavis/constants.py
index da2a8d40..89e75e64 100644
--- a/src/mavis/constants.py
+++ b/src/mavis/constants.py
@@ -10,7 +10,6 @@
 from Bio.Alphabet.IUPAC import ambiguous_dna
 from Bio.Data.IUPACData import ambiguous_dna_values
 from Bio.Seq import Seq
-from tab import cast_boolean, cast_null
 
 PROGNAME: str = 'mavis'
 EXIT_OK: int = 0
@@ -123,6 +122,27 @@ def float_fraction(num):
     return num
 
 
+class SPLICE_TYPE(MavisNamespace):
+    """
+    holds controlled vocabulary for allowed splice type classification values
+
+    Attributes:
+        RETAIN: an intron was retained
+        SKIP: an exon was skipped
+        NORMAL: no exons were skipped and no introns were retained. the normal/expected splicing pattern was followed
+        MULTI_RETAIN: multiple introns were retained
+        MULTI_SKIP: multiple exons were skipped
+        COMPLEX: some combination of exon skipping and intron retention
+    """
+
+    RETAIN: str = 'retained intron'
+    SKIP: str = 'skipped exon'
+    NORMAL: str = 'normal'
+    MULTI_RETAIN: str = 'retained multiple introns'
+    MULTI_SKIP: str = 'skipped multiple exons'
+    COMPLEX: str = 'complex'
+
+
 COMPLETE_STAMP: str = 'MAVIS.COMPLETE'
 """Filename for all complete stamp files"""
 
@@ -515,14 +535,12 @@ class COLUMNS(MavisNamespace):
     call_method: str = 'call_method'
     break1_ewindow: str = 'break1_ewindow'
     break1_ewindow_count: str = 'break1_ewindow_count'
-    break1_ewindow_practical_coverage: str = 'break1_ewindow_practical_coverage'
     break1_homologous_seq: str = 'break1_homologous_seq'
     break1_split_read_names: str = 'break1_split_read_names'
     break1_split_reads: str = 'break1_split_reads'
     break1_split_reads_forced: str = 'break1_split_reads_forced'
     break2_ewindow: str = 'break2_ewindow'
     break2_ewindow_count: str = 'break2_ewindow_count'
-    break2_ewindow_practical_coverage: str = 'break2_ewindow_practical_coverage'
     break2_homologous_seq: str = 'break2_homologous_seq'
     break2_split_read_names: str = 'break2_split_read_names'
     break2_split_reads: str = 'break2_split_reads'
@@ -576,3 +594,63 @@ def sort_columns(input_columns):
     temp = sorted([c for c in input_columns if c in order], key=lambda x: order[x])
     temp = temp + sorted([c for c in input_columns if c not in order])
     return temp
+
+
+INTEGER_COLUMNS = {
+    COLUMNS.break1_position_end,
+    COLUMNS.break1_position_start,
+    COLUMNS.break2_position_end,
+    COLUMNS.break2_position_start,
+}
+
+FLOAT_COLUMNS = {
+    COLUMNS.break1_ewindow_count,
+    COLUMNS.break1_split_reads_forced,
+    COLUMNS.break1_split_reads,
+    COLUMNS.break2_ewindow_count,
+    COLUMNS.break2_split_reads_forced,
+    COLUMNS.break2_split_reads,
+    COLUMNS.cluster_size,
+    COLUMNS.contig_alignment_query_consumption,
+    COLUMNS.contig_alignment_rank,
+    COLUMNS.contig_alignment_score,
+    COLUMNS.contig_break1_read_depth,
+    COLUMNS.contig_break2_read_depth,
+    COLUMNS.contig_build_score,
+    COLUMNS.contig_read_depth,
+    COLUMNS.contig_remap_score,
+    COLUMNS.contig_remapped_reads,
+    COLUMNS.contigs_assembled,
+    COLUMNS.flanking_pairs_compatible,
+    COLUMNS.flanking_pairs,
+    COLUMNS.linking_split_reads,
+    COLUMNS.raw_break1_half_mapped_reads,
+    COLUMNS.raw_break1_split_reads,
+    COLUMNS.raw_break2_half_mapped_reads,
+    COLUMNS.raw_break2_split_reads,
+    COLUMNS.raw_flanking_pairs,
+    COLUMNS.raw_spanning_reads,
+    COLUMNS.repeat_count,
+    COLUMNS.spanning_reads,
+}
+
+BOOLEAN_COLUMNS = {COLUMNS.opposing_strands, COLUMNS.stranded, COLUMNS.supplementary_call}
+
+SUMMARY_LIST_COLUMNS = {
+    COLUMNS.annotation_figure,
+    COLUMNS.annotation_id,
+    COLUMNS.break1_split_reads,
+    COLUMNS.break2_split_reads,
+    COLUMNS.call_method,
+    COLUMNS.contig_alignment_score,
+    COLUMNS.contig_remapped_reads,
+    COLUMNS.contig_seq,
+    COLUMNS.event_type,
+    COLUMNS.flanking_pairs,
+    COLUMNS.pairing,
+    COLUMNS.product_id,
+    COLUMNS.spanning_reads,
+    COLUMNS.tools,
+    COLUMNS.tools,
+    COLUMNS.tracking_id,
+}
diff --git a/src/mavis/main.py b/src/mavis/main.py
index 1d86fc17..5b69fff3 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -8,8 +8,6 @@
 import time
 from typing import Dict
 
-import tab
-
 from . import __version__
 from . import config as _config
 from . import util as _util
@@ -91,10 +89,10 @@ def create_parser(argv):
         help='Indicates the input file type to be parsed',
     )
     optional[SUBCOMMAND.CONVERT].add_argument(
-        '--strand_specific', type=tab.cast_boolean, default=False
+        '--strand_specific', type=_util.cast_boolean, default=False
     )
     optional[SUBCOMMAND.CONVERT].add_argument(
-        '--assume_no_untemplated', type=tab.cast_boolean, default=True
+        '--assume_no_untemplated', type=_util.cast_boolean, default=True
     )
     for command in [SUBCOMMAND.CONVERT, SUBCOMMAND.SETUP]:
         required[command].add_argument(
diff --git a/src/mavis/overlay.py b/src/mavis/overlay.py
index 868a90fa..9543ff84 100644
--- a/src/mavis/overlay.py
+++ b/src/mavis/overlay.py
@@ -1,8 +1,6 @@
 import os
 from typing import Dict, List, Tuple, Union
 
-import tab
-
 from . import annotate as _annotate
 from . import util as _util
 from .annotate.file_io import ReferenceFile
@@ -59,7 +57,7 @@ def check_overlay_args(args, parser):
                 'argument --read_depth_plots: ymax must be an integer: {}'.format(plot[ymax])
             )
         try:
-            plot[stranded] = tab.cast_boolean(plot[stranded])
+            plot[stranded] = _util.cast_boolean(plot[stranded])
         except TypeError:
             parser.error(
                 'argument --read_depth_plots: stranded must be an boolean: {}'.format(
diff --git a/src/mavis/pairing/main.py b/src/mavis/pairing/main.py
index 1e332002..850d0173 100644
--- a/src/mavis/pairing/main.py
+++ b/src/mavis/pairing/main.py
@@ -3,10 +3,11 @@
 import time
 from typing import Dict, List, Set, Tuple
 
-from ..annotate.constants import SPLICE_TYPE
+import pandas as pd
+
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
-from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
+from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
 from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs
 from .pairing import inferred_equivalent, pair_by_distance, product_key
 
@@ -36,23 +37,18 @@ def main(
     bpps.extend(
         read_inputs(
             inputs,
-            require=[
+            required_columns=[
                 COLUMNS.annotation_id,
                 COLUMNS.library,
                 COLUMNS.fusion_cdna_coding_start,
                 COLUMNS.fusion_cdna_coding_end,
                 COLUMNS.fusion_sequence_fasta_id,
             ],
-            in_={
-                COLUMNS.protocol: PROTOCOL.values(),
-                COLUMNS.event_type: SVTYPE.values(),
-                COLUMNS.fusion_splicing_pattern: SPLICE_TYPE.values() + [None, 'None'],
-            },
-            add_default={
-                COLUMNS.fusion_cdna_coding_start: None,
-                COLUMNS.fusion_cdna_coding_end: None,
-                COLUMNS.fusion_sequence_fasta_id: None,
-                COLUMNS.fusion_splicing_pattern: None,
+            apply={
+                COLUMNS.event_type: lambda x: SVTYPE.enforce(x),
+                COLUMNS.fusion_splicing_pattern: lambda x: SPLICE_TYPE.enforce(x)
+                if not pd.isnull(x)
+                else x,
             },
             expand_strand=False,
             expand_orient=False,
diff --git a/src/mavis/summary/main.py b/src/mavis/summary/main.py
index c34cb5e8..94f79ddb 100644
--- a/src/mavis/summary/main.py
+++ b/src/mavis/summary/main.py
@@ -4,11 +4,11 @@
 from functools import partial
 from typing import Dict, List, Tuple
 
-import tab
+import pandas as pd
 
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
-from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SVTYPE
+from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
 from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs, soft_cast
 from .constants import HOMOPOLYMER_MIN_LENGTH
 from .summary import (
@@ -21,13 +21,6 @@
 )
 
 
-def soft_cast_null(value):
-    try:
-        return tab.cast_null(value)
-    except TypeError:
-        return value
-
-
 def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time())):
     annotations = ReferenceFile.load_from_config(config, 'annotations', eager_load=True)
     dgv_annotation = ReferenceFile.load_from_config(config, 'dgv_annotation')
@@ -45,7 +38,7 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
     bpps.extend(
         read_inputs(
             inputs,
-            require=[
+            required_columns=[
                 COLUMNS.event_type,
                 COLUMNS.product_id,
                 COLUMNS.fusion_cdna_coding_end,
@@ -69,50 +62,17 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
                 COLUMNS.disease_status,
             ],
             add_default={
-                **{
-                    k: None
-                    for k in [
-                        COLUMNS.contig_remapped_reads,
-                        COLUMNS.contig_seq,
-                        COLUMNS.break1_split_reads,
-                        COLUMNS.break1_split_reads_forced,
-                        COLUMNS.break2_split_reads,
-                        COLUMNS.break2_split_reads_forced,
-                        COLUMNS.linking_split_reads,
-                        COLUMNS.flanking_pairs,
-                        COLUMNS.contigs_assembled,
-                        COLUMNS.contig_alignment_score,
-                        COLUMNS.contig_remap_score,
-                        COLUMNS.spanning_reads,
-                        COLUMNS.annotation_figure,
-                        COLUMNS.gene1_aliases,
-                        COLUMNS.gene2_aliases,
-                        COLUMNS.protein_synon,
-                        COLUMNS.cdna_synon,
-                        COLUMNS.net_size,
-                        COLUMNS.tracking_id,
-                        COLUMNS.assumed_untemplated,
-                        'dgv',
-                        'summary_pairing',
-                    ]
-                },
                 COLUMNS.call_method: CALL_METHOD.INPUT,
             },
+            apply={
+                COLUMNS.event_type: lambda x: SVTYPE.enforce(x),
+                COLUMNS.fusion_splicing_pattern: lambda x: SPLICE_TYPE.enforce(x)
+                if not pd.isnull(x)
+                else x,
+            },
             expand_strand=False,
             expand_orient=False,
             expand_svtype=False,
-            cast={
-                COLUMNS.break1_split_reads: partial(soft_cast, cast_type=int),
-                COLUMNS.break2_split_reads: partial(soft_cast, cast_type=int),
-                COLUMNS.contig_remapped_reads: partial(soft_cast, cast_type=int),
-                COLUMNS.spanning_reads: partial(soft_cast, cast_type=int),
-                COLUMNS.break1_split_reads_forced: partial(soft_cast, cast_type=int),
-                COLUMNS.break2_split_reads_forced: partial(soft_cast, cast_type=int),
-                COLUMNS.flanking_pairs: partial(soft_cast, cast_type=int),
-                COLUMNS.linking_split_reads: partial(soft_cast, cast_type=int),
-                COLUMNS.protein_synon: soft_cast_null,
-                COLUMNS.cdna_synon: soft_cast_null,
-            },
         )
     )
     # load all transcripts
@@ -335,12 +295,12 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
             row.data.setdefault(COLUMNS.library, lib)
             # filter pairing ids based on what is still kept?
             paired_libraries = set()
-            for product_id in row.pairing.split(';'):
+            for product_id in (row.pairing or '').split(';'):
                 for lib in bpps_by_library:
                     if product_id.startswith(lib):
                         paired_libraries.add(lib)
             inferred_paired_libraries = set()
-            for product_id in row.inferred_pairing.split(';'):
+            for product_id in (row.inferred_pairing or '').split(';'):
                 for lib in bpps_by_library:
                     if product_id.startswith(lib):
                         inferred_paired_libraries.add(lib)
diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py
index 67ef7420..5287d7d8 100644
--- a/src/mavis/summary/summary.py
+++ b/src/mavis/summary/summary.py
@@ -297,6 +297,7 @@ def filter_by_evidence(
             linking_split_reads = bpp.column('linking_split_reads')
             if bpp.event_type == SVTYPE.INS:
                 linking_split_reads += bpp.column('flanking_pairs')
+
             if any(
                 [
                     bpp.column('break1_split_reads') + bpp.column('break1_split_reads_forced')
diff --git a/src/mavis/tools/__init__.py b/src/mavis/tools/__init__.py
index 307f4af5..8dfc2db0 100644
--- a/src/mavis/tools/__init__.py
+++ b/src/mavis/tools/__init__.py
@@ -1,7 +1,7 @@
 import itertools
 from typing import Callable, Dict, List
 
-import tab
+import pandas as pd
 from shortuuid import uuid
 
 from ..breakpoint import Breakpoint, BreakpointPair
@@ -253,9 +253,9 @@ def _convert_tool_output(
             input_file, expand_orient=True, expand_svtype=True, add_default={'stranded': stranded}
         )
     elif file_type == SUPPORTED_TOOL.CNVNATOR:
-        _, rows = tab.read_file(
+        df = pd.read_csv(
             input_file,
-            header=[
+            names=[
                 'event_type',
                 'coordinates',
                 'size',
@@ -266,7 +266,20 @@ def _convert_tool_output(
                 'e-val4',
                 'q0',
             ],
+            dtype={
+                'event_type': str,
+                'coordinates': str,
+                'size': pd.Int64Dtype(),
+                'normalized_RD': float,
+                'e-val1': float,
+                'e-val2': float,
+                'e-val3': float,
+                'e-val4': float,
+                'q0': float,
+            },
+            sep='\t',
         )
+        rows = df.where(df.notnull(), None).to_dict('records')
     elif file_type in [
         SUPPORTED_TOOL.DELLY,
         SUPPORTED_TOOL.MANTA,
@@ -279,7 +292,9 @@ def _convert_tool_output(
     elif file_type == SUPPORTED_TOOL.BREAKDANCER:
         rows = _convert_breakdancer_file(input_file)
     else:
-        _, rows = tab.read_file(input_file)
+        df = pd.read_csv(input_file, sep='\t', dtype=str, comment=None)
+        df.columns = [c[1:] if c.startswith('#') else c for c in df.columns]
+        rows = df.where(df.notnull(), None).to_dict('records')
     if rows:
         log('found', len(rows), 'rows')
         for row in rows:
diff --git a/src/mavis/tools/breakdancer.py b/src/mavis/tools/breakdancer.py
index 2f27ee85..e1f8361e 100644
--- a/src/mavis/tools/breakdancer.py
+++ b/src/mavis/tools/breakdancer.py
@@ -1,24 +1,41 @@
 import re
-
-import tab
 from argparse import Namespace
 
+import pandas as pd
+
 
 def convert_file(input_file):
     bam_to_lib = {}
+
+    # read comments
     with open(input_file, 'r') as fh:
         # comments in breakdancer are marked with a single # so they need to be discarded before reading
         lines = fh.readlines()
-        header = 0
-        while header < len(lines) and lines[header].startswith('#'):
-            metadata_match = re.match(r'^#(\S+)\t.*\tlibrary:(\S+)\t.*', lines[header])
+        line_index = 0
+        while line_index < len(lines) and lines[line_index].startswith('#'):
+            metadata_match = re.match(r'^#(\S+)\t.*\tlibrary:(\S+)\t.*', lines[line_index])
             if metadata_match:
                 bam_to_lib[metadata_match.group(1)] = metadata_match.group(2)
-            header += 1
-        lines = lines[header - 1 :]
-        input_file = Namespace(readlines=lambda: lines)
-    header, rows = tab.read_file(input_file, allow_short=True, require=['num_Reads_lib'])
-    for row in rows:
-        for bam, lib in bam_to_lib.items():
-            row['num_Reads_lib'] = row['num_Reads_lib'].replace(bam, lib)
-    return rows
+            line_index += 1
+        header = [c.strip() for c in re.sub(r'^#', '', lines[line_index - 1]).split('\t')]
+    # read the main file
+    df = pd.read_csv(
+        input_file,
+        names=header,
+        sep='\t',
+        comment='#',
+        dtype={
+            'num_Reads_lib': str,
+            'Pos1': int,
+            'Pos2': int,
+            'Chr1': str,
+            'Chr2': str,
+            'Type': str,
+        },
+    )
+    if 'num_Reads_lib' not in df:
+        raise KeyError(f'missing required column: num_Reads_lib')
+
+    for bam, lib in bam_to_lib.items():
+        df['num_Reads_lib'] = df['num_Reads_lib'].str.replace(bam, lib)
+    return df.to_dict('records')
diff --git a/src/mavis/util.py b/src/mavis/util.py
index 1b145d29..4a22984a 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -9,13 +9,25 @@
 from datetime import datetime
 from functools import partial
 from glob import glob
+from typing import Any, Callable, Dict, List, Optional, Set
 
+import pandas as pd
 from braceexpand import braceexpand
 from shortuuid import uuid
-from tab import tab
 
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE, MavisNamespace, sort_columns
+from .constants import (
+    COLUMNS,
+    FLOAT_COLUMNS,
+    INTEGER_COLUMNS,
+    ORIENT,
+    PROTOCOL,
+    STRAND,
+    SUMMARY_LIST_COLUMNS,
+    SVTYPE,
+    MavisNamespace,
+    sort_columns,
+)
 from .error import InvalidRearrangement
 from .interval import Interval
 
@@ -84,6 +96,22 @@ def __call__(self, item):
             return self.callback_func(item)
 
 
+def cast_null(input_value):
+    value = str(input_value).lower()
+    if value in ['none', 'null']:
+        return None
+    raise TypeError('casting to null/None failed', input_value)
+
+
+def cast_boolean(input_value):
+    value = str(input_value).lower()
+    if value in ['t', 'true', '1', 'y', 'yes', '+']:
+        return True
+    elif value in ['f', 'false', '0', 'n', 'no', '-']:
+        return False
+    raise TypeError('casting to boolean failed', input_value)
+
+
 def cast(value, cast_func):
     """
     cast a value to a given type
@@ -93,7 +121,7 @@ def cast(value, cast_func):
         1
     """
     if cast_func == bool:
-        value = tab.cast_boolean(value)
+        value = cast_boolean(value)
     else:
         value = cast_func(value)
     return value
@@ -113,7 +141,7 @@ def soft_cast(value, cast_type):
         return cast(value, cast_type)
     except (TypeError, ValueError):
         pass
-    return tab.cast_null(value)
+    return cast_null(value)
 
 
 def get_env_variable(arg, default, cast_type=None):
@@ -228,23 +256,21 @@ def filter_on_overlap(bpps, regions_by_reference_name):
     return passed, failed
 
 
-def read_inputs(inputs, **kwargs):
+def read_inputs(inputs, required_columns=[], **kwargs):
     bpps = []
-    kwargs.setdefault('require', [])
-    kwargs['require'] = list(set(kwargs['require'] + [COLUMNS.protocol]))
-    kwargs.setdefault('in_', {})
-    kwargs['in_'][COLUMNS.protocol] = PROTOCOL.values()
+
     for finput in bash_expands(*inputs):
-        try:
-            LOG('loading:', finput)
-            bpps.extend(read_bpp_from_input_file(finput, **kwargs))
-        except tab.EmptyFileError:
-            LOG('ignoring empty file:', finput)
+        LOG('loading:', finput)
+        bpps.extend(
+            read_bpp_from_input_file(
+                finput, required_columns=[COLUMNS.protocol, *required_columns], **kwargs
+            )
+        )
     LOG('loaded', len(bpps), 'breakpoint pairs')
     return bpps
 
 
-def output_tabbed_file(bpps, filename, header=None):
+def output_tabbed_file(bpps: List[BreakpointPair], filename: str, header=None):
     if header is None:
         custom_header = False
         header = set()
@@ -258,12 +284,10 @@ def output_tabbed_file(bpps, filename, header=None):
         if not custom_header:
             header.update(row.keys())
     header = sort_columns(header)
-
-    with open(filename, 'w') as fh:
-        LOG('writing:', filename)
-        fh.write('#' + '\t'.join(header) + '\n')
-        for row in rows:
-            fh.write('\t'.join([str(row.get(c, None)) for c in header]) + '\n')
+    LOG('writing:', filename)
+    df = pd.DataFrame.from_records(rows, columns=header)
+    df = df.fillna('None')
+    df.to_csv(filename, columns=header, index=False, sep='\t')
 
 
 def write_bed_file(filename, bed_rows):
@@ -351,7 +375,9 @@ def filter_uninformative(annotations_by_chr, breakpoint_pairs, max_proximity=500
     return result, filtered
 
 
-def unique_exists(pattern, allow_none=False, get_newest=False):
+def unique_exists(
+    pattern: str, allow_none: bool = False, get_newest: bool = False
+) -> Optional[str]:
     result = bash_expands(pattern)
     if len(result) == 1:
         return result[0]
@@ -366,75 +392,135 @@ def unique_exists(pattern, allow_none=False, get_newest=False):
 
 
 def read_bpp_from_input_file(
-    filename, expand_orient=False, expand_strand=False, expand_svtype=False, **kwargs
-):
+    filename: str,
+    expand_orient: bool = False,
+    expand_strand: bool = False,
+    expand_svtype: bool = False,
+    integer_columns: Set[str] = INTEGER_COLUMNS,
+    float_columns: Set[str] = FLOAT_COLUMNS,
+    required_columns: Set[str] = set(),
+    add_default: Dict[str, Any] = {},
+    summary: bool = False,
+    apply: Dict[str, Callable] = {},
+    overwrite: Dict[str, Any] = {},
+) -> List[BreakpointPair]:
     """
     reads a file using the tab module. Each row is converted to a breakpoint pair and
     other column data is stored in the data attribute
 
     Args:
-        filename (str): path to the input file
-        expand_ns (bool): expand not specified orient/strand settings to all specific version
-            (for strand this is only applied if the bam itself is stranded)
-        explicit_strand (bool): used to stop unstranded breakpoint pairs from losing input strand information
-    Returns:
-        List[BreakpointPair]: a list of pairs
-
-    Example:
-        >>> read_bpp_from_input_file('filename')
-        [BreakpointPair(), BreakpointPair(), ...]
-
-    One can also validate other expected columns that will go in the data attribute using the usual arguments
-    to the tab.read_file function
+        filename: path to the input file
+        expand_ns: expand not specified orient/strand settings to all specific version (for strand this is only applied if the bam itself is stranded)
+        explicit_strand: used to stop unstranded breakpoint pairs from losing input strand information
+        summary: the input is post-summary so some float/int columns have been merged and delimited with semi-colons
+        overwrite: set column values for all breakpoints, if the column exists overwrite its current value
 
-    Example:
-        >>> read_bpp_from_input_file('filename', cast={'index': int})
-        [BreakpointPair(), BreakpointPair(), ...]
+    Returns:
+        a list of pairs
     """
 
     def soft_null_cast(value):
         try:
-            tab.cast_null(value)
+            cast_null(value)
         except TypeError:
             return value
 
-    kwargs['require'] = set() if 'require' not in kwargs else set(kwargs['require'])
-    kwargs['require'].update({COLUMNS.break1_chromosome, COLUMNS.break2_chromosome})
-    kwargs.setdefault('cast', {}).update(
-        {
-            COLUMNS.break1_position_start: int,
-            COLUMNS.break1_position_end: int,
-            COLUMNS.break2_position_start: int,
-            COLUMNS.break2_position_end: int,
-            COLUMNS.opposing_strands: lambda x: None if x == '?' else soft_cast(x, cast_type=bool),
-            COLUMNS.stranded: tab.cast_boolean,
-            COLUMNS.untemplated_seq: soft_null_cast,
-            COLUMNS.break1_chromosome: lambda x: re.sub('^chr', '', x),
-            COLUMNS.break2_chromosome: lambda x: re.sub('^chr', '', x),
-            COLUMNS.tracking_id: lambda x: x if x else str(uuid()),
-        }
-    )
-    kwargs.setdefault('add_default', {}).update(
-        {
-            COLUMNS.untemplated_seq: None,
-            COLUMNS.break1_orientation: ORIENT.NS,
-            COLUMNS.break1_strand: STRAND.NS,
-            COLUMNS.break2_orientation: ORIENT.NS,
-            COLUMNS.break2_strand: STRAND.NS,
-            COLUMNS.opposing_strands: None,
-            COLUMNS.tracking_id: '',
-        }
-    )
-    kwargs.setdefault('in_', {}).update(
-        {
-            COLUMNS.break1_orientation: ORIENT.values(),
-            COLUMNS.break1_strand: STRAND.values(),
-            COLUMNS.break2_orientation: ORIENT.values(),
-            COLUMNS.break2_strand: STRAND.values(),
-        }
-    )
-    _, rows = tab.read_file(filename, suppress_index=True, **kwargs)
-    restricted = [
+    if summary:
+        integer_columns = integer_columns - SUMMARY_LIST_COLUMNS
+        float_columns = float_columns - SUMMARY_LIST_COLUMNS
+
+    try:
+        df = pd.read_csv(
+            filename,
+            dtype={
+                **{col: pd.Int64Dtype() for col in integer_columns},
+                **{col: float for col in float_columns},
+                **{
+                    col: str
+                    for col in COLUMNS.keys()
+                    if col not in (float_columns | integer_columns)
+                },
+            },
+            sep='\t',
+            comment='#',
+            na_values=['None', 'none', 'N/A', 'n/a', 'null', 'NULL', 'Null', 'nan', '<NA>', 'NaN'],
+        )
+    except pd.errors.EmptyDataError:
+        return []
+
+    for col in required_columns:
+        if col not in df:
+            raise KeyError(f'missing required column: {col}')
+
+    # run the custom functions
+    for col, func in apply.items():
+        df[col] = df[col].apply(func)
+
+    if COLUMNS.opposing_strands in df:
+        df[COLUMNS.opposing_strands] = df[COLUMNS.opposing_strands].apply(
+            lambda x: None if x == '?' else soft_cast(x, cast_type=bool)
+        )
+    else:
+        df[COLUMNS.opposing_strands] = None
+
+    if COLUMNS.stranded in df:
+        df[COLUMNS.stranded] = df[COLUMNS.stranded].apply(cast_boolean)
+    else:
+        df[COLUMNS.stranded] = None
+
+    if COLUMNS.untemplated_seq in df:
+        df[COLUMNS.untemplated_seq] = df[COLUMNS.untemplated_seq].apply(soft_null_cast)
+    else:
+        df[COLUMNS.untemplated_seq] = None
+
+    for col in [COLUMNS.break1_chromosome, COLUMNS.break2_chromosome]:
+        df[col] = df[col].apply(lambda v: re.sub(r'^chr', '', v))
+
+    if COLUMNS.tracking_id not in df:
+        df[COLUMNS.tracking_id] = ''
+    else:
+        df[COLUMNS.tracking_id] = df[COLUMNS.tracking_id].fillna(str(uuid()))
+
+    # add default values
+    for col, default_value in add_default.items():
+        if col in df:
+            df[col] = df[col].fillna(default_value)
+        else:
+            df[col] = default_value
+
+    # set overwriting defaults
+    for col, value in overwrite.items():
+        df[col] = value
+
+    # enforce controlled vocabulary
+    for vocab, cols in [
+        (ORIENT, [COLUMNS.break1_orientation, COLUMNS.break2_orientation]),
+        (STRAND, [COLUMNS.break1_strand, COLUMNS.break2_strand]),
+        (PROTOCOL, [COLUMNS.protocol]),
+    ]:
+        for col in cols:
+            if col in df:
+                df[col].apply(lambda c: vocab.enforce(c))
+            elif hasattr(vocab, 'NS'):
+                df[col] = vocab.NS  # type: ignore
+
+    def validate_pipeline_id(value):
+        if not re.match(r'^([A-Za-z0-9-]+|)(;[A-Za-z0-9-]+)*$', value):
+            raise AssertionError(
+                'All mavis pipeline step ids must satisfy the regex:',
+                '^([A-Za-z0-9-]+|)(;[A-Za-z0-9-]+)*$',
+                value,
+            )
+
+    for col in [COLUMNS.cluster_id, COLUMNS.annotation_id, COLUMNS.validation_id]:
+        if col in df:
+            try:
+                df[col].apply(validate_pipeline_id)
+            except AssertionError as err:
+                raise AssertionError(f'error in column ({col}): {err}')
+
+    rows = df.where(df.notnull(), None).to_dict('records')
+    non_data_columns = {
         COLUMNS.break1_chromosome,
         COLUMNS.break1_position_start,
         COLUMNS.break1_position_end,
@@ -448,24 +534,17 @@ def soft_null_cast(value):
         COLUMNS.stranded,
         COLUMNS.opposing_strands,
         COLUMNS.untemplated_seq,
-    ]
-    pairs = []
+    }
+    pairs: List[BreakpointPair] = []
+
     for line_index, row in enumerate(rows):
         row['line_no'] = line_index + 1
+
         if '_index' in row:
             del row['_index']
         for attr, val in row.items():
             row[attr] = soft_null_cast(val)
-        for attr in row:
-            if attr in [COLUMNS.cluster_id, COLUMNS.annotation_id, COLUMNS.validation_id]:
-                if not re.match('^([A-Za-z0-9-]+|)(;[A-Za-z0-9-]+)*$', row[attr]):
-                    raise AssertionError(
-                        'error in column',
-                        attr,
-                        'All mavis pipeline step ids must satisfy the regex:',
-                        '^([A-Za-z0-9-]+|)(;[A-Za-z0-9-]+)*$',
-                        row[attr],
-                    )
+
         stranded = row[COLUMNS.stranded]
 
         strand1 = row[COLUMNS.break1_strand] if stranded else STRAND.NS
@@ -474,7 +553,7 @@ def soft_null_cast(value):
         temp = []
         expand_strand = stranded and expand_strand
         event_type = [None]
-        if row.get(COLUMNS.event_type, None) not in [None, 'None']:
+        if not pd.isnull(row.get(COLUMNS.event_type)):
             try:
                 event_type = row[COLUMNS.event_type].split(';')
                 for putative_event_type in event_type:
@@ -509,7 +588,7 @@ def soft_null_cast(value):
                     orient=orient2,
                 )
 
-                data = {k: v for k, v in row.items() if k not in restricted}
+                data = {k: v for k, v in row.items() if k not in non_data_columns}
                 bpp = BreakpointPair(
                     break1,
                     break2,
diff --git a/src/mavis/validate/main.py b/src/mavis/validate/main.py
index 1136ff81..51d1e48f 100644
--- a/src/mavis/validate/main.py
+++ b/src/mavis/validate/main.py
@@ -48,7 +48,7 @@ def main(
         stdev_fragment_size (int): the standard deviation in fragment size
         read_length (int): read length
         reference_genome (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genome`
-        annotations (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_reference_genes`
+        annotations (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_annotations`
         masking (mavis.annotate.file_io.ReferenceFile): see :func:`mavis.annotate.file_io.load_masking_regions`
         aligner_reference (mavis.annotate.file_io.ReferenceFile): path to the aligner reference file (e.g 2bit file for blat)
     """
@@ -86,11 +86,13 @@ def main(
 
     bpps = read_inputs(
         inputs,
-        add_default={COLUMNS.cluster_id: None, COLUMNS.stranded: False},
-        add={COLUMNS.protocol: config['libraries'][library]['protocol'], COLUMNS.library: library},
+        add_default={COLUMNS.cluster_id: str(uuid()), COLUMNS.stranded: False},
+        overwrite={
+            COLUMNS.protocol: config['libraries'][library]['protocol'],
+            COLUMNS.library: library,
+        },
         expand_strand=False,
         expand_orient=True,
-        cast={COLUMNS.cluster_id: lambda x: str(uuid()) if not x else x},
     )
     evidence_clusters = []
     for bpp in bpps:
diff --git a/src/tab/__init__.py b/src/tab/__init__.py
deleted file mode 100644
index fc7a7938..00000000
--- a/src/tab/__init__.py
+++ /dev/null
@@ -1,127 +0,0 @@
-"""
-# About
-
-The tab module is a python module used for easy transformation of tab files
-It is fairly basic and does not support quoting text or escaping delimiters
-
-## Order of Operations
-
-There are a number of different transformations which can be applied to the rows. The order in which they are applied
-is as follows
-
-1. add
-2. add_default
-3. require
-4. validate
-5. rename
-6. split
-7. combine
-8. cast
-9. in_
-10. drop
-11. simplify
-
-### add
-
-this adds a new column with a default value. If the column name already exists in the input header. The existing column
-is overwritten and replaced with the default value
-
-### add_default
-
-this adds a new column with a default value. If the column name already exists in the input header the existing value
-is retained instead
-
-### require
-
-checks a list of column names to ensure they exist
-
-### validate
-
-checks a given column name against a regular expression
-
-### rename
-
-renames an input column to one or more new column names
-
-### split
-
-based on named capture groups in a regular expression, splits an existing column into one or more new columns
-
-### combine
-
-based on python template strings. Combines the values of 1 or more columns into a new column
-
-### cast
-
-applies any cast function to the column value
-
-### in_
-
-check the column value for membership of a specified object
-
-### drop
-
-deletes columns with a given name
-
-### simplify
-
-drops any input (not new) column names not specified in the require option
-
-
-# Use-Cases
-
-1. reading a tab file with no transformations
-
-```
->>> header, rows = tab.read_file(filename, suppress_index=True)
-```
-
-2. reading a tab file and getting the line numbers
-
-```
->>> header, rows = tab.read_file(filename)
->>> for row in rows:
->>>    print('row number:', row['_index'])
-'row number:' 1
-```
-
-3. split a column with an expected pattern into multiple columns
-
-```
->>> header, rows = tab.read_file(filename, split={'colname': r'^(?P<chr>\w+):(?P<pos>\d+)$'})
->>> print(header)
-['colname', 'chr', 'pos']
-```
-
-4. drop specific unwanted columns
-
-```
->>> header, rows = tab.read_file(filename, drop=['colname'])
-```
-
-5. drop all but specific columns
-
-```
->>> header, rows = tab.read_file(filename, require=['colname'], simplify=True)
-```
-
-6. add a column with a default value
-
-```
->>> header, rows = tab.read_file(filename, add={'colname': 'default value'})
-```
-
-7. combine columns into a new column
-
-```
->>> header, rows = tab.read_file(filename, combine={'new_colname': '{colname1}_{colname2}'})
-```
-
-8. cast a column to a specific type
-
-```
->>> header, rows = tab.read_file(filename, cast={'colname': int})
->>> header, rows = tab.read_file(filename, cast={'colname': tab.cast_boolean})
-```
-"""
-from .tab import FileTransform, cast_boolean, cast_null, read_file, VERBOSE
diff --git a/src/tab/tab.py b/src/tab/tab.py
deleted file mode 100755
index ce28b703..00000000
--- a/src/tab/tab.py
+++ /dev/null
@@ -1,401 +0,0 @@
-#!/usr/bin/env python3
-"""
-Order of transform operations
-
-1. add
-2. add_default
-3. require
-4. validate
-5. rename
-6. split
-7. combine
-8. cast
-9. in_
-10. drop
-11. simplify
-"""
-
-from __future__ import division
-
-import re
-import string
-import warnings
-
-
-VERBOSE = False  # Output extra logging information (useful in debugging)
-
-
-def cast_boolean(input_value):
-    value = str(input_value).lower()
-    if value in ['t', 'true', '1', 'y', 'yes', '+']:
-        return True
-    elif value in ['f', 'false', '0', 'n', 'no', '-']:
-        return False
-    raise TypeError('casting to boolean failed', input_value)
-
-
-def cast_null(input_value):
-    value = str(input_value).lower()
-    if value in ['none', 'null']:
-        return None
-    raise TypeError('casting to null/None failed', input_value)
-
-
-def null(input_value):
-    warnings.warn('null is deprecated in favor of cast_null', DeprecationWarning, stacklevel=2)
-    return cast_null(input_value)
-
-
-class EmptyFileError(Exception):
-    pass
-
-
-class FileTransform:
-    """
-    Holds a set of operations which define the transform_line function.
-    Generally a single FileTransform object is required per file as lines are expected to have the same format
-    """
-
-    def __init__(self, header, **kwargs):
-        """
-        Args:
-            header (List[str]): the header from the file as a list of column names (in-order)
-            require (List[str]): list of columns that must be in the input header
-            rename (Dict[str,List[str]]): mapping of old to new column(s)
-            drop (List[str]): list of columns in the old input header to drop
-            add_default (Dict[str]): mapping of new column names to default values (if the column does not exist already)
-            cast (Dict[str,func]): mapping of new/final columns to the type to cast them to
-            split (Dict[str,str]):
-                a dictionary mapping original column names to regex groups to create as the new column names
-            combine (Dict[str,str]):
-                a dictionary of the final column name to the format string. The field names in the format
-                string must correspond to existing column names
-            simplify (bool): drop all columns not created or retained
-            validate (Dict[str,str]): mapping of old columns to regex they must satisfy
-
-        Returns:
-            FileTransform: an object with the validated rules for transforming lines in an input file
-        """
-        self.input = header[:]
-        self.require = kwargs.pop('require', [])
-        self.rename = kwargs.pop('rename', {})
-        self.drop = kwargs.pop('drop', [])
-        self.add = kwargs.pop('add', {})
-        self.add_default = kwargs.pop('add_default', {})
-        self.split = kwargs.pop('split', {})
-        self.combine = kwargs.pop('combine', {})
-        self.validate = kwargs.pop('validate', {})
-        self.cast = kwargs.pop('cast', {})
-        self.simplify = kwargs.pop('simplify', False)
-        self.in_ = kwargs.pop('in_', {})
-        self.header = []  # holds the new header after the transform
-
-        if kwargs:
-            raise TypeError('invalid argument(s)', list(kwargs.keys()))
-
-        current_columns = set(header)
-        cant_simplify = set()  # columns that are restricted against being dropped in simplify
-
-        if VERBOSE:
-            print('input header:', header)
-
-        # check that the header columns are unique
-        if len(set(header)) != len(header):
-            raise KeyError(
-                'duplicate input col: column names in input header must be unique', header
-            )
-
-        for col in self.add:
-            current_columns.add(col)
-            cant_simplify.add(col)
-        # add_default: add_default new columns with default values if not already present
-        for col in self.add_default:
-            current_columns.add(col)
-            cant_simplify.add(col)
-
-        # 1. require: check that the required columns exist in the input header
-        for col in self.require:
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot require: column not found in the input header', col, current_columns
-                )
-            cant_simplify.add(col)
-
-        # 2. validate: check that the input column matches the expected pattern
-        for col, regex in self.validate.items():
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot validate: column not found in the input header', col, current_columns
-                )
-            cant_simplify.add(col)
-
-        # 4. rename: rename a column to one or more new column names
-        for col, new_names in self.rename.items():
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot rename column. column not found in header', col, current_columns
-                )
-            for new_name in new_names:
-                if new_name in current_columns:
-                    raise KeyError('duplicate column name', new_name, current_columns)
-                current_columns.add(new_name)
-                cant_simplify.add(new_name)
-
-        # 5. split: split a column into a set of new columns
-        for col, regex in self.split.items():
-            robj = re.compile(regex)
-            new_columns = robj.groupindex.keys()
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot split column. column not found in header', col, current_columns
-                )
-            for new_col in new_columns:
-                if new_col in current_columns:
-                    raise KeyError('duplicate column name', new_col, current_columns)
-                current_columns.add(new_col)
-                cant_simplify.add(new_col)
-
-        # 6. combine:
-        for ncol, format_string in self.combine.items():
-            old_column_names = [t[1] for t in list(string.Formatter().parse(format_string))]
-            if ncol in current_columns:
-                raise KeyError('duplicate column name', ncol, current_columns)
-            current_columns.add(ncol)
-            cant_simplify.add(ncol)
-            for col in old_column_names:
-                if col not in current_columns:
-                    raise KeyError(
-                        'cannot combine column. column not found in header', col, current_columns
-                    )
-
-        # 7. cast: apply some callable
-        for col, func in self.cast.items():
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot cast column. column not found in header', col, current_columns
-                )
-            if not callable(func):
-                raise TypeError('function applied to column must be callable', col, func)
-            cant_simplify.add(col)
-
-        # 8. in_: check for satisfying some controlled vocab
-        for col, item in self.in_.items():
-            if col not in current_columns:
-                raise KeyError(
-                    'cannot check membership column. column not found in header',
-                    col,
-                    current_columns,
-                )
-            if None in item:
-                pass
-            cant_simplify.add(col)
-
-        # 9. drop: drop any columns from the original input IF EXIST
-        for col in self.drop:
-            if col in self.require:
-                raise AssertionError('cannot both drop and retain a column', col)
-            current_columns.discard(
-                col
-            )  # 8. simplify: drop any columns that are not new, added, or retained
-
-        if self.simplify:
-            for col in list(current_columns):
-                if col not in cant_simplify:
-                    current_columns.discard(col)
-
-        # retain the original input order except for new columns
-        order = {}
-        for col in current_columns:
-            order[col] = len(header)
-        for i, col in enumerate(header):
-            if col in current_columns:
-                order[col] = i
-
-        self.header = [m for m, n in sorted(order.items(), key=lambda x: (x[1], x[0]))]
-
-        if VERBOSE:
-            print('output header:', self.header)
-
-    def transform_line(self, line, allow_short=False):
-        """
-        transforms the input line into a hash of the new/final column names with the transform rules applied
-
-        Args:
-            line (List[str]): list of values for a row with the same input header as the transform
-        Raises:
-            exception exceptions occur if validation, split or combine fails
-
-        Returns:
-            Dict[str]: the hash representation of the new row
-        """
-        if any(
-            [
-                not allow_short and len(self.input) != len(line),
-                allow_short and len(self.input) < len(line),
-            ]
-        ):
-            raise AssertionError(
-                'length of input list {0} does not match length of the expected header {1}: '.format(
-                    len(line), len(self.input)
-                )
-                + re.sub('\n', '\\n', '\\t'.join(line)),
-                self.input,
-            )
-
-        row = {}
-        cant_simplify = set()
-
-        for i in range(0, len(self.input)):
-            row[self.input[i]] = line[i] if i < len(line) else None
-
-        for col, default in self.add.items():
-            row[col] = default
-            cant_simplify.add(col)
-
-        # add_default: add new columns with default values if not already present
-        for col, default in self.add_default.items():
-            row.setdefault(col, default)
-            cant_simplify.add(col)
-
-        # 1. require: check that the required columns exist in the input header
-        cant_simplify.update(self.require)
-
-        # 2. validate: check that the input column matches the expected pattern
-        for col, regex in self.validate.items():
-            cant_simplify.add(col)
-            if not re.match(regex, row[col]):
-                raise UserWarning('validation failed', col, regex, row[col])
-
-        # 4. rename: rename a column to one or more new column names
-        for col, new_names in self.rename.items():
-            for new_name in new_names:
-                row[new_name] = row[col]
-                cant_simplify.add(new_name)
-
-        # 5. split: split a column into a set of new columns
-        for col, regex in self.split.items():
-            robj = re.compile(regex)
-            new_columns = robj.groupindex.keys()
-            match = robj.match(row[col])
-            if not match:
-                raise UserWarning('split of column failed', col, regex, row[col])
-            for new_col in new_columns:
-                row[new_col] = match.group(new_col)
-                cant_simplify.add(new_col)
-
-        # 6. combine:
-        for ncol, format_string in self.combine.items():
-            old_column_names = [t[1] for t in list(string.Formatter().parse(format_string))]
-            cant_simplify.add(ncol)
-            substitutions = {}
-            for col in old_column_names:
-                substitutions[col] = row[col]
-            row[ncol] = format_string.format(**substitutions)
-
-        # 7. cast: apply some callable
-        for col, func in self.cast.items():
-            try:
-                row[col] = func(row[col])
-            except Exception as err:
-                raise type(err)('error in casting column: {}. {}'.format(col, str(err)))
-            cant_simplify.add(col)
-
-        # 8. in_: check for satisfying some controlled vocab
-        for col, item in self.in_.items():
-            if row[col] not in item:
-                raise KeyError('failed in_ check', col, row[col], item)
-            cant_simplify.add(col)
-
-        # 9. drop: drop any columns from the original input IF EXIST
-        for col in self.drop:
-            row.pop(col, None)
-
-        # 10. simplify: drop any columns that are not new, added, or retained
-        if self.simplify:
-            for col in list(row):
-                if col not in cant_simplify:
-                    row.pop(col, None)
-
-        return row
-
-
-def read_file(
-    inputfile,
-    delimiter='\t',
-    header=None,
-    strict=True,
-    suppress_index=False,
-    allow_short=False,
-    **kwargs
-):
-    """
-    Args:
-        inputfile (str): the path to the inputfile
-        header (List[str]): for non-headered files
-        delimiter (str): the delimiter (what to split on)
-        strict (bool): if false will ignore lines that fail transform
-        suppress_index (bool): do not create an index
-    Returns:
-        Tuple[List[str], Dict[str]]: header and the row dictionaries
-    """
-    if VERBOSE:
-        print("read_file(", inputfile, ", ", kwargs, ")")
-
-    new_header = None
-    is_file_handle = True if hasattr(inputfile, 'readlines') else False
-    index = '_index'
-    objects = []
-    line_count = 0
-
-    fh = inputfile if is_file_handle else open(inputfile, 'r')
-
-    # first grab the header and skip comments
-    lines = fh.readlines()
-    if not lines:
-        raise EmptyFileError('empty file has no lines to read')
-    current_line_index = 0
-    line = re.sub(r'[\r\n]*$', '', lines[current_line_index])
-    while current_line_index < len(lines):
-        if not re.match(r'^\s*##', lines[current_line_index]):  # skip comment lines
-            break
-        current_line_index += 1
-
-    # first line is the header unless a header was input
-    if not header:
-        if current_line_index >= len(lines):
-            raise EmptyFileError('no lines beyond comments to read as header')
-        line = re.sub(r'(^#)|([\r\n\s]*$)', '', lines[current_line_index])  # clean the header
-        current_line_index += 1
-        header = line.split(delimiter) if line else []
-    if not header:
-        raise EmptyFileError('header is empty', inputfile)
-    # create the file transform object
-    transform = FileTransform(header, **kwargs)
-    new_header = transform.header
-
-    if not suppress_index and index in new_header:
-        raise AttributeError(
-            'column name {0} is reserved and cannot be used as an input'.format(repr(index))
-        )
-
-    # now go through the lines in the file
-    while current_line_index < len(lines):
-        line_count += 1
-        line = re.sub(r'[\r\n]*$', '', lines[current_line_index])  # clean the line
-        try:
-            row = line.split(delimiter)
-            row = transform.transform_line(row, allow_short=allow_short)
-            if not suppress_index:
-                row[index] = current_line_index
-            objects.append(row)
-        except Exception as error:  # General b/c will be re-raised unless strict mode is off
-            if strict:
-                print('error at line', current_line_index)
-                raise type(error)('{0} happens at line {1}'.format(error, current_line_index))
-            elif VERBOSE:
-                print('[ERROR]', str(error))
-        current_line_index += 1
-
-    if not is_file_handle:
-        fh.close()
-    return (new_header, objects)
diff --git a/src/tools/calculate_ref_alt_counts.py b/src/tools/calculate_ref_alt_counts.py
index d873daba..cbb3be43 100644
--- a/src/tools/calculate_ref_alt_counts.py
+++ b/src/tools/calculate_ref_alt_counts.py
@@ -190,7 +190,7 @@ def calculate_all_counts(self, input_files, output_file):
         processed_bpps = {}
         filtered_events = []
 
-        bpps = read_inputs(input_files, add_default={'stranded': False})
+        bpps = read_inputs(input_files, add_default={'stranded': False}, summary=True)
 
         for bpp in bpps:
             # only use precise bpps that are within a certain event size
diff --git a/tests/data/annotations_subsample.tab b/tests/data/annotations_subsample.tab
index 00922483..8a56bb78 100644
--- a/tests/data/annotations_subsample.tab
+++ b/tests/data/annotations_subsample.tab
@@ -2,7 +2,7 @@
 ## input file for picking best transcript: ens69_best_transcript.txt
 ## Ensembl Api version 69
 ## generated at: Thu Aug  4 16:38:01 2016
-#ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
+ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
 ENSG00000259662		15	1	63889592	63893885	ENST00000539570	ENST00000539570	NP_976307.2;NM_203373.2	63889592	63893885	1	744	63889592-63889944;63893495-63893885	SSF81383:9-49
 ENSG00000258865	DIO3	14	1	102027834	102028748	ENST00000510508	ENST00000510508	NP_001353.4;NM_001362.3	102027834	102028748	1	915	102027834-102028748	PF00837:38-293;SSF52833:125-198
 ENSG00000255738	GAGE4	X	1	49364778	49370618	ENST00000381700	ENST00000381700	NP_001035753.1;NM_001040663.2	49364778	49370618	1	354	49364778-49364861;49365327-49365447;49368271-49368396;49370596-49370618	PF05831:1-116
diff --git a/tests/data/clustering_input.tab b/tests/data/clustering_input.tab
index 6b60397f..9123da3e 100644
--- a/tests/data/clustering_input.tab
+++ b/tests/data/clustering_input.tab
@@ -1,3 +1,3 @@
-#tracking_id	event_type	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break1_seq	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	break2_seq	opposing_strands	stranded	tools	protocol
+tracking_id	event_type	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break1_seq	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	break2_seq	opposing_strands	stranded	tools	protocol
 manta-MantaDEL:175574:0:0:0:0:0	deletion	15	67333523	67333619	L	?	None	15	67333581	67333581	R	?	None	False	False	manta	genome
 strelka-TyeSomZhWTRakEu6ZJ7up6	deletion	15	67333623	67333623	L	?	None	15	67333625	67333625	R	?	None	False	False	strelka	genome
diff --git a/tests/data/mavis_summary_all_mock-A36971_mock-A47933.tab b/tests/data/mavis_summary_all_mock-A36971_mock-A47933.tab
index 81ad70b4..4ac60e71 100644
--- a/tests/data/mavis_summary_all_mock-A36971_mock-A47933.tab
+++ b/tests/data/mavis_summary_all_mock-A36971_mock-A47933.tab
@@ -1,4 +1,4 @@
-#tracking_id	library	annotation_id	product_id	event_type	gene1	gene1_direction	gene2	gene2_direction	gene1_aliases	gene2_aliases	gene_product_type	transcript1	transcript2	fusion_splicing_pattern	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_mapped_domains	fusion_protein_hgvs	annotation_figure	genes_encompassed	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	exon_last_5prime	exon_first_3prime	break1_strand	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	protocol	tools	call_method	break1_homologous_seq	break1_split_reads	break2_homologous_seq	break2_split_reads	contig_alignment_score	contig_remapped_reads	contig_seq	spanning_reads	flanking_pairs	linking_split_reads	untemplated_seq	cdna_synon	protein_synon	supplementary_call	net_size	assumed_untemplated	dgv	mock-A36971_diseased_genome	mock-A47933_diseased_transcriptome
+tracking_id	library	annotation_id	product_id	event_type	gene1	gene1_direction	gene2	gene2_direction	gene1_aliases	gene2_aliases	gene_product_type	transcript1	transcript2	fusion_splicing_pattern	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_mapped_domains	fusion_protein_hgvs	annotation_figure	genes_encompassed	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	exon_last_5prime	exon_first_3prime	break1_strand	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	protocol	tools	call_method	break1_homologous_seq	break1_split_reads	break2_homologous_seq	break2_split_reads	contig_alignment_score	contig_remapped_reads	contig_seq	spanning_reads	flanking_pairs	linking_split_reads	untemplated_seq	cdna_synon	protein_synon	supplementary_call	net_size	assumed_untemplated	dgv	mock-A36971_diseased_genome	mock-A47933_diseased_transcriptome
 SeYJmwZMWGeS8ciCzwNJeb;eqGMKJx6w8M6QU7kood8N7	mock-A36971	NwT4iWqEPs27pjwfVpEw4j-v1-a1	mock-A36971_genome_NwT4iWqEPs27pjwfVpEw4j-v1-a1_normal_547_1098	inverted translocation	ENSG00000128891	5	ENSG00000122565	3	C15orf57	CBX3	sense	ENST00000358005	ENST00000337620	normal	547	1098	[{"name": "PR00504", "sequences": ["PEEFVVEKV", "VVNGKVEYFLKWKGF", "TDADNTWEPEENL"], "regions": [{"start": 27, "end": 35}, {"start": 40, "end": 54}, {"start": 55, "end": 67}], "mapping_quality": 100.0, "matches": 37}, {"name": "PF01393", "sequences": ["RGLDPERIIGATDSSGELMFLMKWKDSDEADLVLAKEANMKCPQIVIAFYEERLTWHS"], "regions": [{"start": 119, "end": 176}], "mapping_quality": 100.0, "matches": 58}, {"name": "SSF54160", "sequences": ["QNGKSKKVEEAEPEEFVVEKVLDRRVVNGKVEYFLKWKGFTDADNTWEPEENLDCPELIEAFLNS", "KKRDAADKPRGFARGLDPERIIGATDSSGELMFLMKWKDSDEADLVLAKEANMKCPQIVIAFYEERLTWH"], "regions": [{"start": 15, "end": 79}, {"start": 106, "end": 175}], "mapping_quality": 100.0, "matches": 135}, {"name": "PF00385", "sequences": ["FVVEKVLDRRVVNGKVEYFLKWKGFTDADNTWEPEENLDCPELIEAFLN"], "regions": [{"start": 30, "end": 78}], "mapping_quality": 100.0, "matches": 49}, {"name": "SM00298", "sequences": ["EFVVEKVLDRRVVNGKVEYFLKWKGFTDADNTWEPEENLDCPELIEAFLNSQK", "GLDPERIIGATDSSGELMFLMKWKDSDEADLVLAKEANMKCPQIVIAFYEERL"], "regions": [{"start": 29, "end": 81}, {"start": 120, "end": 172}], "mapping_quality": 100.0, "matches": 106}, {"name": "SM00300", "sequences": ["RGFARGLDPERIIGATDSSGELMFLMKWKDSDEADLVLAKEANMKCPQIVIAFYEERLTWHSC"], "regions": [{"start": 115, "end": 177}], "mapping_quality": 100.0, "matches": 63}, {"name": "PS50013", "sequences": ["FVVEKVLDRRVVNGKVEYFLKWKGFTDADNTWEPEENLDCPELIEAFLNSQKAGKEKDG", "LDPERIIGATDSSGELMFLMKWKDSDEADLVLAKEANMKCPQIVIAFYEERLTWHSCPE"], "regions": [{"start": 30, "end": 88}, {"start": 121, "end": 179}], "mapping_quality": 100.0, "matches": 118}]	None	/var/tmp/tmp3cvjw9j4/mock-A36971_diseased_genome/annotate/batch-oyiw4PkCc96hn7kVpVWxEX-1/drawings/mavis_NwT4iWqEPs27pjwfVpEw4j-v1-a1-chrgene1_chrgene5-b-C15orf57_b-CBX3.svg		gene1	33299	33299	R	2	2	?	gene5	584	584	R	?	genome	convert_ta.py_v0.0.1	contig		8		9	0.9954107388710418	9	GCTATTATTCACCGCCTCCGAGCTGCTCCGGGTCGCGGGTCTGCAGCGTCTCCGGCCCTCCGCGCCTACAGCTCAAGCCACATCCGAAGTCAGGAAATATTTTTAAAATAAAATGGCTAACAAGAGGCAGAATGAATCTTATGTCAATATGCTCCCATTCTCAACAATCAATCTATTTATGTAAGTTTTTCAAACTCCAGCATCAG	0	9	10		None	ENST00000337620	False	0-0	False	None	Not Applicable	not expressed
 DTquBbWJJLsogM4dfvDqP7;erEiWboaSGxXgQ29fuZWz8	mock-A36971	5BBNKTYvzaZHNjq8CaTcAH-v1-a1	mock-A36971_genome_5BBNKTYvzaZHNjq8CaTcAH-v1-a1_None_None_None	deletion	None	None	None	None	None	None	None	reference11:6001_6001+	reference11:6005_6005+	None	None	None	None	None	/var/tmp/tmp3cvjw9j4/mock-A36971_diseased_genome/annotate/batch-oyiw4PkCc96hn7kVpVWxEX-14/drawings/mavis_5BBNKTYvzaZHNjq8CaTcAH-v1-a1-chrreference11_chrreference11-NA_NA.svg		reference11	6001	6001	L	None	None	?	reference11	6005	6005	R	?	genome	convert_ta.py_v0.0.1	contig	AT	1		0	0.9951667472208796	9	CACGCCCTGCTAGGAGTTCACGCTTTAGTTGGGGAAAATATACAATAAGCAAGCCAGTTTTTAAAATGAGAACTGCAATTAGAGTTAAATGCTACAAAGACAAACTCACAGGAAGATGGGATGTAGAATAAGGCTCTCAGAATAGTAAGAGAAACTATTGCTTCTTACGATGTTTGTCTTTCTTTGTAT	16	2	0		None	None	False	-3--3	False	None	Not Applicable	not expressed
 AeY2qdWDTvBkEFgXa92ave;D9XznZsQErYgPTEMYeqMjK	mock-A36971	tM2bMsSPiz47LLNw7ED7R6-v1-a1	mock-A36971_genome_tM2bMsSPiz47LLNw7ED7R6-v1-a1_None_None_None	deletion	None	None	None	None	None	None	None	reference11:10026_10026+	reference11:10067_10067+	None	None	None	None	None	/var/tmp/tmp3cvjw9j4/mock-A36971_diseased_genome/annotate/batch-oyiw4PkCc96hn7kVpVWxEX-1/drawings/mavis_tM2bMsSPiz47LLNw7ED7R6-v1-a1-chrreference11_chrreference11-NA_NA.svg		reference11	10026	10026	L	None	None	?	reference11	10067	10067	R	?	genome	convert_ta.py_v0.0.1	contig	C	0		0	0.9819121447028424	51	GGGGGGGGGGTCCGGGGGGGGCTGTGGTTCTTATGGCTGCTCCCAGTCCAGCTGCTGCAAGCCCTGCTGCTGCTCCTCAGGCTGTGGGTCATCCTGCTGCCAGTCCAGCTGCTGTAAGCCCTACTGCTGTCAGTCCAGCTGCTGTAAGCCCTGTAGCTGCTTCTCAGGCTGTGGATCATCCTGCTGCCAATCCAGCTGCTACAAGCCCTGCTGCTGCCAGTCCAGC	0	3	0	CTACTGCTGT	None	None	False	-30--30	False	None	Not Applicable	not expressed
diff --git a/tests/data/mock_masking.tab b/tests/data/mock_masking.tab
index 1d2748ee..dc68745f 100644
--- a/tests/data/mock_masking.tab
+++ b/tests/data/mock_masking.tab
@@ -1 +1 @@
-#chr	start	end	name
+chr	start	end	name
diff --git a/tests/data/mock_pairing_input.tab b/tests/data/mock_pairing_input.tab
index 8e18ac1e..cdddb859 100644
--- a/tests/data/mock_pairing_input.tab
+++ b/tests/data/mock_pairing_input.tab
@@ -1,4 +1,4 @@
-#library	cluster_id	validation_id	annotation_id	event_type	transcript1	transcript2	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_sequence_fasta_id	fusion_sequence_fasta_file	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	opposing_strands	stranded	protocol	break1_call_method	break2_call_method	untemplated_seq	fusion_splicing_pattern
+library	cluster_id	validation_id	annotation_id	event_type	transcript1	transcript2	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_sequence_fasta_id	fusion_sequence_fasta_file	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	opposing_strands	stranded	protocol	break1_call_method	break2_call_method	untemplated_seq	fusion_splicing_pattern
 genome1	1	1	1	deletion	ENST00000367080	ENST00000367080	None	None	None	None	gene3	10008	10008	L	+	gene3	18900	18900	R	+	False	True	genome	split reads	split reads	None	None
 genome2	1	1	1	deletion	ENST00000367080	ENST00000367080	None	None	None	None	gene3	10000	10000	L	+	gene3	18900	18900	R	+	False	True	genome	split reads	split reads	None	None
 transcriptome1	1	1	1	deletion	ENST00000367080	ENST00000367080	None	None	None	None	gene3	5347	5347	L	+	gene3	19969	19969	R	+	False	True	transcriptome	split reads	split reads	None	None
diff --git a/tests/data/mock_reference_annotations.full.tsv b/tests/data/mock_reference_annotations.full.tsv
index d86736d4..7ead95e3 100644
--- a/tests/data/mock_reference_annotations.full.tsv
+++ b/tests/data/mock_reference_annotations.full.tsv
@@ -1,4 +1,4 @@
-#ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
+ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
 ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000375851	NP_001135885.1;NM_001142413.1	1	5278	134	685	1-322;608-833;990-5278	
 ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000375850		59	1202	76	783	59-322;608-1202	
 ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000334490	NP_001001938.1;NM_001001938.3	66	5278	69	677	66-379;608-833;990-5278	
diff --git a/tests/data/mock_reference_annotations.tsv b/tests/data/mock_reference_annotations.tsv
index 10874ea2..14391a69 100644
--- a/tests/data/mock_reference_annotations.tsv
+++ b/tests/data/mock_reference_annotations.tsv
@@ -1,4 +1,4 @@
-#ensembl_gene_id	chr	strand	gene_start	gene_end	ensembl_transcript_id	transcript_genomic_start	transcript_genomic_end
+ensembl_gene_id	chr	strand	gene_start	gene_end	ensembl_transcript_id	transcript_genomic_start	transcript_genomic_end
 GENE-A	fake	+	100	200	TRANSCRIPT-A	100	200
 GENE-B	fake	-	250	350	TRANSCRIPT-B	250	350
 GENE-C	fake	+	300	400	TRANSCRIPT-C	300	400
diff --git a/tests/data/mock_sv_events.tsv b/tests/data/mock_sv_events.tsv
index bd997b86..baba0191 100644
--- a/tests/data/mock_sv_events.tsv
+++ b/tests/data/mock_sv_events.tsv
@@ -1,5 +1,5 @@
 ## False	reference9	2000	2000	reference9	2001	2001	L	R	+	+	insertion	genome	convert_ta.py_v0.0.1	mock-A36971	9:66466004
-#stranded	break1_chromosome	break1_position_start	break1_position_end	break2_chromosome	break2_position_start	break2_position_end	break1_orientation	break2_orientation	break1_strand	break2_strand	event_type	protocol	tools	library	comment
+stranded	break1_chromosome	break1_position_start	break1_position_end	break2_chromosome	break2_position_start	break2_position_end	break1_orientation	break2_orientation	break1_strand	break2_strand	event_type	protocol	tools	library	comment
 False	reference7	5000	5000	reference7	11000	11000	R	L	-	-	duplication	genome	convert_ta.py_v0.0.1	mock-A36971	7:104485067|7:104612302
 False	reference20	2000	2000	reference20	6000	6000	L	R	+	+	deletion	genome	convert_ta.py_v0.0.1	mock-A36971	20:13160730|20:13164100
 False	reference10	520	520	reference19	964	964	R	L	+	+	translocation	genome	convert_ta.py_v0.0.1	mock-A36971	10:7059511|19:17396811
diff --git a/tests/data/mock_trans_sv_events.tsv b/tests/data/mock_trans_sv_events.tsv
index 25087d31..6f1d64a7 100644
--- a/tests/data/mock_trans_sv_events.tsv
+++ b/tests/data/mock_trans_sv_events.tsv
@@ -1,5 +1,5 @@
 ## False	reference9	2000	2000	reference9	2001	2001	L	R	+	+	insertion	genome	convert_ta.py_v0.0.1	mock-A36971	9:66466004
-#stranded	break1_chromosome	break1_position_start	break1_position_end	break2_chromosome	break2_position_start	break2_position_end	break1_orientation	break2_orientation	break1_strand	break2_strand	event_type	protocol	tools	library	comment
+stranded	break1_chromosome	break1_position_start	break1_position_end	break2_chromosome	break2_position_start	break2_position_end	break1_orientation	break2_orientation	break1_strand	break2_strand	event_type	protocol	tools	library	comment
 False	gene3	27175	27175	gene3	27176	27176	R	L	+	+	duplication	transcriptome	convert_ta.py_v0.0.1	mock-A47933	1:207249992
 True	gene1	34090	34090	gene5	608	608	R	R	-	+	inverted translocation	transcriptome	convert_ta.py_v0.0.1	mock-A47933	15:40854971|7:26241389
 False	gene2	22979	22979	gene2	23783	23783	R	L	+	+	duplication	transcriptome	convert_ta.py_v0.0.1	mock-A47933	15:41623873|15:41625248#this one is pretty low qual
diff --git a/tests/data/pairing_annotations.tab b/tests/data/pairing_annotations.tab
index f661e501..a208d593 100644
--- a/tests/data/pairing_annotations.tab
+++ b/tests/data/pairing_annotations.tab
@@ -1,4 +1,4 @@
-#library	cluster_id	cluster_size	validation_id	annotation_id	event_type	gene1	gene1_direction	gene2	gene2_direction	gene_product_type	transcript1	transcript2	fusion_splicing_pattern	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_mapped_domains	fusion_sequence_fasta_id	fusion_sequence_fasta_file	annotation_figure	annotation_figure_legend	genes_encompassed	genes_overlapping_break1	genes_overlapping_break2	genes_proximal_to_break1	genes_proximal_to_break2	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break1_seq	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	break2_seq	opposing_strands	stranded	protocol	tools	call_method	break1_ewindow	break1_ewindow_count	break1_homologous_seq	break1_split_read_names	break1_split_reads	break1_split_reads_forced	break2_ewindow	break2_ewindow_count	break2_homologous_seq	break2_split_read_names	break2_split_reads	break2_split_reads_forced	contig_alignment_score	contig_alignment_query_coverage	contig_build_score	contig_remap_score	contig_remapped_read_names	contig_remapped_reads	contig_seq	contig_strand_specific	contigs_aligned	contigs_assembled	spanning_reads	spanning_read_names	flanking_median_fragment_size	flanking_pairs	flanking_pairs_read_names	flanking_stdev_fragment_size	linking_split_read_names	linking_split_reads	raw_break1_half_mapped_reads	raw_break1_split_reads	raw_break2_half_mapped_reads	raw_break2_split_reads	raw_flanking_pairs	raw_spanning_reads	untemplated_seq
+library	cluster_id	cluster_size	validation_id	annotation_id	event_type	gene1	gene1_direction	gene2	gene2_direction	gene_product_type	transcript1	transcript2	fusion_splicing_pattern	fusion_cdna_coding_start	fusion_cdna_coding_end	fusion_mapped_domains	fusion_sequence_fasta_id	fusion_sequence_fasta_file	annotation_figure	annotation_figure_legend	genes_encompassed	genes_overlapping_break1	genes_overlapping_break2	genes_proximal_to_break1	genes_proximal_to_break2	break1_chromosome	break1_position_start	break1_position_end	break1_orientation	break1_strand	break1_seq	break2_chromosome	break2_position_start	break2_position_end	break2_orientation	break2_strand	break2_seq	opposing_strands	stranded	protocol	tools	call_method	break1_ewindow	break1_ewindow_count	break1_homologous_seq	break1_split_read_names	break1_split_reads	break1_split_reads_forced	break2_ewindow	break2_ewindow_count	break2_homologous_seq	break2_split_read_names	break2_split_reads	break2_split_reads_forced	contig_alignment_score	contig_alignment_query_coverage	contig_build_score	contig_remap_score	contig_remapped_read_names	contig_remapped_reads	contig_seq	contig_strand_specific	contigs_aligned	contigs_assembled	spanning_reads	spanning_read_names	flanking_median_fragment_size	flanking_pairs	flanking_pairs_read_names	flanking_stdev_fragment_size	linking_split_read_names	linking_split_reads	raw_break1_half_mapped_reads	raw_break1_split_reads	raw_break2_half_mapped_reads	raw_break2_split_reads	raw_flanking_pairs	raw_spanning_reads	untemplated_seq
 A36971	cluster-batch20170407r590869-449	1	validation-batch20170407r289453-199	annotation-batch20170410r499868-2221	inverted translocation	ENSG00000182463	5	ENSG00000146282	3	sense	ENST00000371497	ENST00000369536	normal	504	884	[]	annotation-batch20170410r499868-2221_normal	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/annotations.fusion-cdna.fa	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.svg	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.legend.json						20	52021562	52021562	L	+	None	6	88232570	88232570	L	-	None	True	False	genome	DELLY_v0.6.1;convert_ta.py_v0.0.1	contig	52020680-52021832	1051	T		0	0	88216583-88217735	738			0	0	0	seq31	1025	39	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1115:28057:65529;HISEQX1_11:3:1118:19827:12068;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2109:10429:26097;HISEQX1_11:3:2110:30969:47193;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2203:6928:43062;HISEQX1_11:3:2206:18852:13826;HISEQX1_11:3:2213:28696:54330;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:3:2223:4888:12420;HISEQX1_11:4:1116:7669:4192;HISEQX1_11:4:1205:5355:29332;HISEQX1_11:4:1216:24342:59727;HISEQX1_11:4:1223:29873:24409;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2115:26230:35397;HISEQX1_11:4:2202:27072:18116;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2207:24424:30509;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2213:18355:30439;HISEQX1_11:4:2213:19136:30070;HISEQX1_11:4:2217:11647:32531;HISEQX1_11:4:2219:26707:61398;HISEQX1_11:4:2219:28118:62083	43	CCCAACTGGATAATAAATTATAACAATTCTATTATCTGACTGCTTCTGTTCTTCCACGCACTCTTCGACATCCAATTTAAAACTTAAAGTTGGCCGGGCATGGCAGTTCATCCCTGTAATCTAGCATTTTGGGAGGCCGATGTGGGTGGATCACCTGAGGCCAGAAGTTCGAAACCAGCCTGGCCACCAGGGCGAAAACCTGTCTCTACAAAAATACAAAAATTAGCCGTATATGTGCATTTTTCTGGAGTTGAAGGTCCATAGATTTTTTCAGATACTTCAAAGGAGTACATGATACCCCTCCCCAACAAAAGTCCCCTATCTCTGGATTTATGCTTAAAATGAATGCATATTTTACAAAGCCA	False	1	1	0		0	16	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2217:11647:32531	0		0	1	27	5	22	17	0	
 A36971	cluster-batch20170407r590869-449	1	validation-batch20170407r289453-199	annotation-batch20170410r499868-2221	inverted translocation	ENSG00000182463	5	ENSG00000146282	3	sense	ENST00000371497	ENST00000369536	normal	888	3992	"[{""name"": ""PS50157"", ""sequences"": [""LKCMFCGDSFDSLQDLSVHMIKTKHYQKVP"", ""FYCSDCASQFRTPSTYISHLESHLGFQM"", ""FKCKLCCRTFVSKHAVKLHLSKTHSKSPE""], ""regions"": [{""start"": 275, ""end"": 304}, {""start"": 926, ""end"": 953}, {""start"": 994, ""end"": 1022}], ""mapping_quality"": 100.0, ""matches"": 87}, {""name"": ""SM00389"", ""sequences"": [""KRKGRQSNWNPQHLLILQAQFASSLFQTSEGKYLLSDLGPQERMQISKFTGLSMTTISHWLANVKYQLRKTGGTK""], ""regions"": [{""start"": 840, ""end"": 914}], ""mapping_quality"": 100.0, ""matches"": 75}, {""name"": ""SSF46689"", ""sequences"": [""VRRFEDVSSEVSTLHKRKGRQSNWNPQHLLILQAQFASSLFQTSEGKYLLSDLGPQERMQISKFTGLSMTTISHWLANVKYQLRK""], ""regions"": [{""start"": 825, ""end"": 909}], ""mapping_quality"": 100.0, ""matches"": 85}, {""name"": ""SSF57667"", ""sequences"": [""TVFTGASRFRCRQCSAAYDTLVELTVHMNETGHYQDD"", ""KVLKCMFCGDSFDSLQDLSVHMIKTKHYQKVPLKEPVPTISSKMVTPAKKRVFDVNRPCSPDSTTGSFADSFSSQKNANLQLSSNNRYGYQNGASYTWQFEACKSQILKCMECGSSHDTLQQLTTHMM"", ""PIFYCSDCASQFRTPSTYISHLESHLGFQMKDMTRLSVDQQSKVEQEISRVSSAQRSPETIAAEEDTDSKFKCKLCCRTFVSKHAVKLHLSKTH""], ""regions"": [{""start"": 207, ""end"": 243}, {""start"": 273, ""end"": 400}, {""start"": 924, ""end"": 1017}], ""mapping_quality"": 100.0, ""matches"": 259}, {""name"": ""SM00355"", ""sequences"": [""FRCRQCSAAYDTLVELTVHMNETGH"", ""LKCMFCGDSFDSLQDLSVHMIKTKH"", ""LKCMECGSSHDTLQQLTTHMMVTGH"", ""FYCSDCASQFRTPSTYISHLESH"", ""FKCKLCCRTFVSKHAVKLHLSKTH""], ""regions"": [{""start"": 215, ""end"": 239}, {""start"": 275, ""end"": 299}, {""start"": 380, ""end"": 404}, {""start"": 926, ""end"": 948}, {""start"": 994, ""end"": 1017}], ""mapping_quality"": 100.0, ""matches"": 122}]"	annotation-batch20170410r499868-2221_normal	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/annotations.fusion-cdna.fa	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.svg	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.legend.json						20	52021562	52021562	L	+	None	6	88232570	88232570	L	-	None	True	False	genome	DELLY_v0.6.1;convert_ta.py_v0.0.1	contig	52020680-52021832	1051	T		0	0	88216583-88217735	738			0	0	0	seq31	1025	39	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1115:28057:65529;HISEQX1_11:3:1118:19827:12068;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2109:10429:26097;HISEQX1_11:3:2110:30969:47193;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2203:6928:43062;HISEQX1_11:3:2206:18852:13826;HISEQX1_11:3:2213:28696:54330;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:3:2223:4888:12420;HISEQX1_11:4:1116:7669:4192;HISEQX1_11:4:1205:5355:29332;HISEQX1_11:4:1216:24342:59727;HISEQX1_11:4:1223:29873:24409;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2115:26230:35397;HISEQX1_11:4:2202:27072:18116;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2207:24424:30509;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2213:18355:30439;HISEQX1_11:4:2213:19136:30070;HISEQX1_11:4:2217:11647:32531;HISEQX1_11:4:2219:26707:61398;HISEQX1_11:4:2219:28118:62083	43	CCCAACTGGATAATAAATTATAACAATTCTATTATCTGACTGCTTCTGTTCTTCCACGCACTCTTCGACATCCAATTTAAAACTTAAAGTTGGCCGGGCATGGCAGTTCATCCCTGTAATCTAGCATTTTGGGAGGCCGATGTGGGTGGATCACCTGAGGCCAGAAGTTCGAAACCAGCCTGGCCACCAGGGCGAAAACCTGTCTCTACAAAAATACAAAAATTAGCCGTATATGTGCATTTTTCTGGAGTTGAAGGTCCATAGATTTTTTCAGATACTTCAAAGGAGTACATGATACCCCTCCCCAACAAAAGTCCCCTATCTCTGGATTTATGCTTAAAATGAATGCATATTTTACAAAGCCA	False	1	1	0		0	16	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2217:11647:32531	0		0	1	27	5	22	17	0	
 A36971	cluster-batch20170407r590869-449	1	validation-batch20170407r289453-199	annotation-batch20170410r499868-2221	inverted translocation	ENSG00000182463	5	ENSG00000146282	3	sense	ENST00000371497	ENST00000369536	normal	4026	4763	"[{""name"": ""PF05746"", ""sequences"": [""LQYTHARLHSLEETFGCGYLNDFNTACLQEPQSVSILQHLLRFDEVLYKSSQDFQPRHIVSYLLTLSHLAAVAHKTLQIKDSPPEVAGARLHLFKAVRSVLANGMKLLGITPVCRM""], ""regions"": [{""start"": 130, ""end"": 245}], ""mapping_quality"": 100.0, ""matches"": 116}, {""name"": ""SSF47323"", ""sequences"": [""DTGVFLQYTHARLHSLEETFGCGYLNDFNTACLQEPQSVSILQHLLRFDEVLYKSSQDFQPRHIVSYLLTLSHLAAVAHKTLQIKDSPPEVAGARLHLFKAVRSVLANGMKLLGITPVCRM""], ""regions"": [{""start"": 125, ""end"": 245}], ""mapping_quality"": 100.0, ""matches"": 121}, {""name"": ""SM00836"", ""sequences"": [""LQYTHARLHSLEETFGCGYLNDFNTACLQEPQSVSILQHLLRFDEVLYKSSQDFQPRHIVSYLLTLSHLAAVAHKTLQIKDSPPEVAGARLHLFKAVRSVLANGMKLLGITPVCRM""], ""regions"": [{""start"": 130, ""end"": 245}], ""mapping_quality"": 100.0, ""matches"": 116}]"	annotation-batch20170410r499868-2221_normal	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/annotations.fusion-cdna.fa	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.svg	/projects/trans_scratch/validations/workspace/creisle/MAV89/output/A36971_genome/annotation/drawings/mavis_20_6.b-TSHZ2_b-RARS2.annotation-batch20170410r499868-2221.legend.json						20	52021562	52021562	L	+	None	6	88232570	88232570	L	-	None	True	False	genome	DELLY_v0.6.1;convert_ta.py_v0.0.1	contig	52020680-52021832	1051	T		0	0	88216583-88217735	738			0	0	0	seq31	1025	39	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1115:28057:65529;HISEQX1_11:3:1118:19827:12068;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2109:10429:26097;HISEQX1_11:3:2110:30969:47193;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2203:6928:43062;HISEQX1_11:3:2206:18852:13826;HISEQX1_11:3:2213:28696:54330;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:3:2223:4888:12420;HISEQX1_11:4:1116:7669:4192;HISEQX1_11:4:1205:5355:29332;HISEQX1_11:4:1216:24342:59727;HISEQX1_11:4:1223:29873:24409;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2115:26230:35397;HISEQX1_11:4:2202:27072:18116;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2207:24424:30509;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2213:18355:30439;HISEQX1_11:4:2213:19136:30070;HISEQX1_11:4:2217:11647:32531;HISEQX1_11:4:2219:26707:61398;HISEQX1_11:4:2219:28118:62083	43	CCCAACTGGATAATAAATTATAACAATTCTATTATCTGACTGCTTCTGTTCTTCCACGCACTCTTCGACATCCAATTTAAAACTTAAAGTTGGCCGGGCATGGCAGTTCATCCCTGTAATCTAGCATTTTGGGAGGCCGATGTGGGTGGATCACCTGAGGCCAGAAGTTCGAAACCAGCCTGGCCACCAGGGCGAAAACCTGTCTCTACAAAAATACAAAAATTAGCCGTATATGTGCATTTTTCTGGAGTTGAAGGTCCATAGATTTTTTCAGATACTTCAAAGGAGTACATGATACCCCTCCCCAACAAAAGTCCCCTATCTCTGGATTTATGCTTAAAATGAATGCATATTTTACAAAGCCA	False	1	1	0		0	16	HISEQX1_11:3:1109:17107:4684;HISEQX1_11:3:1118:26808:54366;HISEQX1_11:3:1118:26880:54242;HISEQX1_11:3:1203:24657:33551;HISEQX1_11:3:1211:14915:49162;HISEQX1_11:3:1224:29985:17500;HISEQX1_11:3:2119:30005:71682;HISEQX1_11:3:2119:31010:72086;HISEQX1_11:3:2123:23399:43185;HISEQX1_11:3:2213:9699:72121;HISEQX1_11:3:2216:11576:10890;HISEQX1_11:4:2102:9628:27415;HISEQX1_11:4:2206:4391:19223;HISEQX1_11:4:2211:20709:31283;HISEQX1_11:4:2211:21004:20612;HISEQX1_11:4:2217:11647:32531	0		0	1	27	5	22	17	0	
diff --git a/tests/data/pairing_reference_annotations_file.tab b/tests/data/pairing_reference_annotations_file.tab
index df444af8..108d204e 100644
--- a/tests/data/pairing_reference_annotations_file.tab
+++ b/tests/data/pairing_reference_annotations_file.tab
@@ -2,4 +2,4 @@
 ## input file for picking best transcript: ens69_best_transcript.txt
 ## Ensembl Api version 69
 ## generated at: Thu Aug  4 16:38:01 2016
-#ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
+ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index 2b110802..a1d33be2 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -93,7 +93,7 @@ def test_manta(self):
         self.assertEqual(17396810, bpp.break2.end)
         self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
         somatic_event = result['manta-MantaDEL:20644:0:2:0:0:0'][0]
-        self.assertEqual('True', somatic_event.data.get('SOMATIC', False))
+        self.assertEqual(True, somatic_event.data.get('SOMATIC', False))
 
     def test_pindel(self):
         self.run_main(get_data('pindel_events.vcf'), SUPPORTED_TOOL.PINDEL, False)
diff --git a/tests/integration/test_annotate.py b/tests/integration/test_annotate.py
index c2086654..9532665c 100644
--- a/tests/integration/test_annotate.py
+++ b/tests/integration/test_annotate.py
@@ -2,27 +2,25 @@
 import unittest
 
 from mavis.annotate.base import BioInterval, ReferenceName
-from mavis.annotate.file_io import load_reference_genes, load_reference_genome
-from mavis.annotate.genomic import Exon, Gene, Template, Transcript, PreTranscript
-from mavis.annotate.protein import calculate_orf, Domain, DomainRegion, translate, Translation
+from mavis.annotate.file_io import load_annotations, load_reference_genome
+from mavis.annotate.fusion import FusionTranscript, determine_prime
+from mavis.annotate.genomic import Exon, Gene, PreTranscript, Template, Transcript
+from mavis.annotate.protein import Domain, DomainRegion, Translation, calculate_orf, translate
 from mavis.annotate.variant import (
+    Annotation,
     _gather_annotations,
     _gather_breakpoint_annotations,
     annotate_events,
-    Annotation,
     flatten_fusion_transcript,
     overlapping_transcripts,
 )
-from mavis.annotate.fusion import determine_prime, FusionTranscript
-from mavis.annotate.constants import SPLICE_TYPE
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import ORIENT, PRIME, PROTOCOL, reverse_complement, STRAND, SVTYPE
+from mavis.constants import ORIENT, PRIME, PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
 from mavis.error import NotSpecifiedError
 from mavis.interval import Interval
 
-from . import MockLongString, MockObject, get_example_genes
 from ..util import get_data
-
+from . import MockLongString, MockObject, get_example_genes
 
 REFERENCE_ANNOTATIONS = None
 REFERENCE_GENOME = None
@@ -33,7 +31,7 @@
 def setUpModule():
     global REFERENCE_ANNOTATIONS, REFERENCE_GENOME, REF_CHR, EXAMPLE_GENES
     EXAMPLE_GENES = get_example_genes()
-    REFERENCE_ANNOTATIONS = load_reference_genes(get_data('mock_reference_annotations.tsv'))
+    REFERENCE_ANNOTATIONS = load_annotations(get_data('mock_reference_annotations.tsv'))
     count = sum([len(genes) for genes in REFERENCE_ANNOTATIONS.values()])
     print('loaded annotations', count)
     assert count >= 6  # make sure this is the file we expect
@@ -1461,13 +1459,13 @@ def test_reference_name_dict(self):
         self.assertEqual(1, len(d))
 
     def test_loading_json_annotations(self):
-        annotations = load_reference_genes(get_data('mock_reference_annotations.json'))
+        annotations = load_annotations(get_data('mock_reference_annotations.json'))
         self.assertEqual(1, len(annotations.keys()))
         self.assertEqual(1, len(list(annotations.values())[0]))
 
     def test_loading_annotations_not_found(self):
         with self.assertRaises(FileNotFoundError):
-            load_reference_genes('file.other')
+            load_annotations('file.other')
 
     def test_determine_prime(self):
         tneg = PreTranscript(exons=[(3, 4)], strand=STRAND.NEG)
@@ -1558,9 +1556,7 @@ def test_calculate_orf_nested(self):
 
 class TestAnnotateEvents(unittest.TestCase):
     def test_annotate_events(self):
-        reference_annotations = load_reference_genes(
-            get_data('mock_reference_annotations.full.tsv')
-        )
+        reference_annotations = load_annotations(get_data('mock_reference_annotations.full.tsv'))
         b1 = Breakpoint('fakereference9', 658, orient=ORIENT.RIGHT, strand=STRAND.POS)
         b2 = Breakpoint('fakereference9', 10237, orient=ORIENT.RIGHT, strand=STRAND.NEG)
         bpp = BreakpointPair(
diff --git a/tests/integration/test_annotate_examples.py b/tests/integration/test_annotate_examples.py
index 3a57d5a5..1e28f845 100644
--- a/tests/integration/test_annotate_examples.py
+++ b/tests/integration/test_annotate_examples.py
@@ -1,19 +1,18 @@
 import os
 import unittest
 
+from mavis.annotate.fusion import FusionTranscript
 from mavis.annotate.variant import (
-    annotate_events,
     Annotation,
-    flatten_fusion_transcript,
-    call_protein_indel,
     IndelCall,
+    annotate_events,
+    call_protein_indel,
+    flatten_fusion_transcript,
 )
-from mavis.annotate.fusion import FusionTranscript
-from mavis.annotate.constants import SPLICE_TYPE
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import ORIENT, PROTOCOL, STRAND, SVTYPE
+from mavis.constants import ORIENT, PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE
 
-from . import get_example_genes, MockObject, MockLongString
+from . import MockLongString, MockObject, get_example_genes
 
 
 def get_best(gene):
diff --git a/tests/integration/test_bam.py b/tests/integration/test_bam.py
index f29453df..0712bc57 100644
--- a/tests/integration/test_bam.py
+++ b/tests/integration/test_bam.py
@@ -1,10 +1,11 @@
 import logging
 import os
 import unittest
-from unittest import mock
 import warnings
+from unittest import mock
 
-from mavis.annotate.file_io import load_reference_genes, load_reference_genome
+import timeout_decorator
+from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.bam import cigar as _cigar
 from mavis.bam import read as _read
 from mavis.bam.cache import BamCache
@@ -14,22 +15,20 @@
     read_pair_type,
     sequenced_strand,
 )
-from mavis.bam.stats import compute_genome_bam_stats, compute_transcriptome_bam_stats, Histogram
+from mavis.bam.stats import Histogram, compute_genome_bam_stats, compute_transcriptome_bam_stats
 from mavis.constants import (
     CIGAR,
     DNA_ALPHABET,
+    NA_MAPPING_QUALITY,
     ORIENT,
     READ_PAIR_TYPE,
     STRAND,
     SVTYPE,
-    NA_MAPPING_QUALITY,
 )
 from mavis.interval import Interval
-import timeout_decorator
 
-from . import MockRead, MockBamFileHandle
 from ..util import get_data
-
+from . import MockBamFileHandle, MockRead
 
 REFERENCE_GENOME = None
 
@@ -463,7 +462,7 @@ def test_genome_bam_stats(self):
 
     def test_trans_bam_stats(self):
         bamfh = BamCache(get_data('mock_trans_reads_for_events.sorted.bam'))
-        annotations = load_reference_genes(get_data('mock_annotations.json'))
+        annotations = load_annotations(get_data('mock_annotations.json'))
         stats = compute_transcriptome_bam_stats(
             bamfh,
             annotations,
diff --git a/tests/integration/test_splicing.py b/tests/integration/test_splicing.py
index 55503882..a80adb11 100644
--- a/tests/integration/test_splicing.py
+++ b/tests/integration/test_splicing.py
@@ -1,13 +1,13 @@
 import os
 import unittest
 
-from mavis.annotate.constants import SPLICE_SITE_RADIUS, SPLICE_TYPE
+from mavis.annotate.constants import SPLICE_SITE_RADIUS
 from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.annotate.genomic import Exon, PreTranscript
 from mavis.annotate.splicing import predict_splice_sites
 from mavis.annotate.variant import annotate_events
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import PROTOCOL, reverse_complement, STRAND, SVTYPE
+from mavis.constants import PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
 from mavis.interval import Interval
 
 from . import DATA_DIR, MockLongString, MockObject, get_example_genes
diff --git a/tests/unit/test_tab.py b/tests/unit/test_tab.py
deleted file mode 100644
index b510c635..00000000
--- a/tests/unit/test_tab.py
+++ /dev/null
@@ -1,287 +0,0 @@
-import unittest
-from tab import FileTransform, cast_boolean, cast_null
-
-
-class MockFileTransform:
-    def __init__(self, h, **kwargs):
-        self.input = h
-        self.require = kwargs.pop('require', [])
-        self.rename = kwargs.pop('rename', {})
-        self.drop = kwargs.pop('drop', [])
-        self.add = kwargs.pop('add', {})
-        self.add_default = kwargs.pop('add_default', {})
-        self.split = kwargs.pop('split', {})
-        self.combine = kwargs.pop('combine', {})
-        self.validate = kwargs.pop('validate', {})
-        self.cast = kwargs.pop('cast', {})
-        self.simplify = kwargs.pop('simplify', False)
-        self.in_ = kwargs.pop('in_', {})
-
-    def transform_line(self, *pos, **kwargs):
-        return FileTransform.transform_line(self, *pos, **kwargs)
-
-
-class TestCast(unittest.TestCase):
-    def test_cast_boolean_true(self):
-        self.assertEqual(True, cast_boolean('+'))
-        self.assertEqual(True, cast_boolean('T'))
-        self.assertEqual(True, cast_boolean('true'))
-        self.assertEqual(True, cast_boolean('y'))
-        self.assertEqual(True, cast_boolean(1))
-
-    def test_cast_boolean_false(self):
-        self.assertEqual(False, cast_boolean('-'))
-        self.assertEqual(False, cast_boolean('f'))
-        self.assertEqual(False, cast_boolean('false'))
-        self.assertEqual(False, cast_boolean('n'))
-        self.assertEqual(False, cast_boolean(0))
-
-    def test_cast_boolean_error(self):
-        with self.assertRaises(TypeError):
-            cast_boolean(2)
-
-    def test_cast_null_ok(self):
-        self.assertEqual(None, cast_null('none'))
-        self.assertEqual(None, cast_null(None))
-
-    def test_cast_null_error(self):
-        with self.assertRaises(TypeError):
-            cast_null('f')
-
-
-class TestFileTransform(unittest.TestCase):
-    def test_simplify(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(header=h)
-        self.assertEqual(h, ft.input)
-        self.assertEqual(h, ft.header)
-        ft = FileTransform(h, simplify=True)
-        self.assertEqual(h, ft.input)
-        self.assertEqual([], ft.header)
-
-    def test_require_simplify(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(header=h, require=['a'], simplify=True)
-        self.assertEqual(h, ft.input)
-        self.assertEqual(['a'], ft.header)
-
-    def test_require_error(self):
-        h = ['a', 'b', 'c']
-        with self.assertRaises(KeyError):
-            FileTransform(header=h, require=['k'])
-
-    def test_rename(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(h, rename={'a': ['k', 'm']})
-        self.assertEqual(h, ft.input)
-        self.assertEqual(['a', 'b', 'c', 'k', 'm'], ft.header)
-
-    def test_rename_error(self):
-        h = ['a', 'b', 'c']
-        with self.assertRaises(KeyError):
-            FileTransform(header=h, rename={'k': ['t']})
-
-    def test_cast_error(self):
-        h = ['b', 'c']
-        with self.assertRaises(KeyError):
-            FileTransform(h, cast={'a': int})
-
-    def test_add(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(h, add_default={'k': 1})
-        self.assertEqual(h, ft.input)
-        self.assertEqual(['a', 'b', 'c', 'k'], ft.header)
-
-    def test_require__in(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(h, require=['c'], in_={'a': []}, simplify=True)
-        self.assertEqual(h, ft.input)
-        self.assertEqual(['a', 'c'], ft.header)
-
-    def test_combine(self):
-        h = ['a', 'b', 'c']
-        ft = FileTransform(h, combine={'k': '{a}{b}{c}'})
-        self.assertEqual(ft.input, h)
-        self.assertEqual(ft.header, h + ['k'])
-
-    def test_combine_error_name_conflict(self):
-        h = ['a', 'b', 'c']
-        with self.assertRaises(KeyError):
-            FileTransform(h, combine={'b': '{a}{b}{c}'})
-
-    def test_combine_keyerror(self):
-        h = ['a', 'b', 'c']
-        with self.assertRaises(KeyError):
-            FileTransform(h, combine={'k': '{m}{b}{c}'})
-
-    def test_duplicate_input_column(self):
-        with self.assertRaises(KeyError):
-            FileTransform(['a', 'a'])
-
-    def test_validate_missing_column(self):
-        with self.assertRaises(KeyError):
-            FileTransform(['a', 'b'], validate={'c': ''})
-
-    def test_drop_and_require_error(self):
-        with self.assertRaises(AssertionError):
-            FileTransform(['a'], require=['a'], drop=['a'])
-
-    def test_membership_of_missing_column_error(self):
-        with self.assertRaises(KeyError):
-            FileTransform(['a'], in_={'x': []})
-
-    def test_membership_bad_object(self):
-        with self.assertRaises(TypeError):
-            FileTransform(['a'], in_={'a': 1})
-
-    def test_cast_noncallable_error(self):
-        FileTransform(['a'], cast={'a': int})
-        with self.assertRaises(TypeError):
-            FileTransform(['a'], cast={'a': 1})
-
-    def test_split_missing_column_error(self):
-        FileTransform(['a'], split={'a': r'^(?P<thing>\w+)'})
-        with self.assertRaises(KeyError):
-            FileTransform(['a'], split={'x': r'^(?P<thing>\w+)'})
-
-    def test_split_duplicate_column_error(self):
-        FileTransform(['a', 'b'], split={'a': r'^(?P<thing>\w+)'})
-        with self.assertRaises(KeyError):
-            FileTransform(['a', 'b'], require=['b'], split={'a': r'^(?P<b>\w+)'})
-
-    def test_add(self):
-        ft = FileTransform(['a', 'b'], add={'c': 1})
-        self.assertEqual(['a', 'b', 'c'], ft.header)
-
-    def test_add_default(self):
-        ft = FileTransform(['a', 'b'], add_default={'c': 1})
-        self.assertEqual(['a', 'b', 'c'], ft.header)
-        ft = FileTransform(['a', 'b'], add_default={'b': 1})
-        self.assertEqual(['a', 'b'], ft.header)
-
-    def test_require(self):
-        ft = FileTransform(['a', 'b'], require=['a'])
-        self.assertEqual(['a', 'b'], ft.header)
-
-        ft = FileTransform(['a', 'b'], require=['a'], simplify=True)
-        self.assertEqual(['a'], ft.header)
-
-    def test_invalid_option(self):
-        with self.assertRaises(TypeError):
-            FileTransform(['a', 'b'], require=['a'], blargh=1)
-
-
-class TestTransformLine(unittest.TestCase):
-    def test_add(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, add={'a': 'blargh'})
-        row = ft.transform_line(['1', '2', '3'])
-        self.assertEqual('blargh', row['a'])
-
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, add={'x': 'blargh'})
-        row = ft.transform_line(['1', '2', '3'])
-        self.assertEqual('blargh', row['x'])
-
-    def test_combine(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, combine={'k': '{a}{b}{c}'})
-        row = ft.transform_line(['1', '2', '3'])
-        self.assertEqual('123', row['k'])
-
-    def test_combine_then_cast(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, combine={'k': '{a}{b}{c}'}, cast={'k': int})
-        row = ft.transform_line(['1', '2', '3'])
-        self.assertEqual(123, row['k'])
-
-    def test_cast_to_cast_boolean(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, cast={'a': cast_boolean, 'b': cast_boolean})
-        row = ft.transform_line(['1', '0', '3'])
-        self.assertEqual(True, row['a'])
-        self.assertEqual(False, row['b'])
-
-    def test_split_combine_cast(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(
-            h,
-            split={'a': r'^(?P<a1>\d+)_(?P<a2>\d+)$'},
-            combine={'k': '{a1}{b}{c}'},
-            cast={'k': int},
-        )
-        row = ft.transform_line(['1_10', '2', '3'])
-        self.assertEqual(123, row['k'])
-
-    def test_add_default(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, add_default={'k': 1})
-        line = ['1', '2', '3']
-        row = ft.transform_line(line)
-        self.assertEqual(1, row['k'])
-        self.assertEqual('1', row['a'])
-        self.assertEqual('2', row['b'])
-        self.assertEqual('3', row['c'])
-
-    def test_add_default_override_default(self):
-        h = ['a', 'b', 'c']
-        ft = MockFileTransform(h, add_default={'a': 8}, in_={'a': ['1']})
-        line = ['1', '2', '3']
-        row = ft.transform_line(line)
-        self.assertEqual('1', row['a'])
-        self.assertEqual('2', row['b'])
-        self.assertEqual('3', row['c'])
-
-    def test_validate(self):
-        h = ['a']
-        ft = MockFileTransform(h, validate={'a': r'^[t]+$'})
-        line = ['ttttt']
-        row = ft.transform_line(line)
-        self.assertEqual('ttttt', row['a'])
-
-    def test_rename(self):
-        h = ['a']
-        ft = MockFileTransform(h, rename={'a': ['b', 'c']})
-        line = ['ttttt']
-        row = ft.transform_line(line)
-        self.assertEqual('ttttt', row['a'])
-        self.assertEqual('ttttt', row['b'])
-        self.assertEqual('ttttt', row['c'])
-
-    def test_length_mismatch_error(self):
-        h = ['a', 'b']
-        ft = MockFileTransform(h)
-        line = ['ttttt']
-        with self.assertRaises(AssertionError):
-            ft.transform_line(line)
-
-    def test_rename_drop_original(self):
-        h = ['a']
-        ft = MockFileTransform(h, rename={'a': ['b', 'c']}, drop=['a'])
-        line = ['ttttt']
-        row = ft.transform_line(line)
-        self.assertTrue('a' not in row)
-        self.assertEqual('ttttt', row['b'])
-        self.assertEqual('ttttt', row['c'])
-
-        ft = MockFileTransform(h, rename={'a': ['b', 'c']}, simplify=True)
-        row = ft.transform_line(line)
-        self.assertTrue('a' not in row)
-        self.assertEqual('ttttt', row['b'])
-        self.assertEqual('ttttt', row['c'])
-
-    def test_split(self):
-        h = ['a']
-        ft = MockFileTransform(h, split={'a': r'^(?P<a1>\d+)[_]+(?P<a2>\d+)$'})
-        row = ft.transform_line(['1_2'])
-        self.assertEqual('1', row['a1'])
-        self.assertEqual('2', row['a2'])
-        row = ft.transform_line(['1__2'])
-        self.assertEqual('1', row['a1'])
-        self.assertEqual('2', row['a2'])
-        with self.assertRaises(UserWarning):
-            ft.transform_line(['_1__4'])
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/unit/test_tool.py b/tests/unit/test_tool.py
index fffaa36a..72c0d2e9 100644
--- a/tests/unit/test_tool.py
+++ b/tests/unit/test_tool.py
@@ -1,5 +1,6 @@
 import unittest
 
+import pytest
 from mavis.constants import COLUMNS, ORIENT, STRAND, SVTYPE
 from mavis.tools import SUPPORTED_TOOL, _convert_tool_row, _parse_transabyss
 from mavis.tools.vcf import convert_record as _parse_vcf_record
@@ -8,7 +9,7 @@
 from .mock import Mock
 
 
-class TestDelly(unittest.TestCase):
+class TestDelly:
     def test_convert_insertion(self):
         row = Mock(
             chrom='1',
@@ -25,28 +26,28 @@ def test_convert_insertion(self):
             alts=[],
         )
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.DELLY, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual(247760043 - 10, bpp.break1.start)
-        self.assertEqual(247760043 + 10, bpp.break1.end)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(247760044 - 10, bpp.break2.start)
-        self.assertEqual(247760044 + 10, bpp.break2.end)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(SVTYPE.INS, bpp.event_type)
-        self.assertEqual(None, bpp.untemplated_seq)
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '1'
+        assert bpp.break1.start == 247760043 - 10
+        assert bpp.break1.end == 247760043 + 10
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.start == 247760044 - 10
+        assert bpp.break2.end == 247760044 + 10
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.break2.chr == '1'
+        assert bpp.event_type == SVTYPE.INS
+        assert bpp.untemplated_seq == None
 
         bpp_list = _convert_tool_row(
             _parse_vcf_record(row)[0], SUPPORTED_TOOL.DELLY, False, assume_no_untemplated=True
         )
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(None, bpp.untemplated_seq)
-        self.assertNotEqual('', bpp.untemplated_seq)
+        assert bpp.untemplated_seq == None
+        assert bpp.untemplated_seq != ''
 
     def test_convert_convert_translocation(self):
         row = Mock(
@@ -66,43 +67,43 @@ def test_convert_convert_translocation(self):
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.DELLY, False)
         for b in bpp_list:
             print(b)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         row.info['CT'] = 'NtoN'
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.DELLY, False)
         for b in bpp_list:
             print(b)
-        self.assertEqual(4, len(bpp_list))
+        assert len(bpp_list) == 4
 
 
-class TestCnvNator(unittest.TestCase):
+class TestCnvNator:
     def test_convert_deletion(self):
         row = {'event_type': 'deletion', 'coordinates': '1:1-10000'}
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CNVNATOR, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(1, bpp.break1.start)
-        self.assertEqual(1, bpp.break1.end)
-        self.assertEqual(10000, bpp.break2.start)
-        self.assertEqual(10000, bpp.break2.start)
-        self.assertEqual(SVTYPE.DEL, bpp.event_type)
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
+        assert bpp.break1.start == 1
+        assert bpp.break1.end == 1
+        assert bpp.break2.start == 10000
+        assert bpp.break2.start == 10000
+        assert bpp.event_type == SVTYPE.DEL
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
 
     def test_convert_duplication(self):
         row = {'event_type': 'duplication', 'coordinates': '1:1-10000'}
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CNVNATOR, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(1, bpp.break1.start)
-        self.assertEqual(1, bpp.break1.end)
-        self.assertEqual(10000, bpp.break2.start)
-        self.assertEqual(10000, bpp.break2.start)
-        self.assertEqual(SVTYPE.DUP, bpp.event_type)
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
+        assert bpp.break1.start == 1
+        assert bpp.break1.end == 1
+        assert bpp.break2.start == 10000
+        assert bpp.break2.start == 10000
+        assert bpp.event_type == SVTYPE.DUP
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
 
 
-class TestStarFusion(unittest.TestCase):
+class TestStarFusion:
     def test_convert_standard_event(self):
         row = {
             'FusionName': 'GAS6--RASA3',
@@ -111,14 +112,14 @@ def test_convert_standard_event(self):
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.STARFUSION, True)
 
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
         bpp = bpp_list[0]
-        self.assertEqual('chr13', bpp.break1.chr)
-        self.assertEqual('chr13', bpp.break2.chr)
-        self.assertEqual(114529969, bpp.break1.start)
-        self.assertEqual(114751269, bpp.break2.start)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(True, bpp.stranded)
+        assert bpp.break1.chr == 'chr13'
+        assert bpp.break2.chr == 'chr13'
+        assert bpp.break1.start == 114529969
+        assert bpp.break2.start == 114751269
+        assert bpp.opposing_strands == False
+        assert bpp.stranded == True
 
     def test_convert_translocation(self):
         row = {
@@ -128,22 +129,22 @@ def test_convert_translocation(self):
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.STARFUSION, True)
 
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
         bpp = bpp_list[0]
-        self.assertEqual('chr17', bpp.break1.chr)
-        self.assertEqual('chr20', bpp.break2.chr)
-        self.assertEqual(59445688, bpp.break1.start)
-        self.assertEqual(49411710, bpp.break2.start)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(True, bpp.stranded)
+        assert bpp.break1.chr == 'chr17'
+        assert bpp.break2.chr == 'chr20'
+        assert bpp.break1.start == 59445688
+        assert bpp.break2.start == 49411710
+        assert bpp.opposing_strands == False
+        assert bpp.stranded == True
 
     def test_malformed(self):
         row = {'FusionName': 'BCAS4--BCAS3', 'LeftBreakpoint': '', 'RightBreakpoint': None}
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             _convert_tool_row(row, SUPPORTED_TOOL.STARFUSION, False)
 
 
-class TestTransAbyss(unittest.TestCase):
+class TestTransAbyss:
     def test_convert_stranded_indel_insertion(self):
         row = {
             'chr': '1',
@@ -155,16 +156,16 @@ def test_convert_stranded_indel_insertion(self):
             'id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.TA, True)
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
         bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(10015, bpp.break1.start)
-        self.assertEqual(10016, bpp.break2.start)
-        self.assertEqual(SVTYPE.INS, bpp.event_type)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(True, bpp.stranded)
-        self.assertEqual('AAT', bpp.untemplated_seq)
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 10015
+        assert bpp.break2.start == 10016
+        assert bpp.event_type == SVTYPE.INS
+        assert bpp.opposing_strands == False
+        assert bpp.stranded == True
+        assert bpp.untemplated_seq == 'AAT'
 
     def test_convert_indel_deletion(self):
         row = {
@@ -182,9 +183,9 @@ def test_convert_indel_deletion(self):
         print(_convert_tool_row)
         for bpp in bpp_list:
             print(bpp)
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
         bpp = bpp_list[0]
-        self.assertEqual('', bpp.untemplated_seq)
+        assert bpp.untemplated_seq == ''
 
     def test_convert_indel_unstranded_insertion(self):
         row = {
@@ -199,15 +200,15 @@ def test_convert_indel_unstranded_insertion(self):
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.TA, False)
         print([str(b) for b in bpp_list])
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
 
         bpp = bpp_list[0]
-        self.assertEqual(SVTYPE.INS, bpp.event_type)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual('TT', bpp.untemplated_seq)
+        assert bpp.event_type == SVTYPE.INS
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.stranded == False
+        assert bpp.opposing_strands == False
+        assert bpp.untemplated_seq == 'TT'
 
     def test_convert_indel_duplication(self):
         row = {
@@ -222,15 +223,15 @@ def test_convert_indel_duplication(self):
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.TA, False)
         print([str(b) for b in bpp_list])
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
 
         bpp = bpp_list[0]
-        self.assertEqual(SVTYPE.DUP, bpp.event_type)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual('', bpp.untemplated_seq)
+        assert bpp.event_type == SVTYPE.DUP
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.stranded == False
+        assert bpp.opposing_strands == False
+        assert bpp.untemplated_seq == ''
 
     def test_convert_translocation(self):
         raise unittest.SkipTest('TODO')
@@ -246,7 +247,7 @@ def test_convert_stranded_translocation(self):
             'id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.TA, True)
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
 
     def test_parse_stranded_translocation(self):
         row = {
@@ -260,10 +261,10 @@ def test_parse_stranded_translocation(self):
         }
         std = _parse_transabyss(row)
         print(std)
-        self.assertTrue('event_type' not in std)
+        assert 'event_type' not in std
 
 
-class TestManta(unittest.TestCase):
+class TestManta:
     def test_convert_deletion(self):
         row = Mock(
             chrom='21',
@@ -274,16 +275,16 @@ def test_convert_deletion(self):
             alts=[],
         )
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.MANTA, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('21', bpp.break1.chr)
-        self.assertEqual(9412306, bpp.break1.start)
-        self.assertEqual(9412310, bpp.break1.end)
-        self.assertEqual(9412400, bpp.break2.start)
-        self.assertEqual(9412404, bpp.break2.end)
-        self.assertEqual('21', bpp.break2.chr)
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '21'
+        assert bpp.break1.start == 9412306
+        assert bpp.break1.end == 9412310
+        assert bpp.break2.start == 9412400
+        assert bpp.break2.end == 9412404
+        assert bpp.break2.chr == '21'
         print(bpp, bpp.data['tracking_id'])
-        self.assertEqual('manta-MantaDEL:20644:0:2:0:0:0', bpp.data['tracking_id'])
+        assert bpp.data['tracking_id'] == 'manta-MantaDEL:20644:0:2:0:0:0'
 
     def test_convert_duplication(self):
         row = Mock(
@@ -295,11 +296,11 @@ def test_convert_duplication(self):
             alts=[],
         )
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.MANTA, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual('manta-MantaDUP:TANDEM:22477:0:1:0:9:0', bpp.data['tracking_id'])
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.data['tracking_id'] == 'manta-MantaDUP:TANDEM:22477:0:1:0:9:0'
 
     def test_non_trans_bnd(self):
         row = Mock(
@@ -319,16 +320,16 @@ def test_non_trans_bnd(self):
         )
         vcf_list = _parse_vcf_record(row)
         bpp_list = _convert_tool_row(vcf_list[0], SUPPORTED_TOOL.MANTA, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(17051724, bpp.break1.start)
-        self.assertEqual(234912188, bpp.break2.start)
-        self.assertEqual('R', bpp.break1.orient)
-        self.assertEqual('R', bpp.break2.orient)
-        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:0', bpp.data['tracking_id'])
-        self.assertEqual(1, len(bpp_list))
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 17051724
+        assert bpp.break2.start == 234912188
+        assert bpp.break1.orient == 'R'
+        assert bpp.break2.orient == 'R'
+        assert bpp.data['tracking_id'] == 'manta-MantaBND:207:0:1:0:0:0:0'
+        assert len(bpp_list) == 1
 
     def test_non_trans_bnd_from_mate(self):
         row = Mock(
@@ -348,19 +349,19 @@ def test_non_trans_bnd_from_mate(self):
         )
         vcf_list = _parse_vcf_record(row)
         bpp_list = _convert_tool_row(vcf_list[0], SUPPORTED_TOOL.MANTA, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(17051724, bpp.break1.start)
-        self.assertEqual(234912188, bpp.break2.start)
-        self.assertEqual('R', bpp.break1.orient)
-        self.assertEqual('R', bpp.break2.orient)
-        self.assertEqual('manta-MantaBND:207:0:1:0:0:0:1', bpp.data['tracking_id'])
-        self.assertEqual(1, len(bpp_list))
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 17051724
+        assert bpp.break2.start == 234912188
+        assert bpp.break1.orient == 'R'
+        assert bpp.break2.orient == 'R'
+        assert bpp.data['tracking_id'] == 'manta-MantaBND:207:0:1:0:0:0:1'
+        assert len(bpp_list) == 1
 
 
-class TestDefuse(unittest.TestCase):
+class TestDefuse:
     def test_convert_inverted_translocation(self):
         row = {
             'gene_chromosome1': 'X',
@@ -372,18 +373,18 @@ def test_convert_inverted_translocation(self):
             'cluster_id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.DEFUSE, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('X', bpp.break2.chr)
-        self.assertEqual(50294136, bpp.break1.start)
-        self.assertEqual(153063989, bpp.break2.start)
-        self.assertEqual(None, bpp.event_type)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.data['tracking_id'])
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == 'X'
+        assert bpp.break1.start == 50294136
+        assert bpp.break2.start == 153063989
+        assert bpp.event_type == None
+        assert bpp.opposing_strands == False
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.stranded == False
+        assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_translocation(self):
         row = {
@@ -396,18 +397,18 @@ def test_convert_translocation(self):
             'cluster_id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.DEFUSE, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('X', bpp.break2.chr)
-        self.assertEqual(50294136, bpp.break1.start)
-        self.assertEqual(153063989, bpp.break2.start)
-        self.assertEqual(None, bpp.event_type)
-        self.assertEqual(True, bpp.opposing_strands)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.data['tracking_id'])
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == 'X'
+        assert bpp.break1.start == 50294136
+        assert bpp.break2.start == 153063989
+        assert bpp.event_type == None
+        assert bpp.opposing_strands == True
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.stranded == False
+        assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_indel(self):
         row = {
@@ -420,18 +421,18 @@ def test_convert_indel(self):
             'cluster_id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.DEFUSE, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(1663681, bpp.break1.start)
-        self.assertEqual(151732089, bpp.break2.start)
-        self.assertEqual(None, bpp.event_type)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.data['tracking_id'])
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 1663681
+        assert bpp.break2.start == 151732089
+        assert bpp.event_type == None
+        assert bpp.opposing_strands == False
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.stranded == False
+        assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_inversion(self):
         row = {
@@ -444,21 +445,21 @@ def test_convert_inversion(self):
             'cluster_id': 1,
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.DEFUSE, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(144898348, bpp.break1.start)
-        self.assertEqual(235294748, bpp.break2.start)
-        self.assertEqual(None, bpp.event_type)
-        self.assertEqual(True, bpp.opposing_strands)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual('defuse-1', bpp.data['tracking_id'])
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 144898348
+        assert bpp.break2.start == 235294748
+        assert bpp.event_type == None
+        assert bpp.opposing_strands == True
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.stranded == False
+        assert bpp.data['tracking_id'] == 'defuse-1'
 
 
-class TestChimerascan(unittest.TestCase):
+class TestChimerascan:
     def test_convert_pos_pos(self):
         row = {
             'chrom5p': 'chr3',
@@ -472,17 +473,17 @@ def test_convert_pos_pos(self):
             'chimera_cluster_id': 'CLUSTER30',
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CHIMERASCAN, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('3', bpp.break2.chr)
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == '3'
         print(bpp)
-        self.assertEqual(int(row['end5p']), bpp.break1.start)
-        self.assertEqual(int(row['start3p']), bpp.break2.start)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
+        assert bpp.break1.start == int(row['end5p'])
+        assert bpp.break2.start == int(row['start3p'])
+        assert bpp.opposing_strands == False
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.stranded == False
 
     def test_convert_pos_neg(self):
         row = {
@@ -497,17 +498,17 @@ def test_convert_pos_neg(self):
             'chimera_cluster_id': 'CLUSTER30',
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CHIMERASCAN, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('3', bpp.break2.chr)
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == '3'
         print(bpp)
-        self.assertEqual(int(row['end5p']), bpp.break1.start)
-        self.assertEqual(int(row['end3p']), bpp.break2.start)
-        self.assertEqual(True, bpp.opposing_strands)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
+        assert bpp.break1.start == int(row['end5p'])
+        assert bpp.break2.start == int(row['end3p'])
+        assert bpp.opposing_strands == True
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.stranded == False
 
     def test_convert_neg_pos(self):
         row = {
@@ -522,17 +523,17 @@ def test_convert_neg_pos(self):
             'chimera_cluster_id': 'CLUSTER30',
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CHIMERASCAN, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('3', bpp.break2.chr)
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == '3'
         print(bpp)
-        self.assertEqual(int(row['start5p']), bpp.break1.start)
-        self.assertEqual(int(row['start3p']), bpp.break2.start)
-        self.assertEqual(True, bpp.opposing_strands)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
+        assert bpp.break1.start == int(row['start5p'])
+        assert bpp.break2.start == int(row['start3p'])
+        assert bpp.opposing_strands == True
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.stranded == False
 
     def test_convert_neg_neg(self):
         row = {
@@ -547,162 +548,162 @@ def test_convert_neg_neg(self):
             'chimera_cluster_id': 'CLUSTER30',
         }
         bpp_list = _convert_tool_row(row, SUPPORTED_TOOL.CHIMERASCAN, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('3', bpp.break1.chr)
-        self.assertEqual('3', bpp.break2.chr)
+        assert bpp.break1.chr == '3'
+        assert bpp.break2.chr == '3'
         print(bpp)
-        self.assertEqual(int(row['start5p']), bpp.break1.start)
-        self.assertEqual(int(row['end3p']), bpp.break2.start)
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(False, bpp.stranded)
+        assert bpp.break1.start == int(row['start5p'])
+        assert bpp.break2.start == int(row['end3p'])
+        assert bpp.opposing_strands == False
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.stranded == False
 
 
-class TestPindel(unittest.TestCase):
+class TestPindel:
     def test_convert_deletion(self):
         row = Mock(chrom='21', pos=9412306, info={'SVTYPE': 'DEL'}, stop=9412400, id=None, alts=[])
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.PINDEL, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('21', bpp.break1.chr)
-        self.assertEqual('21', bpp.break2.chr)
-        self.assertEqual(SVTYPE.DEL, bpp.event_type)
-        self.assertEqual(row.pos, bpp.break1.start)
-        self.assertEqual(row.pos, bpp.break1.end)
-        self.assertEqual(row.stop, bpp.break2.start)
-        self.assertEqual(row.stop, bpp.break2.end)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual(False, bpp.opposing_strands)
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '21'
+        assert bpp.break2.chr == '21'
+        assert bpp.event_type == SVTYPE.DEL
+        assert bpp.break1.start == row.pos
+        assert bpp.break1.end == row.pos
+        assert bpp.break2.start == row.stop
+        assert bpp.break2.end == row.stop
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.stranded == False
+        assert bpp.opposing_strands == False
 
     def test_convert_insertion(self):
         row = Mock(chrom='21', pos=9412306, info={'SVTYPE': 'INS'}, stop=9412400, id=None, alts=[])
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.PINDEL, False)
-        self.assertEqual(1, len(bpp_list))
-        bpp = bpp_list[0]
-        self.assertEqual('21', bpp.break1.chr)
-        self.assertEqual('21', bpp.break2.chr)
-        self.assertEqual(SVTYPE.INS, bpp.event_type)
-        self.assertEqual(row.pos, bpp.break1.start)
-        self.assertEqual(row.pos, bpp.break1.end)
-        self.assertEqual(row.stop, bpp.break2.start)
-        self.assertEqual(row.stop, bpp.break2.end)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual(False, bpp.opposing_strands)
+        assert len(bpp_list) == 1
+        bpp = bpp_list[0]
+        assert bpp.break1.chr == '21'
+        assert bpp.break2.chr == '21'
+        assert bpp.event_type == SVTYPE.INS
+        assert bpp.break1.start == row.pos
+        assert bpp.break1.end == row.pos
+        assert bpp.break2.start == row.stop
+        assert bpp.break2.end == row.stop
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.stranded == False
+        assert bpp.opposing_strands == False
 
     def test_convert_inversion(self):
         row = Mock(chrom='21', pos=9412306, info={'SVTYPE': 'INV'}, stop=9412400, id=None, alts=[])
         bpp_list = _convert_tool_row(_parse_vcf_record(row)[0], SUPPORTED_TOOL.PINDEL, False)
-        self.assertEqual(2, len(bpp_list))
+        assert len(bpp_list) == 2
         bpp = sorted(bpp_list, key=lambda x: x.break1)[0]
-        self.assertEqual('21', bpp.break1.chr)
-        self.assertEqual('21', bpp.break2.chr)
-        self.assertEqual(SVTYPE.INV, bpp.event_type)
-        self.assertEqual(row.pos, bpp.break1.start)
-        self.assertEqual(row.pos, bpp.break1.end)
-        self.assertEqual(row.stop, bpp.break2.start)
-        self.assertEqual(row.stop, bpp.break2.end)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(STRAND.NS, bpp.break1.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(STRAND.NS, bpp.break2.strand)
-        self.assertEqual(False, bpp.stranded)
-        self.assertEqual(True, bpp.opposing_strands)
-
-
-class TestParseBndAlt(unittest.TestCase):
+        assert bpp.break1.chr == '21'
+        assert bpp.break2.chr == '21'
+        assert bpp.event_type == SVTYPE.INV
+        assert bpp.break1.start == row.pos
+        assert bpp.break1.end == row.pos
+        assert bpp.break2.start == row.stop
+        assert bpp.break2.end == row.stop
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break1.strand == STRAND.NS
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.break2.strand == STRAND.NS
+        assert bpp.stranded == False
+        assert bpp.opposing_strands == True
+
+
+class TestParseBndAlt:
     def test_right(self):
         # '[4:190898243[AGGT'
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('[4:190898243[A')
-        self.assertEqual('4', chrom)
-        self.assertEqual(190898243, pos)
-        self.assertEqual(ORIENT.RIGHT, orient1)
-        self.assertEqual(ORIENT.RIGHT, orient2)
-        self.assertEqual('', seq)
-        self.assertEqual('A', ref)
+        assert chrom == '4'
+        assert pos == 190898243
+        assert orient1 == ORIENT.RIGHT
+        assert orient2 == ORIENT.RIGHT
+        assert seq == ''
+        assert ref == 'A'
 
     def test_right_untemp_seq(self):
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('[5:190898243[AGGT')
-        self.assertEqual('5', chrom)
-        self.assertEqual(190898243, pos)
-        self.assertEqual(ORIENT.RIGHT, orient1)
-        self.assertEqual(ORIENT.RIGHT, orient2)
-        self.assertEqual('AGG', seq)
-        self.assertEqual('T', ref)
+        assert chrom == '5'
+        assert pos == 190898243
+        assert orient1 == ORIENT.RIGHT
+        assert orient2 == ORIENT.RIGHT
+        assert seq == 'AGG'
+        assert ref == 'T'
 
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('CAGTNNNCA[5:190898243[')
-        self.assertEqual('5', chrom)
-        self.assertEqual(190898243, pos)
-        self.assertEqual(ORIENT.LEFT, orient1)
-        self.assertEqual(ORIENT.RIGHT, orient2)
-        self.assertEqual('AGTNNNCA', seq)
-        self.assertEqual('C', ref)
+        assert chrom == '5'
+        assert pos == 190898243
+        assert orient1 == ORIENT.LEFT
+        assert orient2 == ORIENT.RIGHT
+        assert seq == 'AGTNNNCA'
+        assert ref == 'C'
 
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('CTG[21:47575965[')
-        self.assertEqual('21', chrom)
-        self.assertEqual(47575965, pos)
-        self.assertEqual(ORIENT.LEFT, orient1)
-        self.assertEqual(ORIENT.RIGHT, orient2)
-        self.assertEqual('TG', seq)
-        self.assertEqual('C', ref)
+        assert chrom == '21'
+        assert pos == 47575965
+        assert orient1 == ORIENT.LEFT
+        assert orient2 == ORIENT.RIGHT
+        assert seq == 'TG'
+        assert ref == 'C'
 
     def test_left(self):
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('G]10:198982]')
-        self.assertEqual('10', chrom)
-        self.assertEqual(198982, pos)
-        self.assertEqual(ORIENT.LEFT, orient1)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('', seq)
-        self.assertEqual('G', ref)
+        assert chrom == '10'
+        assert pos == 198982
+        assert orient1 == ORIENT.LEFT
+        assert orient2 == ORIENT.LEFT
+        assert seq == ''
+        assert ref == 'G'
 
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt(']10:198982]G')
-        self.assertEqual('10', chrom)
-        self.assertEqual(198982, pos)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('', seq)
-        self.assertEqual('G', ref)
+        assert chrom == '10'
+        assert pos == 198982
+        assert orient2 == ORIENT.LEFT
+        assert seq == ''
+        assert ref == 'G'
 
     def test_alternate_chrom(self):
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('G]GL000.01:198982]')
-        self.assertEqual('GL000.01', chrom)
-        self.assertEqual(198982, pos)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('', seq)
-        self.assertEqual('G', ref)
+        assert chrom == 'GL000.01'
+        assert pos == 198982
+        assert orient2 == ORIENT.LEFT
+        assert seq == ''
+        assert ref == 'G'
 
     def test_left_untemp_seq(self):
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt(']11:123456]AGTNNNCAT')
-        self.assertEqual('11', chrom)
-        self.assertEqual(123456, pos)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('AGTNNNCA', seq)
-        self.assertEqual('T', ref)
+        assert chrom == '11'
+        assert pos == 123456
+        assert orient2 == ORIENT.LEFT
+        assert seq == 'AGTNNNCA'
+        assert ref == 'T'
 
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt(']8:1682443]TGC')
-        self.assertEqual('8', chrom)
-        self.assertEqual(1682443, pos)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('TG', seq)
-        self.assertEqual('C', ref)
+        assert chrom == '8'
+        assert pos == 1682443
+        assert orient2 == ORIENT.LEFT
+        assert seq == 'TG'
+        assert ref == 'C'
 
         chrom, pos, orient1, orient2, ref, seq = _parse_bnd_alt('AAGTG]11:66289601]')
-        self.assertEqual('11', chrom)
-        self.assertEqual(66289601, pos)
-        self.assertEqual(ORIENT.LEFT, orient2)
-        self.assertEqual('AGTG', seq)
-        self.assertEqual('A', ref)
+        assert chrom == '11'
+        assert pos == 66289601
+        assert orient2 == ORIENT.LEFT
+        assert seq == 'AGTG'
+        assert ref == 'A'
 
 
-class TestBreakDancer(unittest.TestCase):
+class TestBreakDancer:
     def test_itx(self):
         row = {
             'Chr1': '1',
@@ -717,15 +718,15 @@ def test_itx(self):
             'num_Reads': '43',
         }
         bpps = _convert_tool_row(row, SUPPORTED_TOOL.BREAKDANCER, False, True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(SVTYPE.DUP, bpps[0].event_type)
-        self.assertEqual(10001, bpps[0].break1.start)
-        self.assertEqual(10001, bpps[0].break1.end)
-        self.assertEqual(ORIENT.RIGHT, bpps[0].break1.orient)
-        self.assertEqual(10546, bpps[0].break2.start)
-        self.assertEqual(10546, bpps[0].break2.end)
-        self.assertEqual(ORIENT.LEFT, bpps[0].break2.orient)
-        self.assertEqual(False, bpps[0].opposing_strands)
+        assert len(bpps) == 1
+        assert bpps[0].event_type == SVTYPE.DUP
+        assert bpps[0].break1.start == 10001
+        assert bpps[0].break1.end == 10001
+        assert bpps[0].break1.orient == ORIENT.RIGHT
+        assert bpps[0].break2.start == 10546
+        assert bpps[0].break2.end == 10546
+        assert bpps[0].break2.orient == ORIENT.LEFT
+        assert bpps[0].opposing_strands == False
 
     def test_deletion(self):
         row = {
@@ -741,15 +742,15 @@ def test_deletion(self):
             'num_Reads': '67',
         }
         bpps = _convert_tool_row(row, SUPPORTED_TOOL.BREAKDANCER, False, True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(SVTYPE.DEL, bpps[0].event_type)
-        self.assertEqual(869445, bpps[0].break1.start)
-        self.assertEqual(869445, bpps[0].break1.end)
-        self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
-        self.assertEqual(870225, bpps[0].break2.start)
-        self.assertEqual(870225, bpps[0].break2.end)
-        self.assertEqual(ORIENT.RIGHT, bpps[0].break2.orient)
-        self.assertEqual(False, bpps[0].opposing_strands)
+        assert len(bpps) == 1
+        assert bpps[0].event_type == SVTYPE.DEL
+        assert bpps[0].break1.start == 869445
+        assert bpps[0].break1.end == 869445
+        assert bpps[0].break1.orient == ORIENT.LEFT
+        assert bpps[0].break2.start == 870225
+        assert bpps[0].break2.end == 870225
+        assert bpps[0].break2.orient == ORIENT.RIGHT
+        assert bpps[0].opposing_strands == False
 
     def test_inversion(self):
         row = {
@@ -765,24 +766,24 @@ def test_inversion(self):
             'num_Reads': '2',
         }
         bpps = _convert_tool_row(row, SUPPORTED_TOOL.BREAKDANCER, False, True)
-        self.assertEqual(2, len(bpps))
-        self.assertEqual(SVTYPE.INV, bpps[0].event_type)
-        self.assertEqual(13143396, bpps[0].break1.start)
-        self.assertEqual(13143396, bpps[0].break1.end)
-        self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
-        self.assertEqual(13218683, bpps[0].break2.start)
-        self.assertEqual(13218683, bpps[0].break2.end)
-        self.assertEqual(ORIENT.LEFT, bpps[0].break2.orient)
-        self.assertEqual(True, bpps[0].opposing_strands)
-
-        self.assertEqual(SVTYPE.INV, bpps[1].event_type)
-        self.assertEqual(13143396, bpps[1].break1.start)
-        self.assertEqual(13143396, bpps[1].break1.end)
-        self.assertEqual(ORIENT.RIGHT, bpps[1].break1.orient)
-        self.assertEqual(13218683, bpps[1].break2.start)
-        self.assertEqual(13218683, bpps[1].break2.end)
-        self.assertEqual(ORIENT.RIGHT, bpps[1].break2.orient)
-        self.assertEqual(True, bpps[1].opposing_strands)
+        assert len(bpps) == 2
+        assert bpps[0].event_type == SVTYPE.INV
+        assert bpps[0].break1.start == 13143396
+        assert bpps[0].break1.end == 13143396
+        assert bpps[0].break1.orient == ORIENT.LEFT
+        assert bpps[0].break2.start == 13218683
+        assert bpps[0].break2.end == 13218683
+        assert bpps[0].break2.orient == ORIENT.LEFT
+        assert bpps[0].opposing_strands == True
+
+        assert bpps[1].event_type == SVTYPE.INV
+        assert bpps[1].break1.start == 13143396
+        assert bpps[1].break1.end == 13143396
+        assert bpps[1].break1.orient == ORIENT.RIGHT
+        assert bpps[1].break2.start == 13218683
+        assert bpps[1].break2.end == 13218683
+        assert bpps[1].break2.orient == ORIENT.RIGHT
+        assert bpps[1].opposing_strands == True
 
     def test_insertion(self):
         row = {
@@ -798,30 +799,30 @@ def test_insertion(self):
             'num_Reads': '3',
         }
         bpps = _convert_tool_row(row, SUPPORTED_TOOL.BREAKDANCER, False, True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(SVTYPE.INS, bpps[0].event_type)
-        self.assertEqual(20216146, bpps[0].break1.start)
-        self.assertEqual(20216146, bpps[0].break1.end)
-        self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
-        self.assertEqual(20218060, bpps[0].break2.start)
-        self.assertEqual(20218060, bpps[0].break2.end)
-        self.assertEqual(ORIENT.RIGHT, bpps[0].break2.orient)
-        self.assertEqual(False, bpps[0].opposing_strands)
-
-
-class TestStrelka(unittest.TestCase):
+        assert len(bpps) == 1
+        assert bpps[0].event_type == SVTYPE.INS
+        assert bpps[0].break1.start == 20216146
+        assert bpps[0].break1.end == 20216146
+        assert bpps[0].break1.orient == ORIENT.LEFT
+        assert bpps[0].break2.start == 20218060
+        assert bpps[0].break2.end == 20218060
+        assert bpps[0].break2.orient == ORIENT.RIGHT
+        assert bpps[0].opposing_strands == False
+
+
+class TestStrelka:
     def testInsertion(self):
         event = Mock(
             chrom='1', pos=724986, id=None, info={}, ref='G', stop=724986, alts=('GGAATT',)
         )
         bpp_list = _convert_tool_row(_parse_vcf_record(event)[0], SUPPORTED_TOOL.STRELKA, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(724986, bpp.break1.start)
-        self.assertEqual(724986, bpp.break1.end)
-        self.assertEqual(724986, bpp.break2.start)
-        self.assertEqual(724986, bpp.break2.end)
-        self.assertEqual(SVTYPE.INS, bpp.event_type)
+        assert bpp.break1.start == 724986
+        assert bpp.break1.end == 724986
+        assert bpp.break2.start == 724986
+        assert bpp.break2.end == 724986
+        assert bpp.event_type == SVTYPE.INS
 
     def testDeletion(self):
         event = Mock(
@@ -834,13 +835,13 @@ def testDeletion(self):
             alts=('G',),
         )
         bpp_list = _convert_tool_row(_parse_vcf_record(event)[0], SUPPORTED_TOOL.STRELKA, False)
-        self.assertEqual(1, len(bpp_list))
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(1265353, bpp.break1.start)
-        self.assertEqual(1265353, bpp.break1.end)
-        self.assertEqual(1265366, bpp.break2.start)
-        self.assertEqual(1265366, bpp.break2.end)
-        self.assertEqual(SVTYPE.DEL, bpp.event_type)
+        assert bpp.break1.start == 1265353
+        assert bpp.break1.end == 1265353
+        assert bpp.break2.start == 1265366
+        assert bpp.break2.end == 1265366
+        assert bpp.event_type == SVTYPE.DEL
 
     def testMalformated(self):
         event = Mock(
@@ -852,68 +853,82 @@ def testMalformated(self):
             alts=('CTTTTAAATGTAACATGACATAATATATTTCCTAAATAATTTAAAATAATC.',),
             stop=53678660,
         )
-        with self.assertRaises(NotImplementedError):
+        with pytest.raises(NotImplementedError):
             _convert_tool_row(_parse_vcf_record(event)[0], SUPPORTED_TOOL.STRELKA, False)
 
 
-class TestVCF(unittest.TestCase):
-    def setUp(self):
-        self.tra = Mock(
-            chrom='2',
-            pos=21673582,
-            id=None,
-            info={'SVTYPE': 'TRA', 'CT': '5to5', 'CHR2': '3'},
-            stop=58921502,
-            alts=[],
-        )
+@pytest.fixture
+def vcf_translocation():
+    return Mock(
+        chrom='2',
+        pos=21673582,
+        id=None,
+        info={'SVTYPE': 'TRA', 'CT': '5to5', 'CHR2': '3'},
+        stop=58921502,
+        alts=[],
+    )
 
-    def test_no_ci(self):
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+
+class TestVCF:
+    def test_no_ci(self, vcf_translocation):
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(21673582, bpp.break1.start)
-        self.assertEqual(21673582, bpp.break1.end)
-        self.assertEqual(58921502, bpp.break2.start)
-        self.assertEqual(58921502, bpp.break2.end)
+        assert bpp.break1.start == 21673582
+        assert bpp.break1.end == 21673582
+        assert bpp.break2.start == 58921502
+        assert bpp.break2.end == 58921502
 
-    def test_ci(self):
-        self.tra.info.update({'CIEND': [-700, 700], 'CIPOS': [-700, 700]})
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+    def test_ci(self, vcf_translocation):
+        vcf_translocation.info.update({'CIEND': [-700, 700], 'CIPOS': [-700, 700]})
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
         print(bpp)
-        self.assertEqual(21673582 - 700, bpp.break1.start)
-        self.assertEqual(21673582 + 700, bpp.break1.end)
-        self.assertEqual(58921502 - 700, bpp.break2.start)
-        self.assertEqual(58921502 + 700, bpp.break2.end)
+        assert bpp.break1.start == 21673582 - 700
+        assert bpp.break1.end == 21673582 + 700
+        assert bpp.break2.start == 58921502 - 700
+        assert bpp.break2.end == 58921502 + 700
 
-    def test_precise_flag_ignores_ci(self):
-        self.tra.info.update({'CIEND': [-700, 700], 'CIPOS': [-700, 700], 'PRECISE': True})
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+    def test_precise_flag_ignores_ci(self, vcf_translocation):
+        vcf_translocation.info.update({'CIEND': [-700, 700], 'CIPOS': [-700, 700], 'PRECISE': True})
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual(21673582, bpp.break1.start)
-        self.assertEqual(21673582, bpp.break1.end)
-        self.assertEqual(58921502, bpp.break2.start)
-        self.assertEqual(58921502, bpp.break2.end)
+        assert bpp.break1.start == 21673582
+        assert bpp.break1.end == 21673582
+        assert bpp.break2.start == 58921502
+        assert bpp.break2.end == 58921502
 
-    def test_no_id(self):
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+    def test_no_id(self, vcf_translocation):
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertTrue(bpp.data[COLUMNS.tracking_id])
+        assert bpp.data[COLUMNS.tracking_id]
 
-    def test_N_id(self):
-        self.tra.id = 'N'
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+    def test_N_id(self, vcf_translocation):
+        vcf_translocation.id = 'N'
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertTrue(bpp.data[COLUMNS.tracking_id])
-        self.assertNotEqual('N', bpp.data[COLUMNS.tracking_id])
+        assert bpp.data[COLUMNS.tracking_id]
+        assert bpp.data[COLUMNS.tracking_id] != 'N'
 
-    def test_id_given(self):
-        self.tra.id = 'thing-1'
-        bpp_list = _convert_tool_row(_parse_vcf_record(self.tra)[0], SUPPORTED_TOOL.VCF, False)
-        self.assertEqual(1, len(bpp_list))
+    def test_id_given(self, vcf_translocation):
+        vcf_translocation.id = 'thing-1'
+        bpp_list = _convert_tool_row(
+            _parse_vcf_record(vcf_translocation)[0], SUPPORTED_TOOL.VCF, False
+        )
+        assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        self.assertEqual('vcf-thing-1', bpp.data[COLUMNS.tracking_id])
+        assert bpp.data[COLUMNS.tracking_id] == 'vcf-thing-1'
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
index 62b6910e..e163349e 100644
--- a/tests/unit/test_util.py
+++ b/tests/unit/test_util.py
@@ -1,6 +1,6 @@
 import os
-import unittest
 
+import pytest
 from mavis.constants import COLUMNS, ORIENT, STRAND
 from mavis.error import NotSpecifiedError
 from mavis.util import (
@@ -14,351 +14,360 @@
 from .mock import Mock
 
 
-class MockFileHandle(Mock):
-    def __init__(self, lines):
-        Mock.__init__(self, lines=lines)
-
-    def readlines(self):
-        return self.lines
-
-
-class TestGetConnectedComponents(unittest.TestCase):
+class TestGetConnectedComponents:
     def test_no_nodes(self):
-        self.assertEqual([], get_connected_components({}))
+        assert get_connected_components({}) == []
 
     def test_no_connections(self):
         graph = {1: {}, 2: {}, 3: {}}
         components = get_connected_components(graph)
-        self.assertEqual(3, len(components))
+        assert len(components) == 3
 
     def test_fully_connected(self):
         graph = {1: {2, 3, 1}, 2: {1, 2, 2}, 3: {3, 2}}
         components = get_connected_components(graph)
-        self.assertEqual(1, len(components))
-        self.assertEqual([{1, 2, 3}], components)
+        assert len(components) == 1
+        assert components == [{1, 2, 3}]
 
     def test_multiple_components(self):
         graph = {1: {2}, 2: {3}, 3: {4}, 6: {7, 8}}
         components = get_connected_components(graph)
-        self.assertEqual(2, len(components))
-        self.assertEqual({1, 2, 3, 4}, components[0])
-        self.assertEqual({6, 7, 8}, components[1])
+        assert len(components) == 2
+        assert components[0] == {1, 2, 3, 4}
+        assert components[1] == {6, 7, 8}
 
 
-class TestCast(unittest.TestCase):
+class TestCast:
     def test_float(self):
-        self.assertEqual(type(1.0), type(cast('1', float)))
-        self.assertNotEqual(type(1.0), type(cast('1', int)))
+        assert type(cast('1', float)) == type(1.0)
+        assert type(cast('1', int)) != type(1.0)
 
     def test_boolean(self):
-        self.assertEqual(type(False), type(cast('f', bool)))
-        self.assertEqual(type(False), type(cast('false', bool)))
-        self.assertFalse(cast('f', bool))
-        self.assertFalse(cast('false', bool))
-        self.assertFalse(cast('0', bool))
-        self.assertFalse(cast('F', bool))
-
-
-class TestGetEnvVariable(unittest.TestCase):
-    def setUp(self):
-        if 'MAVIS_TEST_ENV' in os.environ:
-            del os.environ['MAVIS_TEST_ENV']
-
-    def test_not_set(self):
-        self.assertEqual(1, get_env_variable('test_env', 1))
-
-    def test_needs_casting(self):
-        os.environ['MAVIS_TEST_ENV'] = '15'
-        self.assertEqual(15, get_env_variable('test_env', 1))
-
-
-class TestReadBreakpointPairsFromFile(unittest.TestCase):
-    def build_filehandle(self, row):
-        header = [c for c in row]
-        line = [row[c] for c in header]
-        lines = ['\t'.join(header), '\t'.join([str(v) for v in line])]
-        return MockFileHandle(lines)
-
-    def test_break1_strand_ns(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.POS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: False,
-            }
+        assert type(cast('f', bool)) == type(False)
+        assert type(cast('false', bool)) == type(False)
+        assert not cast('f', bool)
+        assert not cast('false', bool)
+        assert not cast('0', bool)
+        assert not cast('F', bool)
+
+
+def mock_file_content(row):
+    header = [c for c in row]
+    line = [row[c] for c in header]
+    lines = ['\t'.join(header), '\t'.join([str(v) for v in line])]
+    return '\n'.join(lines)
+
+
+class TestReadBreakpointPairsFromFile:
+    def test_break1_strand_ns(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.POS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        with self.assertRaises(NotSpecifiedError):
-            bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True)
+        with pytest.raises(NotSpecifiedError):
+            bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=True)
             for b in bpps:
                 print(b)
 
-    def test_stranded_no_expand_error(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.POS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: False,
-            }
+    def test_stranded_no_expand_error(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.POS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.POS, bpps[0].break2.strand)
-
-    def test_break2_strand_ns(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=True)
+        assert len(bpps) == 1
+        assert bpps[0].break1.strand == STRAND.POS
+        assert bpps[0].break2.strand == STRAND.POS
+
+    def test_break2_strand_ns(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
 
-        with self.assertRaises(NotSpecifiedError) as err:
+        with pytest.raises(NotSpecifiedError) as err:
             print(err)
-            bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=False)
-
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break2.strand)
-
-    def test_stranded_expand_strands_and_orient(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: False,
-            }
+            bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False)
+
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=True)
+        assert len(bpps) == 1
+        assert bpps[0].break2.strand == STRAND.POS
+
+    def test_stranded_expand_strands_and_orient(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=False)
-        self.assertEqual(2, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.POS, bpps[0].break2.strand)
-        self.assertEqual(STRAND.NEG, bpps[1].break1.strand)
-        self.assertEqual(STRAND.NEG, bpps[1].break2.strand)
-
-    def test_expand_strands_and_orient(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: False,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=False)
+        assert len(bpps) == 2
+        assert bpps[0].break1.strand == STRAND.POS
+        assert bpps[0].break2.strand == STRAND.POS
+        assert bpps[1].break1.strand == STRAND.NEG
+        assert bpps[1].break2.strand == STRAND.NEG
+
+    def test_expand_strands_and_orient(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: False,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=False)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.NS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NS, bpps[0].break2.strand)
-
-    def test_stranded_expand_strands_not_orient(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=False)
+        assert len(bpps) == 1
+        assert bpps[0].break1.strand == STRAND.NS
+        assert bpps[0].break2.strand == STRAND.NS
+
+    def test_stranded_expand_strands_not_orient(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True)
-        self.assertEqual(2, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.POS, bpps[0].break2.strand)
-        self.assertEqual(STRAND.NEG, bpps[1].break1.strand)
-        self.assertEqual(STRAND.NEG, bpps[1].break2.strand)
-
-    def test_expand_orient_not_strand(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.NS,
-                COLUMNS.break1_orientation: ORIENT.LEFT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: False,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=True)
+        assert len(bpps) == 2
+        assert bpps[0].break1.strand == STRAND.POS
+        assert bpps[0].break2.strand == STRAND.POS
+        assert bpps[1].break1.strand == STRAND.NEG
+        assert bpps[1].break2.strand == STRAND.NEG
+
+    def test_expand_orient_not_strand(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.NS,
+                    COLUMNS.break1_orientation: ORIENT.LEFT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: False,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.NS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NS, bpps[0].break2.strand)
-
-    def test_break1_orient_ns(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.NS,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.POS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: False,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=True)
+        assert len(bpps) == 1
+        assert bpps[0].break1.strand == STRAND.NS
+        assert bpps[0].break2.strand == STRAND.NS
+
+    def test_break1_orient_ns(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.NS,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.POS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: False,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
-
-    def test_break2_orient_ns(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.NS,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.POS,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: False,
-                COLUMNS.opposing_strands: False,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=True)
+        assert len(bpps) == 1
+        assert bpps[0].break1.orient == ORIENT.LEFT
+
+    @pytest.mark.skip(reason='TODO')
+    def test_break2_orient_ns(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.NS,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.POS,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: False,
+                    COLUMNS.opposing_strands: False,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(ORIENT.LEFT, bpps[0].break1.orient)
-        raise unittest.SkipTest('TODO')
-
-    def test_both_break_orient_ns(self):
-        raise unittest.SkipTest('TODO')
-
-    def test_base_case(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.RIGHT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NEG,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: True,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=True)
+        assert len(bpps) == 1
+        assert bpps[0].break1.orient == ORIENT.LEFT
+
+    @pytest.mark.skip(reason='TODO')
+    def test_both_break_orient_ns(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+
+    def test_base_case(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.RIGHT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NEG,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: True,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=False)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(ORIENT.RIGHT, bpps[0].break1.orient)
-        self.assertEqual(True, bpps[0].opposing_strands)
-
-    def test_unstranded_with_strand_calls(self):
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.RIGHT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NEG,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: False,
-                COLUMNS.opposing_strands: True,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False)
+        assert len(bpps) == 1
+        assert bpps[0].break1.orient == ORIENT.RIGHT
+        assert bpps[0].opposing_strands == True
+
+    def test_unstranded_with_strand_calls(self, tmp_path):
+        input_file = tmp_path / "inputs.tsv"
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.RIGHT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NEG,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: False,
+                    COLUMNS.opposing_strands: True,
+                }
+            )
         )
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=False)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.NS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NS, bpps[0].break2.strand)
-
-        bpps = read_bpp_from_input_file(fh, expand_strand=False, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.NS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NS, bpps[0].break2.strand)
-
-        fh = self.build_filehandle(
-            {
-                COLUMNS.break1_chromosome: '1',
-                COLUMNS.break1_position_start: 1,
-                COLUMNS.break1_position_end: 1,
-                COLUMNS.break1_strand: STRAND.POS,
-                COLUMNS.break1_orientation: ORIENT.RIGHT,
-                COLUMNS.break2_chromosome: '1',
-                COLUMNS.break2_position_start: 10,
-                COLUMNS.break2_position_end: 10,
-                COLUMNS.break2_strand: STRAND.NEG,
-                COLUMNS.break2_orientation: ORIENT.RIGHT,
-                COLUMNS.stranded: True,
-                COLUMNS.opposing_strands: True,
-            }
+        bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False)
+        assert len(bpps) == 1
+        assert bpps[0].break1.strand == STRAND.NS
+        assert bpps[0].break2.strand == STRAND.NS
+
+        input_file = tmp_path / "inputs2.tsv"
+
+        input_file.write_text(
+            mock_file_content(
+                {
+                    COLUMNS.break1_chromosome: '1',
+                    COLUMNS.break1_position_start: 1,
+                    COLUMNS.break1_position_end: 1,
+                    COLUMNS.break1_strand: STRAND.POS,
+                    COLUMNS.break1_orientation: ORIENT.RIGHT,
+                    COLUMNS.break2_chromosome: '1',
+                    COLUMNS.break2_position_start: 10,
+                    COLUMNS.break2_position_end: 10,
+                    COLUMNS.break2_strand: STRAND.NEG,
+                    COLUMNS.break2_orientation: ORIENT.RIGHT,
+                    COLUMNS.stranded: True,
+                    COLUMNS.opposing_strands: True,
+                }
+            )
         )
 
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=False)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NEG, bpps[0].break2.strand)
-
-        bpps = read_bpp_from_input_file(fh, expand_strand=True, expand_orient=True)
-        self.assertEqual(1, len(bpps))
-        self.assertEqual(STRAND.POS, bpps[0].break1.strand)
-        self.assertEqual(STRAND.NEG, bpps[0].break2.strand)
+        bpps = read_bpp_from_input_file(input_file, expand_strand=True, expand_orient=False)
+        assert len(bpps) == 1
+        assert bpps[0].break1.strand == STRAND.POS
+        assert bpps[0].break2.strand == STRAND.NEG

From 95d2a335f2230ea6f6c473de335687360c399593 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 23 Apr 2021 13:38:11 -0700
Subject: [PATCH 022/137] Fix import sort

---
 src/mavis/align.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mavis/align.py b/src/mavis/align.py
index dffed765..20984ba6 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -13,8 +13,16 @@
 from .bam import cigar as _cigar
 from .bam import read as _read
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import (CIGAR, COLUMNS, NA_MAPPING_QUALITY, ORIENT, STRAND,
-                        SVTYPE, MavisNamespace, reverse_complement)
+from .constants import (
+    CIGAR,
+    COLUMNS,
+    NA_MAPPING_QUALITY,
+    ORIENT,
+    STRAND,
+    SVTYPE,
+    MavisNamespace,
+    reverse_complement,
+)
 from .error import InvalidRearrangement
 from .interval import Interval
 from .util import DEVNULL

From 6a12c3b650f8f1efdc329b4ea7ab61b3cb745230 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@users.noreply.github.com>
Date: Fri, 23 Apr 2021 14:51:47 -0700
Subject: [PATCH 023/137] Fix typo

---
 docs/migrating.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/migrating.md b/docs/migrating.md
index 213ee00c..d56e17c3 100644
--- a/docs/migrating.md
+++ b/docs/migrating.md
@@ -12,7 +12,7 @@ reference files
 
 ### Configuration
 
-MAVIS no longer users command line arguments, config files, and environment variables for
+MAVIS no longer uses command line arguments, config files, and environment variables for
 configuration. Instead all configurable settings are controlled via a single input JSON
 config file
 

From 7c9442971b0278e2585540b9750216a9630c954c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 23 Apr 2021 15:09:00 -0700
Subject: [PATCH 024/137] Remove pound from examples in docs as well

---
 .github/CONTRIBUTING.md  |  7 +------
 docs/inputs/reference.md | 34 +++++++++++++++++++---------------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index 48817547..d38244d4 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -1,4 +1,3 @@
-
 ## Getting Started
 
 If you are new to the project a good way to get started is by adding to the documentation, or adding unit tests where
@@ -47,7 +46,6 @@ mkdocs build
 The contents of the user manual can then be viewed by opening the build-docs/index.html
 in any available web browser (i.e. google-chrome, firefox, etc.)
 
-
 ## Deploy to PyPi
 
 Install deployment dependencies
@@ -68,13 +66,11 @@ Use twine to upload
 twine upload -r pypi dist/*
 ```
 
-
 ## Reporting a Bug
 
 Please make sure to search through the issues before reporting a bug to ensure there isn't
 already an open issue.
 
-
 ## Conventions
 
 ### Linting
@@ -82,7 +78,7 @@ already an open issue.
 Use [black](https://github.com/psf/black) with strings off and line length 100
 
 ```bash
-black mavis -S -l 100
+black src/mavis -S -l 100
 ```
 
 ### Docstrings
@@ -112,7 +108,6 @@ def some_function(some_arg: List[str]) -> None:
 
 any column name which may appear in any of the intermediate or final output files must be defined in `mavis.constants.COLUMNS` as well as added to the [columns glossary](../outputs/columns)
 
-
 ### Tests
 
 - all new code must have unit tests in the tests subdirectory
diff --git a/docs/inputs/reference.md b/docs/inputs/reference.md
index cc31d56e..5eff1cbb 100644
--- a/docs/inputs/reference.md
+++ b/docs/inputs/reference.md
@@ -21,7 +21,6 @@ not available,
 | [DGV annotations](../../inputs/reference/#dgv-database-of-genomic-variants) (text/tabbed)     | `MAVIS_DGV_ANNOTATION`    | [![](../images/get_app-24px.svg) GRCh37/Hg19](http://www.bcgsc.ca/downloads/mavis/dgv_hg19_variants.tab)<br>[![](../images/get_app-24px.svg) GRCh38](http://www.bcgsc.ca/downloads/mavis/dgv_hg38_variants.tab)                                               |
 | [aligner reference](../../inputs/reference/#aligner-reference)                                | `MAVIS_ALIGNER_REFERENCE` | [![](../images/get_app-24px.svg) GRCh37/Hg19 2bit (blat)](http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit)<br>[![](../images/get_app-24px.svg) GRCh38 2bit (blat)](http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit)         |
 
-
 If the environment variables above are set they will be used as the
 default values when any step of the pipeline script is called (including
 generating the template config file)
@@ -38,11 +37,13 @@ chromosomes. This is only used during visualization.
 
 The structure of the file should look something like this
 
-    chr1    0       2300000 p36.33  gneg
-    chr1    2300000 5400000 p36.32  gpos25
-    chr1    5400000 7200000 p36.31  gneg
-    chr1    7200000 9200000 p36.23  gpos25
-    chr1    9200000 12700000        p36.22  gneg
+```text
+chr1    0       2300000 p36.33  gneg
+chr1    2300000 5400000 p36.32  gpos25
+chr1    5400000 7200000 p36.31  gneg
+chr1    7200000 9200000 p36.23  gpos25
+chr1    9200000 12700000        p36.22  gneg
+```
 
 ## Masking File
 
@@ -52,9 +53,11 @@ known false positives, bad mapping, centromeres, telomeres etc. An
 example of the expected format is shown below. The file should have four
 columns: chr, start, end and name.
 
-    #chr    start   end     name
-    chr1    0       2300000 centromere
-    chr1    9200000 12700000        telomere
+```text
+chr    start   end     name
+chr1    0       2300000 centromere
+chr1    9200000 12700000        telomere
+```
 
 The pre-built masking files in the downloads table above are telomere
 regions, centromere regions (based on the cytoband file), and nspan
@@ -81,7 +84,6 @@ the ensembl annotations file including non-coding transcripts below.
 
 [![](../images/get_app-24px.svg) GRCh37/Hg19 + Ensembl69 (includes non-coding genes)](http://www.bcgsc.ca/downloads/mavis/ensembl69_hg19_annotations_with_ncrna.json)
 
-
 !!! warning
     the `mavis.annotate.file_io.load_reference_genes`{.interpreted-text
     role="func"} will only load valid translations. If the cds sequence in
@@ -98,7 +100,7 @@ be seen below
     {
         "name": string,
         "start": int,
-        "end": int
+        "end": int,
         "aliases": [string, string, ...],
         "transcripts": [
             {
@@ -180,10 +182,12 @@ awk '{print $2"\t"$3"\t"$4"\t"$1} GRCh37_hg19_variants_2016-05-15.txt > dgv_hg19
 Note in hg19 the column is called "name" and in hg38 the column is
 called "variantaccession". An example is shown below
 
-    #chr     start   end     name
-    1       1       2300000 nsv482937
-    1       10001   22118   dgv1n82
-    1       10001   127330  nsv7879
+```text
+chr     start   end     name
+1       1       2300000 nsv482937
+1       10001   22118   dgv1n82
+1       10001   127330  nsv7879
+```
 
 ## Aligner Reference
 

From 01a2eda28c66567cb85456d95a6ff1d3fcbd0488 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 26 Apr 2021 15:27:36 -0700
Subject: [PATCH 025/137] Convert to pytest syntax

---
 tests/end_to_end/test_convert.py            |   61 +-
 tests/end_to_end/test_help.py               |   50 +-
 tests/end_to_end/test_overlay.py            |    1 -
 tests/end_to_end/test_ref_alt_count.py      |   92 +-
 tests/integration/test_align.py             |  386 +++--
 tests/integration/test_annotate.py          | 1599 +++++++++++--------
 tests/integration/test_annotate_examples.py |  160 +-
 tests/integration/test_annotate_fileio.py   |   32 +-
 tests/integration/test_args.py              |    2 -
 tests/integration/test_assemble.py          |  138 +-
 tests/integration/test_bam.py               |  511 +++---
 tests/integration/test_bam_cigar.py         |  237 ++-
 tests/integration/test_blat.py              |  151 +-
 tests/integration/test_breakpoint.py        |  118 +-
 tests/integration/test_cluster.py           |   68 +-
 tests/integration/test_illustrate.py        |  192 ++-
 tests/integration/test_pairing.py           |  403 ++---
 tests/integration/test_splicing.py          |  609 ++++---
 tests/integration/test_validate.py          |  328 ++--
 tests/integration/test_validate_call.py     |  699 ++++----
 tests/integration/test_validate_evidence.py |  805 +++++-----
 tests/unit/test_annotate.py                 |  268 ++--
 tests/unit/test_assemble.py                 |  100 +-
 tests/unit/test_bam.py                      |  126 +-
 tests/unit/test_blat.py                     |   15 +-
 tests/unit/test_breakpoint.py               |  169 +-
 tests/unit/test_call_indels.py              |  164 +-
 tests/unit/test_cluster.py                  |   13 +-
 tests/unit/test_constants.py                |   63 +-
 tests/unit/test_illustrate.py               |    7 +-
 tests/unit/test_interval.py                 |  278 ++--
 tests/unit/test_summary.py                  |  268 ++--
 tests/unit/test_tool.py                     |   86 +-
 tests/unit/test_util.py                     |   28 +-
 tests/unit/test_validate.py                 |   33 +-
 tests/util.py                               |   13 +
 36 files changed, 4236 insertions(+), 4037 deletions(-)

diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index a1d33be2..514fae52 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -1,4 +1,3 @@
-import glob
 import os
 import shutil
 import sys
@@ -23,7 +22,7 @@ def setUpModule():
     print('output dir', TEMP_OUTPUT)
 
 
-class TestConvert(unittest.TestCase):
+class TestConvert:
     def run_main(self, inputfile, file_type, strand_specific=False):
         outputfile = os.path.join(TEMP_OUTPUT, file_type + '.tab')
         args = [
@@ -41,7 +40,7 @@ def run_main(self, inputfile, file_type, strand_specific=False):
         with patch.object(sys, 'argv', args):
             main()
             print('output', outputfile)
-            self.assertTrue(unique_exists(outputfile))
+            assert unique_exists(outputfile)
         result = {}
         for pair in read_bpp_from_input_file(outputfile):
             result.setdefault(pair.data['tracking_id'], []).append(pair)
@@ -56,44 +55,44 @@ def test_defuse(self):
     def test_delly(self):
         result = self.run_main(get_data('delly_events.vcf'), SUPPORTED_TOOL.DELLY, False)
         # test the contents were converted successfully
-        self.assertEqual(1, len(result['delly-DUP00000424']))
+        assert len(result['delly-DUP00000424']) == 1
         bpp = result['delly-DUP00000424'][0]
         print(bpp.data)
         print(bpp)
-        self.assertEqual(SVTYPE.DUP, bpp.event_type)
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('1', bpp.break2.chr)
-        self.assertEqual(224646569, bpp.break1.start)
-        self.assertEqual(224646569, bpp.break1.end)
-        self.assertEqual(224800120, bpp.break2.start)
-        self.assertEqual(224800120, bpp.break2.end)
-        self.assertEqual(1, len(result['delly-TRA00020624']))
+        assert bpp.event_type == SVTYPE.DUP
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '1'
+        assert bpp.break1.start == 224646569
+        assert bpp.break1.end == 224646569
+        assert bpp.break2.start == 224800120
+        assert bpp.break2.end == 224800120
+        assert len(result['delly-TRA00020624']) == 1
         bpp = result['delly-TRA00020624'][0]
-        self.assertEqual(SVTYPE.TRANS, bpp.event_type)
-        self.assertEqual('10', bpp.break1.chr)
-        self.assertEqual('19', bpp.break2.chr)
-        self.assertEqual(7059510 - 670, bpp.break1.start)
-        self.assertEqual(7059510 + 670, bpp.break1.end)
-        self.assertEqual(17396810 - 670, bpp.break2.start)
-        self.assertEqual(17396810 + 670, bpp.break2.end)
-        self.assertEqual(len(result), 31)
+        assert bpp.event_type == SVTYPE.TRANS
+        assert bpp.break1.chr == '10'
+        assert bpp.break2.chr == '19'
+        assert bpp.break1.start == 7059510 - 670
+        assert bpp.break1.end == 7059510 + 670
+        assert bpp.break2.start == 17396810 - 670
+        assert bpp.break2.end == 17396810 + 670
+        assert 31 == len(result)
 
     def test_manta(self):
         result = self.run_main(get_data('manta_events.vcf'), SUPPORTED_TOOL.MANTA, False)
         # ensure weird bnd type is converted correctly
         bnd_id = 'manta-MantaBND:173633:0:1:0:0:0:0'
-        self.assertEqual(1, len(result[bnd_id]))
+        assert len(result[bnd_id]) == 1
         bpp = result[bnd_id][0]
-        self.assertEqual(SVTYPE.TRANS, bpp.event_type)
-        self.assertEqual('10', bpp.break1.chr)
-        self.assertEqual('19', bpp.break2.chr)
-        self.assertEqual(7059511 - 0, bpp.break1.start)
-        self.assertEqual(7059511 + 1, bpp.break1.end)
-        self.assertEqual(17396810, bpp.break2.start)
-        self.assertEqual(17396810, bpp.break2.end)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
+        assert bpp.event_type == SVTYPE.TRANS
+        assert bpp.break1.chr == '10'
+        assert bpp.break2.chr == '19'
+        assert bpp.break1.start == 7059511 - 0
+        assert bpp.break1.end == 7059511 + 1
+        assert bpp.break2.start == 17396810
+        assert bpp.break2.end == 17396810
+        assert bpp.break2.orient == ORIENT.LEFT
         somatic_event = result['manta-MantaDEL:20644:0:2:0:0:0'][0]
-        self.assertEqual(True, somatic_event.data.get('SOMATIC', False))
+        assert somatic_event.data.get('SOMATIC', False) is True
 
     def test_pindel(self):
         self.run_main(get_data('pindel_events.vcf'), SUPPORTED_TOOL.PINDEL, False)
@@ -107,7 +106,7 @@ def test_vcf(self):
         print(results.keys())
         record = results['vcf-460818'][0]
         print(record, record.data)
-        self.assertEqual('Pathogenic', record.data['CLNSIG'])
+        assert record.data['CLNSIG'] == 'Pathogenic'
 
     def test_breakseq2(self):
         self.run_main(get_data('breakseq.vcf'), SUPPORTED_TOOL.BREAKSEQ, False)
diff --git a/tests/end_to_end/test_help.py b/tests/end_to_end/test_help.py
index 4ff3172a..6d3cdd24 100644
--- a/tests/end_to_end/test_help.py
+++ b/tests/end_to_end/test_help.py
@@ -1,110 +1,106 @@
-import os
-import subprocess
 import sys
-import unittest
 from unittest.mock import patch
 
-
 from mavis.constants import SUBCOMMAND
 from mavis.main import main
 
 
-class TestHelpMenu(unittest.TestCase):
+class TestHelpMenu:
     def test_main(self):
         with patch.object(sys, 'argv', ['mavis', '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_pipeline(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.SETUP, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_cluster(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.CLUSTER, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_validate(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.VALIDATE, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_annotate(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.ANNOTATE, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_pairing(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.PAIR, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_summary(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.SUMMARY, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_convert(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.CONVERT, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_overlay(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.OVERLAY, '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
 
     def test_bad_option(self):
         with patch.object(sys, 'argv', ['mavis', SUBCOMMAND.SETUP, '--blargh']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertNotEqual(0, err.code)
+                assert err.code != 0
             else:
-                self.assertNotEqual(0, returncode)
+                assert returncode != 0
 
     def test_ref_alt_count(self):
         with patch.object(sys, 'argv', ['calculate_ref_alt_counts', '-h']):
             try:
                 returncode = main()
             except SystemExit as err:
-                self.assertEqual(0, err.code)
+                assert err.code == 0
             else:
-                self.assertEqual(0, returncode)
+                assert returncode == 0
diff --git a/tests/end_to_end/test_overlay.py b/tests/end_to_end/test_overlay.py
index db664c55..5950701d 100644
--- a/tests/end_to_end/test_overlay.py
+++ b/tests/end_to_end/test_overlay.py
@@ -1,7 +1,6 @@
 import json
 import os
 import shutil
-import subprocess
 import sys
 import tempfile
 from unittest.mock import patch
diff --git a/tests/end_to_end/test_ref_alt_count.py b/tests/end_to_end/test_ref_alt_count.py
index 4c30fb81..c1afb816 100644
--- a/tests/end_to_end/test_ref_alt_count.py
+++ b/tests/end_to_end/test_ref_alt_count.py
@@ -1,8 +1,8 @@
 import os
 import shutil
 import tempfile
-import unittest
 
+import pytest
 from mavis.annotate.file_io import load_reference_genome
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, SVTYPE
@@ -31,68 +31,60 @@ def print_file_tree(dirname):
             print('{}{}'.format(subindent, f))
 
 
-class TestFullCalculator(unittest.TestCase):
-    def setUp(self):
-        # create the temp output directory to store file outputs
-        self.temp_output = tempfile.mkdtemp()
-        print('output dir', self.temp_output)
+@pytest.fixture
+def calculator():
+    return RefAltCalculator(
+        [("TEST", get_data('mock_reads_for_events.sorted.bam'))],
+        REFERENCE_GENOME,
+        max_event_size=100,
+        buffer=20,
+    )
 
-        self.calculator = RefAltCalculator(
-            [("TEST", get_data('mock_reads_for_events.sorted.bam'))],
-            REFERENCE_GENOME,
-            max_event_size=100,
-            buffer=20,
-        )
-
-    def test_calculate_all_counts(self):
-        self.calculator.calculate_all_counts(
-            [get_data("mavis_summary_all_mock-A36971_mock-A47933.tab")],
-            os.path.join(self.temp_output, "ref_alt_output.tab"),
-        )
-        self.assertTrue(glob_exists(self.temp_output, "ref_alt_output.tab"))
 
-    def tearDown(self):
-        # remove the temp directory and outputs
-        print_file_tree(self.temp_output)
-        shutil.rmtree(self.temp_output)
+@pytest.fixture
+def temp_output():
+    d = tempfile.mkdtemp()
+    yield d
+    shutil.rmtree(d)
 
 
-class TestRefAltCalulator(unittest.TestCase):
-    def setUp(self):
-        self.calculator = RefAltCalculator(
-            [("TEST", get_data('mock_reads_for_events.sorted.bam'))],
-            REFERENCE_GENOME,
-            max_event_size=100,
-            buffer=20,
+class TestFullCalculator:
+    def test_calculate_all_counts(self, calculator, temp_output):
+        calculator.calculate_all_counts(
+            [get_data("mavis_summary_all_mock-A36971_mock-A47933.tab")],
+            os.path.join(temp_output, "ref_alt_output.tab"),
         )
+        assert glob_exists(temp_output, "ref_alt_output.tab")
+
 
-    def test_calculate_count(self):
+class TestRefAltCalulator:
+    def test_calculate_count(self, calculator):
         ev1 = BreakpointPair(
             Breakpoint('reference11', 5999, orient=ORIENT.LEFT),
             Breakpoint('reference11', 6003, orient=ORIENT.RIGHT),
             opposing_strands=False,
             event_type=SVTYPE.DEL,
         )
-        bpp = self.calculator.calculate_ref_counts(ev1)
+        bpp = calculator.calculate_ref_counts(ev1)
         print(bpp.data)
-        self.assertEqual(27, bpp.data["TEST_ref_count"])
-        self.assertEqual(14, bpp.data["TEST_alt_count"])
-        self.assertEqual(188, bpp.data['TEST_ignored_count'])
+        assert bpp.data["TEST_ref_count"] == 27
+        assert bpp.data["TEST_alt_count"] == 14
+        assert bpp.data['TEST_ignored_count'] == 188
 
-    def test_calculate_count2(self):
+    def test_calculate_count2(self, calculator):
         ev1 = BreakpointPair(
             Breakpoint('reference11', 9999, orient=ORIENT.LEFT),
             Breakpoint('reference11', 10030, orient=ORIENT.RIGHT),
             opposing_strands=False,
             event_type=SVTYPE.DEL,
         )
-        bpp = self.calculator.calculate_ref_counts(ev1)
+        bpp = calculator.calculate_ref_counts(ev1)
         print(bpp.data)
-        self.assertEqual(0, bpp.data["TEST_ref_count"])
-        self.assertEqual(63, bpp.data["TEST_alt_count"])
-        self.assertEqual(197, bpp.data['TEST_ignored_count'])
+        assert bpp.data["TEST_ref_count"] == 0
+        assert bpp.data["TEST_alt_count"] == 63
+        assert bpp.data['TEST_ignored_count'] == 197
 
-    def test_calculate_count3(self):
+    def test_calculate_count3(self, calculator):
         ev1 = BreakpointPair(
             Breakpoint('reference1', 2002, orient=ORIENT.LEFT),
             Breakpoint('reference1', 2003, orient=ORIENT.RIGHT),
@@ -100,21 +92,21 @@ def test_calculate_count3(self):
             event_type=SVTYPE.INS,
             untemplated_seq='TT',
         )
-        bpp = self.calculator.calculate_ref_counts(ev1)
+        bpp = calculator.calculate_ref_counts(ev1)
         print(bpp.data)
-        self.assertEqual(0, bpp.data["TEST_ref_count"])
-        self.assertEqual(23, bpp.data["TEST_alt_count"])
-        self.assertEqual(145, bpp.data['TEST_ignored_count'])
+        assert bpp.data["TEST_ref_count"] == 0
+        assert bpp.data["TEST_alt_count"] == 23
+        assert bpp.data['TEST_ignored_count'] == 145
 
-    def test_calculate_count4(self):
+    def test_calculate_count4(self, calculator):
         ev1 = BreakpointPair(
             Breakpoint('reference11', 1999, orient=ORIENT.LEFT),
             Breakpoint('reference11', 2001, orient=ORIENT.RIGHT),
             opposing_strands=False,
             event_type=SVTYPE.DEL,
         )
-        bpp = self.calculator.calculate_ref_counts(ev1)
+        bpp = calculator.calculate_ref_counts(ev1)
         print(bpp.data)
-        self.assertEqual(0, bpp.data["TEST_ref_count"])
-        self.assertEqual(50, bpp.data["TEST_alt_count"])
-        self.assertEqual(191, bpp.data['TEST_ignored_count'])
+        assert bpp.data["TEST_ref_count"] == 0
+        assert bpp.data["TEST_alt_count"] == 50
+        assert bpp.data['TEST_ignored_count'] == 191
diff --git a/tests/integration/test_align.py b/tests/integration/test_align.py
index 45c9cb1a..4effd774 100644
--- a/tests/integration/test_align.py
+++ b/tests/integration/test_align.py
@@ -1,21 +1,21 @@
 import shutil
-import unittest
 from unittest import mock
 
 import mavis.bam.cigar as _cigar
+import pytest
 from mavis import align
 from mavis.annotate.file_io import load_reference_genome
 from mavis.assemble import Contig
 from mavis.bam.cache import BamCache
 from mavis.bam.read import SamRead
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import CIGAR, ORIENT, STRAND, SVTYPE, reverse_complement
+from mavis.constants import CIGAR, ORIENT, STRAND, reverse_complement
 from mavis.interval import Interval
 from mavis.schemas import DEFAULTS
 from mavis.validate.evidence import GenomeEvidence
 
 from ..util import get_data
-from . import MockBamFileHandle, MockLongString, MockObject, MockRead
+from . import MockLongString, MockObject, MockRead
 
 REFERENCE_GENOME = None
 
@@ -32,7 +32,7 @@ def setUpModule():
     BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
 
 
-class TestCallReadEvents(unittest.TestCase):
+class TestCallReadEvents:
     def test_hardclipping(self):
         read = SamRead(reference_name='15')
         read.reference_start = 71491944
@@ -46,16 +46,13 @@ def test_hardclipping(self):
             untemplated_seq='',
         )
         events = align.call_read_events(read, is_stranded=True)
-        self.assertEqual(1, len(events))
-        self.assertEqual(expected_bpp.break1, events[0].break1)
-        self.assertEqual(expected_bpp.break2, events[0].break2)
+        assert len(events) == 1
+        assert events[0].break1 == expected_bpp.break1
+        assert events[0].break2 == expected_bpp.break2
 
 
-class TestAlign(unittest.TestCase):
-    def setUp(self):
-        self.cache = BamCache(MockBamFileHandle({'Y': 23, 'fake': 0, 'reference3': 3}))
-
-    @unittest.skipIf(not shutil.which('blat'), 'missing the blat command')
+class TestAlign:
+    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
     def test_blat_contigs(self):
         ev = GenomeEvidence(
             Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
@@ -92,16 +89,16 @@ def test_blat_contigs(self):
         align.select_contig_alignments(ev, seq)
         print(ev.contigs[0].alignments)
         alignment = list(ev.contigs[0].alignments)[0]
-        self.assertEqual(1, alignment.read1.reference_id)
-        self.assertEqual(1, alignment.read2.reference_id)
-        self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
-        self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
-        self.assertEqual(1114, alignment.read1.reference_start)
-        self.assertEqual(2187, alignment.read2.reference_start)
-        self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
-        self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
-
-    @unittest.skipIf(not shutil.which('bwa'), 'missing the command')
+        assert alignment.read1.reference_id == 1
+        assert alignment.read2.reference_id == 1
+        assert align.query_coverage_interval(alignment.read1) == Interval(125, 244)
+        assert align.query_coverage_interval(alignment.read2) == Interval(117, 244)
+        assert alignment.read1.reference_start == 1114
+        assert alignment.read2.reference_start == 2187
+        assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
+        assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
+
+    @pytest.mark.skipif(not shutil.which('bwa'), reason='missing the command')
     def test_bwa_contigs(self):
         ev = GenomeEvidence(
             Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
@@ -139,21 +136,19 @@ def test_bwa_contigs(self):
         align.select_contig_alignments(ev, seq)
         print(ev.contigs[0].alignments)
         alignment = list(ev.contigs[0].alignments)[0]
-        self.assertEqual(
-            reverse_complement(alignment.read1.query_sequence), alignment.read2.query_sequence
-        )
-        self.assertEqual('reference3', alignment.read1.reference_name)
-        self.assertEqual('reference3', alignment.read2.reference_name)
-        self.assertEqual(1, alignment.read1.reference_id)
-        self.assertEqual(1, alignment.read2.reference_id)
-        self.assertEqual(Interval(125, 244), align.query_coverage_interval(alignment.read1))
-        self.assertEqual(Interval(117, 244), align.query_coverage_interval(alignment.read2))
-        self.assertEqual(1114, alignment.read1.reference_start)
-        self.assertEqual(2187, alignment.read2.reference_start)
-        self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], alignment.read1.cigar)
-        self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], alignment.read2.cigar)
-
-    @unittest.skipIf(not shutil.which('blat'), 'missing the blat command')
+        assert alignment.read2.query_sequence == reverse_complement(alignment.read1.query_sequence)
+        assert alignment.read1.reference_name == 'reference3'
+        assert alignment.read2.reference_name == 'reference3'
+        assert alignment.read1.reference_id == 1
+        assert alignment.read2.reference_id == 1
+        assert align.query_coverage_interval(alignment.read1) == Interval(125, 244)
+        assert align.query_coverage_interval(alignment.read2) == Interval(117, 244)
+        assert alignment.read1.reference_start == 1114
+        assert alignment.read2.reference_start == 2187
+        assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
+        assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
+
+    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
     def test_blat_contigs_deletion(self):
         ev = GenomeEvidence(
             Breakpoint('fake', 1714, orient=ORIENT.LEFT),
@@ -188,20 +183,16 @@ def test_blat_contigs_deletion(self):
         print('alignments:')
         for aln in alignments:
             print(aln, repr(aln.read1), repr(aln.read2))
-        self.assertEqual(1, len(alignments))
+        assert len(alignments) == 1
         alignment = alignments[0]
-        self.assertTrue(alignment.read2 is None)
-        self.assertEqual(0, alignment.read1.reference_id)
-        self.assertTrue(not alignment.read1.is_reverse)
-        self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
-        self.assertEqual(1612, alignment.read1.reference_start)
-        self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
-
-    @unittest.skipIf(not shutil.which('blat'), 'missing the blat command')
-    def test_blat_contigs_inversion(self):
-        raise unittest.SkipTest('TODO')
-
-    @unittest.skipIf(not shutil.which('blat'), 'missing the blat command')
+        assert alignment.read2 is None
+        assert alignment.read1.reference_id == 0
+        assert not alignment.read1.is_reverse
+        assert align.query_coverage_interval(alignment.read1) == Interval(0, 175)
+        assert alignment.read1.reference_start == 1612
+        assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
+
+    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
     def test_blat_contigs_deletion_revcomp(self):
         ev = GenomeEvidence(
             Breakpoint('fake', 1714, orient=ORIENT.LEFT),
@@ -231,33 +222,32 @@ def test_blat_contigs_deletion_revcomp(self):
         print('alignments:', ev.contigs[0].alignments)
         alignment = list(ev.contigs[0].alignments)[0]
         print(alignment)
-        self.assertTrue(alignment.read2 is None)
-        self.assertEqual(0, alignment.read1.reference_id)
-        self.assertTrue(alignment.read1.is_reverse)
-        self.assertEqual(seq, alignment.read1.query_sequence)
-        self.assertEqual(Interval(0, 175), align.query_coverage_interval(alignment.read1))
-        self.assertEqual(1612, alignment.read1.reference_start)
-        self.assertEqual([(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)], alignment.read1.cigar)
-
-
-class TestBreakpointContigRemappedDepth(unittest.TestCase):
-    def setUp(self):
-        self.contig = Contig(' ' * 60, None)
-        self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10))
-        self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20))
-        self.contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60))
+        assert alignment.read2 is None
+        assert alignment.read1.reference_id == 0
+        assert alignment.read1.is_reverse
+        assert alignment.read1.query_sequence == seq
+        assert align.query_coverage_interval(alignment.read1) == Interval(0, 175)
+        assert alignment.read1.reference_start == 1612
+        assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
+
 
+class TestBreakpointContigRemappedDepth:
     def test_break_left_deletion(self):
+        contig = Contig(' ' * 60, None)
+        contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10))
+        contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20))
+        contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60))
+
         b = Breakpoint('10', 1030, 1030, orient=ORIENT.LEFT)
         read = MockRead(
             cigar=_cigar.convert_string_to_cigar('35M10D5I20M'),
             reference_start=999,
             reference_name='10',
         )
-        align.SplitAlignment.breakpoint_contig_remapped_depth(b, self.contig, read)
+        align.SplitAlignment.breakpoint_contig_remapped_depth(b, contig, read)
 
 
-class TestSplitEvents(unittest.TestCase):
+class TestSplitEvents:
     def test_read_with_exons(self):
         contig = MockRead(
             query_sequence='CTTGAAGGAAACTGAATTCAAAAAGATCAAAGTGCTGGGCTCCGGTGCGTTCGGCACGGTGTATAAGGGACTCTGGATCCCAGAAGGTGAGAAAGTTAAAATTCCCGTCGCTATCAAGACATCTCCGAAAGCCAACAAGGAAATCCTCGATGAAGCCTACGTGATGGCCAGCGTGGACAACCCCCACGTGTGCCGCCTGCTGGGCATCTGCCTCACCTCCACCGTGCAGCTCATCATGCAGCTCATGCCCTTCGGCTGCCTCCTGGACTATGTCCGGGAACACAAAGACAATATTGGCTCCCAGTACCTGCTCAACTGGTGTGTGCAGATCGCAAAGGGCATGAACTACTTGGAGGACCGTCGCTTGGTGCACCGCGACCTGGCAGCCAGGAACGTACTGGTGAAAACACCGCAGCATGTCAAGATCACAGATTTTGGGCTGGCCAAACTGCTGGGTGCGGAAGAGAAAGAATACCATGCAGAAGGAGGCAAAGTGCCTATCAAGTGGATGGCATTGGAATCAATTTTACACAGAATCTATACCCACCAGAGTGATGTCTGGAGCTACGGGGTGACCGTTTGGGAGTTGATGACCTTTGGATCCAA',
@@ -268,10 +258,10 @@ def test_read_with_exons(self):
             reference_id=6,
             reference_start=55241669,
         )
-        self.assertEqual(6, len(align.call_read_events(contig)))
+        assert len(align.call_read_events(contig)) == 6
 
 
-class TestCallBreakpointPair(unittest.TestCase):
+class TestCallBreakpointPair:
     def test_single_one_event(self):
         r = MockRead(
             reference_id=0,
@@ -281,14 +271,14 @@ def test_single_one_event(self):
             query_sequence='ACTGAATCGTGGGTAGCTGCTAG',
         )
         bpps = align.call_read_events(r)
-        self.assertEqual(1, len(bpps))
+        assert len(bpps) == 1
         bpp = bpps[0]
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(10, bpp.break1.start)
-        self.assertEqual(10, bpp.break1.end)
-        self.assertEqual(18, bpp.break2.start)
-        self.assertEqual(18, bpp.break2.end)
-        self.assertEqual('GGG', bpp.untemplated_seq)
+        assert bpp.opposing_strands is False
+        assert bpp.break1.start == 10
+        assert bpp.break1.end == 10
+        assert bpp.break2.start == 18
+        assert bpp.break2.end == 18
+        assert bpp.untemplated_seq == 'GGG'
 
     def test_ins_and_del(self):
         r = MockRead(
@@ -300,20 +290,20 @@ def test_ins_and_del(self):
         )
         # only report the major del event for now
         bpps = align.call_read_events(r)
-        self.assertEqual(2, len(bpps))
+        assert len(bpps) == 2
         bpp = bpps[0]
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(10, bpp.break1.start)
-        self.assertEqual(10, bpp.break1.end)
-        self.assertEqual(11, bpp.break2.start)
-        self.assertEqual(11, bpp.break2.end)
-        self.assertEqual('GGG', bpp.untemplated_seq)
+        assert bpp.opposing_strands is False
+        assert bpp.break1.start == 10
+        assert bpp.break1.end == 10
+        assert bpp.break2.start == 11
+        assert bpp.break2.end == 11
+        assert bpp.untemplated_seq == 'GGG'
         bpp = bpps[1]
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(15, bpp.break1.start)
-        self.assertEqual(15, bpp.break1.end)
-        self.assertEqual(23, bpp.break2.start)
-        self.assertEqual(23, bpp.break2.end)
+        assert bpp.opposing_strands is False
+        assert bpp.break1.start == 15
+        assert bpp.break1.end == 15
+        assert bpp.break2.start == 23
+        assert bpp.break2.end == 23
 
     def test_single_insertion(self):
         r = MockRead(
@@ -324,12 +314,12 @@ def test_single_insertion(self):
             query_sequence='ACTGAATCGTGGGTAGCTGCTAG',
         )
         bpp = align.call_read_events(r)[0]
-        self.assertEqual(False, bpp.opposing_strands)
-        self.assertEqual(10, bpp.break1.start)
-        self.assertEqual(10, bpp.break1.end)
-        self.assertEqual(11, bpp.break2.start)
-        self.assertEqual(11, bpp.break2.end)
-        self.assertEqual('GGGTAGCT', bpp.untemplated_seq)
+        assert bpp.opposing_strands is False
+        assert bpp.break1.start == 10
+        assert bpp.break1.end == 10
+        assert bpp.break2.start == 11
+        assert bpp.break2.end == 11
+        assert bpp.untemplated_seq == 'GGGTAGCT'
 
     def test_single_duplication(self):
         r = MockRead(
@@ -341,9 +331,9 @@ def test_single_duplication(self):
             'GACAGACTCTAGTAGTGTC',
         )
         bpp = align.call_read_events(r)[0]
-        self.assertEqual(27220, bpp.break1.start)
-        self.assertEqual(27316, bpp.break2.start)
-        self.assertEqual('AGACTT', bpp.untemplated_seq)
+        assert bpp.break1.start == 27220
+        assert bpp.break2.start == 27316
+        assert bpp.untemplated_seq == 'AGACTT'
 
     def test_single_duplication_with_leading_untemp(self):
         r = MockRead(
@@ -360,11 +350,9 @@ def test_single_duplication_with_leading_untemp(self):
             is_reverse=False,
         )
         bpp = align.call_read_events(r)[0]
-        self.assertEqual(
-            'AGGTTCCATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATCGGCTCCGT', bpp.untemplated_seq
-        )
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
+        assert bpp.untemplated_seq == 'AGGTTCCATGGGCTCCGTAGGTTCCATGGGCTCCGTAGGTTCCATCGGCTCCGT'
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
 
     def test_single_duplication_with_no_untemp(self):
         r = MockRead(
@@ -381,11 +369,11 @@ def test_single_duplication_with_no_untemp(self):
         )
         # repeat: GATTTTGCTGTTGTTTTTGTTC
         bpp = align.convert_to_duplication(align.call_read_events(r)[0], REFERENCE_GENOME)
-        self.assertEqual('', bpp.untemplated_seq)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(bpp.break2.start, 1548)
-        self.assertEqual(bpp.break1.start, 1527)
+        assert bpp.untemplated_seq == ''
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert 1548 == bpp.break2.start
+        assert 1527 == bpp.break1.start
 
     def test_single_duplication_with_trailing_untemp(self):
         r = MockRead(
@@ -411,11 +399,11 @@ def test_single_duplication_with_trailing_untemp(self):
         print(bpp)
         bpp = align.convert_to_duplication(bpp, REFERENCE_GENOME)
         print(bpp)
-        self.assertEqual('GTCAA', bpp.untemplated_seq)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual(bpp.break2.start, 1548)
-        self.assertEqual(bpp.break1.start, 1527)
+        assert bpp.untemplated_seq == 'GTCAA'
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert 1548 == bpp.break2.start
+        assert 1527 == bpp.break1.start
 
     def test_read_pair_indel(self):
         # seq AAATTTCCCGGGAATTCCGGATCGATCGAT 1-30     1-?
@@ -441,15 +429,15 @@ def test_read_pair_indel(self):
             is_reverse=False,
         )
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.POS, bpp.break2.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual('GGGAATTCCGGA', bpp.untemplated_seq)
-        self.assertEqual(9, bpp.break1.start)
-        self.assertEqual(100, bpp.break2.start)
-        self.assertEqual('AAATTTCCC', bpp.break1.seq)
-        self.assertEqual('TCGATCGAT', bpp.break2.seq)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.POS
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.untemplated_seq == 'GGGAATTCCGGA'
+        assert bpp.break1.start == 9
+        assert bpp.break2.start == 100
+        assert bpp.break1.seq == 'AAATTTCCC'
+        assert bpp.break2.seq == 'TCGATCGAT'
 
     def test_read_pair_deletion(self):
         # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
@@ -474,13 +462,13 @@ def test_read_pair_deletion(self):
             is_reverse=False,
         )
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.POS, bpp.break2.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual('', bpp.untemplated_seq)
-        self.assertEqual(21, bpp.break1.start)
-        self.assertEqual(100, bpp.break2.start)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.POS
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.untemplated_seq == ''
+        assert bpp.break1.start == 21
+        assert bpp.break2.start == 100
 
     def test_read_pair_translocation(self):
         # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
@@ -505,13 +493,13 @@ def test_read_pair_translocation(self):
             is_reverse=False,
         )
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.POS, bpp.break2.strand)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual('1', bpp.break1.chr)
-        self.assertEqual('2', bpp.break2.chr)
-        self.assertEqual('', bpp.untemplated_seq)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.POS
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.break1.chr == '1'
+        assert bpp.break2.chr == '2'
+        assert bpp.untemplated_seq == ''
 
     def test_read_pair_deletion_overlapping_query_coverage(self):
         # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
@@ -536,17 +524,17 @@ def test_read_pair_deletion_overlapping_query_coverage(self):
             query_sequence=seq,
             is_reverse=False,
         )
-        self.assertEqual(21, r1.reference_end)
+        assert r1.reference_end == 21
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.POS, bpp.break2.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual('', bpp.untemplated_seq)
-        self.assertEqual(21, bpp.break1.start)
-        self.assertEqual(103, bpp.break2.start)
-        self.assertEqual('AAATTTCCCGGGAATTCCGGA', bpp.break1.seq)
-        self.assertEqual('TCGATCGAT', bpp.break2.seq)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.POS
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.untemplated_seq == ''
+        assert bpp.break1.start == 21
+        assert bpp.break2.start == 103
+        assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA'
+        assert bpp.break2.seq == 'TCGATCGAT'
 
     def test_read_pair_inversion_overlapping_query_coverage(self):
         # seq AAATTTCCCGGGAATTCCGGATCGATCGAT
@@ -573,15 +561,15 @@ def test_read_pair_inversion_overlapping_query_coverage(self):
             is_reverse=True,
         )
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.NEG, bpp.break2.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual('', bpp.untemplated_seq)
-        self.assertEqual(21, bpp.break1.start)
-        self.assertEqual(108, bpp.break2.start)
-        self.assertEqual('AAATTTCCCGGGAATTCCGGA', bpp.break1.seq)
-        self.assertEqual(reverse_complement('TCGATCGAT'), bpp.break2.seq)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.NEG
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.untemplated_seq == ''
+        assert bpp.break1.start == 21
+        assert bpp.break2.start == 108
+        assert bpp.break1.seq == 'AAATTTCCCGGGAATTCCGGA'
+        assert bpp.break2.seq == reverse_complement('TCGATCGAT')
 
     def test_read_pair_large_inversion_overlapping_query_coverage(self):
         s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
@@ -601,24 +589,22 @@ def test_read_pair_large_inversion_overlapping_query_coverage(self):
             is_reverse=True,
         )
         bpp = align.call_paired_read_event(read1, read2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.NEG, bpp.break2.strand)
-        self.assertEqual(ORIENT.RIGHT, bpp.break1.orient)
-        self.assertEqual(ORIENT.RIGHT, bpp.break2.orient)
-        self.assertEqual('', bpp.untemplated_seq)
-        self.assertEqual(1115, bpp.break1.start)
-        self.assertEqual(2188 + 3, bpp.break2.start)
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.NEG
+        assert bpp.break1.orient == ORIENT.RIGHT
+        assert bpp.break2.orient == ORIENT.RIGHT
+        assert bpp.untemplated_seq == ''
+        assert bpp.break1.start == 1115
+        assert bpp.break2.start == 2188 + 3
         print(bpp.break1.seq)
         print(bpp.break2.seq)
-        self.assertEqual(
-            'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAG'
-            'GGTTTTCATTTCTGTATGTTAAT',
-            bpp.break1.seq,
+        assert (
+            bpp.break1.seq
+            == 'TCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
         )
-        self.assertEqual(
-            'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCA'
-            'AATTCTGTGTTTACAGGGCTTTCATGCTCAG',
-            bpp.break2.seq,
+        assert (
+            bpp.break2.seq
+            == 'GCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATCCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
         )
 
     def test_read_pair_inversion_gap_in_query_coverage(self):
@@ -646,18 +632,18 @@ def test_read_pair_inversion_gap_in_query_coverage(self):
             is_reverse=True,
         )
         bpp = align.call_paired_read_event(r1, r2, is_stranded=True)
-        self.assertEqual(STRAND.POS, bpp.break1.strand)
-        self.assertEqual(STRAND.NEG, bpp.break2.strand)
-        self.assertEqual(ORIENT.LEFT, bpp.break1.orient)
-        self.assertEqual(ORIENT.LEFT, bpp.break2.orient)
-        self.assertEqual('CC', bpp.untemplated_seq)
-        self.assertEqual(16, bpp.break1.start)
-        self.assertEqual(111, bpp.break2.start)
-        self.assertEqual('AAATTTCCCGGGAATT', bpp.break1.seq)
-        self.assertEqual(reverse_complement('GGATCGATCGAT'), bpp.break2.seq)
-
-
-class TestConvertToDuplication(unittest.TestCase):
+        assert bpp.break1.strand == STRAND.POS
+        assert bpp.break2.strand == STRAND.NEG
+        assert bpp.break1.orient == ORIENT.LEFT
+        assert bpp.break2.orient == ORIENT.LEFT
+        assert bpp.untemplated_seq == 'CC'
+        assert bpp.break1.start == 16
+        assert bpp.break2.start == 111
+        assert bpp.break1.seq == 'AAATTTCCCGGGAATT'
+        assert bpp.break2.seq == reverse_complement('GGATCGATCGAT')
+
+
+class TestConvertToDuplication:
     def test_insertion_to_duplication(self):
         # BPP(Breakpoint(3:60204611L), Breakpoint(3:60204612R), opposing=False, seq='CATACATACATACATACATACATACATACATA')
         # insertion contig [seq2] contig_alignment_score: 0.99, contig_alignment_mq: Interval(255, 255)
@@ -681,13 +667,13 @@ def test_insertion_to_duplication(self):
         setattr(bpp, 'read2', None)
         event = align.convert_to_duplication(bpp, reference_genome)
         print(event)
-        self.assertEqual(ORIENT.RIGHT, event.break1.orient)
-        self.assertEqual(60204588, event.break1.start)
-        self.assertEqual(ORIENT.LEFT, event.break2.orient)
-        self.assertEqual(60204611, event.break2.start)
+        assert event.break1.orient == ORIENT.RIGHT
+        assert event.break1.start == 60204588
+        assert event.break2.orient == ORIENT.LEFT
+        assert event.break2.start == 60204611
         # CATACATACATACATACATACATACATACATA
         # ........................********
-        self.assertEqual('CATACATA', event.untemplated_seq)
+        assert event.untemplated_seq == 'CATACATA'
 
     def test_single_bp_insertion(self):
         bpp = BreakpointPair(
@@ -704,14 +690,14 @@ def test_single_bp_insertion(self):
         setattr(bpp, 'read2', None)
         event = align.convert_to_duplication(bpp, reference_genome)
         print(event)
-        self.assertEqual(ORIENT.RIGHT, event.break1.orient)
-        self.assertEqual(121, event.break1.start)
-        self.assertEqual(ORIENT.LEFT, event.break2.orient)
-        self.assertEqual(121, event.break2.start)
-        self.assertEqual('', event.untemplated_seq)
+        assert event.break1.orient == ORIENT.RIGHT
+        assert event.break1.start == 121
+        assert event.break2.orient == ORIENT.LEFT
+        assert event.break2.start == 121
+        assert event.untemplated_seq == ''
 
 
-class TestSelectContigAlignments(unittest.TestCase):
+class TestSelectContigAlignments:
     def test_inversion_and_deletion(self):
         s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
         evidence = MockObject(
@@ -756,34 +742,30 @@ def test_inversion_and_deletion(self):
         raw_alignments = {s: [read1, read2]}
         align.select_contig_alignments(evidence, raw_alignments)
         alignments = list(evidence.contigs[0].alignments)
-        self.assertEqual(2, len(alignments))
+        assert len(alignments) == 2
 
 
-class TestGetAlignerVersion(unittest.TestCase):
+class TestGetAlignerVersion:
     def test_get_blat_36x2(self):
         content = 'blat - Standalone BLAT v. 36x2 fast sequence search command line tool\n'
         with mock.patch('subprocess.getoutput', mock.Mock(return_value=content)):
-            self.assertEqual('36x2', align.get_aligner_version(align.SUPPORTED_ALIGNER.BLAT))
+            assert align.get_aligner_version(align.SUPPORTED_ALIGNER.BLAT) == '36x2'
 
     def test_get_blat_36(self):
         content = "blat - Standalone BLAT v. 36 fast sequence search command line tool"
         with mock.patch('subprocess.getoutput', mock.Mock(return_value=content)):
-            self.assertEqual('36', align.get_aligner_version(align.SUPPORTED_ALIGNER.BLAT))
+            assert align.get_aligner_version(align.SUPPORTED_ALIGNER.BLAT) == '36'
 
     def test_get_bwa_0_7_15(self):
         content = (
             "\nProgram: bwa (alignment via Burrows-Wheeler transformation)\nVersion: 0.7.15-r1140"
         )
         with mock.patch('subprocess.getoutput', mock.Mock(return_value=content)):
-            self.assertEqual(
-                '0.7.15-r1140', align.get_aligner_version(align.SUPPORTED_ALIGNER.BWA_MEM)
-            )
+            assert align.get_aligner_version(align.SUPPORTED_ALIGNER.BWA_MEM) == '0.7.15-r1140'
 
     def test_get_bwa_0_7_12(self):
         content = (
             "\nProgram: bwa (alignment via Burrows-Wheeler transformation)\nVersion: 0.7.12-r1039"
         )
         with mock.patch('subprocess.getoutput', mock.Mock(return_value=content)):
-            self.assertEqual(
-                '0.7.12-r1039', align.get_aligner_version(align.SUPPORTED_ALIGNER.BWA_MEM)
-            )
+            assert align.get_aligner_version(align.SUPPORTED_ALIGNER.BWA_MEM) == '0.7.12-r1039'
diff --git a/tests/integration/test_annotate.py b/tests/integration/test_annotate.py
index 9532665c..bf816b70 100644
--- a/tests/integration/test_annotate.py
+++ b/tests/integration/test_annotate.py
@@ -1,6 +1,7 @@
-import os
+import argparse
 import unittest
 
+import pytest
 from mavis.annotate.base import BioInterval, ReferenceName
 from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.annotate.fusion import FusionTranscript, determine_prime
@@ -11,16 +12,15 @@
     _gather_annotations,
     _gather_breakpoint_annotations,
     annotate_events,
-    flatten_fusion_transcript,
     overlapping_transcripts,
 )
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import ORIENT, PRIME, PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
+from mavis.constants import ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE, reverse_complement
 from mavis.error import NotSpecifiedError
 from mavis.interval import Interval
 
 from ..util import get_data
-from . import MockLongString, MockObject, get_example_genes
+from . import MockObject, get_example_genes
 
 REFERENCE_ANNOTATIONS = None
 REFERENCE_GENOME = None
@@ -40,129 +40,156 @@ def setUpModule():
     print('loaded the reference genome', get_data('mock_reference_genome.fa'))
 
 
-class TestTemplate(unittest.TestCase):
+class TestTemplate:
     def test_template_hashing(self):
         t = Template('1', 1, 10)
         d = {'1': 1, '2': 2, 1: '5'}
-        self.assertEqual('1', t.name)
-        self.assertEqual(1, d[t.name])
-        self.assertEqual(1, d[t])
-
-
-class TestFusionTranscript(unittest.TestCase):
-    def setUp(self):
-        self.x = Interval(100, 199)  # C
-        self.y = Interval(500, 599)  # G
-        self.z = Interval(1200, 1299)  # T
-        self.w = Interval(1500, 1599)  # C
-        self.s = Interval(1700, 1799)  # G
-        # introns: 99, 300, 600, 200, 100, ...
-        reference_sequence = 'A' * 99 + 'C' * 100 + 'A' * 300 + 'G' * 100
-        reference_sequence += 'A' * 600 + 'T' * 100 + 'A' * 200 + 'C' * 100
-        reference_sequence += 'A' * 100 + 'G' * 100 + 'A' * 200 + 'T' * 100
-
-        self.a = Interval(2000, 2099)  # T
-        self.b = Interval(2600, 2699)  # C
-        self.c = Interval(3000, 3099)  # G
-        self.d = Interval(3300, 3399)  # T
-        reference_sequence += 'A' * 500 + 'C' * 100 + 'A' * 300 + 'G' * 100
-        reference_sequence += 'A' * 200 + 'T' * 100 + 'A' * 200
-        self.reference_sequence = reference_sequence
-
-        self.b1 = Interval(600, 699)  # A
-        self.b2 = Interval(800, 899)  # G
-        self.b3 = Interval(1100, 1199)  # T
-        self.b4 = Interval(1400, 1499)  # A
-        self.b5 = Interval(1700, 1799)  # G
-        self.b6 = Interval(2100, 2199)  # A
-        alternate_sequence = 'C' * 599 + 'A' * 100 + 'C' * 100 + 'G' * 100
-        alternate_sequence += 'C' * 200 + 'T' * 100 + 'C' * 200 + 'A' * 100
-        alternate_sequence += 'C' * 200 + 'G' * 100 + 'C' * 300 + 'A' * 100
-        alternate_sequence += 'C' * 200
-        self.alternate_sequence = alternate_sequence
-
-    def test__pull_exons_left_pos_intronic(self):
+        assert t.name == '1'
+        assert d[t.name] == 1
+        assert d[t] == 1
+
+
+@pytest.fixture
+def intervals():
+    n = argparse.Namespace()
+    n.x = Interval(100, 199)  # C
+    n.y = Interval(500, 599)  # G
+    n.z = Interval(1200, 1299)  # T
+    n.w = Interval(1500, 1599)  # C
+    n.s = Interval(1700, 1799)  # G
+    # introns: 99, 300, 600, 200, 100, ...
+    reference_sequence = 'A' * 99 + 'C' * 100 + 'A' * 300 + 'G' * 100
+    reference_sequence += 'A' * 600 + 'T' * 100 + 'A' * 200 + 'C' * 100
+    reference_sequence += 'A' * 100 + 'G' * 100 + 'A' * 200 + 'T' * 100
+
+    n.a = Interval(2000, 2099)  # T
+    n.b = Interval(2600, 2699)  # C
+    n.c = Interval(3000, 3099)  # G
+    n.d = Interval(3300, 3399)  # T
+    reference_sequence += 'A' * 500 + 'C' * 100 + 'A' * 300 + 'G' * 100
+    reference_sequence += 'A' * 200 + 'T' * 100 + 'A' * 200
+    n.reference_sequence = reference_sequence
+
+    n.b1 = Interval(600, 699)  # A
+    n.b2 = Interval(800, 899)  # G
+    n.b3 = Interval(1100, 1199)  # T
+    n.b4 = Interval(1400, 1499)  # A
+    n.b5 = Interval(1700, 1799)  # G
+    n.b6 = Interval(2100, 2199)  # A
+    alternate_sequence = 'C' * 599 + 'A' * 100 + 'C' * 100 + 'G' * 100
+    alternate_sequence += 'C' * 200 + 'T' * 100 + 'C' * 200 + 'A' * 100
+    alternate_sequence += 'C' * 200 + 'G' * 100 + 'C' * 300 + 'A' * 100
+    alternate_sequence += 'C' * 200
+    n.alternate_sequence = alternate_sequence
+    return n
+
+
+class TestFusionTranscript:
+    def test__pull_exons_left_pos_intronic(self, intervals):
         # 100-199, 500-599, 1200-1299, 1500-1599, 1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 700, orient=ORIENT.LEFT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
-        expt = 'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (700 - 600 + 1)
-        self.assertEqual(expt, seq)
-        self.assertEqual(2, len(new_exons))
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
+        expt = (
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (700 - 600 + 1)
+        )
+        assert seq == expt
+        assert len(new_exons) == 2
         e = new_exons[0][0]
-        self.assertEqual(1, e.start)
-        self.assertEqual(100, e.end)
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
+        assert e.start == 1
+        assert e.end == 100
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
 
-    def test__pull_exons_left_pos_intronic_splice(self):
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+    def test__pull_exons_left_pos_intronic_splice(self, intervals):
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 201, orient=ORIENT.LEFT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'C' * 100 + 'A' * 2
-        self.assertEqual(expt, seq)
-        self.assertEqual(1, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 1
         e = new_exons[0][0]
-        self.assertEqual(1, e.start)
-        self.assertEqual(100, e.end)
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(False, e.end_splice_site.intact)
+        assert e.start == 1
+        assert e.end == 100
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is False
 
-    def test__pull_exons_left_pos_exonic(self):
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+    def test__pull_exons_left_pos_exonic(self, intervals):
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         print('transcriptt exons:', t.exons)
         b = Breakpoint(REF_CHR, 199, orient=ORIENT.LEFT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'C' * 100
-        self.assertEqual(expt, seq)
-        self.assertEqual(1, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 1
         e = new_exons[0][0]
-        self.assertEqual(1, e.start)
-        self.assertEqual(100, e.end)
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(False, e.end_splice_site.intact)
+        assert e.start == 1
+        assert e.end == 100
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is False
 
-    def test__pull_exons_left_pos_exonic_splice(self):
+    def test__pull_exons_left_pos_exonic_splice(self, intervals):
         # 100-199, 500-599, 1200-1299, 1500-1599, 1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 101, orient=ORIENT.LEFT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'C' * 2
-        self.assertEqual(expt, seq)
-        self.assertEqual(1, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 1
         e = new_exons[0][0]
-        self.assertEqual(1, e.start)
-        self.assertEqual(2, e.end)
-        self.assertEqual(False, e.start_splice_site.intact)
-        self.assertEqual(False, e.end_splice_site.intact)
+        assert e.start == 1
+        assert e.end == 2
+        assert e.start_splice_site.intact is False
+        assert e.end_splice_site.intact is False
 
-    def test__pull_exons_right_pos_intronic(self):
+    def test__pull_exons_right_pos_intronic(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 1600, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
-        expt = 'A' * (1699 - 1600 + 1) + 'G' * len(self.s)
-        self.assertEqual(expt, seq)
-        self.assertEqual(1, len(new_exons))
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
+        expt = 'A' * (1699 - 1600 + 1) + 'G' * len(intervals.s)
+        assert seq == expt
+        assert len(new_exons) == 1
 
         b = Breakpoint(REF_CHR, 300, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'A' * (499 - 300 + 1) + 'G' * 100 + 'A' * (1199 - 600 + 1) + 'T' * 100
         expt += 'A' * (1499 - 1300 + 1) + 'C' * 100 + 'A' * (1699 - 1600 + 1) + 'G' * 100
 
-        self.assertEqual(expt, seq)
-        self.assertEqual(4, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 4
         e = new_exons[0][0]
-        self.assertEqual(201, e.start)
-        self.assertEqual(300, e.end)
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
+        assert e.start == 201
+        assert e.end == 300
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
 
-    def test__pull_exons_right_pos_intronic_splice(self):
+    def test__pull_exons_right_pos_intronic_splice(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 1198, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = (
             'AA'
             + 'T' * 100
@@ -171,64 +198,76 @@ def test__pull_exons_right_pos_intronic_splice(self):
             + 'A' * (1699 - 1600 + 1)
             + 'G' * 100
         )
-        self.assertEqual(expt, seq)
-        self.assertEqual(3, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 3
         e = new_exons[0][0]
-        self.assertEqual(False, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
+        assert e.start_splice_site.intact is False
+        assert e.end_splice_site.intact is True
 
-    def test__pull_exons_right_pos_exonic(self):
+    def test__pull_exons_right_pos_exonic(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 1201, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'T' * 99 + 'A' * (1499 - 1300 + 1) + 'C' * 100 + 'A' * (1699 - 1600 + 1) + 'G' * 100
-        self.assertEqual(expt, seq)
-        self.assertEqual(3, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 3
         e = new_exons[0][0]
-        self.assertEqual(False, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
+        assert e.start_splice_site.intact is False
+        assert e.end_splice_site.intact is True
 
-    def test__pull_exons_right_pos_exonic_splice(self):
+    def test__pull_exons_right_pos_exonic_splice(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b = Breakpoint(REF_CHR, 1298, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'TT' + 'A' * (1499 - 1300 + 1) + 'C' * 100 + 'A' * (1699 - 1600 + 1) + 'G' * 100
-        self.assertEqual(expt, seq)
-        self.assertEqual(3, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 3
         e = new_exons[0][0]
-        self.assertEqual(False, e.start_splice_site.intact)
-        self.assertEqual(False, e.end_splice_site.intact)
+        assert e.start_splice_site.intact is False
+        assert e.end_splice_site.intact is False
 
-    def test__pull_exons_right_neg_intronic(self):
+    def test__pull_exons_right_neg_intronic(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         b = Breakpoint(REF_CHR, 700, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = 'A' * (1199 - 700 + 1) + 'T' * 100 + 'A' * (1499 - 1300 + 1) + 'C' * 100
         expt += 'A' * (1699 - 1600 + 1) + 'G' * 100
         expt = reverse_complement(expt)
-        self.assertEqual(expt, seq)
-        self.assertEqual(3, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 3
         e = new_exons[0][0]
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
-        self.assertEqual(1, e.start)
-        self.assertEqual(100, e.end)
-        self.assertEqual('C' * 100, seq[e.start - 1 : e.end])
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
+        assert e.start == 1
+        assert e.end == 100
+        assert seq[e.start - 1 : e.end] == 'C' * 100
         e = new_exons[1][0]
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
-        self.assertEqual(201, e.start)
-        self.assertEqual(300, e.end)
-        self.assertEqual('G' * 100, seq[e.start - 1 : e.end])
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
+        assert e.start == 201
+        assert e.end == 300
+        assert seq[e.start - 1 : e.end] == 'G' * 100
 
-    def test__pull_exons_right_neg_intronic_splice(self):
+    def test__pull_exons_right_neg_intronic_splice(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         b = Breakpoint(REF_CHR, 1198, orient=ORIENT.RIGHT)
-        seq, new_exons = FusionTranscript._pull_exons(t, b, self.reference_sequence)
+        seq, new_exons = FusionTranscript._pull_exons(t, b, intervals.reference_sequence)
         expt = (
             'AA'
             + 'T' * 100
@@ -238,63 +277,66 @@ def test__pull_exons_right_neg_intronic_splice(self):
             + 'G' * 100
         )
         expt = reverse_complement(expt)
-        self.assertEqual(expt, seq)
-        self.assertEqual(3, len(new_exons))
+        assert seq == expt
+        assert len(new_exons) == 3
         e = new_exons[0][0]
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
-        self.assertEqual(1, e.start)
-        self.assertEqual(100, e.end)
-        self.assertEqual('C' * 100, seq[e.start - 1 : e.end])
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
+        assert e.start == 1
+        assert e.end == 100
+        assert seq[e.start - 1 : e.end] == 'C' * 100
         e = new_exons[1][0]
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(True, e.end_splice_site.intact)
-        self.assertEqual(201, e.start)
-        self.assertEqual(300, e.end)
-        self.assertEqual('G' * 100, seq[e.start - 1 : e.end])
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is True
+        assert e.start == 201
+        assert e.end == 300
+        assert seq[e.start - 1 : e.end] == 'G' * 100
         e = new_exons[2][0]
-        self.assertEqual(True, e.start_splice_site.intact)
-        self.assertEqual(False, e.end_splice_site.intact)
-        self.assertEqual(501, e.start)
-        self.assertEqual(600, e.end)
-        self.assertEqual('A' * 100, seq[e.start - 1 : e.end])
+        assert e.start_splice_site.intact is True
+        assert e.end_splice_site.intact is False
+        assert e.start == 501
+        assert e.end == 600
+        assert seq[e.start - 1 : e.end] == 'A' * 100
 
-    def test_build_single_transcript_indel(self):
+    def test_build_single_transcript_indel(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b1 = Breakpoint(REF_CHR, 599, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGATCG')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.x)
+            'C' * len(intervals.x)
             + 'A' * (499 - 200 + 1)
-            + 'G' * len(self.y)
+            + 'G' * len(intervals.y)
             + 'ATCGATCG'
-            + 'T' * len(self.z)
+            + 'T' * len(intervals.z)
         )
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
 
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(5, len(ft.exons))
+        assert ft.seq == expt
+        assert len(ft.exons) == 5
 
         for i, ex in enumerate(t.exons):
             n = ft.exons[i]
-            self.assertEqual(ex, ft.exon_mapping[n.position])
+            assert ft.exon_mapping[n.position] == ex
 
-        self.assertEqual(1, ft.exons[0].start)
-        self.assertEqual(100, ft.exons[0].end)
+        assert ft.exons[0].start == 1
+        assert ft.exons[0].end == 100
 
         splice_pattern = [(True, True), (True, False), (False, True), (True, True), (True, True)]
         char_pattern = [x * 100 for x in ['C', 'G', 'T', 'C', 'G']]
@@ -302,60 +344,72 @@ def test_build_single_transcript_indel(self):
         for i in range(0, len(splice_pattern)):
             s, t = splice_pattern[i]
             ex = ft.exons[i]
-            self.assertEqual(s, ex.start_splice_site.intact)
-            self.assertEqual(t, ex.end_splice_site.intact)
-            self.assertEqual(char_pattern[i], ft.seq[ex.start - 1 : ex.end])
+            assert ex.start_splice_site.intact == s
+            assert ex.end_splice_site.intact == t
+            assert ft.seq[ex.start - 1 : ex.end] == char_pattern[i]
 
-    def test_build_single_transcript_inversion(self):
+    def test_build_single_transcript_inversion(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 1299, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='ATCGTC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'ATCGTC' + 'A' * len(self.z)
+        expt += 'ATCGTC' + 'A' * len(intervals.z)
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
         exons = [(1, 100), (401, 500), (1407, 1506), (1607, 1706)]
         for i in range(len(exons)):
-            self.assertEqual(exons[i][0], ft.exons[i].start)
-            self.assertEqual(exons[i][1], ft.exons[i].end)
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(4, len(ft.exons))
+            assert ft.exons[i].start == exons[i][0]
+            assert ft.exons[i].end == exons[i][1]
+        assert ft.seq == expt
+        assert len(ft.exons) == 4
 
-    def test_build_single_transcript_inversion_transcriptome(self):
+    def test_build_single_transcript_inversion_transcriptome(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 1299, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='ATCGTC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.INV, protocol=PROTOCOL.TRANS
         )
         ft = FusionTranscript.build(ann, ref)
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'ATCGTC' + 'A' * len(self.z)
+        expt += 'ATCGTC' + 'A' * len(intervals.z)
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
         exons = [
             Exon(1, 100, strand=STRAND.POS),
@@ -366,106 +420,119 @@ def test_build_single_transcript_inversion_transcriptome(self):
         ]
         print(ft.exons)
         for i in range(len(exons)):
-            self.assertEqual(exons[i].start, ft.exons[i].start)
-            self.assertEqual(exons[i].end, ft.exons[i].end)
-            self.assertEqual(
-                exons[i].start_splice_site.intact, ft.exons[i].start_splice_site.intact
-            )
-            self.assertEqual(exons[i].end_splice_site.intact, ft.exons[i].end_splice_site.intact)
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(5, len(ft.exons))
-
-    def test_build_single_transcript_inversion_neg(self):
+            assert ft.exons[i].start == exons[i].start
+            assert ft.exons[i].end == exons[i].end
+            assert ft.exons[i].start_splice_site.intact == exons[i].start_splice_site.intact
+            assert ft.exons[i].end_splice_site.intact == exons[i].end_splice_site.intact
+        assert ft.seq == expt
+        assert len(ft.exons) == 5
+
+    def test_build_single_transcript_inversion_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         b1 = Breakpoint(REF_CHR, 1300, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='ATCGTC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.s)
+            'C' * len(intervals.s)
             + 'T' * (1699 - 1600 + 1)
-            + 'G' * len(self.w)
+            + 'G' * len(intervals.w)
             + 'T' * (1499 - 1300 + 1)
         )
-        expt += 'T' * len(self.z) + 'GACGAT' + 'T' * (1199 - 600 + 1) + 'C' * len(self.y)
-        expt += 'T' * (499 - 200 + 1) + 'G' * len(self.x)
+        expt += 'T' * len(intervals.z) + 'GACGAT' + 'T' * (1199 - 600 + 1) + 'C' * len(intervals.y)
+        expt += 'T' * (499 - 200 + 1) + 'G' * len(intervals.x)
 
         exons = [(1, 100), (201, 300), (1207, 1306), (1607, 1706)]
 
         for i in range(len(exons)):
-            self.assertEqual(exons[i][0], ft.exons[i].start)
-            self.assertEqual(exons[i][1], ft.exons[i].end)
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(4, len(ft.exons))
+            assert ft.exons[i].start == exons[i][0]
+            assert ft.exons[i].end == exons[i][1]
+        assert ft.seq == expt
+        assert len(ft.exons) == 4
 
-    def test_build_single_transcript_duplication_pos(self):
+    def test_build_single_transcript_duplication_pos(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 1299, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGATCG')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.DUP, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
-        self.assertEqual(STRAND.POS, ft.get_strand())
+        assert ft.get_strand() == STRAND.POS
 
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'T' * len(self.z) + 'ATCGATCG' + 'T' * len(self.z)
+        expt += 'T' * len(intervals.z) + 'ATCGATCG' + 'T' * len(intervals.z)
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
-        self.assertEqual(expt, ft.seq)
+        assert ft.seq == expt
         exons = [(1, 100), (401, 500), (1101, 1200), (1209, 1308), (1509, 1608), (1709, 1808)]
         for i in range(len(exons)):
-            self.assertEqual(exons[i][0], ft.exons[i].start)
-            self.assertEqual(exons[i][1], ft.exons[i].end)
+            assert ft.exons[i].start == exons[i][0]
+            assert ft.exons[i].end == exons[i][1]
 
-        self.assertEqual(6, len(ft.exons))
-        self.assertTrue(ft.exons[2].start_splice_site.intact)
-        self.assertTrue(ft.exons[3].end_splice_site.intact)
-        self.assertFalse(ft.exons[2].end_splice_site.intact)
-        self.assertFalse(ft.exons[3].start_splice_site.intact)
+        assert len(ft.exons) == 6
+        assert ft.exons[2].start_splice_site.intact
+        assert ft.exons[3].end_splice_site.intact
+        assert not ft.exons[2].end_splice_site.intact
+        assert not ft.exons[3].start_splice_site.intact
 
-    def test_build_single_transcript_duplication_pos_transcriptome(self):
+    def test_build_single_transcript_duplication_pos_transcriptome(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 1299, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGATCG')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.DUP, protocol=PROTOCOL.TRANS
         )
         ft = FusionTranscript.build(ann, ref)
-        self.assertEqual(STRAND.POS, ft.get_strand())
+        assert ft.get_strand() == STRAND.POS
 
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'T' * len(self.z) + 'ATCGATCG' + 'T' * len(self.z)
+        expt += 'T' * len(intervals.z) + 'ATCGATCG' + 'T' * len(intervals.z)
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
-        self.assertEqual(expt, ft.seq)
+        assert ft.seq == expt
         exons = [
             Exon(1, 100, strand=STRAND.POS),
             Exon(401, 500, strand=STRAND.POS),
@@ -477,241 +544,290 @@ def test_build_single_transcript_duplication_pos_transcriptome(self):
         ]
         print(ft.exons)
         for i in range(len(exons)):
-            self.assertEqual(exons[i].start, ft.exons[i].start)
-            self.assertEqual(exons[i].end, ft.exons[i].end)
-            self.assertEqual(
-                exons[i].start_splice_site.intact, ft.exons[i].start_splice_site.intact
-            )
-            self.assertEqual(exons[i].end_splice_site.intact, ft.exons[i].end_splice_site.intact)
+            assert ft.exons[i].start == exons[i].start
+            assert ft.exons[i].end == exons[i].end
+            assert ft.exons[i].start_splice_site.intact == exons[i].start_splice_site.intact
+            assert ft.exons[i].end_splice_site.intact == exons[i].end_splice_site.intact
 
-        self.assertEqual(7, len(ft.exons))
+        assert len(ft.exons) == 7
 
-    def test_build_single_transcript_duplication_neg(self):
+    def test_build_single_transcript_duplication_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
-        t = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 1299, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGATCG')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t, transcript2=t, event_type=SVTYPE.DUP, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'T' * len(self.z) + 'ATCGATCG' + 'T' * len(self.z)
+        expt += 'T' * len(intervals.z) + 'ATCGATCG' + 'T' * len(intervals.z)
         expt += (
             'A' * (1499 - 1300 + 1)
-            + 'C' * len(self.w)
+            + 'C' * len(intervals.w)
             + 'A' * (1699 - 1600 + 1)
-            + 'G' * len(self.s)
+            + 'G' * len(intervals.s)
         )
         expt = reverse_complement(expt)
-        self.assertEqual(expt, ft.seq)
+        assert ft.seq == expt
 
         exons = [(1, 100), (201, 300), (501, 600), (609, 708), (1309, 1408), (1709, 1808)]
 
         for i in range(len(exons)):
-            self.assertEqual(exons[i][0], ft.exons[i].start)
-            self.assertEqual(exons[i][1], ft.exons[i].end)
-
-        self.assertEqual(6, len(ft.exons))
-        self.assertTrue(ft.exons[2].start_splice_site.intact)
-        self.assertTrue(ft.exons[3].end_splice_site.intact)
-        self.assertFalse(ft.exons[2].end_splice_site.intact)
-        self.assertFalse(ft.exons[3].start_splice_site.intact)
-        self.assertEqual(3, ft.exon_number(ft.exons[2]))
-        self.assertEqual(3, ft.exon_number(ft.exons[3]))
-
-    def test_build_two_transcript_inversion_5prime_pos(self):
+            assert ft.exons[i].start == exons[i][0]
+            assert ft.exons[i].end == exons[i][1]
+
+        assert len(ft.exons) == 6
+        assert ft.exons[2].start_splice_site.intact
+        assert ft.exons[3].end_splice_site.intact
+        assert not ft.exons[2].end_splice_site.intact
+        assert not ft.exons[3].start_splice_site.intact
+        assert ft.exon_number(ft.exons[2]) == 3
+        assert ft.exon_number(ft.exons[3]) == 3
+
+    def test_build_two_transcript_inversion_5prime_pos(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.NEG)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.NEG
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2699, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='ATCGACTC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
-        )
-        expt += 'ATCGACTC' + 'G' * len(self.b) + 'T' * (2599 - 2100 + 1) + 'A' * len(self.a)
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(4, len(ft.exons))
-        self.assertTrue(ft.exons[3].end_splice_site.intact)
-        self.assertFalse(ft.exons[2].start_splice_site.intact)
-        self.assertTrue(ft.exons[2].end_splice_site.intact)
-        self.assertEqual(2, ft.exon_number(ft.exons[1]))
-        self.assertEqual(3, ft.exon_number(ft.exons[2]))
-
-    def test_build_two_transcript_inversion_5prime_neg(self):
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
+        )
+        expt += (
+            'ATCGACTC' + 'G' * len(intervals.b) + 'T' * (2599 - 2100 + 1) + 'A' * len(intervals.a)
+        )
+        assert ft.seq == expt
+        assert len(ft.exons) == 4
+        assert ft.exons[3].end_splice_site.intact
+        assert not ft.exons[2].start_splice_site.intact
+        assert ft.exons[2].end_splice_site.intact
+        assert ft.exon_number(ft.exons[1]) == 2
+        assert ft.exon_number(ft.exons[2]) == 3
+
+    def test_build_two_transcript_inversion_5prime_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.POS)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.POS
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2699, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='ATCGACTC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.INV, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
-        expt = 'T' * len(self.a) + 'A' * (2599 - 2100 + 1) + 'C' * len(self.b) + 'ATCGACTC'
+        expt = (
+            'T' * len(intervals.a) + 'A' * (2599 - 2100 + 1) + 'C' * len(intervals.b) + 'ATCGACTC'
+        )
         expt += (
-            'T' * (1199 - 600 + 1) + 'C' * len(self.y) + 'T' * (499 - 200 + 1) + 'G' * len(self.x)
+            'T' * (1199 - 600 + 1)
+            + 'C' * len(intervals.y)
+            + 'T' * (499 - 200 + 1)
+            + 'G' * len(intervals.x)
         )
 
-        self.assertEqual(4, len(ft.exons))
-        self.assertEqual(2, ft.exon_number(ft.exons[1]))
-        self.assertEqual(4, ft.exon_number(ft.exons[2]))
-        self.assertEqual(expt, ft.seq)
+        assert len(ft.exons) == 4
+        assert ft.exon_number(ft.exons[1]) == 2
+        assert ft.exon_number(ft.exons[2]) == 4
+        assert ft.seq == expt
 
-    def test_build_two_transcript_duplication_pos(self):
+    def test_build_two_transcript_duplication_pos(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.POS)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.POS
+        )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 2699, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGAC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.DUP, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
-        expt = 'T' * len(self.a) + 'A' * (2599 - 2100 + 1) + 'C' * len(self.b) + 'ATCGAC'
-        expt += 'T' * len(self.z) + 'A' * (1499 - 1300 + 1) + 'C' * len(self.w)
-        expt += 'A' * (1699 - 1600 + 1) + 'G' * len(self.s)
+        expt = 'T' * len(intervals.a) + 'A' * (2599 - 2100 + 1) + 'C' * len(intervals.b) + 'ATCGAC'
+        expt += 'T' * len(intervals.z) + 'A' * (1499 - 1300 + 1) + 'C' * len(intervals.w)
+        expt += 'A' * (1699 - 1600 + 1) + 'G' * len(intervals.s)
 
-        self.assertEqual(5, len(ft.exons))
-        self.assertEqual(2, ft.exon_number(ft.exons[1]))
-        self.assertEqual(3, ft.exon_number(ft.exons[2]))
-        self.assertEqual(expt, ft.seq)
+        assert len(ft.exons) == 5
+        assert ft.exon_number(ft.exons[1]) == 2
+        assert ft.exon_number(ft.exons[2]) == 3
+        assert ft.seq == expt
 
-    def test_build_two_transcript_duplication_neg(self):
+    def test_build_two_transcript_duplication_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.NEG)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.NEG
+        )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 2699, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='ATCGAC')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.DUP, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.s)
+            'C' * len(intervals.s)
             + 'T' * (1699 - 1600 + 1)
-            + 'G' * len(self.w)
+            + 'G' * len(intervals.w)
             + 'T' * (1499 - 1300 + 1)
         )
-        expt += 'A' * len(self.z) + 'GTCGAT' + 'G' * len(self.b) + 'T' * (2599 - 2100 + 1)
-        expt += 'A' * len(self.a)
+        expt += 'A' * len(intervals.z) + 'GTCGAT' + 'G' * len(intervals.b) + 'T' * (2599 - 2100 + 1)
+        expt += 'A' * len(intervals.a)
 
-        self.assertEqual(5, len(ft.exons))
-        self.assertEqual(2, ft.exon_number(ft.exons[1]))
-        self.assertEqual(3, ft.exon_number(ft.exons[2]))
-        self.assertEqual(expt, ft.seq)
+        assert len(ft.exons) == 5
+        assert ft.exon_number(ft.exons[1]) == 2
+        assert ft.exon_number(ft.exons[2]) == 3
+        assert ft.seq == expt
 
-    def test_build_two_transcript_deletion_pos(self):
+    def test_build_two_transcript_deletion_pos(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.POS)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.POS
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2700, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='AACGTGT')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.x)
+            'C' * len(intervals.x)
             + 'A' * (499 - 200 + 1)
-            + 'G' * len(self.y)
+            + 'G' * len(intervals.y)
             + 'A' * (1199 - 600 + 1)
             + 'AACGTGT'
         )
         expt += (
             'A' * (2999 - 2700 + 1)
-            + 'G' * len(self.c)
+            + 'G' * len(intervals.c)
             + 'A' * (3299 - 3100 + 1)
-            + 'T' * len(self.d)
+            + 'T' * len(intervals.d)
         )
 
-        self.assertEqual(expt, ft.seq)
-        self.assertTrue(4, len(ft.exons))
+        assert ft.seq == expt
+        assert 4, len(ft.exons)
 
-    def test_build_two_transcript_deletion_pos_transcriptome(self):
+    def test_build_two_transcript_deletion_pos_transcriptome(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.POS)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.POS
+        )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2700, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='AACGTGT')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.DEL, protocol=PROTOCOL.TRANS
         )
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.x)
+            'C' * len(intervals.x)
             + 'A' * (499 - 200 + 1)
-            + 'G' * len(self.y)
+            + 'G' * len(intervals.y)
             + 'A' * (1199 - 600 + 1)
             + 'AACGTGT'
         )
         expt += (
             'A' * (2999 - 2700 + 1)
-            + 'G' * len(self.c)
+            + 'G' * len(intervals.c)
             + 'A' * (3299 - 3100 + 1)
-            + 'T' * len(self.d)
+            + 'T' * len(intervals.d)
         )
 
-        self.assertEqual(expt, ft.seq)
-        self.assertTrue(5, len(ft.exons))
+        assert ft.seq == expt
+        assert 5, len(ft.exons)
 
-    def test_build_two_transcript_deletion_neg(self):
+    def test_build_two_transcript_deletion_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # a:2000-2099, b:2600-2699, c:3000-3099, d:3300-3399
         #   TTTTTTTTT    CCCCCCCCC    GGGGGGGGG    TTTTTTTTT
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
 
-        t2 = PreTranscript(exons=[self.a, self.b, self.c, self.d], strand=STRAND.NEG)
+        t2 = PreTranscript(
+            exons=[intervals.a, intervals.b, intervals.c, intervals.d], strand=STRAND.NEG
+        )
         print('t1 exons', t1.exons)
         print('t2 exons', t2.exons)
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 2699, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='AACGAGTGT')
-        ref = {REF_CHR: MockObject(seq=self.reference_sequence)}
+        ref = {REF_CHR: MockObject(seq=intervals.reference_sequence)}
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME
         )
@@ -719,456 +835,521 @@ def test_build_two_transcript_deletion_neg(self):
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.s)
+            'C' * len(intervals.s)
             + 'T' * (1699 - 1600 + 1)
-            + 'G' * len(self.w)
+            + 'G' * len(intervals.w)
             + 'T' * (1499 - 1300 + 1)
         )
-        expt += 'A' * len(self.z) + 'ACACTCGTT' + 'G' * len(self.b) + 'T' * (2599 - 2100 + 1)
-        expt += 'A' * len(self.a)
+        expt += (
+            'A' * len(intervals.z) + 'ACACTCGTT' + 'G' * len(intervals.b) + 'T' * (2599 - 2100 + 1)
+        )
+        expt += 'A' * len(intervals.a)
 
-        self.assertEqual(expt, ft.seq)
-        self.assertTrue(5, len(ft.exons))
-        self.assertEqual(3, ft.exon_number(ft.exons[2]))
-        self.assertEqual(3, ft.exon_number(ft.exons[3]))
+        assert ft.seq == expt
+        assert 5, len(ft.exons)
+        assert ft.exon_number(ft.exons[2]) == 3
+        assert ft.exon_number(ft.exons[3]) == 3
 
-    def test_build_two_transcript_translocation(self):
+    def test_build_two_transcript_translocation(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # 1:600-699, 2:800-899, 3:1100-1199, 4:1400-1499, 5:1700-1799 6:2100-2199
         #   AAAAAAA    GGGGGGG,   TTTTTTTTT,   AAAAAAAAA,   GGGGGGGGG   AAAAAAAAA
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.POS)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.POS,
+        )
         t2 = PreTranscript(
-            exons=[self.b1, self.b2, self.b3, self.b4, self.b5, self.b6], strand=STRAND.POS
+            exons=[
+                intervals.b1,
+                intervals.b2,
+                intervals.b3,
+                intervals.b4,
+                intervals.b5,
+                intervals.b6,
+            ],
+            strand=STRAND.POS,
         )
         b1 = Breakpoint(REF_CHR, 1199, orient=ORIENT.LEFT)
         b2 = Breakpoint('ref2', 1200, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='GCAACATAT')
         ref = {
-            REF_CHR: MockObject(seq=self.reference_sequence),
-            'ref2': MockObject(seq=self.alternate_sequence),
+            REF_CHR: MockObject(seq=intervals.reference_sequence),
+            'ref2': MockObject(seq=intervals.alternate_sequence),
         }
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.TRANS, protocol=PROTOCOL.GENOME
         )
-        self.assertEqual(b1, ann.break1)
+        assert ann.break1 == b1
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.x) + 'A' * (499 - 200 + 1) + 'G' * len(self.y) + 'A' * (1199 - 600 + 1)
+            'C' * len(intervals.x)
+            + 'A' * (499 - 200 + 1)
+            + 'G' * len(intervals.y)
+            + 'A' * (1199 - 600 + 1)
         )
-        expt += 'GCAACATAT' + 'C' * (1399 - 1200 + 1) + 'A' * len(self.b4) + 'C' * (1699 - 1500 + 1)
-        expt += 'G' * len(self.b5) + 'C' * (2099 - 1800 + 1) + 'A' * len(self.b6)
+        expt += (
+            'GCAACATAT'
+            + 'C' * (1399 - 1200 + 1)
+            + 'A' * len(intervals.b4)
+            + 'C' * (1699 - 1500 + 1)
+        )
+        expt += 'G' * len(intervals.b5) + 'C' * (2099 - 1800 + 1) + 'A' * len(intervals.b6)
 
-        self.assertEqual(expt, ft.seq)
-        self.assertTrue(5, len(ft.exons))
-        self.assertTrue(2, ft.exon_number(ft.exons[1]))
-        self.assertTrue(4, ft.exon_number(ft.exons[2]))
+        assert ft.seq == expt
+        assert 5, len(ft.exons)
+        assert 2, ft.exon_number(ft.exons[1])
+        assert 4, ft.exon_number(ft.exons[2])
 
-    def test_build_two_transcript_translocation_neg(self):
+    def test_build_two_transcript_translocation_neg(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # 1:600-699, 2:800-899, 3:1100-1199, 4:1400-1499, 5:1700-1799 6:2100-2199
         #   AAAAAAA    GGGGGGG,   TTTTTTTTT,   AAAAAAAAA,   GGGGGGGGG   AAAAAAAAA
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         t2 = PreTranscript(
-            exons=[self.b1, self.b2, self.b3, self.b4, self.b5, self.b6], strand=STRAND.NEG
+            exons=[
+                intervals.b1,
+                intervals.b2,
+                intervals.b3,
+                intervals.b4,
+                intervals.b5,
+                intervals.b6,
+            ],
+            strand=STRAND.NEG,
         )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(ALT_REF_CHR, 1199, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2, opposing_strands=False, untemplated_seq='TCTACATAT')
         ref = {
-            REF_CHR: MockObject(seq=self.reference_sequence),
-            ALT_REF_CHR: MockObject(seq=self.alternate_sequence),
+            REF_CHR: MockObject(seq=intervals.reference_sequence),
+            ALT_REF_CHR: MockObject(seq=intervals.alternate_sequence),
         }
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.TRANS, protocol=PROTOCOL.GENOME
         )
-        self.assertEqual(b1, ann.break1)
-        self.assertEqual(b2, ann.break2)
+        assert ann.break1 == b1
+        assert ann.break2 == b2
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.s)
+            'C' * len(intervals.s)
             + 'T' * (1699 - 1600 + 1)
-            + 'G' * len(self.w)
+            + 'G' * len(intervals.w)
             + 'T' * (1499 - 1300 + 1)
         )
-        expt += 'A' * len(self.z) + 'ATATGTAGA' + 'A' * len(self.b3) + 'G' * (1099 - 900 + 1)
-        expt += 'C' * len(self.b2) + 'G' * (799 - 700 + 1) + 'T' * len(self.b1)
+        expt += (
+            'A' * len(intervals.z) + 'ATATGTAGA' + 'A' * len(intervals.b3) + 'G' * (1099 - 900 + 1)
+        )
+        expt += 'C' * len(intervals.b2) + 'G' * (799 - 700 + 1) + 'T' * len(intervals.b1)
 
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(6, len(ft.exons))
-        self.assertTrue(3, ft.exon_number(ft.exons[2]))
-        self.assertTrue(3, ft.exon_number(ft.exons[3]))
+        assert ft.seq == expt
+        assert len(ft.exons) == 6
+        assert 3, ft.exon_number(ft.exons[2])
+        assert 3, ft.exon_number(ft.exons[3])
 
-    def test_build_two_transcript_inverted_translocation(self):
+    def test_build_two_transcript_inverted_translocation(self, intervals):
         # x:100-199, y:500-599, z:1200-1299, w:1500-1599, s:1700-1799
         #   CCCCCCC    GGGGGGG    TTTTTTTTT    CCCCCCCCC    GGGGGGGGG
         # 1:600-699, 2:800-899, 3:1100-1199, 4:1400-1499, 5:1700-1799 6:2100-2199
         #   AAAAAAA    GGGGGGG,   TTTTTTTTT,   AAAAAAAAA,   GGGGGGGGG   AAAAAAAAA
-        t1 = PreTranscript(exons=[self.x, self.y, self.z, self.w, self.s], strand=STRAND.NEG)
+        t1 = PreTranscript(
+            exons=[intervals.x, intervals.y, intervals.z, intervals.w, intervals.s],
+            strand=STRAND.NEG,
+        )
         t2 = PreTranscript(
-            exons=[self.b1, self.b2, self.b3, self.b4, self.b5, self.b6], strand=STRAND.POS
+            exons=[
+                intervals.b1,
+                intervals.b2,
+                intervals.b3,
+                intervals.b4,
+                intervals.b5,
+                intervals.b6,
+            ],
+            strand=STRAND.POS,
         )
         b1 = Breakpoint(REF_CHR, 1200, orient=ORIENT.RIGHT)
         b2 = Breakpoint(ALT_REF_CHR, 1200, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2, opposing_strands=True, untemplated_seq='GATACATAT')
         ref = {
-            REF_CHR: MockObject(seq=self.reference_sequence),
-            ALT_REF_CHR: MockObject(seq=self.alternate_sequence),
+            REF_CHR: MockObject(seq=intervals.reference_sequence),
+            ALT_REF_CHR: MockObject(seq=intervals.alternate_sequence),
         }
         ann = Annotation(
             bpp, transcript1=t1, transcript2=t2, event_type=SVTYPE.TRANS, protocol=PROTOCOL.GENOME
         )
-        self.assertEqual(b1, ann.break1)
-        self.assertEqual(b2, ann.break2)
+        assert ann.break1 == b1
+        assert ann.break2 == b2
         ft = FusionTranscript.build(ann, ref)
 
         expt = (
-            'C' * len(self.s)
+            'C' * len(intervals.s)
             + 'T' * (1699 - 1600 + 1)
-            + 'G' * len(self.w)
+            + 'G' * len(intervals.w)
             + 'T' * (1499 - 1300 + 1)
         )
-        expt += 'A' * len(self.z) + 'ATATGTATC' + 'C' * (1399 - 1200 + 1) + 'A' * len(self.b4)
+        expt += (
+            'A' * len(intervals.z) + 'ATATGTATC' + 'C' * (1399 - 1200 + 1) + 'A' * len(intervals.b4)
+        )
         expt += (
             'C' * (1699 - 1500 + 1)
-            + 'G' * len(self.b5)
+            + 'G' * len(intervals.b5)
             + 'C' * (2099 - 1800 + 1)
-            + 'A' * len(self.b6)
+            + 'A' * len(intervals.b6)
         )
 
-        self.assertEqual(expt, ft.seq)
-        self.assertEqual(6, len(ft.exons))
-        self.assertTrue(3, ft.exon_number(ft.exons[2]))
-        self.assertTrue(4, ft.exon_number(ft.exons[3]))
+        assert ft.seq == expt
+        assert len(ft.exons) == 6
+        assert 3, ft.exon_number(ft.exons[2])
+        assert 4, ft.exon_number(ft.exons[3])
 
 
-class TestSequenceFetching(unittest.TestCase):
-    def setUp(self):
-        self.gene = Gene(REF_CHR, 1, 900, strand=STRAND.POS)
+@pytest.fixture
+def mock_ann_obj():
+    n = argparse.Namespace()
+    n.gene = Gene(REF_CHR, 1, 900, strand=STRAND.POS)
 
-        self.pre_transcript = PreTranscript(
-            exons=[(101, 200), (301, 400), (501, 600), (701, 800)], gene=self.gene
-        )
-        self.gene.transcripts.append(self.pre_transcript)
+    n.pre_transcript = PreTranscript(
+        exons=[(101, 200), (301, 400), (501, 600), (701, 800)], gene=n.gene
+    )
+    n.gene.transcripts.append(n.pre_transcript)
 
-        self.transcript = Transcript(
-            self.pre_transcript, self.pre_transcript.generate_splicing_patterns()[0]
-        )
-        self.pre_transcript.transcripts.append(self.transcript)
+    n.transcript = Transcript(n.pre_transcript, n.pre_transcript.generate_splicing_patterns()[0])
+    n.pre_transcript.transcripts.append(n.transcript)
 
-        self.translation = Translation(51, 350, self.transcript)
-        self.transcript.translations.append(self.translation)
+    n.translation = Translation(51, 350, n.transcript)
+    n.transcript.translations.append(n.translation)
 
-        self.spliced_seq = (
-            'GGTGAATTTCTAGTTTGCCTTTTCAGCTAGGGATTAGCTTTTTAGGGGTCCCAATG'
-            'CCTAGGGAGATTTCTAGGTCCTCTGTTCCTTGCTGACCTCCAATAATCAGAAAATGCTGTGAAGGAAAAAC'
-            'AAAATGAAATTGCATTGTTTCTACCGGCCCTTTATCAAGCCCTGGCCACCATGATAGTCATGAATTCCAAT'
-            'TGTGTTGAAATCACTTCAATGTGTTTCTCTTCTTTCTGGGAGCTTACACACTCAAGTTCTGGATGCTTTGA'
-            'TTGCTATCAGAAGCCGTTAAATAGCTACTTATAAATAGCATTGAGTTATCAGTACTTTCATGTCTTGATAC'
-            'ATTTCTTCTTGAAAATGTTCATGCTTGCTGATTTGTCTGTTTGTTGAGAGGAGAATGTTC'
-        )
+    n.spliced_seq = (
+        'GGTGAATTTCTAGTTTGCCTTTTCAGCTAGGGATTAGCTTTTTAGGGGTCCCAATG'
+        'CCTAGGGAGATTTCTAGGTCCTCTGTTCCTTGCTGACCTCCAATAATCAGAAAATGCTGTGAAGGAAAAAC'
+        'AAAATGAAATTGCATTGTTTCTACCGGCCCTTTATCAAGCCCTGGCCACCATGATAGTCATGAATTCCAAT'
+        'TGTGTTGAAATCACTTCAATGTGTTTCTCTTCTTTCTGGGAGCTTACACACTCAAGTTCTGGATGCTTTGA'
+        'TTGCTATCAGAAGCCGTTAAATAGCTACTTATAAATAGCATTGAGTTATCAGTACTTTCATGTCTTGATAC'
+        'ATTTCTTCTTGAAAATGTTCATGCTTGCTGATTTGTCTGTTTGTTGAGAGGAGAATGTTC'
+    )
 
-        self.domain = Domain(
-            name=REF_CHR, regions=[(11, 20), (51, 60)], translation=self.translation
-        )
-        self.translation.domains.append(self.domain)
+    n.domain = Domain(name=REF_CHR, regions=[(11, 20), (51, 60)], translation=n.translation)
+    n.translation.domains.append(n.domain)
+    return n
 
-    def test_fetch_gene_seq_from_ref(self):
+
+class TestSequenceFetching:
+    def test_fetch_gene_seq_from_ref(self, mock_ann_obj):
         expt = str(REFERENCE_GENOME[REF_CHR][0:900].seq).upper()
-        self.assertEqual(expt, self.gene.get_seq(REFERENCE_GENOME))
+        assert mock_ann_obj.gene.get_seq(REFERENCE_GENOME) == expt
         # gene seq should be the same if gene in on reverse strand b/c gene seq always given on pos
-        self.gene.strand = STRAND.NEG
-        self.assertEqual(expt, self.gene.get_seq(REFERENCE_GENOME))
+        mock_ann_obj.gene.strand = STRAND.NEG
+        assert mock_ann_obj.gene.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_gene_seq_from_stored(self):
+    def test_fetch_gene_seq_from_stored(self, mock_ann_obj):
         expt = 'AAA'
-        self.gene.seq = expt
-        self.assertEqual(expt, self.gene.get_seq(REFERENCE_GENOME))
+        mock_ann_obj.gene.seq = expt
+        assert mock_ann_obj.gene.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_gene_seq_force_uncached(self):
+    def test_fetch_gene_seq_force_uncached(self, mock_ann_obj):
         expt = str(REFERENCE_GENOME[REF_CHR][0:900].seq).upper()
-        self.gene.seq = 'AAA'
-        self.assertEqual(expt, self.gene.get_seq(REFERENCE_GENOME, ignore_cache=True))
+        mock_ann_obj.gene.seq = 'AAA'
+        assert mock_ann_obj.gene.get_seq(REFERENCE_GENOME, ignore_cache=True) == expt
 
-    def test_fetch_us_transcript_seq_from_ref(self):
+    def test_fetch_us_transcript_seq_from_ref(self, mock_ann_obj):
         expt = str(REFERENCE_GENOME[REF_CHR][100:800].seq).upper()
-        self.assertEqual(expt, self.pre_transcript.get_seq(REFERENCE_GENOME))
+        assert mock_ann_obj.pre_transcript.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_us_transcript_seq_from_ref_revcomp(self):
-        self.gene.strand = STRAND.NEG
+    def test_fetch_us_transcript_seq_from_ref_revcomp(self, mock_ann_obj):
+        mock_ann_obj.gene.strand = STRAND.NEG
         expt = reverse_complement(str(REFERENCE_GENOME[REF_CHR][100:800].seq).upper())
-        self.assertEqual(expt, self.pre_transcript.get_seq(REFERENCE_GENOME))
+        assert mock_ann_obj.pre_transcript.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_us_transcript_seq_from_stored(self):
+    def test_fetch_us_transcript_seq_from_stored(self, mock_ann_obj):
         expt = 'AAA'
-        self.pre_transcript.seq = expt
-        self.assertEqual(expt, self.pre_transcript.get_seq(REFERENCE_GENOME))
+        mock_ann_obj.pre_transcript.seq = expt
+        assert mock_ann_obj.pre_transcript.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_us_transcript_seq_from_parent_gene(self):
-        self.gene.seq = 'A' * len(self.gene)
-        self.assertEqual('A' * len(self.pre_transcript), self.pre_transcript.get_seq())
+    def test_fetch_us_transcript_seq_from_parent_gene(self, mock_ann_obj):
+        mock_ann_obj.gene.seq = 'A' * len(mock_ann_obj.gene)
+        assert mock_ann_obj.pre_transcript.get_seq() == 'A' * len(mock_ann_obj.pre_transcript)
 
-    def test_fetch_us_transcript_seq_from_parent_gene_revcomp(self):
-        self.gene.seq = 'A' * len(self.gene)
-        self.gene.strand = STRAND.NEG
-        self.assertEqual('T' * len(self.pre_transcript), self.pre_transcript.get_seq())
+    def test_fetch_us_transcript_seq_from_parent_gene_revcomp(self, mock_ann_obj):
+        mock_ann_obj.gene.seq = 'A' * len(mock_ann_obj.gene)
+        mock_ann_obj.gene.strand = STRAND.NEG
+        assert mock_ann_obj.pre_transcript.get_seq() == 'T' * len(mock_ann_obj.pre_transcript)
 
-    def test_fetch_us_transcript_seq_force_uncached(self):
+    def test_fetch_us_transcript_seq_force_uncached(self, mock_ann_obj):
         expt = str(REFERENCE_GENOME[REF_CHR][100:800].seq).upper()
-        self.pre_transcript.seq = 'AAA'
-        self.assertEqual(expt, self.pre_transcript.get_seq(REFERENCE_GENOME, ignore_cache=True))
+        mock_ann_obj.pre_transcript.seq = 'AAA'
+        assert mock_ann_obj.pre_transcript.get_seq(REFERENCE_GENOME, ignore_cache=True) == expt
 
-    def test_fetch_transcript_seq_from_ref(self):
-        self.assertEqual(self.spliced_seq, self.transcript.get_seq(REFERENCE_GENOME))
+    def test_fetch_transcript_seq_from_ref(self, mock_ann_obj):
+        assert mock_ann_obj.transcript.get_seq(REFERENCE_GENOME) == mock_ann_obj.spliced_seq
 
-    def test_fetch_transcript_seq_from_ref_revcomp(self):
-        self.gene.strand = STRAND.NEG
-        self.assertEqual(
-            reverse_complement(self.spliced_seq), self.transcript.get_seq(REFERENCE_GENOME)
+    def test_fetch_transcript_seq_from_ref_revcomp(self, mock_ann_obj):
+        mock_ann_obj.gene.strand = STRAND.NEG
+        assert mock_ann_obj.transcript.get_seq(REFERENCE_GENOME) == reverse_complement(
+            mock_ann_obj.spliced_seq
         )
 
-    def test_fetch_transcript_seq_from_stored(self):
+    def test_fetch_transcript_seq_from_stored(self, mock_ann_obj):
         expt = 'AAA'
-        self.transcript.seq = expt
-        self.assertEqual(expt, self.transcript.get_seq(REFERENCE_GENOME))
+        mock_ann_obj.transcript.seq = expt
+        assert mock_ann_obj.transcript.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_transcript_seq_from_parent_ust(self):
-        self.pre_transcript.seq = 'A' * len(self.pre_transcript)
-        self.assertEqual('A' * len(self.transcript), self.transcript.get_seq())
+    def test_fetch_transcript_seq_from_parent_ust(self, mock_ann_obj):
+        mock_ann_obj.pre_transcript.seq = 'A' * len(mock_ann_obj.pre_transcript)
+        assert mock_ann_obj.transcript.get_seq() == 'A' * len(mock_ann_obj.transcript)
 
-    def test_fetch_transcript_seq_from_parent_gene(self):
-        self.gene.seq = 'A' * len(self.gene)
-        self.assertEqual('A' * len(self.transcript), self.transcript.get_seq())
+    def test_fetch_transcript_seq_from_parent_gene(self, mock_ann_obj):
+        mock_ann_obj.gene.seq = 'A' * len(mock_ann_obj.gene)
+        assert mock_ann_obj.transcript.get_seq() == 'A' * len(mock_ann_obj.transcript)
 
-    def test_fetch_transcript_seq_force_uncached(self):
-        self.transcript.seq = 'AAA'
-        self.assertEqual(
-            self.spliced_seq, self.transcript.get_seq(REFERENCE_GENOME, ignore_cache=True)
+    def test_fetch_transcript_seq_force_uncached(self, mock_ann_obj):
+        mock_ann_obj.transcript.seq = 'AAA'
+        assert (
+            mock_ann_obj.transcript.get_seq(REFERENCE_GENOME, ignore_cache=True)
+            == mock_ann_obj.spliced_seq
         )
 
-    def test_fetch_translation_aa_seq_from_ref(self):
-        cds = self.spliced_seq[self.translation.start - 1 : self.translation.end]
-        self.assertEqual(translate(cds), self.translation.get_aa_seq(REFERENCE_GENOME))
+    def test_fetch_translation_aa_seq_from_ref(self, mock_ann_obj):
+        cds = mock_ann_obj.spliced_seq[
+            mock_ann_obj.translation.start - 1 : mock_ann_obj.translation.end
+        ]
+        assert mock_ann_obj.translation.get_aa_seq(REFERENCE_GENOME) == translate(cds)
 
-    def test_fetch_translation_cds_seq_from_ref(self):
-        cds = self.spliced_seq[self.translation.start - 1 : self.translation.end]
-        self.assertEqual(cds, self.translation.get_seq(REFERENCE_GENOME))
+    def test_fetch_translation_cds_seq_from_ref(self, mock_ann_obj):
+        cds = mock_ann_obj.spliced_seq[
+            mock_ann_obj.translation.start - 1 : mock_ann_obj.translation.end
+        ]
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == cds
 
-    def test_fetch_translation_cds_seq_from_ref_revcomp(self):
-        self.gene.strand = STRAND.NEG
-        cdna = reverse_complement(self.spliced_seq)
-        cds = cdna[self.translation.start - 1 : self.translation.end]
-        self.assertEqual(cds, self.translation.get_seq(REFERENCE_GENOME))
+    def test_fetch_translation_cds_seq_from_ref_revcomp(self, mock_ann_obj):
+        mock_ann_obj.gene.strand = STRAND.NEG
+        cdna = reverse_complement(mock_ann_obj.spliced_seq)
+        cds = cdna[mock_ann_obj.translation.start - 1 : mock_ann_obj.translation.end]
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == cds
 
-    def test_fetch_translation_cds_seq_from_stored(self):
+    def test_fetch_translation_cds_seq_from_stored(self, mock_ann_obj):
         expt = 'AAA'
-        self.translation.seq = expt
-        self.assertEqual(expt, self.translation.get_seq(REFERENCE_GENOME))
+        mock_ann_obj.translation.seq = expt
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == expt
 
-    def test_fetch_translation_cds_seq_from_parent_transcript(self):
-        self.transcript.seq = 'A' * len(self.transcript)
-        self.assertEqual('A' * len(self.translation), self.translation.get_seq(REFERENCE_GENOME))
+    def test_fetch_translation_cds_seq_from_parent_transcript(self, mock_ann_obj):
+        mock_ann_obj.transcript.seq = 'A' * len(mock_ann_obj.transcript)
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == 'A' * len(
+            mock_ann_obj.translation
+        )
 
-    def test_fetch_translation_cds_seq_from_parent_ust(self):
-        self.pre_transcript.seq = 'A' * len(self.pre_transcript)
-        self.assertEqual('A' * len(self.translation), self.translation.get_seq(REFERENCE_GENOME))
+    def test_fetch_translation_cds_seq_from_parent_ust(self, mock_ann_obj):
+        mock_ann_obj.pre_transcript.seq = 'A' * len(mock_ann_obj.pre_transcript)
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == 'A' * len(
+            mock_ann_obj.translation
+        )
 
-    def test_fetch_translation_cds_seq_from_parent_gene(self):
-        self.gene.seq = 'A' * len(self.gene)
-        self.assertEqual('A' * len(self.translation), self.translation.get_seq(REFERENCE_GENOME))
+    def test_fetch_translation_cds_seq_from_parent_gene(self, mock_ann_obj):
+        mock_ann_obj.gene.seq = 'A' * len(mock_ann_obj.gene)
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME) == 'A' * len(
+            mock_ann_obj.translation
+        )
 
-    def test_fetch_translation_cds_seq_force_uncached(self):
-        self.translation.seq = 'AAA'
-        cds = self.spliced_seq[self.translation.start - 1 : self.translation.end]
-        self.assertEqual(cds, self.translation.get_seq(REFERENCE_GENOME, ignore_cache=True))
+    def test_fetch_translation_cds_seq_force_uncached(self, mock_ann_obj):
+        mock_ann_obj.translation.seq = 'AAA'
+        cds = mock_ann_obj.spliced_seq[
+            mock_ann_obj.translation.start - 1 : mock_ann_obj.translation.end
+        ]
+        assert mock_ann_obj.translation.get_seq(REFERENCE_GENOME, ignore_cache=True) == cds
 
-    def test_fetch_domain_seq_from_ref(self):
+    def test_fetch_domain_seq_from_ref(self, mock_ann_obj):
         seqs = ['VPC*PPIIRK', 'C*NHFNVFLF']
-        self.assertEqual(seqs, self.domain.get_seqs(REFERENCE_GENOME))
+        assert mock_ann_obj.domain.get_seqs(REFERENCE_GENOME) == seqs
 
 
-class TestStrandInheritance(unittest.TestCase):
-    def setUp(self):
-        self.gene = Gene('1', 1, 500, strand=STRAND.POS)
-        pre_transcript = PreTranscript(gene=self.gene, exons=[(1, 100), (200, 300), (400, 500)])
-        self.gene.unspliced_transcripts.append(pre_transcript)
-        for spl in pre_transcript.generate_splicing_patterns():
-            t = Transcript(pre_transcript, spl)
-            pre_transcript.spliced_transcripts.append(t)
-            tl = Translation(51, 250, t)
-            t.translations.append(tl)
+@pytest.fixture
+def unstranded_gene():
+    gene = Gene('1', 1, 500, strand=STRAND.POS)
+    pre_transcript = PreTranscript(gene=gene, exons=[(1, 100), (200, 300), (400, 500)])
+    gene.unspliced_transcripts.append(pre_transcript)
+    for spl in pre_transcript.generate_splicing_patterns():
+        t = Transcript(pre_transcript, spl)
+        pre_transcript.spliced_transcripts.append(t)
+        tl = Translation(51, 250, t)
+        t.translations.append(tl)
+    return gene
 
-    def test_strand_gene(self):
-        self.assertEqual(STRAND.POS, self.gene.get_strand())
 
-    def test_strand_us_transcript(self):
-        self.assertEqual(STRAND.POS, self.gene.unspliced_transcripts[0].get_strand())
+class TestStrandInheritance:
+    def test_strand_gene(self, unstranded_gene):
+        assert unstranded_gene.get_strand() == STRAND.POS
 
-    def test_strand_spl_transcript(self):
-        self.assertEqual(STRAND.POS, self.gene.spliced_transcripts[0].get_strand())
+    def test_strand_us_transcript(self, unstranded_gene):
+        assert unstranded_gene.unspliced_transcripts[0].get_strand() == STRAND.POS
 
-    def test_strand_translation(self):
-        self.assertEqual(STRAND.POS, self.gene.spliced_transcripts[0].translations[0].get_strand())
+    def test_strand_spl_transcript(self, unstranded_gene):
+        assert unstranded_gene.spliced_transcripts[0].get_strand() == STRAND.POS
 
+    def test_strand_translation(self, unstranded_gene):
+        assert unstranded_gene.spliced_transcripts[0].translations[0].get_strand() == STRAND.POS
 
-class TestCoordinateCoversion(unittest.TestCase):
-    def setUp(self):
-        self.gene = Gene('1', 15, 700, strand=STRAND.POS)
 
-        self.pre_transcript = PreTranscript(
-            gene=self.gene, exons=[(101, 200), (301, 400), (501, 600)]
-        )
-        self.gene.unspliced_transcripts.append(self.pre_transcript)
-        assert 1 == len(self.pre_transcript.generate_splicing_patterns())
+@pytest.fixture
+def coord_conv_setup():
+    n = argparse.Namespace()
+    n.gene = Gene('1', 15, 700, strand=STRAND.POS)
+
+    n.pre_transcript = PreTranscript(gene=n.gene, exons=[(101, 200), (301, 400), (501, 600)])
+    n.gene.unspliced_transcripts.append(n.pre_transcript)
+    assert 1 == len(n.pre_transcript.generate_splicing_patterns())
+
+    spl = n.pre_transcript.generate_splicing_patterns()[0]
+    n.transcript = Transcript(n.pre_transcript, spl)
+    n.pre_transcript.spliced_transcripts.append(n.transcript)
 
-        spl = self.pre_transcript.generate_splicing_patterns()[0]
-        self.transcript = Transcript(self.pre_transcript, spl)
-        self.pre_transcript.spliced_transcripts.append(self.transcript)
+    n.translation = Translation(51, 251, n.transcript)
+    n.transcript.translations.append(n.translation)
 
-        self.translation = Translation(51, 251, self.transcript)
-        self.transcript.translations.append(self.translation)
+    n.rev_gene = Gene('1', 15, 700, strand=STRAND.NEG)
+    n.rev_ust = PreTranscript(gene=n.rev_gene, exons=[(101, 200), (301, 400), (501, 600)])
+    n.gene.unspliced_transcripts.append(n.rev_ust)
+    assert 1 == len(n.rev_ust.generate_splicing_patterns())
 
-        self.rev_gene = Gene('1', 15, 700, strand=STRAND.NEG)
-        self.rev_ust = PreTranscript(gene=self.rev_gene, exons=[(101, 200), (301, 400), (501, 600)])
-        self.gene.unspliced_transcripts.append(self.rev_ust)
-        assert 1 == len(self.rev_ust.generate_splicing_patterns())
+    spl = n.rev_ust.generate_splicing_patterns()[0]
+    n.rev_transcript = Transcript(n.rev_ust, spl)
+    n.rev_ust.spliced_transcripts.append(n.rev_transcript)
 
-        spl = self.rev_ust.generate_splicing_patterns()[0]
-        self.rev_transcript = Transcript(self.rev_ust, spl)
-        self.rev_ust.spliced_transcripts.append(self.rev_transcript)
+    n.rev_translation = Translation(51, 251, n.rev_transcript)
+    n.rev_transcript.translations.append(n.rev_translation)
+    return n
 
-        self.rev_translation = Translation(51, 251, self.rev_transcript)
-        self.rev_transcript.translations.append(self.rev_translation)
 
-    def test_cdna_to_genomic(self):
-        self.assertEqual(150, self.transcript.convert_cdna_to_genomic(50))
-        self.assertEqual(550, self.transcript.convert_cdna_to_genomic(250))
+class TestCoordinateCoversion:
+    def test_cdna_to_genomic(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_cdna_to_genomic(50) == 150
+        assert coord_conv_setup.transcript.convert_cdna_to_genomic(250) == 550
 
-    def test_cdna_to_genomic_before(self):
-        self.assertEqual(100, self.transcript.convert_cdna_to_genomic(-1))
-        self.assertEqual(51, self.transcript.convert_cdna_to_genomic(-50))
+    def test_cdna_to_genomic_before(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_cdna_to_genomic(-1) == 100
+        assert coord_conv_setup.transcript.convert_cdna_to_genomic(-50) == 51
 
-    def test_cdna_to_genomic_after(self):
-        self.assertEqual(650, self.transcript.convert_cdna_to_genomic(350))
+    def test_cdna_to_genomic_after(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_cdna_to_genomic(350) == 650
 
-    def test_cdna_to_genomic_revcomp(self):
-        self.assertEqual(551, self.rev_transcript.convert_cdna_to_genomic(50))
-        self.assertEqual(151, self.rev_transcript.convert_cdna_to_genomic(250))
+    def test_cdna_to_genomic_revcomp(self, coord_conv_setup):
+        assert coord_conv_setup.rev_transcript.convert_cdna_to_genomic(50) == 551
+        assert coord_conv_setup.rev_transcript.convert_cdna_to_genomic(250) == 151
 
-    def test_genomic_to_cdna(self):
-        self.assertEqual(50, self.transcript.convert_genomic_to_cdna(150))
-        self.assertEqual(249, self.transcript.convert_genomic_to_cdna(549))
+    def test_genomic_to_cdna(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_cdna(150) == 50
+        assert coord_conv_setup.transcript.convert_genomic_to_cdna(549) == 249
 
-    def test_genomic_to_cdna_before(self):
-        self.assertEqual((1, -1), self.transcript.convert_genomic_to_nearest_cdna(100))
+    def test_genomic_to_cdna_before(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(100) == (1, -1)
 
-    def test_genomic_to_cdna_after(self):
-        self.assertEqual((300, 1), self.transcript.convert_genomic_to_nearest_cdna(601))
+    def test_genomic_to_cdna_after(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(601) == (300, 1)
 
-    def test_genomic_to_cdna_revcomp(self):
-        self.assertEqual(50, self.rev_transcript.convert_genomic_to_cdna(551))
-        self.assertEqual(250, self.rev_transcript.convert_genomic_to_cdna(151))
+    def test_genomic_to_cdna_revcomp(self, coord_conv_setup):
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_cdna(551) == 50
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_cdna(151) == 250
 
-    def test_aa_to_cdna(self):
-        self.assertEqual(Interval(51, 53), self.translation.convert_aa_to_cdna(1))
-        self.assertEqual(Interval(249, 251), self.translation.convert_aa_to_cdna(67))
+    def test_aa_to_cdna(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_aa_to_cdna(1) == Interval(51, 53)
+        assert coord_conv_setup.translation.convert_aa_to_cdna(67) == Interval(249, 251)
 
-    def test_cdna_to_aa(self):
-        self.assertEqual(1, self.translation.convert_cdna_to_aa(51))
-        self.assertEqual(67, self.translation.convert_cdna_to_aa(251))
-        with self.assertRaises(IndexError):
-            self.translation.convert_cdna_to_aa(50)
-        with self.assertRaises(IndexError):
-            self.translation.convert_cdna_to_aa(252)
+    def test_cdna_to_aa(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_cdna_to_aa(51) == 1
+        assert coord_conv_setup.translation.convert_cdna_to_aa(251) == 67
+        with pytest.raises(IndexError):
+            coord_conv_setup.translation.convert_cdna_to_aa(50)
+        with pytest.raises(IndexError):
+            coord_conv_setup.translation.convert_cdna_to_aa(252)
 
-    def test_genomic_to_cds(self):
-        self.assertEqual(1, self.translation.convert_genomic_to_cds(151))
-        self.assertEqual(201, self.translation.convert_genomic_to_cds(551))
+    def test_genomic_to_cds(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds(151) == 1
+        assert coord_conv_setup.translation.convert_genomic_to_cds(551) == 201
 
-    def test_genomic_to_cds_3prime_utr(self):
-        self.assertEqual(-1, self.translation.convert_genomic_to_cds(150))
+    def test_genomic_to_cds_3prime_utr(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds(150) == -1
 
-    def test_genomic_to_cds_5prime_utr(self):
-        self.assertEqual(202, self.translation.convert_genomic_to_cds(552))
+    def test_genomic_to_cds_5prime_utr(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds(552) == 202
 
-    def test_genomic_to_cds_notation(self):
-        self.assertEqual('1', self.translation.convert_genomic_to_cds_notation(151))
-        self.assertEqual('201', self.translation.convert_genomic_to_cds_notation(551))
+    def test_genomic_to_cds_notation(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(151) == '1'
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(551) == '201'
 
-    def test_genomic_to_cds_notation_3prime_utr(self):
-        self.assertEqual('-1', self.translation.convert_genomic_to_cds_notation(150))
+    def test_genomic_to_cds_notation_3prime_utr(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(150) == '-1'
 
-    def test_genomic_to_cds_notation_5prime_utr(self):
-        self.assertEqual('*1', self.translation.convert_genomic_to_cds_notation(552))
+    def test_genomic_to_cds_notation_5prime_utr(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(552) == '*1'
 
-    def test_genomic_to_cds_notation_intronic_pos(self):
-        self.assertEqual('50+2', self.translation.convert_genomic_to_cds_notation(202))
+    def test_genomic_to_cds_notation_intronic_pos(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(202) == '50+2'
 
-    def test_genomic_to_cds_notation_intronic_neg(self):
-        self.assertEqual('51-2', self.translation.convert_genomic_to_cds_notation(299))
+    def test_genomic_to_cds_notation_intronic_neg(self, coord_conv_setup):
+        assert coord_conv_setup.translation.convert_genomic_to_cds_notation(299) == '51-2'
 
-    def test_genomic_to_nearest_cdna_exonic(self):
-        self.assertEqual((1, 0), self.transcript.convert_genomic_to_nearest_cdna(101))
-        self.assertEqual((300, 0), self.transcript.convert_genomic_to_nearest_cdna(600))
-        self.assertEqual((101, 0), self.transcript.convert_genomic_to_nearest_cdna(301))
+    def test_genomic_to_nearest_cdna_exonic(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(101) == (1, 0)
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(600) == (300, 0)
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(301) == (101, 0)
 
-    def test_genomic_to_nearest_cdna_intronic_pos(self):
-        self.assertEqual((100, 10), self.transcript.convert_genomic_to_nearest_cdna(210))
+    def test_genomic_to_nearest_cdna_intronic_pos(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(210) == (100, 10)
 
-    def test_genomic_to_nearest_cdna_intronic_neg(self):
-        self.assertEqual((101, -2), self.transcript.convert_genomic_to_nearest_cdna(299))
+    def test_genomic_to_nearest_cdna_intronic_neg(self, coord_conv_setup):
+        assert coord_conv_setup.transcript.convert_genomic_to_nearest_cdna(299) == (101, -2)
 
-    def test_genomic_to_nearest_cdna_rev_exonic(self):
-        self.assertEqual((300, 0), self.rev_transcript.convert_genomic_to_nearest_cdna(101))
-        self.assertEqual((1, 0), self.rev_transcript.convert_genomic_to_nearest_cdna(600))
-        self.assertEqual((101, 0), self.rev_transcript.convert_genomic_to_nearest_cdna(400))
+    def test_genomic_to_nearest_cdna_rev_exonic(self, coord_conv_setup):
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_nearest_cdna(101) == (300, 0)
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_nearest_cdna(600) == (1, 0)
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_nearest_cdna(400) == (101, 0)
 
-    def test_genomic_to_nearest_cdna_rev_intronic_pos(self):
-        self.assertEqual((201, -10), self.rev_transcript.convert_genomic_to_nearest_cdna(210))
+    def test_genomic_to_nearest_cdna_rev_intronic_pos(self, coord_conv_setup):
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_nearest_cdna(210) == (201, -10)
 
-    def test_genomic_to_nearest_cdna_rev_intronic_neg(self):
-        self.assertEqual((200, 2), self.rev_transcript.convert_genomic_to_nearest_cdna(299))
+    def test_genomic_to_nearest_cdna_rev_intronic_neg(self, coord_conv_setup):
+        assert coord_conv_setup.rev_transcript.convert_genomic_to_nearest_cdna(299) == (200, 2)
 
 
-class TestUSTranscript(unittest.TestCase):
+class TestUSTranscript:
     def test___init__implicit_start(self):
         t = PreTranscript(gene=None, exons=[(1, 100), (200, 300), (400, 500)], strand=STRAND.POS)
-        self.assertEqual(1, t.start)
-        self.assertEqual(t.start, t.start)
-        self.assertEqual(500, t.end)
-        self.assertEqual(t.end, t.end)
-        self.assertEqual(1, t[0])
-        self.assertEqual(500, t[1])
-        self.assertFalse(Interval.overlaps((0, 0), t))
-        self.assertTrue(Interval.overlaps((1, 1), t))
-        self.assertTrue(Interval.overlaps((1, 50), t))
+        assert t.start == 1
+        assert t.start == t.start
+        assert t.end == 500
+        assert t.end == t.end
+        assert t[0] == 1
+        assert t[1] == 500
+        assert not Interval.overlaps((0, 0), t)
+        assert Interval.overlaps((1, 1), t)
+        assert Interval.overlaps((1, 50), t)
 
     def test___init__strand_mismatch(self):
         g = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
 
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             PreTranscript([(1, 100)], gene=g, strand=STRAND.NEG)
 
     def test___init__overlapping_exon_error(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             PreTranscript(exons=[Exon(1, 15), Exon(10, 20)])
 
     def test_exon_number(self):
         t = PreTranscript(gene=None, exons=[(1, 99), (200, 299), (400, 499)], strand=STRAND.POS)
         for i, e in enumerate(t.exons):
-            self.assertEqual(i + 1, t.exon_number(e))
+            assert t.exon_number(e) == i + 1
 
         t = PreTranscript(gene=None, exons=[(1, 99), (200, 299), (400, 499)], strand=STRAND.NEG)
         for i, e in enumerate(sorted(t.exons, key=lambda x: x.start, reverse=True)):
-            self.assertEqual(i + 1, t.exon_number(e))
+            assert t.exon_number(e) == i + 1
 
 
-class TestDomain(unittest.TestCase):
+class TestDomain:
     def test___init__region_error(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Domain('name', [(1, 3), (4, 3)])
 
     def test_get_seq_from_ref(self):
@@ -1177,13 +1358,13 @@ def test_get_seq_from_ref(self):
         t = PreTranscript(exons=[(2, 5), (7, 15)], gene=g)
         tl = Translation(4, 11, t, [])
         d = Domain('name', [(1, 2)], translation=tl)
-        self.assertEqual([translate('GGGGAT')], d.get_seqs(ref))
+        assert d.get_seqs(ref) == [translate('GGGGAT')]
 
     def test_get_seq_from_translation_seq(self):
         t = PreTranscript(exons=[(2, 5), (7, 15)], seq='CCCTAATCCCCTTT', strand=STRAND.NEG)
         tl = Translation(4, 11, t, [])
         d = Domain('name', [(1, 2)], translation=tl)
-        self.assertEqual([translate('TAATCC')], d.get_seqs())
+        assert d.get_seqs() == [translate('TAATCC')]
 
     def test_align_seq(self):
         regions = [
@@ -1213,15 +1394,15 @@ def test_align_seq(self):
         )
 
         d = Domain('name', regions)
-        self.assertTrue(len(refseq) >= 578)
+        assert len(refseq) >= 578
         match, total, temp = d.align_seq(refseq)
-        self.assertEqual(sum([len(d.seq) for d in regions]), total)
-        self.assertEqual(total, match)
-        self.assertEqual(len(regions), len(temp))
+        assert total == sum([len(d.seq) for d in regions])
+        assert match == total
+        assert len(temp) == len(regions)
         for dr1, dr2 in zip(temp, regions):
-            self.assertEqual(dr1.start, dr2.start)
-            self.assertEqual(dr1.end, dr2.end)
-            self.assertEqual(dr1.seq, dr2.seq)
+            assert dr2.start == dr1.start
+            assert dr2.end == dr1.end
+            assert dr2.seq == dr1.seq
 
         refseq = (
             'MHRPPRHMGNKAMEPMDSPLMSAIPRLRPLQPMGRPPMQLLMDSLPLVILLQLPPRHTASLSRGMALVLMIPPL'
@@ -1236,7 +1417,7 @@ def test_align_seq(self):
         )
 
         dom = Domain('name', [DomainRegion(1, len(d), d)])
-        with self.assertRaises(UserWarning):
+        with pytest.raises(UserWarning):
             dom.align_seq(refseq)
 
         seq = (
@@ -1251,42 +1432,42 @@ def test_align_seq(self):
         d = 'IYVQGLNDSVTLDDLADFFKQCGVVKMNKRTGQPMIHIYLDKETGKPKGDATVSYEDPPTAKAAVEWFDGKDFQGSKLK'
         dom = Domain('name', [DomainRegion(1, len(d), d)])
 
-        with self.assertRaises(UserWarning):
+        with pytest.raises(UserWarning):
             m, t, regions = dom.align_seq(seq)
 
 
-class TestBioInterval(unittest.TestCase):
+class TestBioInterval:
     def test___eq__(self):
         a = BioInterval(REF_CHR, 1, 2)
         b = BioInterval(REF_CHR, 1, 2)
         c = BioInterval('test2', 1, 2)
-        self.assertEqual(a, a)
-        self.assertEqual(a, b)
-        self.assertNotEqual(a, None)
-        self.assertNotEqual(a, c)
+        assert a == a
+        assert b == a
+        assert a is not None
+        assert c != a
 
 
-class TestGene(unittest.TestCase):
+class TestGene:
     def test___hash__(self):
         g1 = Gene(REF_CHR, 1, 2, 'name1', STRAND.POS)
         g2 = Gene(REF_CHR, 1, 2, 'name2', STRAND.POS)
         h = set([g1, g2])
-        self.assertEqual(2, len(h))
+        assert len(h) == 2
 
     def test___eq__(self):
         g1 = Gene(REF_CHR, 1, 2, 'name1', STRAND.POS)
         g2 = Gene(REF_CHR, 1, 2, 'name2', STRAND.POS)
-        self.assertNotEqual(g1, g2)
+        assert g2 != g1
         g3 = Gene('test2', 1, 2, 'name1', STRAND.POS)
-        self.assertNotEqual(g1, g3)
-        self.assertNotEqual(g3, g1)
-        self.assertNotEqual(g1, None)
-        self.assertNotEqual(None, g1)
+        assert g3 != g1
+        assert g1 != g3
+        assert None != g1  # noqa: E711
+        assert g1 != None  # noqa: E711
 
     def test_get_seq(self):
         ref = {'1': MockObject(seq='AACCCTTTGGG')}
         g = Gene('1', 3, 8, strand=STRAND.POS)
-        self.assertEqual('CCCTTT', g.get_seq(ref))
+        assert g.get_seq(ref) == 'CCCTTT'
         g = Gene(REF_CHR, 2836, 4144, strand=STRAND.POS)
         seq = (
             'GCAACTATATAATCTGTGGGAATATCTCCTTTTACACCTAGCCCTACTTCTGTCTGGCTACAGTCATTTATCTGGCTTTGGGAAATGTGACCACAGAATCAGATAT'
@@ -1303,16 +1484,16 @@ def test_get_seq(self):
             'CATCGATAAACATCACAAAATGACTACTGGTAACCACTATGAAACTCTTTAAGCGGTAGGTCCTGTATGAATTTTACTCCTCATGATTTGAAGATTATGCATAAAT'
             'TCCTTCTTCCTGTTATTTTGTTTCCAATTTAGTCTTT'
         ).upper()
-        self.assertEqual(seq, g.get_seq(REFERENCE_GENOME))
+        assert g.get_seq(REFERENCE_GENOME) == seq
 
 
-class TestAnnotationGathering(unittest.TestCase):
+class TestAnnotationGathering:
     def test_overlapping_transcripts(self):
         b = Breakpoint('C', 1000, strand=STRAND.POS)
         g = Gene('C', 1, 9999, 'gene1', STRAND.POS)
         t = PreTranscript(exons=[(100, 199), (500, 699), (1200, 1300)], gene=g)
         g.transcripts.append(t)
-        self.assertTrue(Interval.overlaps(b, t))
+        assert Interval.overlaps(b, t)
         t = PreTranscript(exons=[(100, 199), (500, 699), (800, 900)], gene=g)
         g.transcripts.append(t)
         h = Gene('C', 1, 9999, 'gene1', STRAND.NEG)
@@ -1320,73 +1501,73 @@ def test_overlapping_transcripts(self):
         h.transcripts.append(t)
         d = {'C': [g, h]}
         tlist = overlapping_transcripts(d, b)
-        self.assertEqual(1, len(tlist))
+        assert len(tlist) == 1
 
     def test_breakpoint_within_gene(self):
         b = Breakpoint(REF_CHR, 150, 150)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(1, len(pos))
-        self.assertEqual(1, len(neg))
-        self.assertEqual(STRAND.POS, pos[0].get_strand())
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(b.end, neg[0].end)
-        self.assertEqual(STRAND.NEG, neg[0].get_strand())
+        assert len(pos) == 1
+        assert len(neg) == 1
+        assert pos[0].get_strand() == STRAND.POS
+        assert neg[0].start == b.start
+        assert neg[0].end == b.end
+        assert neg[0].get_strand() == STRAND.NEG
 
     def test_breakpoint_overlapping_gene(self):
         b = Breakpoint(REF_CHR, 150, 230)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(2, len(pos))
-        self.assertEqual(201, pos[1].start)
-        self.assertEqual(b.end, pos[1].end)
-        self.assertEqual(1, len(neg))
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(b.end, neg[0].end)
+        assert len(pos) == 2
+        assert pos[1].start == 201
+        assert pos[1].end == b.end
+        assert len(neg) == 1
+        assert neg[0].start == b.start
+        assert neg[0].end == b.end
 
         b = Breakpoint(REF_CHR, 150, 225, strand=STRAND.POS)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(2, len(pos))
-        self.assertEqual(100, pos[0].start)
-        self.assertEqual(200, pos[0].end)
-        self.assertEqual(201, pos[1].start)
-        self.assertEqual(b.end, pos[1].end)
-        self.assertEqual(1, len(neg))
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(b.end, neg[0].end)
+        assert len(pos) == 2
+        assert pos[0].start == 100
+        assert pos[0].end == 200
+        assert pos[1].start == 201
+        assert pos[1].end == b.end
+        assert len(neg) == 1
+        assert neg[0].start == b.start
+        assert neg[0].end == b.end
 
         b = Breakpoint(REF_CHR, 375, 425, strand=STRAND.POS)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(2, len(pos))
-        self.assertEqual(300, pos[0].start)
-        self.assertEqual(400, pos[0].end)
-        self.assertEqual(401, pos[1].start)
-        self.assertEqual(b.end, pos[1].end)
-        self.assertEqual(1, len(neg))
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(b.end, neg[0].end)
+        assert len(pos) == 2
+        assert pos[0].start == 300
+        assert pos[0].end == 400
+        assert pos[1].start == 401
+        assert pos[1].end == b.end
+        assert len(neg) == 1
+        assert neg[0].start == b.start
+        assert neg[0].end == b.end
 
     def test_breakpoint_overlapping_mutliple_genes_and_intergenic(self):
         b = Breakpoint(REF_CHR, 150, 275)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(2, len(pos))
-        self.assertEqual(201, pos[1].start)
-        self.assertEqual(b.end, pos[1].end)
-        self.assertEqual(2, len(neg))
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(249, neg[0].end)
+        assert len(pos) == 2
+        assert pos[1].start == 201
+        assert pos[1].end == b.end
+        assert len(neg) == 2
+        assert neg[0].start == b.start
+        assert neg[0].end == 249
 
     def test_breakpoint_overlapping_mutliple_pos_genes(self):
         b = Breakpoint(REF_CHR, 575, 625)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(2, len(pos))
-        self.assertEqual(1, len(neg))
-        self.assertEqual(b.start, neg[0].start)
-        self.assertEqual(b.end, neg[0].end)
+        assert len(pos) == 2
+        assert len(neg) == 1
+        assert neg[0].start == b.start
+        assert neg[0].end == b.end
 
     def test_breakpoint_overlapping_mutliple_genes(self):
         b = Breakpoint(REF_CHR, 300, 350)
         pos, neg = _gather_breakpoint_annotations(REFERENCE_ANNOTATIONS, b)
-        self.assertEqual(1, len(pos))
-        self.assertEqual(1, len(neg))
+        assert len(pos) == 1
+        assert len(neg) == 1
 
     def test_intrachromosomal(self):
         b1 = Breakpoint(REF_CHR, 150, 225, strand=STRAND.POS)
@@ -1395,16 +1576,16 @@ def test_intrachromosomal(self):
         ann_list = sorted(
             _gather_annotations(REFERENCE_ANNOTATIONS, bpp), key=lambda x: (x.break1, x.break2)
         )
-        self.assertEqual(5, len(ann_list))
+        assert len(ann_list) == 5
         first = ann_list[0]
-        self.assertEqual(1, len(first.encompassed_genes))
-        self.assertEqual(0, len(first.genes_proximal_to_break1))
-        self.assertEqual(1, len(first.genes_proximal_to_break2))
-        self.assertEqual(0, len(first.genes_overlapping_break1))
-        self.assertEqual(0, len(first.genes_overlapping_break2))
+        assert len(first.encompassed_genes) == 1
+        assert len(first.genes_proximal_to_break1) == 0
+        assert len(first.genes_proximal_to_break2) == 1
+        assert len(first.genes_overlapping_break1) == 0
+        assert len(first.genes_overlapping_break2) == 0
         near, dist = list(first.genes_proximal_to_break2)[0]
-        self.assertEqual(50, dist)
-        self.assertEqual(2, len(ann_list[1].encompassed_genes))
+        assert dist == 50
+        assert len(ann_list[1].encompassed_genes) == 2
 
     def test_interchromosomal(self):
         raise unittest.SkipTest('TODO')
@@ -1419,8 +1600,8 @@ def test_intrachromosomal_within_gene_inversion(self):
         b2 = Breakpoint(REF_CHR, 2250, strand=STRAND.NEG)
         bpp = BreakpointPair(b1, b2)
         ann_list = sorted(_gather_annotations(ref, bpp), key=lambda x: (x.break1, x.break2))
-        self.assertEqual(1, len(ann_list))
-        self.assertEqual(ann_list[0].transcript1, ann_list[0].transcript2)
+        assert len(ann_list) == 1
+        assert ann_list[0].transcript2 == ann_list[0].transcript1
 
     def test_breakpoint_single_gene(self):
         g = Gene(REF_CHR, 1000, 3000, strand=STRAND.POS)
@@ -1431,40 +1612,40 @@ def test_breakpoint_single_gene(self):
         b2 = Breakpoint(REF_CHR, 800, strand=STRAND.POS)
         bpp = BreakpointPair(b1, b2, event_type=SVTYPE.DEL, protocol=PROTOCOL.GENOME)
         ann_list = sorted(_gather_annotations(ref, bpp), key=lambda x: (x.break1, x.break2))
-        self.assertEqual(3, len(ann_list))
+        assert len(ann_list) == 3
         for ann in ann_list:
-            self.assertTrue(ann.break1.start in ann.transcript1.position)
-            self.assertTrue(ann.break1.end in ann.transcript1.position)
-            self.assertTrue(ann.break2.start in ann.transcript2.position)
-            self.assertTrue(ann.break2.end in ann.transcript2.position)
+            assert ann.break1.start in ann.transcript1.position
+            assert ann.break1.end in ann.transcript1.position
+            assert ann.break2.start in ann.transcript2.position
+            assert ann.break2.end in ann.transcript2.position
 
 
-class TestAnnotate(unittest.TestCase):
+class TestAnnotate:
     def test_reference_name_eq(self):
         first, second = ReferenceName('chr1'), ReferenceName('1')
-        self.assertEqual(first, second)
+        assert second == first
 
     def test_reference_name_set(self):
         first, second = ReferenceName('chr1'), ReferenceName('1')
         d = {first, second}
-        self.assertEqual(1, len(d))
+        assert len(d) == 1
 
     def test_reference_name_dict(self):
         first, second = ReferenceName('chr1'), ReferenceName('1')
         d = {first: 1}
         d[second] = 2
         print(d)
-        self.assertEqual(1, len(d))
+        assert len(d) == 1
         d = {first: 1, second: 2}
-        self.assertEqual(1, len(d))
+        assert len(d) == 1
 
     def test_loading_json_annotations(self):
         annotations = load_annotations(get_data('mock_reference_annotations.json'))
-        self.assertEqual(1, len(annotations.keys()))
-        self.assertEqual(1, len(list(annotations.values())[0]))
+        assert len(annotations.keys()) == 1
+        assert len(list(annotations.values())[0]) == 1
 
     def test_loading_annotations_not_found(self):
-        with self.assertRaises(FileNotFoundError):
+        with pytest.raises(FileNotFoundError):
             load_annotations('file.other')
 
     def test_determine_prime(self):
@@ -1473,22 +1654,22 @@ def test_determine_prime(self):
         bleft = Breakpoint(REF_CHR, 1, 2, orient=ORIENT.LEFT)
         bright = Breakpoint(REF_CHR, 1, 2, orient=ORIENT.RIGHT)
         # positive left should be five prime
-        self.assertEqual(PRIME.FIVE, determine_prime(tpos, bleft))
+        assert determine_prime(tpos, bleft) == PRIME.FIVE
         # positive right should be three prime
-        self.assertEqual(PRIME.THREE, determine_prime(tpos, bright))
+        assert determine_prime(tpos, bright) == PRIME.THREE
         # negative left should be three prime
-        self.assertEqual(PRIME.THREE, determine_prime(tneg, bleft))
+        assert determine_prime(tneg, bleft) == PRIME.THREE
         # negative right should be five prime
-        self.assertEqual(PRIME.FIVE, determine_prime(tneg, bright))
+        assert determine_prime(tneg, bright) == PRIME.FIVE
 
-        with self.assertRaises(NotSpecifiedError):
+        with pytest.raises(NotSpecifiedError):
             bleft.orient = ORIENT.NS
             determine_prime(tpos, bleft)
 
-        with self.assertRaises(NotSpecifiedError):
+        with pytest.raises(NotSpecifiedError):
             determine_prime(tneg, bleft)
 
-        with self.assertRaises(NotSpecifiedError):
+        with pytest.raises(NotSpecifiedError):
             tpos.strand = STRAND.NS
             determine_prime(tpos, bright)
 
@@ -1520,11 +1701,11 @@ def test_calculate_orf_nested(self):
         )
         orfs = calculate_orf(seq)
         for orf in orfs:
-            self.assertEqual('ATG', seq[orf.start - 1 : orf.start + 2])
+            assert seq[orf.start - 1 : orf.start + 2] == 'ATG'
         orfs = sorted(orfs)
-        self.assertEqual(2, len(orfs))
-        self.assertEqual(Interval(1, 894), orfs[0])
-        self.assertEqual(Interval(590, 724), orfs[1])
+        assert len(orfs) == 2
+        assert orfs[0] == Interval(1, 894)
+        assert orfs[1] == Interval(590, 724)
 
         seq = (
             'AAGGAGAGAAAATGGCGTCCACGGATTACAGTACCTATAGCCAAGCTGCAGCGCAGCAGGGCTACAGTGCTTACACCGCCCAGCCCACTCAAGGATATGC'
@@ -1551,10 +1732,10 @@ def test_calculate_orf_nested(self):
 
         orfs = calculate_orf(seq)
         for orf in orfs:
-            self.assertEqual('ATG', seq[orf.start - 1 : orf.start + 2])
+            assert seq[orf.start - 1 : orf.start + 2] == 'ATG'
 
 
-class TestAnnotateEvents(unittest.TestCase):
+class TestAnnotateEvents:
     def test_annotate_events(self):
         reference_annotations = load_annotations(get_data('mock_reference_annotations.full.tsv'))
         b1 = Breakpoint('fakereference9', 658, orient=ORIENT.RIGHT, strand=STRAND.POS)
@@ -1570,14 +1751,14 @@ def test_annotate_events(self):
         annotations = annotate_events(
             [bpp], reference_genome=REFERENCE_GENOME, annotations=reference_annotations, filters=[]
         )
-        self.assertEqual(4, len(annotations))
-        self.assertEqual(STRAND.POS, annotations[0].transcript1.get_strand())
-        self.assertEqual(STRAND.NEG, annotations[0].transcript2.get_strand())
-        self.assertEqual('ENST00000375851', annotations[0].transcript1.name)
-        self.assertEqual(None, annotations[0].transcript2.name)
+        assert len(annotations) == 4
+        assert annotations[0].transcript1.get_strand() == STRAND.POS
+        assert annotations[0].transcript2.get_strand() == STRAND.NEG
+        assert annotations[0].transcript1.name == 'ENST00000375851'
+        assert annotations[0].transcript2.name is None
         for ann in annotations:
             print(ann.transcript1, ann.transcript2)
         annotations = annotate_events(
             [bpp], reference_genome=REFERENCE_GENOME, annotations=reference_annotations
         )
-        self.assertEqual(2, len(annotations))
+        assert len(annotations) == 2
diff --git a/tests/integration/test_annotate_examples.py b/tests/integration/test_annotate_examples.py
index 1e28f845..f6ed15ee 100644
--- a/tests/integration/test_annotate_examples.py
+++ b/tests/integration/test_annotate_examples.py
@@ -1,6 +1,3 @@
-import os
-import unittest
-
 from mavis.annotate.fusion import FusionTranscript
 from mavis.annotate.variant import (
     Annotation,
@@ -22,23 +19,20 @@ def get_best(gene):
     raise KeyError('no best transcript for gene', gene)
 
 
-class TestNDUFA12(unittest.TestCase):
-    def setUp(self):
-        print(get_example_genes().keys())
-        self.gene = get_example_genes()['NDUFA12']
-        self.reference_annotations = {self.gene.chr: [self.gene]}
-        self.reference_genome = {
-            self.gene.chr: MockObject(seq=MockLongString(self.gene.seq, offset=self.gene.start - 1))
+class TestNDUFA12:
+    def test_annotate_events_synonymous(self):
+        gene = get_example_genes()['NDUFA12']
+        reference_annotations = {gene.chr: [gene]}
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
         }
-        self.best = get_best(self.gene)
 
-    def test_annotate_events_synonymous(self):
-        for gene_list in self.reference_annotations.values():
+        for gene_list in reference_annotations.values():
             for gene in gene_list:
                 for t in gene.transcripts:
                     print(t)
-        b1 = Breakpoint(self.gene.chr, 95344068, orient=ORIENT.LEFT, strand=STRAND.NS)
-        b2 = Breakpoint(self.gene.chr, 95344379, orient=ORIENT.RIGHT, strand=STRAND.NS)
+        b1 = Breakpoint(gene.chr, 95344068, orient=ORIENT.LEFT, strand=STRAND.NS)
+        b2 = Breakpoint(gene.chr, 95344379, orient=ORIENT.RIGHT, strand=STRAND.NS)
         bpp = BreakpointPair(
             b1,
             b2,
@@ -49,28 +43,27 @@ def test_annotate_events_synonymous(self):
             untemplated_seq='',
         )
         annotations = annotate_events(
-            [bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations
+            [bpp], reference_genome=reference_genome, annotations=reference_annotations
         )
         ann = annotations[0]
         for a in annotations:
             print(a, a.fusion, a.fusion.transcripts)
             print(a.transcript1, a.transcript1.transcripts)
         fseq = ann.fusion.transcripts[0].get_seq()
-        refseq = ann.transcript1.transcripts[0].get_seq(self.reference_genome)
-        self.assertEqual(refseq, fseq)
-        self.assertEqual(1, len(annotations))
+        refseq = ann.transcript1.transcripts[0].get_seq(reference_genome)
+        assert fseq == refseq
+        assert len(annotations) == 1
 
 
-class TestARID1B(unittest.TestCase):
-    def setUp(self):
-        self.gene = get_example_genes()['ARID1B']
-        self.reference_annotations = {self.gene.chr: [self.gene]}
-        self.reference_genome = {
-            self.gene.chr: MockObject(seq=MockLongString(self.gene.seq, offset=self.gene.start - 1))
+class TestARID1B:
+    def test_small_duplication(self):
+        gene = get_example_genes()['ARID1B']
+        reference_annotations = {gene.chr: [gene]}
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
         }
-        self.best = get_best(self.gene)
+        best = get_best(gene)
 
-    def test_small_duplication(self):
         bpp = BreakpointPair(
             Breakpoint('6', 157100005, strand='+', orient='R'),
             Breakpoint('6', 157100007, strand='+', orient='L'),
@@ -80,41 +73,40 @@ def test_small_duplication(self):
         )
         # annotate the breakpoint with the gene
         annotations = annotate_events(
-            [bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations
+            [bpp], reference_genome=reference_genome, annotations=reference_annotations
         )
-        self.assertEqual(1, len(annotations))
+        assert len(annotations) == 1
 
-        ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
+        ann = Annotation(bpp, transcript1=best, transcript2=best)
         ft = FusionTranscript.build(
             ann,
-            self.reference_genome,
+            reference_genome,
             min_orf_size=300,
             max_orf_cap=10,
             min_domain_mapping_match=0.9,
         )
-        ref_tx = self.best.translations[0]
+        ref_tx = best.translations[0]
         fusion_tx = ft.translations[0]
 
         # compare the fusion translation to the refernece translation to create the protein notation
-        ref_aa_seq = ref_tx.get_aa_seq(self.reference_genome)
+        ref_aa_seq = ref_tx.get_aa_seq(reference_genome)
         call = IndelCall(ref_aa_seq, fusion_tx.get_aa_seq())
-        self.assertTrue(call.is_dup)
+        assert call.is_dup
 
-        notation = call_protein_indel(ref_tx, fusion_tx, self.reference_genome)
+        notation = call_protein_indel(ref_tx, fusion_tx, reference_genome)
         print(notation)
-        self.assertEqual('ENST00000346085:p.G319dupG', notation)
+        assert notation == 'ENST00000346085:p.G319dupG'
 
 
-class TestSVEP1(unittest.TestCase):
-    def setUp(self):
-        self.gene = get_example_genes()['SVEP1']
-        self.reference_annotations = {self.gene.chr: [self.gene]}
-        self.reference_genome = {
-            self.gene.chr: MockObject(seq=MockLongString(self.gene.seq, offset=self.gene.start - 1))
+class TestSVEP1:
+    def test_annotate_small_intronic_inversion(self):
+        gene = get_example_genes()['SVEP1']
+        reference_annotations = {gene.chr: [gene]}
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
         }
-        self.best = get_best(self.gene)
+        best = get_best(gene)
 
-    def test_annotate_small_intronic_inversion(self):
         bpp = BreakpointPair(
             Breakpoint('9', 113152627, 113152627, orient='L'),
             Breakpoint('9', 113152635, 113152635, orient='L'),
@@ -125,19 +117,25 @@ def test_annotate_small_intronic_inversion(self):
             untemplated_seq='',
         )
         annotations = annotate_events(
-            [bpp], reference_genome=self.reference_genome, annotations=self.reference_annotations
+            [bpp], reference_genome=reference_genome, annotations=reference_annotations
         )
         for a in annotations:
             print(a, a.transcript1, a.transcript2)
-        self.assertEqual(1, len(annotations))
+        assert len(annotations) == 1
         ann = annotations[0]
-        self.assertEqual(self.best, ann.transcript1)
-        self.assertEqual(self.best, ann.transcript2)
-        refseq = self.best.transcripts[0].get_seq(self.reference_genome)
-        self.assertEqual(1, len(ann.fusion.transcripts))
-        self.assertEqual(refseq, ann.fusion.transcripts[0].get_seq())
+        assert ann.transcript1 == best
+        assert ann.transcript2 == best
+        refseq = best.transcripts[0].get_seq(reference_genome)
+        assert len(ann.fusion.transcripts) == 1
+        assert ann.fusion.transcripts[0].get_seq() == refseq
 
     def test_build_single_transcript_inversion(self):
+        gene = get_example_genes()['SVEP1']
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
+        }
+        best = get_best(gene)
+
         bpp = BreakpointPair(
             Breakpoint('9', 113152627, 113152627, orient='L'),
             Breakpoint('9', 113152635, 113152635, orient='L'),
@@ -147,29 +145,27 @@ def test_build_single_transcript_inversion(self):
             protocol=PROTOCOL.GENOME,
             untemplated_seq='',
         )
-        ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
+        ann = Annotation(bpp, transcript1=best, transcript2=best)
         ft = FusionTranscript.build(
             ann,
-            self.reference_genome,
+            reference_genome,
             min_orf_size=300,
             max_orf_cap=10,
             min_domain_mapping_match=0.9,
         )
-        refseq = self.best.transcripts[0].get_seq(self.reference_genome)
-        self.assertEqual(1, len(ft.transcripts))
-        self.assertEqual(refseq, ft.transcripts[0].get_seq())
+        refseq = best.transcripts[0].get_seq(reference_genome)
+        assert len(ft.transcripts) == 1
+        assert ft.transcripts[0].get_seq() == refseq
 
 
-class TestPRKCB(unittest.TestCase):
-    def setUp(self):
-        self.gene = get_example_genes()['PRKCB']
-        self.reference_annotations = {self.gene.chr: [self.gene]}
-        self.reference_genome = {
-            self.gene.chr: MockObject(seq=MockLongString(self.gene.seq, offset=self.gene.start - 1))
+class TestPRKCB:
+    def test_retained_intron(self):
+        gene = get_example_genes()['PRKCB']
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
         }
-        self.best = get_best(self.gene)
+        best = get_best(gene)
 
-    def test_retained_intron(self):
         bpp = BreakpointPair(
             Breakpoint('16', 23957049, orient='L'),
             Breakpoint('16', 23957050, orient='R'),
@@ -179,31 +175,29 @@ def test_retained_intron(self):
             protocol=PROTOCOL.TRANS,
             untemplated_seq='A',
         )
-        ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
+        ann = Annotation(bpp, transcript1=best, transcript2=best)
         ft = FusionTranscript.build(
             ann,
-            self.reference_genome,
+            reference_genome,
             min_orf_size=300,
             max_orf_cap=10,
             min_domain_mapping_match=0.9,
         )
-        self.assertEqual(1, len(ft.transcripts))
+        assert len(ft.transcripts) == 1
         print(ft.transcripts[0].splicing_pattern)
-        print(self.best.transcripts[0].splicing_pattern)
-        self.assertEqual(SPLICE_TYPE.RETAIN, ft.transcripts[0].splicing_pattern.splice_type)
+        print(best.transcripts[0].splicing_pattern)
+        assert ft.transcripts[0].splicing_pattern.splice_type == SPLICE_TYPE.RETAIN
 
 
-class TestDSTYK(unittest.TestCase):
-    def setUp(self):
+class TestDSTYK:
+    def test_build_single_transcript_inversion_reverse_strand(self):
         print(get_example_genes().keys())
-        self.gene = get_example_genes()['DSTYK']
-        self.reference_annotations = {self.gene.chr: [self.gene]}
-        self.reference_genome = {
-            self.gene.chr: MockObject(seq=MockLongString(self.gene.seq, offset=self.gene.start - 1))
+        gene = get_example_genes()['DSTYK']
+        reference_genome = {
+            gene.chr: MockObject(seq=MockLongString(gene.seq, offset=gene.start - 1))
         }
-        self.best = get_best(self.gene)
+        best = get_best(gene)
 
-    def test_build_single_transcript_inversion_reverse_strand(self):
         # 1:205178631R 1:205178835R inversion
         bpp = BreakpointPair(
             Breakpoint('1', 205178631, orient='R'),
@@ -214,10 +208,10 @@ def test_build_single_transcript_inversion_reverse_strand(self):
             protocol=PROTOCOL.GENOME,
             untemplated_seq='',
         )
-        ann = Annotation(bpp, transcript1=self.best, transcript2=self.best)
+        ann = Annotation(bpp, transcript1=best, transcript2=best)
         ft = FusionTranscript.build(
             ann,
-            self.reference_genome,
+            reference_genome,
             min_orf_size=300,
             max_orf_cap=10,
             min_domain_mapping_match=0.9,
@@ -233,8 +227,8 @@ def test_build_single_transcript_inversion_reverse_strand(self):
                 len(ft.exon_mapping.get(ex.position, None)),
                 ft.exon_number(ex),
             )
-        # refseq = self.best.transcripts[0].get_seq(self.reference_genome)
-        self.assertEqual(1, len(ft.transcripts))
-        self.assertEqual(1860, ft.break1)
-        self.assertEqual(2065, ft.break2)
+        # refseq = best.transcripts[0].get_seq(reference_genome)
+        assert len(ft.transcripts) == 1
+        assert ft.break1 == 1860
+        assert ft.break2 == 2065
         flatten_fusion_transcript(ft.transcripts[0])  # test no error
diff --git a/tests/integration/test_annotate_fileio.py b/tests/integration/test_annotate_fileio.py
index 6eacbe8b..926928aa 100644
--- a/tests/integration/test_annotate_fileio.py
+++ b/tests/integration/test_annotate_fileio.py
@@ -1,28 +1,24 @@
-import os
-import unittest
-
 from mavis.annotate.file_io import convert_tab_to_json, load_annotations
 
 from ..util import get_data
 
+TAB = get_data('annotations_subsample.tab')
+JSON = get_data('annotations_subsample.json')
 
-class TestAnnotationLoading(unittest.TestCase):
-    def setUp(self):
-        self.tab = get_data('annotations_subsample.tab')
-        self.json = get_data('annotations_subsample.json')
 
+class TestAnnotationLoading:
     def test_convert_tab_to_json(self):
-        json = convert_tab_to_json(self.tab, warn=print)
-        self.assertEqual(32, len(json['genes']))
+        json = convert_tab_to_json(TAB, warn=print)
+        assert len(json['genes']) == 32
 
     def test_tab_equivalent_to_json(self):
-        tab_result = load_annotations(self.tab, warn=print)
-        json_result = load_annotations(self.json, warn=print)
-        self.assertEqual(sorted(tab_result.keys()), sorted(json_result.keys()))
+        tab_result = load_annotations(TAB, warn=print)
+        json_result = load_annotations(JSON, warn=print)
+        assert sorted(json_result.keys()) == sorted(tab_result.keys())
 
     def test_load_tab(self):
-        result = load_annotations(self.tab, warn=print)
-        self.assertEqual(12, len(result.keys()))
+        result = load_annotations(TAB, warn=print)
+        assert len(result.keys()) == 12
         domains = []
         for gene in result['12']:
             for t in gene.spliced_transcripts:
@@ -35,10 +31,10 @@ def test_load_tab(self):
                 break
         for d in domains:
             print(d.name, d.regions)
-        self.assertEqual(2, len(domains))
+        assert len(domains) == 2
         result = load_annotations(get_data('mock_reference_annotations.tsv'), warn=print)
-        self.assertEqual(1, len(result.keys()))
+        assert len(result.keys()) == 1
 
     def test_load_json(self):
-        result = load_annotations(self.json, warn=print)
-        self.assertEqual(12, len(result.keys()))
+        result = load_annotations(JSON, warn=print)
+        assert len(result.keys()) == 12
diff --git a/tests/integration/test_args.py b/tests/integration/test_args.py
index 0dea660f..db14bc55 100644
--- a/tests/integration/test_args.py
+++ b/tests/integration/test_args.py
@@ -1,6 +1,4 @@
-import argparse
 import json
-import os
 import sys
 import tempfile
 from unittest.mock import patch
diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index 9903f8ff..8cae4394 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -1,7 +1,6 @@
-import os
 import time
-import unittest
 
+import pytest
 import timeout_decorator
 from mavis.assemble import Contig, assemble, filter_contigs
 from mavis.constants import reverse_complement
@@ -9,11 +8,11 @@
 from mavis.schemas import DEFAULTS
 from mavis.util import LOG
 
-from ..util import get_data
-from . import RUN_FULL, MockObject
+from ..util import get_data, long_running_test
+from . import MockObject
 
 
-class TestFilterContigs(unittest.TestCase):
+class TestFilterContigs:
     @timeout_decorator.timeout(30)
     def test_large_set(self):
         contigs = []
@@ -27,61 +26,66 @@ def test_large_set(self):
         print()
         for c in filtered:
             print(c.seq)
-        self.assertEqual(3, len(filtered))  # figure out amount later. need to optimize timing
+        assert len(filtered) == 3  # figure out amount later. need to optimize timing
 
 
-class TestContigRemap(unittest.TestCase):
-    def setUp(self):
-        self.contig = Contig(' ' * 60, None)
-        self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10))
-        self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20))
-        self.contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60))
+@pytest.fixture
+def contig():
+    contig = Contig(' ' * 60, None)
+    contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10))
+    contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=20))
+    contig.add_mapped_sequence(MockObject(reference_start=50, reference_end=60))
+    return contig
 
-    def test_depth_even_coverage(self):
-        covg = self.contig.remap_depth(Interval(1, 10))
-        self.assertEqual(2, covg)
 
-    def test_depth_mixed_coverage(self):
-        covg = self.contig.remap_depth(Interval(1, 20))
-        self.assertEqual(1.5, covg)
+class TestContigRemap:
+    def test_depth_even_coverage(self, contig):
+        covg = contig.remap_depth(Interval(1, 10))
+        assert covg == 2
 
-    def test_depth_no_coverage(self):
-        covg = self.contig.remap_depth(Interval(21, 49))
-        self.assertEqual(0, covg)
+    def test_depth_mixed_coverage(self, contig):
+        covg = contig.remap_depth(Interval(1, 20))
+        assert covg == 1.5
 
-    def test_depth_whole_contig_coverage(self):
-        self.assertAlmostEqual(40 / 60, self.contig.remap_depth())
+    def test_depth_no_coverage(self, contig):
+        covg = contig.remap_depth(Interval(21, 49))
+        assert covg == 0
 
-    def test_depth_weighted_read(self):
-        self.contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10), 5)
-        self.assertAlmostEqual(42 / 60, self.contig.remap_depth())
+    def test_depth_whole_contig_coverage(self, contig):
+        assert pytest.approx(contig.remap_depth()) == 40 / 60
 
-    def test_depth_bad_query_range(self):
-        with self.assertRaises(ValueError):
-            self.contig.remap_depth(Interval(0, 10))
-        with self.assertRaises(ValueError):
-            self.contig.remap_depth(Interval(1, len(self.contig.seq) + 1))
+    def test_depth_weighted_read(self, contig):
+        contig.add_mapped_sequence(MockObject(reference_start=0, reference_end=10), 5)
+        assert pytest.approx(contig.remap_depth()) == 42 / 60
 
-    def test_coverage(self):
-        self.assertEqual(0.5, self.contig.remap_coverage())
+    def test_depth_bad_query_range(self, contig):
+        with pytest.raises(ValueError):
+            contig.remap_depth(Interval(0, 10))
+        with pytest.raises(ValueError):
+            contig.remap_depth(Interval(1, len(contig.seq) + 1))
 
+    def test_coverage(self, contig):
+        assert contig.remap_coverage() == 0.5
 
-class TestAssemble(unittest.TestCase):
-    @classmethod
-    def setUpClass(cls):
-        # load files here so they do not count towar timeout checking
-        sequences = set()
-        with open(get_data('long_filter_assembly.txt'), 'r') as fh:
-            sequences.update([s.strip() for s in fh.readlines() if s])
-        cls.long_filter_seq = sequences
-        sequences = set()
-        with open(get_data('large_assembly.txt'), 'r') as fh:
-            sequences.update([line.strip() for line in fh.readlines()])
-        cls.large_assembly_seq = sequences
 
-    def setUp(self):
-        self.log = lambda *x, **k: print(x, k)
+@pytest.fixture
+def long_filter_seq():
+    # load files here so they do not count towar timeout checking
+    sequences = set()
+    with open(get_data('long_filter_assembly.txt'), 'r') as fh:
+        sequences.update([s.strip() for s in fh.readlines() if s])
+    return sequences
 
+
+@pytest.fixture
+def large_assembly_seq():
+    sequences = set()
+    with open(get_data('large_assembly.txt'), 'r') as fh:
+        sequences.update([line.strip() for line in fh.readlines()])
+    return sequences
+
+
+class TestAssemble:
     def test1(self):
         sequences = [
             'TCTTTTTCTTTCTTTCTTTCTTTCTTTCTATTCTATCTTCTTCCTGACTCTTCCTAGCTTAGTCTTACTGACAAGCATGTTACCTTCTTTTTATTTTTGTTTTTAAACCACATTGATCGTAAATCGCCGTGCTTGGTGCTTAATGTACTT',
@@ -177,11 +181,10 @@ def test1(self):
             remap_min_exact_match=6,
             assembly_max_paths=20,
             assembly_min_uniq=0.01,
-            log=self.log,
         )
         for contig in assembly:
             print(contig.seq)
-        self.assertTrue(assembly)
+        assert assembly
 
     def test_assembly_low_center(self):
         sequences = {
@@ -245,11 +248,10 @@ def test_assembly_low_center(self):
             remap_min_exact_match=6,
             assembly_max_paths=20,
             assembly_min_uniq=0.01,
-            log=self.log,
         )
         for assembly in assemblies:
             print(assembly.seq)
-        self.assertEqual(2, len(assemblies))
+        assert len(assemblies) == 2
 
     def test_low_evidence(self):
         seqs = [
@@ -280,11 +282,10 @@ def test_low_evidence(self):
             remap_min_exact_match=6,
             assembly_max_paths=20,
             assembly_min_uniq=0.01,
-            log=self.log,
         )
         for assembly in assemblies:
             print(assembly.seq, assembly.remap_score())
-        self.assertEqual(2, len(assemblies))
+        assert len(assemblies) == 2
 
     def test_multiple_events(self):
         sequences = {
@@ -341,28 +342,23 @@ def test_multiple_events(self):
             remap_min_exact_match=DEFAULTS['validate.assembly_min_exact_match_to_remap'],
             assembly_max_paths=DEFAULTS['validate.assembly_max_paths'],
             assembly_min_uniq=0.01,
-            log=self.log,
         )
         print('assemblies', len(assemblies))
         for assembly in assemblies:
             print(assembly.seq, assembly.remap_score())
             print(reverse_complement(assembly.seq))
         expected = 'ACCAGGTCTTCGATATATAAAAACCCTAGGTCGGCCGGTCGGCCGTGTTAGTGAGACACACACACACACATGTATACCCGTGCGCGCCCGCGGGAGAGAGAGAGAGAGAGATATATATATAGCAGACCAGGAGAGCGAGAGCGAGAGAGATATAGAGAGATCGCGCGCGAGAGAGATAGGAGACC'
-        self.assertEqual(expected, assemblies[0].seq)
-        self.assertEqual(1, len(assemblies))
+        assert assemblies[0].seq == expected
+        assert len(assemblies) == 1
 
     @timeout_decorator.timeout(300)
-    @unittest.skipIf(
-        not RUN_FULL,
-        'slower tests will not be run unless the environment variable RUN_FULL is given',
-    )
-    def test_large_assembly(self):
+    @long_running_test
+    def test_large_assembly(self, large_assembly_seq):
         # simply testing that this will complete before the timeout
-        sequences = self.large_assembly_seq
         kmer_size = 150 * DEFAULTS['validate.assembly_kmer_size']
         print('read inputs')
         contigs = assemble(
-            sequences,
+            large_assembly_seq,
             kmer_size,
             min_edge_trim_weight=DEFAULTS['validate.assembly_min_edge_trim_weight'],
             assembly_max_paths=DEFAULTS['validate.assembly_max_paths'],
@@ -374,7 +370,7 @@ def test_large_assembly(self):
         for contig in contigs:
             print(len(contig.seq), contig.remap_score())
             print(contig.seq)
-        self.assertTrue(len(contigs))
+        assert len(contigs)
 
     def test_assemble_short_contig(self):
         sequences = {
@@ -626,16 +622,12 @@ def test_assemble_short_contig(self):
         print('target', target)
         for contig in contigs:
             print(len(contig.seq), contig.remap_score(), contig.seq)
-        self.assertTrue({target, reverse_complement(target)} & {c.seq for c in contigs})
+        assert {target, reverse_complement(target)} & {c.seq for c in contigs}
 
     @timeout_decorator.timeout(120)
-    @unittest.skipIf(
-        not RUN_FULL,
-        'slower tests will not be run unless the environment variable RUN_FULL is given',
-    )
-    def test_long_filter_bug(self):
-        sequences = self.long_filter_seq
-        contigs = assemble(sequences, 111, 3, 8, 0.1, 0.1, log=LOG)
+    @long_running_test
+    def test_long_filter_bug(self, long_filter_seq):
+        contigs = assemble(long_filter_seq, 111, 3, 8, 0.1, 0.1, log=LOG)
         for c in contigs:
             print(c.seq, c.remap_score())
-        self.assertTrue(len(contigs))
+        assert len(contigs)
diff --git a/tests/integration/test_bam.py b/tests/integration/test_bam.py
index 0712bc57..7f8b87f7 100644
--- a/tests/integration/test_bam.py
+++ b/tests/integration/test_bam.py
@@ -1,9 +1,9 @@
+import argparse
 import logging
-import os
-import unittest
 import warnings
 from unittest import mock
 
+import pytest
 import timeout_decorator
 from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.bam import cigar as _cigar
@@ -16,15 +16,7 @@
     sequenced_strand,
 )
 from mavis.bam.stats import Histogram, compute_genome_bam_stats, compute_transcriptome_bam_stats
-from mavis.constants import (
-    CIGAR,
-    DNA_ALPHABET,
-    NA_MAPPING_QUALITY,
-    ORIENT,
-    READ_PAIR_TYPE,
-    STRAND,
-    SVTYPE,
-)
+from mavis.constants import CIGAR, DNA_ALPHABET, ORIENT, READ_PAIR_TYPE, STRAND, SVTYPE
 from mavis.interval import Interval
 
 from ..util import get_data
@@ -44,23 +36,23 @@ def setUpModule():
         raise AssertionError('fake genome file does not have the expected contents')
 
 
-class TestBamCache(unittest.TestCase):
+class TestBamCache:
     def test___init__(self):
         fh = MockBamFileHandle()
         b = BamCache(fh)
-        self.assertEqual(fh, b.fh)
+        assert b.fh == fh
 
     def test_add_read(self):
         fh = MockBamFileHandle()
         b = BamCache(fh)
         r = mock.MagicMock(query_name='name', query_sequence='')
         b.add_read(r)
-        self.assertEqual(1, len(b.cache.values()))
+        assert len(b.cache.values()) == 1
         b.add_read(r)
-        self.assertEqual(1, len(b.cache.values()))
+        assert len(b.cache.values()) == 1
         r.reference_start = 0
         b.add_read(r)
-        self.assertEqual(1, len(b.cache.values()))
+        assert len(b.cache.values()) == 1
 
     @mock.patch('mavis.util.LOG')
     def test_add_invalid_read(self, log_patcher):
@@ -69,7 +61,7 @@ def test_add_invalid_read(self, log_patcher):
         )
         cache = BamCache(MockBamFileHandle())
         cache.add_read(bad_read)
-        self.assertEqual(0, len(cache.cache))
+        assert len(cache.cache) == 0
         log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
 
     @mock.patch('mavis.util.LOG')
@@ -81,7 +73,7 @@ def test_fetch_invalid_read(self, log_patcher):
         fh.configure_mock(**{'fetch.return_value': [bad_read]})
         cache = BamCache(fh)
         cache.fetch('chr', 1, 10)
-        self.assertEqual(0, len(cache.cache))
+        assert len(cache.cache) == 0
         log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
 
     @mock.patch('mavis.util.LOG')
@@ -93,85 +85,88 @@ def test_bin_fetch_invalid_read(self, log_patcher):
         fh.configure_mock(**{'fetch.return_value': [bad_read]})
         cache = BamCache(fh)
         cache.fetch_from_bins('chr', 1, 10)
-        self.assertEqual(0, len(cache.cache))
+        assert len(cache.cache) == 0
         log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
 
     def test_reference_id(self):
         fh = MockBamFileHandle({'1': 0})
         b = BamCache(fh)
-        self.assertEqual(0, b.reference_id('1'))
-        with self.assertRaises(KeyError):
+        assert b.reference_id('1') == 0
+        with pytest.raises(KeyError):
             b.reference_id('2')
 
     def test_get_read_reference_name(self):
         fh = MockBamFileHandle({'1': 0})
         b = BamCache(fh)
         r = MockRead('name', 0)
-        self.assertEqual('1', b.get_read_reference_name(r))
+        assert b.get_read_reference_name(r) == '1'
 
     def test_generate_fetch_bins_single(self):
-        self.assertEqual([(1, 100)], BamCache._generate_fetch_bins(1, 100, 1, 1))
+        assert BamCache._generate_fetch_bins(1, 100, 1, 1) == [(1, 100)]
 
     def test_generate_fetch_bins_multi(self):
-        self.assertEqual([(1, 50), (51, 100)], BamCache._generate_fetch_bins(1, 100, 2, 1))
-        self.assertEqual(
-            [(1, 20), (21, 40), (41, 60), (61, 80), (81, 100)],
-            BamCache._generate_fetch_bins(1, 100, 5, 1),
-        )
+        assert BamCache._generate_fetch_bins(1, 100, 2, 1) == [(1, 50), (51, 100)]
+        assert BamCache._generate_fetch_bins(1, 100, 5, 1) == [
+            (1, 20),
+            (21, 40),
+            (41, 60),
+            (61, 80),
+            (81, 100),
+        ]
 
     def test_generate_fetch_bins_large_min_size(self):
-        self.assertEqual([(1, 50), (51, 100)], BamCache._generate_fetch_bins(1, 100, 5, 50))
+        assert BamCache._generate_fetch_bins(1, 100, 5, 50) == [(1, 50), (51, 100)]
 
     def test_fetch_single_read(self):
         b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
         s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1)
-        self.assertEqual(1, len(s))
+        assert len(s) == 1
         r = list(s)[0]
-        self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname)
+        assert r.qname == 'HISEQX1_11:4:2122:14275:37717:split'
         b.close()
 
     def test_get_mate(self):
         # dependant on fetch working
         b = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
         s = b.fetch_from_bins('reference3', 1382, 1383, read_limit=1, sample_bins=1)
-        self.assertEqual(1, len(s))
+        assert len(s) == 1
         r = list(s)[0]
-        self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', r.qname)
+        assert r.qname == 'HISEQX1_11:4:2122:14275:37717:split'
         o = b.get_mate(r, allow_file_access=True)
-        self.assertEqual(1, len(o))
-        self.assertEqual('HISEQX1_11:4:2122:14275:37717:split', o[0].qname)
+        assert len(o) == 1
+        assert o[0].qname == 'HISEQX1_11:4:2122:14275:37717:split'
 
 
-class TestModule(unittest.TestCase):
+class TestModule:
     """
     test class for functions in the validate namespace
     that are not associated with a class
     """
 
     def test_alphabet_matching(self):
-        self.assertTrue(DNA_ALPHABET.match('N', 'A'))
-        self.assertTrue(DNA_ALPHABET.match('A', 'N'))
+        assert DNA_ALPHABET.match('N', 'A')
+        assert DNA_ALPHABET.match('A', 'N')
 
     def test_breakpoint_pos(self):
         # ==========+++++++++>
         r = MockRead(reference_start=10, cigar=[(CIGAR.M, 10), (CIGAR.S, 10)])
-        self.assertEqual(19, _read.breakpoint_pos(r))
+        assert _read.breakpoint_pos(r) == 19
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             breakpoint_pos(r, ORIENT.RIGHT)
 
-        self.assertEqual(19, _read.breakpoint_pos(r, ORIENT.LEFT))
+        assert _read.breakpoint_pos(r, ORIENT.LEFT) == 19
 
         # ++++++++++=========>
         r = MockRead(reference_start=10, cigar=[(CIGAR.S, 10), (CIGAR.M, 10)])
-        self.assertEqual(10, _read.breakpoint_pos(r))
+        assert _read.breakpoint_pos(r) == 10
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             breakpoint_pos(r, ORIENT.LEFT)
 
-        self.assertEqual(10, _read.breakpoint_pos(r, ORIENT.RIGHT))
+        assert _read.breakpoint_pos(r, ORIENT.RIGHT) == 10
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             r = MockRead(reference_start=10, cigar=[(CIGAR.X, 10), (CIGAR.M, 10)])
             _read.breakpoint_pos(r, ORIENT.LEFT)
 
@@ -188,7 +183,7 @@ def test_nsb_align(self):
         # GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG
 
 
-class TestNsbAlign(unittest.TestCase):
+class TestNsbAlign:
     def test_length_seq_le_ref(self):
         ref = (
             'GATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAG'
@@ -199,9 +194,9 @@ def test_length_seq_le_ref(self):
             'TGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAAC'
         )
         alignment = _read.nsb_align(ref, seq)
-        self.assertEqual(1, len(alignment))
+        assert len(alignment) == 1
         alignment = _read.nsb_align(ref, seq, min_consecutive_match=20)
-        self.assertEqual(0, len(alignment))
+        assert len(alignment) == 0
 
     def test_length_ref_le_seq(self):
         pass
@@ -217,17 +212,17 @@ def test_long_ref_seq(self):
             'CGCAGCTACTCAGGAGATCGGAAG'
         )
         alignment = _read.nsb_align(ref, seq, min_consecutive_match=6)
-        self.assertEqual(1, len(alignment))
+        assert len(alignment) == 1
 
     def test_left_softclipping(self):
         ref = 'TAAGCTTCTTCCTTTTTCTATGCCACCTACATAGGCATTTTGCATGGTCAGATTGGAATTTACATAATGCATACATGCAAAGAAATATATAGAAGCCAGATATATAAGGTAGTACATTGGCAGGCTTCATATATATAGACTCCCCCATATTGTCTATATGCTAAAAAAGTATTTTAAATCCTTAAATTTTATTTTTGTTCTCTGCATTTGAAATCTTTATCAACTAGGTCATGAAAATAGCCAGTCGGTTCTCCTTTTGGTCTATTAGAATAAAATCTGGACTGCAACTGAGAAGCAGAAGGTAATGTCAGAATGTAT'
         seq = 'GCTAAAAAAGTATTTTAAATCCTTAAATGTTATTTTTGTTCTC'
         alignments = _read.nsb_align(ref, seq, min_consecutive_match=6)
-        self.assertEqual(1, len(alignments))
+        assert len(alignments) == 1
         print(alignments)
         seq = 'CTTATAAAGCTGGAGTATCTGCTGAGAGCATCAGGAATTGACATCTAGGATAATGAGAGAAGGCTGATCATGGACAACATATAGCCTTTCTAGTAGATGCAGCTGAGGCTAAAAAAGTATTTTAAATCCTTAAATGTTATTTTTGTTCTC'
         alignments = _read.nsb_align(ref, seq, min_consecutive_match=6, min_overlap_percent=0.5)
-        self.assertEqual(1, len(alignments))
+        assert len(alignments) == 1
 
     def test_min_overlap(self):
         ref = 'ATTACATTAAAGATTCAAACTCCTAGAGTTTTTTTGATTTTTAGTATGATCTTTAGATAAAAAAAAAGGAAGAAAAAGAAAAAAAAACAGAGTCTATTAAGGCATCTTCTATGGTCAGATATATCTATTTTTTTCTTTCTTTTTTTTACTTTCATTAAGTGCCACTAAAAAATTAGGTTCAATTAAACTTTATTAATCTCTTCTGAGTTTTGAT'
@@ -240,193 +235,201 @@ def test_min_overlap(self):
             min_overlap_percent=(len(seq) - 15) / len(seq),
         )
         print(alignments)
-        self.assertEqual(0, len(alignments))
-
-
-class TestReadPairStrand(unittest.TestCase):
-    def setUp(self):
-        self.read1_pos_neg = MockRead(is_reverse=False, is_read1=True, mate_is_reverse=True)
-        assert not self.read1_pos_neg.is_read2
-        self.read1_neg_pos = MockRead(is_reverse=True, is_read1=True, mate_is_reverse=False)
-        self.read1_pos_pos = MockRead(is_reverse=False, is_read1=True, mate_is_reverse=False)
-        self.read1_neg_neg = MockRead(is_reverse=True, is_read1=True, mate_is_reverse=True)
-
-        self.read2_pos_neg = MockRead(is_reverse=True, is_read1=False, mate_is_reverse=True)
-        assert self.read2_pos_neg.is_read2
-        self.read2_neg_pos = MockRead(is_reverse=False, is_read1=False, mate_is_reverse=False)
-        self.read2_pos_pos = MockRead(is_reverse=False, is_read1=False, mate_is_reverse=False)
-        self.read2_neg_neg = MockRead(is_reverse=True, is_read1=False, mate_is_reverse=True)
-
-        self.unpaired_pos = MockRead(is_reverse=False, is_paired=False)
-        self.unpaired_neg = MockRead(is_reverse=True, is_paired=False)
-
-    def test_read_pair_strand_det1_read1(self):
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read1_pos_neg, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read1_neg_pos, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read1_pos_pos, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read1_neg_neg, strand_determining_read=1)
-        )
+        assert len(alignments) == 0
 
-    def test_read_pair_strand_det1_read2(self):
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read2_pos_neg, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read2_neg_pos, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read2_pos_pos, strand_determining_read=1)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read2_neg_neg, strand_determining_read=1)
-        )
 
-    def test_read_pair_strand_det2_read2(self):
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read2_pos_neg, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read2_neg_pos, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read2_pos_pos, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read2_neg_neg, strand_determining_read=2)
-        )
+@pytest.fixture
+def stranded_reads():
+    n = argparse.Namespace()
+    n.read1_pos_neg = MockRead(is_reverse=False, is_read1=True, mate_is_reverse=True)
+    assert not n.read1_pos_neg.is_read2
+    n.read1_neg_pos = MockRead(is_reverse=True, is_read1=True, mate_is_reverse=False)
+    n.read1_pos_pos = MockRead(is_reverse=False, is_read1=True, mate_is_reverse=False)
+    n.read1_neg_neg = MockRead(is_reverse=True, is_read1=True, mate_is_reverse=True)
 
-    def test_read_pair_strand_det2_read1(self):
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read1_pos_neg, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read1_neg_pos, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.NEG, sequenced_strand(self.read1_pos_pos, strand_determining_read=2)
-        )
-        self.assertEqual(
-            STRAND.POS, sequenced_strand(self.read1_neg_neg, strand_determining_read=2)
-        )
+    n.read2_pos_neg = MockRead(is_reverse=True, is_read1=False, mate_is_reverse=True)
+    assert n.read2_pos_neg.is_read2
+    n.read2_neg_pos = MockRead(is_reverse=False, is_read1=False, mate_is_reverse=False)
+    n.read2_pos_pos = MockRead(is_reverse=False, is_read1=False, mate_is_reverse=False)
+    n.read2_neg_neg = MockRead(is_reverse=True, is_read1=False, mate_is_reverse=True)
 
-    def test_read_pair_strand_unpaired(self):
-        with self.assertRaises(ValueError):
-            sequenced_strand(self.unpaired_pos)
-        with self.assertRaises(ValueError):
-            sequenced_strand(self.unpaired_neg)
-
-    def test_read_pair_strand_det_error(self):
-        with self.assertRaises(ValueError):
-            sequenced_strand(self.read1_pos_neg, strand_determining_read=3)
-
-
-class TestReadPairType(unittest.TestCase):
-    def setUp(self):
-        self.LR = MockRead(
-            reference_id=0,
-            next_reference_id=0,
-            reference_start=1,
-            next_reference_start=2,
-            is_reverse=False,
-            mate_is_reverse=True,
-        )
-        self.LL = MockRead(
-            reference_id=0,
-            next_reference_id=0,
-            reference_start=1,
-            next_reference_start=2,
-            is_reverse=False,
-            mate_is_reverse=False,
-        )
-        self.RR = MockRead(
-            reference_id=0,
-            next_reference_id=0,
-            reference_start=1,
-            next_reference_start=2,
-            is_reverse=True,
-            mate_is_reverse=True,
+    n.unpaired_pos = MockRead(is_reverse=False, is_paired=False)
+    n.unpaired_neg = MockRead(is_reverse=True, is_paired=False)
+    return n
+
+
+class TestReadPairStrand:
+    def test_read_pair_strand_det1_read1(self, stranded_reads):
+        assert (
+            sequenced_strand(stranded_reads.read1_pos_neg, strand_determining_read=1) == STRAND.POS
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_neg_pos, strand_determining_read=1) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_pos_pos, strand_determining_read=1) == STRAND.POS
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_neg_neg, strand_determining_read=1) == STRAND.NEG
+        )
+
+    def test_read_pair_strand_det1_read2(self, stranded_reads):
+        assert (
+            sequenced_strand(stranded_reads.read2_pos_neg, strand_determining_read=1) == STRAND.POS
         )
-        self.RL = MockRead(
-            reference_id=0,
-            next_reference_id=0,
-            reference_start=1,
-            next_reference_start=2,
-            is_reverse=True,
-            mate_is_reverse=False,
-        )
-
-    def test_read_pair_type_LR(self):
-        self.assertEqual(READ_PAIR_TYPE.LR, read_pair_type(self.LR))
-
-    def test_read_pair_type_LL(self):
-        self.assertEqual(READ_PAIR_TYPE.LL, read_pair_type(self.LL))
-
-    def test_read_pair_type_RR(self):
-        self.assertEqual(READ_PAIR_TYPE.RR, read_pair_type(self.RR))
-
-    def test_read_pair_type_RL(self):
-        self.assertEqual(READ_PAIR_TYPE.RL, read_pair_type(self.RL))
-
-    def test_orientation_supports_type_deletion(self):
-        self.assertTrue(orientation_supports_type(self.LR, SVTYPE.DEL))
-        self.assertFalse(orientation_supports_type(self.RL, SVTYPE.DEL))
-        self.assertFalse(orientation_supports_type(self.LL, SVTYPE.DEL))
-        self.assertFalse(orientation_supports_type(self.RR, SVTYPE.DEL))
-
-    def test_orientation_supports_type_insertion(self):
-        self.assertTrue(orientation_supports_type(self.LR, SVTYPE.INS))
-        self.assertFalse(orientation_supports_type(self.RL, SVTYPE.INS))
-        self.assertFalse(orientation_supports_type(self.LL, SVTYPE.INS))
-        self.assertFalse(orientation_supports_type(self.RR, SVTYPE.INS))
-
-    def test_orientation_supports_type_inversion(self):
-        self.assertFalse(orientation_supports_type(self.LR, SVTYPE.INV))
-        self.assertFalse(orientation_supports_type(self.RL, SVTYPE.INV))
-        self.assertTrue(orientation_supports_type(self.LL, SVTYPE.INV))
-        self.assertTrue(orientation_supports_type(self.RR, SVTYPE.INV))
-
-    def test_orientation_supports_type_translocation_inversion(self):
-        self.assertFalse(orientation_supports_type(self.LR, SVTYPE.ITRANS))
-        self.assertFalse(orientation_supports_type(self.RL, SVTYPE.ITRANS))
-        self.assertTrue(orientation_supports_type(self.LL, SVTYPE.ITRANS))
-        self.assertTrue(orientation_supports_type(self.RR, SVTYPE.ITRANS))
-
-    def test_orientation_supports_type_trans_duplication(self):
-        self.assertFalse(orientation_supports_type(self.LR, SVTYPE.DUP))
-        self.assertTrue(orientation_supports_type(self.RL, SVTYPE.DUP))
-        self.assertFalse(orientation_supports_type(self.LL, SVTYPE.DUP))
-        self.assertFalse(orientation_supports_type(self.RR, SVTYPE.DUP))
-
-    def test_orientation_supports_type_translocation(self):
-        self.assertTrue(orientation_supports_type(self.LR, SVTYPE.TRANS))
-        self.assertTrue(orientation_supports_type(self.RL, SVTYPE.TRANS))
-        self.assertFalse(orientation_supports_type(self.LL, SVTYPE.TRANS))
-        self.assertFalse(orientation_supports_type(self.RR, SVTYPE.TRANS))
-
-
-class TestHistogram(unittest.TestCase):
+        assert (
+            sequenced_strand(stranded_reads.read2_neg_pos, strand_determining_read=1) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read2_pos_pos, strand_determining_read=1) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read2_neg_neg, strand_determining_read=1) == STRAND.POS
+        )
+
+    def test_read_pair_strand_det2_read2(self, stranded_reads):
+        assert (
+            sequenced_strand(stranded_reads.read2_pos_neg, strand_determining_read=2) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read2_neg_pos, strand_determining_read=2) == STRAND.POS
+        )
+        assert (
+            sequenced_strand(stranded_reads.read2_pos_pos, strand_determining_read=2) == STRAND.POS
+        )
+        assert (
+            sequenced_strand(stranded_reads.read2_neg_neg, strand_determining_read=2) == STRAND.NEG
+        )
+
+    def test_read_pair_strand_det2_read1(self, stranded_reads):
+        assert (
+            sequenced_strand(stranded_reads.read1_pos_neg, strand_determining_read=2) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_neg_pos, strand_determining_read=2) == STRAND.POS
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_pos_pos, strand_determining_read=2) == STRAND.NEG
+        )
+        assert (
+            sequenced_strand(stranded_reads.read1_neg_neg, strand_determining_read=2) == STRAND.POS
+        )
+
+    def test_read_pair_strand_unpaired(self, stranded_reads):
+        with pytest.raises(ValueError):
+            sequenced_strand(stranded_reads.unpaired_pos)
+        with pytest.raises(ValueError):
+            sequenced_strand(stranded_reads.unpaired_neg)
+
+    def test_read_pair_strand_det_error(self, stranded_reads):
+        with pytest.raises(ValueError):
+            sequenced_strand(stranded_reads.read1_pos_neg, strand_determining_read=3)
+
+
+@pytest.fixture
+def read_pairs():
+    n = argparse.Namespace()
+    n.LR = MockRead(
+        reference_id=0,
+        next_reference_id=0,
+        reference_start=1,
+        next_reference_start=2,
+        is_reverse=False,
+        mate_is_reverse=True,
+    )
+    n.LL = MockRead(
+        reference_id=0,
+        next_reference_id=0,
+        reference_start=1,
+        next_reference_start=2,
+        is_reverse=False,
+        mate_is_reverse=False,
+    )
+    n.RR = MockRead(
+        reference_id=0,
+        next_reference_id=0,
+        reference_start=1,
+        next_reference_start=2,
+        is_reverse=True,
+        mate_is_reverse=True,
+    )
+    n.RL = MockRead(
+        reference_id=0,
+        next_reference_id=0,
+        reference_start=1,
+        next_reference_start=2,
+        is_reverse=True,
+        mate_is_reverse=False,
+    )
+    return n
+
+
+class TestReadPairType:
+    def test_read_pair_type_LR(self, read_pairs):
+        assert read_pair_type(read_pairs.LR) == READ_PAIR_TYPE.LR
+
+    def test_read_pair_type_LL(self, read_pairs):
+        assert read_pair_type(read_pairs.LL) == READ_PAIR_TYPE.LL
+
+    def test_read_pair_type_RR(self, read_pairs):
+        assert read_pair_type(read_pairs.RR) == READ_PAIR_TYPE.RR
+
+    def test_read_pair_type_RL(self, read_pairs):
+        assert read_pair_type(read_pairs.RL) == READ_PAIR_TYPE.RL
+
+    def test_orientation_supports_type_deletion(self, read_pairs):
+        assert orientation_supports_type(read_pairs.LR, SVTYPE.DEL)
+        assert not orientation_supports_type(read_pairs.RL, SVTYPE.DEL)
+        assert not orientation_supports_type(read_pairs.LL, SVTYPE.DEL)
+        assert not orientation_supports_type(read_pairs.RR, SVTYPE.DEL)
+
+    def test_orientation_supports_type_insertion(self, read_pairs):
+        assert orientation_supports_type(read_pairs.LR, SVTYPE.INS)
+        assert not orientation_supports_type(read_pairs.RL, SVTYPE.INS)
+        assert not orientation_supports_type(read_pairs.LL, SVTYPE.INS)
+        assert not orientation_supports_type(read_pairs.RR, SVTYPE.INS)
+
+    def test_orientation_supports_type_inversion(self, read_pairs):
+        assert not orientation_supports_type(read_pairs.LR, SVTYPE.INV)
+        assert not orientation_supports_type(read_pairs.RL, SVTYPE.INV)
+        assert orientation_supports_type(read_pairs.LL, SVTYPE.INV)
+        assert orientation_supports_type(read_pairs.RR, SVTYPE.INV)
+
+    def test_orientation_supports_type_translocation_inversion(self, read_pairs):
+        assert not orientation_supports_type(read_pairs.LR, SVTYPE.ITRANS)
+        assert not orientation_supports_type(read_pairs.RL, SVTYPE.ITRANS)
+        assert orientation_supports_type(read_pairs.LL, SVTYPE.ITRANS)
+        assert orientation_supports_type(read_pairs.RR, SVTYPE.ITRANS)
+
+    def test_orientation_supports_type_trans_duplication(self, read_pairs):
+        assert not orientation_supports_type(read_pairs.LR, SVTYPE.DUP)
+        assert orientation_supports_type(read_pairs.RL, SVTYPE.DUP)
+        assert not orientation_supports_type(read_pairs.LL, SVTYPE.DUP)
+        assert not orientation_supports_type(read_pairs.RR, SVTYPE.DUP)
+
+    def test_orientation_supports_type_translocation(self, read_pairs):
+        assert orientation_supports_type(read_pairs.LR, SVTYPE.TRANS)
+        assert orientation_supports_type(read_pairs.RL, SVTYPE.TRANS)
+        assert not orientation_supports_type(read_pairs.LL, SVTYPE.TRANS)
+        assert not orientation_supports_type(read_pairs.RR, SVTYPE.TRANS)
+
+
+class TestHistogram:
     def test_add(self):
         h = Histogram()
         h.add(1)
         h.add(1)
-        self.assertEqual(2, h[1])
+        assert h[1] == 2
         h.add(1, 4)
-        self.assertEqual(6, h[1])
+        assert h[1] == 6
 
     def test_median(self):
         h = Histogram()
         for i in range(1, 11):
             h.add(i)
-        self.assertEqual(5.5, h.median())
+        assert h.median() == 5.5
         h.add(11)
-        self.assertEqual(6, h.median())
+        assert h.median() == 6
 
     def test_distib_stderr(self):
         h = Histogram()
@@ -435,9 +438,9 @@ def test_distib_stderr(self):
         for i in range(4, 8):
             h.add(i)
         m = h.median()
-        self.assertEqual(5, m)
+        assert m == 5
         err = h.distribution_stderr(m, 1)
-        self.assertEqual(116 / 15, err)
+        assert err == 116 / 15
 
     def test_add_operator(self):
         x = Histogram()
@@ -445,19 +448,19 @@ def test_add_operator(self):
         x.add(1)
         y.add(1, 4)
         z = x + y
-        self.assertEqual(1, x[1])
-        self.assertEqual(4, y[1])
-        self.assertEqual(5, z[1])
+        assert x[1] == 1
+        assert y[1] == 4
+        assert z[1] == 5
 
 
-class TestBamStats(unittest.TestCase):
+class TestBamStats:
     def test_genome_bam_stats(self):
         bamfh = BamCache(get_data('mock_reads_for_events.sorted.bam'))
         stats = compute_genome_bam_stats(
             bamfh, 1000, 100, min_mapping_quality=1, sample_cap=10000, distribution_fraction=0.99
         )
-        self.assertGreaterEqual(50, abs(stats.median_fragment_size - 420))
-        self.assertEqual(150, stats.read_length)
+        assert 50 >= abs(stats.median_fragment_size - 420)
+        assert stats.read_length == 150
         bamfh.close()
 
     def test_trans_bam_stats(self):
@@ -472,35 +475,37 @@ def test_trans_bam_stats(self):
             sample_cap=10000,
             distribution_fraction=0.99,
         )
-        self.assertTrue(abs(stats.median_fragment_size - 185) < 5)
-        self.assertEqual(75, stats.read_length)
-        self.assertTrue(stats.stdev_fragment_size < 50)
+        assert abs(stats.median_fragment_size - 185) < 5
+        assert stats.read_length == 75
+        assert stats.stdev_fragment_size < 50
         bamfh.close()
 
 
-class TestMapRefRangeToQueryRange(unittest.TestCase):
-    def setUp(self):
-        self.contig_read = MockRead(
-            cigar=_cigar.convert_string_to_cigar('275M18I12041D278M'),
-            reference_start=89700025,
-            reference_name='10',
-        )
+@pytest.fixture
+def contig_read():
+    return MockRead(
+        cigar=_cigar.convert_string_to_cigar('275M18I12041D278M'),
+        reference_start=89700025,
+        reference_name='10',
+    )
+
 
-    def test_full_aligned_portion(self):
+class TestMapRefRangeToQueryRange:
+    def test_full_aligned_portion(self, contig_read):
         ref_range = Interval(89700026, 89712619)
-        qrange = _read.map_ref_range_to_query_range(self.contig_read, ref_range)
-        self.assertEqual(571, len(qrange))
-        self.assertEqual(1, qrange.start)
-        self.assertEqual(571, qrange.end)
+        qrange = _read.map_ref_range_to_query_range(contig_read, ref_range)
+        assert len(qrange) == 571
+        assert qrange.start == 1
+        assert qrange.end == 571
 
-    def test_multiple_events(self):
+    def test_multiple_events(self, contig_read):
         ref_range = Interval(89700067, 89712347)
-        qrange = _read.map_ref_range_to_query_range(self.contig_read, ref_range)
-        self.assertEqual(len(ref_range) - 12041 + 18, len(qrange))
+        qrange = _read.map_ref_range_to_query_range(contig_read, ref_range)
+        assert len(qrange) == len(ref_range) - 12041 + 18
 
-    def test_no_events(self):
+    def test_no_events(self, contig_read):
         ref_range = Interval(89700031, 89700040)
-        qrange = _read.map_ref_range_to_query_range(self.contig_read, ref_range)
-        self.assertEqual(10, len(qrange))
-        self.assertEqual(6, qrange.start)
-        self.assertEqual(15, qrange.end)
+        qrange = _read.map_ref_range_to_query_range(contig_read, ref_range)
+        assert len(qrange) == 10
+        assert qrange.start == 6
+        assert qrange.end == 15
diff --git a/tests/integration/test_bam_cigar.py b/tests/integration/test_bam_cigar.py
index 6a71e54e..1d0c49d8 100644
--- a/tests/integration/test_bam_cigar.py
+++ b/tests/integration/test_bam_cigar.py
@@ -1,7 +1,9 @@
-import unittest
 import warnings
 
+import pytest
+import timeout_decorator
 from mavis.annotate.file_io import load_reference_genome
+from mavis.bam import read as _read
 from mavis.bam.cigar import (
     alignment_matches,
     compute,
@@ -16,14 +18,11 @@
     recompute_cigar_mismatch,
     score,
 )
-from mavis.constants import CIGAR
 from mavis.bam.read import SamRead
-from mavis.bam import read as _read
-import timeout_decorator
+from mavis.constants import CIGAR
 
-from . import MockRead, MockObject
 from ..util import get_data
-
+from . import MockObject, MockRead
 
 REFERENCE_GENOME = None
 
@@ -39,14 +38,14 @@ def setUpModule():
         raise AssertionError('fake genome file does not have the expected contents')
 
 
-class TestRecomputeCigarMismatch(unittest.TestCase):
+class TestRecomputeCigarMismatch:
     def test_simple(self):
         r = MockRead(
             reference_start=1456,
             query_sequence='CCCAAACAAC' 'TATAAATTTT' 'GTAATACCTA' 'GAACAATATA' 'AATAT',
             cigar=[(CIGAR.M, 45)],
         )
-        self.assertEqual([(CIGAR.EQ, 45)], recompute_cigar_mismatch(r, REFERENCE_GENOME['fake']))
+        assert recompute_cigar_mismatch(r, REFERENCE_GENOME['fake']) == [(CIGAR.EQ, 45)]
 
     def test_hardclipping(self):
         r = MockRead(
@@ -54,8 +53,8 @@ def test_hardclipping(self):
             query_sequence='CCCAAACAAC' 'TATAAATTTT' 'GTAATACCTA' 'GAACAATATA' 'AATAT',
             cigar=[(CIGAR.H, 20), (CIGAR.M, 45)],
         )
-        self.assertEqual(
-            [(CIGAR.H, 20), (CIGAR.EQ, 45)], recompute_cigar_mismatch(r, REFERENCE_GENOME['fake'])
+        assert [(CIGAR.H, 20), (CIGAR.EQ, 45)] == recompute_cigar_mismatch(
+            r, REFERENCE_GENOME['fake']
         )
 
     def test_with_events(self):
@@ -64,10 +63,13 @@ def test_with_events(self):
             query_sequence='TATA' 'CCCAAACAAC' 'TATAAATTTT' 'GTAATACCTA' 'GAACAATATA' 'AATAT',
             cigar=[(CIGAR.S, 4), (CIGAR.M, 10), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.M, 25)],
         )
-        self.assertEqual(
-            [(CIGAR.S, 4), (CIGAR.EQ, 10), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.EQ, 25)],
-            recompute_cigar_mismatch(r, REFERENCE_GENOME['fake']),
-        )
+        assert [
+            (CIGAR.S, 4),
+            (CIGAR.EQ, 10),
+            (CIGAR.D, 10),
+            (CIGAR.I, 10),
+            (CIGAR.EQ, 25),
+        ] == recompute_cigar_mismatch(r, REFERENCE_GENOME['fake'])
 
     def test_mismatch_to_mismatch(self):
         r = MockRead(
@@ -75,10 +77,13 @@ def test_mismatch_to_mismatch(self):
             query_sequence='CAGC' 'CCCAAACAAC' 'TATAAATTTT' 'GTAATACCTA' 'GAACAATATA' 'AATAT',
             cigar=[(CIGAR.X, 4), (CIGAR.M, 10), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.M, 25)],
         )
-        self.assertEqual(
-            [(CIGAR.X, 4), (CIGAR.EQ, 10), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.EQ, 25)],
-            recompute_cigar_mismatch(r, REFERENCE_GENOME['fake']),
-        )
+        assert [
+            (CIGAR.X, 4),
+            (CIGAR.EQ, 10),
+            (CIGAR.D, 10),
+            (CIGAR.I, 10),
+            (CIGAR.EQ, 25),
+        ] == recompute_cigar_mismatch(r, REFERENCE_GENOME['fake'])
 
     def test_m_to_mismatch(self):
         r = MockRead(
@@ -86,38 +91,39 @@ def test_m_to_mismatch(self):
             query_sequence='CAGC' 'CCCAAACAAC' 'TATAAATTTT' 'GTAATACCTA' 'GAACAATATA' 'AATAT',
             cigar=[(CIGAR.M, 14), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.M, 25)],
         )
-        self.assertEqual(
-            [(CIGAR.X, 4), (CIGAR.EQ, 10), (CIGAR.D, 10), (CIGAR.I, 10), (CIGAR.EQ, 25)],
-            recompute_cigar_mismatch(r, REFERENCE_GENOME['fake']),
-        )
+        assert [
+            (CIGAR.X, 4),
+            (CIGAR.EQ, 10),
+            (CIGAR.D, 10),
+            (CIGAR.I, 10),
+            (CIGAR.EQ, 25),
+        ] == recompute_cigar_mismatch(r, REFERENCE_GENOME['fake'])
 
 
-class TestExtendSoftclipping(unittest.TestCase):
+class TestExtendSoftclipping:
     def test_softclipped_right(self):
         c = convert_string_to_cigar('70=2X1=8X4=1X1=4X1=6X1=4X1=4X2=5X3=3X1=4X1=3X1=14X1=1X2=1S')
         cnew, prefix = extend_softclipping(c, 6)
-        self.assertEqual(0, prefix)
-        self.assertEqual(convert_string_to_cigar('70=80S'), cnew)
+        assert prefix == 0
+        assert cnew == convert_string_to_cigar('70=80S')
 
 
-class TestCigarTools(unittest.TestCase):
+class TestCigarTools:
     def test_alignment_matches(self):
         c = [(CIGAR.M, 10), (CIGAR.EQ, 10), (CIGAR.X, 10)]
-        self.assertEqual(30, alignment_matches(c))
+        assert alignment_matches(c) == 30
 
     def test_join(self):
         c = [(CIGAR.M, 10), (CIGAR.X, 10), (CIGAR.X, 10)]
-        self.assertEqual([(CIGAR.M, 10), (CIGAR.X, 20)], join(c))
+        assert join(c) == [(CIGAR.M, 10), (CIGAR.X, 20)]
         k = [(CIGAR.X, 10), (CIGAR.M, 10), (CIGAR.X, 10)]
-        self.assertEqual([(CIGAR.M, 10), (CIGAR.X, 30), (CIGAR.M, 10), (CIGAR.X, 10)], join(c, k))
+        assert join(c, k) == [(CIGAR.M, 10), (CIGAR.X, 30), (CIGAR.M, 10), (CIGAR.X, 10)]
         k = [(4, 1), (4, 2), (7, 5), (8, 7), (7, 2), (8, 5), (7, 28), (8, 1), (7, 99)]
-        self.assertEqual(
-            [(4, 3), (7, 5), (8, 7), (7, 2), (8, 5), (7, 28), (8, 1), (7, 99)], join(k)
-        )
+        assert [(4, 3), (7, 5), (8, 7), (7, 2), (8, 5), (7, 28), (8, 1), (7, 99)] == join(k)
 
     def test_join_hardclipping(self):
         c = [(CIGAR.H, 10), (CIGAR.M, 10), (CIGAR.X, 10), (CIGAR.X, 10)]
-        self.assertEqual([(CIGAR.H, 10), (CIGAR.M, 10), (CIGAR.X, 20)], join(c))
+        assert join(c) == [(CIGAR.H, 10), (CIGAR.M, 10), (CIGAR.X, 20)]
 
     def test_longest_fuzzy_match(self):
         c = [
@@ -128,10 +134,10 @@ def test_longest_fuzzy_match(self):
             (CIGAR.I, 3),
             (CIGAR.EQ, 5),
         ]
-        self.assertEqual(15, longest_fuzzy_match(c, 1))
-        self.assertEqual(10, longest_fuzzy_match(c, 0))
-        self.assertEqual(16, longest_fuzzy_match(c, 2))
-        self.assertEqual(16, longest_fuzzy_match(c, 4))
+        assert longest_fuzzy_match(c, 1) == 15
+        assert longest_fuzzy_match(c, 0) == 10
+        assert longest_fuzzy_match(c, 2) == 16
+        assert longest_fuzzy_match(c, 4) == 16
 
     def test_score(self):
         c = [
@@ -142,10 +148,10 @@ def test_score(self):
             (CIGAR.I, 3),
             (CIGAR.EQ, 5),
         ]
-        self.assertEqual(22, score(c))
+        assert score(c) == 22
 
     def test_score_error(self):
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             c = [(CIGAR.S, 10), (CIGAR.EQ, 1), (CIGAR.X, 4), (99, 10), (CIGAR.I, 3), (CIGAR.EQ, 5)]
             score(c)
 
@@ -158,56 +164,50 @@ def test_match_percent(self):
             (CIGAR.I, 3),
             (CIGAR.EQ, 5),
         ]
-        self.assertEqual(0.8, match_percent(c))
-        with self.assertRaises(AttributeError):
+        assert match_percent(c) == 0.8
+        with pytest.raises(AttributeError):
             match_percent([(CIGAR.M, 100)])
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             match_percent([(CIGAR.S, 100)])
 
     def test_compute(self):
         # GTGAGTAAATTCAACATCGTTTTT
         # aacttagAATTCAAC---------
-        self.assertEqual(
-            ([(CIGAR.S, 7), (CIGAR.EQ, 8)], 7),
-            compute('GTGAGTAAATTCAACATCGTTTTT', 'AACTTAGAATTCAAC---------'),
+        assert ([(CIGAR.S, 7), (CIGAR.EQ, 8)], 7) == compute(
+            'GTGAGTAAATTCAACATCGTTTTT', 'AACTTAGAATTCAAC---------'
         )
-        self.assertEqual(
-            ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 7),
-            compute('GTGAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------'),
+        assert ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 7) == compute(
+            'GTGAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------'
         )
-        self.assertEqual(
-            ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 7),
-            compute('GTGAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------', False),
+        assert ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 7) == compute(
+            'GTGAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------', False
         )
 
-        self.assertEqual(
-            ([(CIGAR.S, 5), (CIGAR.EQ, 5), (CIGAR.I, 2), (CIGAR.EQ, 1)], 7),
-            compute('GTGAGTAAATTC--CATCGTTTTT', '--CTTAGAATTCAAC---------', False),
+        assert ([(CIGAR.S, 5), (CIGAR.EQ, 5), (CIGAR.I, 2), (CIGAR.EQ, 1)], 7) == compute(
+            'GTGAGTAAATTC--CATCGTTTTT', '--CTTAGAATTCAAC---------', False
         )
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             compute('CCTG', 'CCG')
 
-        self.assertEqual(
-            ([(CIGAR.EQ, 2), (CIGAR.X, 2)], 0),
-            compute('CCTG', 'CCGT', min_exact_to_stop_softclipping=10),
+        assert ([(CIGAR.EQ, 2), (CIGAR.X, 2)], 0) == compute(
+            'CCTG', 'CCGT', min_exact_to_stop_softclipping=10
         )
 
-        self.assertEqual(
-            ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 5),
-            compute('--GAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------', False),
+        assert ([(CIGAR.S, 5), (CIGAR.EQ, 8)], 5) == compute(
+            '--GAGTAAATTCAACATCGTTTTT', '--CTTAGAATTCAAC---------', False
         )
 
     def test_convert_for_igv(self):
         c = [(CIGAR.M, 10), (CIGAR.EQ, 10), (CIGAR.X, 10)]
-        self.assertEqual([(CIGAR.M, 30)], convert_for_igv(c))
+        assert convert_for_igv(c) == [(CIGAR.M, 30)]
 
 
-class TestHgvsStandardizeCigars(unittest.TestCase):
+class TestHgvsStandardizeCigars:
     def no_change_aligned(self):
         ref = 'AAATTTGGGCCCAATT'
         read = MockRead('name', '1', 1, cigar=[(CIGAR.M, 10)], query_sequence='AAATTTGGGC')
-        self.assertEqual([(CIGAR.M, 10)], hgvs_standardize_cigar(read, ref))
+        assert hgvs_standardize_cigar(read, ref) == [(CIGAR.M, 10)]
 
     def no_change_proper_indel(self):
         ref = 'ATAGGC' 'ATCTACGAG' 'ATCGCTACG'
@@ -218,9 +218,8 @@ def no_change_proper_indel(self):
             query_sequence='ATCTAC' 'CCC' 'ATCG',
             cigar=[(CIGAR.EQ, 6), (CIGAR.I, 3), (CIGAR.D, 3), (CIGAR.EQ, 4)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 6), (CIGAR.I, 3), (CIGAR.D, 3), (CIGAR.EQ, 4)],
-            hgvs_standardize_cigar(read, ref),
+        assert [(CIGAR.EQ, 6), (CIGAR.I, 3), (CIGAR.D, 3), (CIGAR.EQ, 4)] == hgvs_standardize_cigar(
+            read, ref
         )
 
     def ins_after_deletion(self):
@@ -232,9 +231,8 @@ def ins_after_deletion(self):
             query_sequence='ATCTAC' 'CCC' 'ATCG',
             cigar=[(CIGAR.EQ, 6), (CIGAR.D, 3), (CIGAR.I, 3), (CIGAR.EQ, 4)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 6), (CIGAR.I, 3), (CIGAR.D, 3), (CIGAR.EQ, 4)],
-            hgvs_standardize_cigar(read, ref),
+        assert [(CIGAR.EQ, 6), (CIGAR.I, 3), (CIGAR.D, 3), (CIGAR.EQ, 4)] == hgvs_standardize_cigar(
+            read, ref
         )
 
     def test_insertion_in_repeat(self):
@@ -246,9 +244,7 @@ def test_insertion_in_repeat(self):
             query_sequence='ATCT' 'ACGA' 'ACGA' 'GATC',
             cigar=[(CIGAR.EQ, 4), (CIGAR.I, 4), (CIGAR.EQ, 8)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 8), (CIGAR.I, 4), (CIGAR.EQ, 4)], hgvs_standardize_cigar(read, ref)
-        )
+        assert [(CIGAR.EQ, 8), (CIGAR.I, 4), (CIGAR.EQ, 4)] == hgvs_standardize_cigar(read, ref)
 
     def test_deletion_in_repeat(self):
         ref = 'ATAGGC' 'ATCT' 'ACGA' 'ACGA' 'ACGA' 'GATCGCTACG'
@@ -259,9 +255,7 @@ def test_deletion_in_repeat(self):
             query_sequence='ATCT' 'ACGA' 'ACGA' 'GATC',
             cigar=[(CIGAR.EQ, 4), (CIGAR.D, 4), (CIGAR.EQ, 12)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 12), (CIGAR.D, 4), (CIGAR.EQ, 4)], hgvs_standardize_cigar(read, ref)
-        )
+        assert [(CIGAR.EQ, 12), (CIGAR.D, 4), (CIGAR.EQ, 4)] == hgvs_standardize_cigar(read, ref)
 
     def test_bubble_sort_indel_sections(self):
         rseq = 'ATAGGC' 'ATCT' 'GG' 'GA' 'GCGA' 'GATCGCTACG'
@@ -280,9 +274,8 @@ def test_bubble_sort_indel_sections(self):
                 (CIGAR.EQ, 8),
             ],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 4), (CIGAR.I, 5), (CIGAR.D, 4), (CIGAR.EQ, 8)],
-            hgvs_standardize_cigar(read, rseq),
+        assert [(CIGAR.EQ, 4), (CIGAR.I, 5), (CIGAR.D, 4), (CIGAR.EQ, 8)] == hgvs_standardize_cigar(
+            read, rseq
         )
 
     def test_bubble_sort_indel_sections_drop_mismatch(self):
@@ -304,9 +297,8 @@ def test_bubble_sort_indel_sections_drop_mismatch(self):
                 (CIGAR.EQ, 8),
             ],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 4), (CIGAR.I, 5), (CIGAR.D, 8), (CIGAR.EQ, 9)],
-            hgvs_standardize_cigar(read, rseq),
+        assert [(CIGAR.EQ, 4), (CIGAR.I, 5), (CIGAR.D, 8), (CIGAR.EQ, 9)] == hgvs_standardize_cigar(
+            read, rseq
         )
 
     def test_bubble_sort_indel_sections_drop_mismatch_with_hardclipping(self):
@@ -337,10 +329,13 @@ def test_bubble_sort_indel_sections_drop_mismatch_with_hardclipping(self):
         print(SamRead.deletion_sequences(read, {'1': MockObject(seq=ref)}))
         print(SamRead.insertion_sequences(read))
         print(read.query_sequence, len(read.query_sequence))
-        self.assertEqual(
-            [(CIGAR.H, 10), (CIGAR.EQ, 4), (CIGAR.I, 6), (CIGAR.D, 5), (CIGAR.EQ, 6)],
-            hgvs_standardize_cigar(read, ref),
-        )
+        assert [
+            (CIGAR.H, 10),
+            (CIGAR.EQ, 4),
+            (CIGAR.I, 6),
+            (CIGAR.D, 5),
+            (CIGAR.EQ, 6),
+        ] == hgvs_standardize_cigar(read, ref)
 
     def test_homopolymer_even_odd(self):
         ref = 'ATCGAGAT' + 'A' * 15 + 'TCGAGAT'
@@ -351,8 +346,8 @@ def test_homopolymer_even_odd(self):
             query_sequence='ATCGAGATA' + 'A' * 12 + 'TCGAGAT',
             cigar=[(CIGAR.EQ, 8), (CIGAR.D, 2), (CIGAR.EQ, 20)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 9 + 12), (CIGAR.D, 2), (CIGAR.EQ, 7)], hgvs_standardize_cigar(read, ref)
+        assert [(CIGAR.EQ, 9 + 12), (CIGAR.D, 2), (CIGAR.EQ, 7)] == hgvs_standardize_cigar(
+            read, ref
         )
         ref = (
             'CCCCGGCTCATGTCTGGTTTTGTTTTCCGGGGGCGGGGGGGCTCCCTGGGGATGATGGTGATTTTTTTTTTTTTTTAATCCTCAACTAGGAGAGAAAA'
@@ -370,9 +365,8 @@ def test_homopolymer_even_odd(self):
             ),
             cigar=[(CIGAR.EQ, 61), (CIGAR.I, 2), (CIGAR.EQ, 87)],
         )
-        self.assertEqual(
-            [(CIGAR.EQ, 61 + 15), (CIGAR.I, 2), (CIGAR.EQ, 87 - 15)],
-            hgvs_standardize_cigar(read, ref),
+        assert [(CIGAR.EQ, 61 + 15), (CIGAR.I, 2), (CIGAR.EQ, 87 - 15)] == hgvs_standardize_cigar(
+            read, ref
         )
 
         ref = (
@@ -391,10 +385,12 @@ def test_homopolymer_even_odd(self):
             ),
             cigar=[(CIGAR.S, 2), (CIGAR.EQ, 96), (CIGAR.I, 2), (CIGAR.EQ, 50)],
         )
-        self.assertEqual(
-            [(CIGAR.S, 2), (CIGAR.EQ, 96 + 15), (CIGAR.I, 2), (CIGAR.EQ, 50 - 15)],
-            hgvs_standardize_cigar(read, ref),
-        )
+        assert [
+            (CIGAR.S, 2),
+            (CIGAR.EQ, 96 + 15),
+            (CIGAR.I, 2),
+            (CIGAR.EQ, 50 - 15),
+        ] == hgvs_standardize_cigar(read, ref)
 
     def test_even_deletion_in_repeat(self):
         rseq = (
@@ -421,7 +417,7 @@ def test_even_deletion_in_repeat(self):
         print(SamRead.deletion_sequences(read, reference_genome))
         read.cigar = new_cigar
         print(SamRead.deletion_sequences(read, reference_genome))
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_odd_deletion_in_repeat(self):
         rseq = (
@@ -446,7 +442,7 @@ def test_odd_deletion_in_repeat(self):
         print(SamRead.deletion_sequences(read, reference_genome))
         read.cigar = new_cigar
         print(SamRead.deletion_sequences(read, reference_genome))
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_unecessary_indel(self):
         rseq = 'qwertyuiopasdfghjklzxcvbnm'
@@ -458,10 +454,9 @@ def test_unecessary_indel(self):
             cigar=convert_string_to_cigar('13=1I1D12='),
             query_sequence=qseq,
         )
-        reference_genome = {'1': MockObject(seq=rseq)}
         exp = convert_string_to_cigar('26=')
         new_cigar = hgvs_standardize_cigar(read, rseq)
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_unecessary_indel2(self):
         rseq = 'qwertyuiopasdfghjklzxcvbnm'
@@ -473,10 +468,9 @@ def test_unecessary_indel2(self):
             cigar=convert_string_to_cigar('13=2I1D12='),
             query_sequence=qseq,
         )
-        reference_genome = {'1': MockObject(seq=rseq)}
         exp = convert_string_to_cigar('14=1I12=')
         new_cigar = hgvs_standardize_cigar(read, rseq)
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_unecessary_indel_end_match(self):
         rseq = 'qwertyuiopasdfghjklzxcvbnm'
@@ -488,10 +482,9 @@ def test_unecessary_indel_end_match(self):
             cigar=convert_string_to_cigar('14=5I2D10='),
             query_sequence=qseq,
         )
-        reference_genome = {'1': MockObject(seq=rseq)}
         exp = convert_string_to_cigar('14=3I12=')
         new_cigar = hgvs_standardize_cigar(read, rseq)
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_unecessary_indel_end_match2(self):
         rseq = 'GGGTGCAGTGGCTTACACCT' 'GTAATCCAAACACCTTGGGAGCCGCCCCCTGAG' 'CCTCCAGGCCCGGGACAGA'
@@ -503,10 +496,9 @@ def test_unecessary_indel_end_match2(self):
             cigar=convert_string_to_cigar('20=5I33D19='),
             query_sequence=qseq,
         )
-        reference_genome = {'1': MockObject(seq=rseq)}
         exp = convert_string_to_cigar('20=4I32D20=')
         new_cigar = hgvs_standardize_cigar(read, rseq)
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_even_insertion_in_repeat(self):
         rseq = (
@@ -527,11 +519,10 @@ def test_even_insertion_in_repeat(self):
             cigar=convert_string_to_cigar('4S13=2I66='),
             query_sequence=qseq,
         )
-        reference_genome = {'1': MockObject(seq=rseq)}
         exp = convert_string_to_cigar('4S26=2I53=')
         new_cigar = hgvs_standardize_cigar(read, rseq)
         read.cigar = new_cigar
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
     def test_deletion_repeat(self):
         qseq = (
@@ -581,7 +572,7 @@ def test_deletion_repeat(self):
         print(SamRead.deletion_sequences(read, REFERENCE_GENOME))
         read.cigar = std_cigar
         print(SamRead.deletion_sequences(read, REFERENCE_GENOME))
-        self.assertEqual(expected_cigar, std_cigar)
+        assert std_cigar == expected_cigar
 
     @timeout_decorator.timeout(1)
     def test_complex(self):
@@ -651,7 +642,7 @@ def test_complex(self):
         std_cigar = hgvs_standardize_cigar(read, rseq)
         print(new_cigar)
         print(std_cigar)
-        self.assertEqual(new_cigar, std_cigar)
+        assert std_cigar == new_cigar
 
     def test_deletion_partial_repeat(self):
         qseq = 'ATCTTAGCCAGGT' 'AGTTACATACATATC'
@@ -663,7 +654,7 @@ def test_deletion_partial_repeat(self):
             query_sequence=qseq,
             cigar=convert_string_to_cigar('13=6D15='),
         )
-        self.assertEqual(convert_string_to_cigar('15=6D13='), hgvs_standardize_cigar(read, rseq))
+        assert convert_string_to_cigar('15=6D13=') == hgvs_standardize_cigar(read, rseq)
 
     def test_indel_repeat(self):
         qseq = 'ATCTTAGCCAGGT' 'C' 'AGTTACATACATATC'
@@ -677,7 +668,7 @@ def test_indel_repeat(self):
             query_sequence=qseq,
             cigar=convert_string_to_cigar('13=1I6D15='),
         )
-        self.assertEqual(convert_string_to_cigar('13=1I6D15='), hgvs_standardize_cigar(read, rseq))
+        assert convert_string_to_cigar('13=1I6D15=') == hgvs_standardize_cigar(read, rseq)
 
     def test_shift_complex_indel(self):
         refseq = 'ATATATCTATTTTTTTCTTTCTTTTTTTTACTTTCATTAAGTGCCACTAAAAAATTAGGTTCAATTAAACTTTATTAATCTCTTCTGAGTTTTGATTGAGTATATATATATATATACCCAGTTTCAAGCAGGTATCTGCCTTTAAAGATAAGAGACCTCCTAAATGCTTTCTTTTATTAGTTGCCCTGTTTCAGATTCAGCTTTGTATCTATATCACCTGTTAATATGTGTGGACTCACAGAAATGATCATTGAGGGAATGCACCCTGTTTGGGTGTAAGTAGCTCAGGGAAAAAATCCTAG'
@@ -690,14 +681,14 @@ def test_shift_complex_indel(self):
         )
         print(_read.convert_cigar_to_string(read.cigar))
         read.cigar = recompute_cigar_mismatch(read, refseq)
-        self.assertEqual(convert_string_to_cigar('44=18I63=1X17=1X6='), read.cigar)
+        assert read.cigar == convert_string_to_cigar('44=18I63=1X17=1X6=')
         print(_read.convert_cigar_to_string(read.cigar))
         read.cigar = hgvs_standardize_cigar(read, refseq)
         print(_read.convert_cigar_to_string(read.cigar))
-        self.assertEqual(convert_string_to_cigar('45=18I62=1X17=1X6='), read.cigar)
+        assert read.cigar == convert_string_to_cigar('45=18I62=1X17=1X6=')
 
 
-class TestMergeInternalEvents(unittest.TestCase):
+class TestMergeInternalEvents:
     def test_small_exact_match(self):
         cigar = convert_string_to_cigar('283M17506D5M21275D596M17506D5M21275D313M')
         # [(0, 283), (2, 17506), (0, 5), (2, 21275), (0, 596), (2, 17506), (0, 5), (2, 21275), (0, 313)]
@@ -711,10 +702,10 @@ def test_small_exact_match(self):
             (CIGAR.D, 17506 + 21275 + 5),
             (CIGAR.M, 313),
         ]
-        self.assertEqual(exp, new_cigar)
+        assert new_cigar == exp
 
 
-class TestConvertStringToCigar(unittest.TestCase):
+class TestConvertStringToCigar:
     def test(self):
         string = '283M' '17506D' '5M' '21275D' '596M' '17506D' '5M' '21275D' '313M'
         exp = [
@@ -728,13 +719,10 @@ def test(self):
             (CIGAR.D, 21275),
             (CIGAR.M, 313),
         ]
-        self.assertEqual(exp, convert_string_to_cigar(string))
-
+        assert convert_string_to_cigar(string) == exp
 
-class TestGetSequences(unittest.TestCase):
-    def setUp(self):
-        self.reference_genome = {'1': MockObject(seq='abcdefghijklmnopqrstuvwxyz')}
 
+class TestGetSequences:
     def test_deletions(self):
         exp = ['cde', 'nopq']
         read = MockRead(
@@ -743,7 +731,10 @@ def test_deletions(self):
             query_sequence='',
             cigar=convert_string_to_cigar('2=3D8=4D9='),
         )
-        self.assertEqual(exp, SamRead.deletion_sequences(read, self.reference_genome))
+        assert (
+            SamRead.deletion_sequences(read, {'1': MockObject(seq='abcdefghijklmnopqrstuvwxyz')})
+            == exp
+        )
 
     def test_insertions(self):
         exp = ['kkk', 'kkkk']
@@ -753,4 +744,4 @@ def test_insertions(self):
             query_sequence='abcdekkkfghijklmnopqkkkkrstuvwxyz',
             cigar=convert_string_to_cigar('5=3I12=4I9='),
         )
-        self.assertEqual(exp, SamRead.insertion_sequences(read))
+        assert SamRead.insertion_sequences(read) == exp
diff --git a/tests/integration/test_blat.py b/tests/integration/test_blat.py
index 69ad1d68..10fe8320 100644
--- a/tests/integration/test_blat.py
+++ b/tests/integration/test_blat.py
@@ -1,6 +1,5 @@
-import shutil
-import unittest
-
+import mavis.bam.cigar as _cigar
+import pytest
 from Bio import SeqIO
 from mavis.align import query_coverage_interval
 from mavis.annotate.file_io import load_reference_genome
@@ -8,11 +7,9 @@
 from mavis.blat import Blat
 from mavis.constants import CIGAR, reverse_complement
 from mavis.interval import Interval
-import mavis.bam.cigar as _cigar
 
-from . import MockBamFileHandle, MockObject, MockLongString
 from ..util import get_data
-
+from . import MockBamFileHandle, MockLongString, MockObject
 
 REFERENCE_GENOME = None
 
@@ -29,16 +26,18 @@ def setUpModule():
     BAM_CACHE = BamCache(get_data('mini_mock_reads_for_events.sorted.bam'))
 
 
-class TestBlat(unittest.TestCase):
-    def setUp(self):
-        self.cache = BamCache(MockBamFileHandle({'Y': 23, 'fake': 0, 'reference3': 3, '14': 13}))
+@pytest.fixture
+def cache():
+    return BamCache(MockBamFileHandle({'Y': 23, 'fake': 0, 'reference3': 3, '14': 13}))
+
 
+class TestBlat:
     def test_read_pslx(self):
         mapping = {}
         for record in SeqIO.parse(get_data('blat_input.fa'), 'fasta'):
             mapping[record.id] = record.seq
         header, rows = Blat.read_pslx(get_data('blat_output.pslx'), mapping)
-        self.assertEqual(11067, len(rows))
+        assert len(rows) == 11067
         expect_pslx_header = [
             'match',
             'mismatch',
@@ -64,9 +63,9 @@ def test_read_pslx(self):
             'qseqs',
             'tseqs',
         ]
-        self.assertEqual(expect_pslx_header, header)
+        assert header == expect_pslx_header
 
-    def test_pslx_row_to_pysam_single_block(self):
+    def test_pslx_row_to_pysam_single_block(self, cache):
         pslx_row = {
             'score': 20,
             'tseqs': ['AATACCAAATACATGATATA'],
@@ -82,11 +81,11 @@ def test_pslx_row_to_pysam_single_block(self):
             'qseq_full': 'AGCCTCCCAAGTAGCTGGGACTACAGGCGCCCGCCACTACGCCCGGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTTTT'
             'AGCCAGGATGGTCTCGATCTCCTGACCTCATGATCCGCCCGCCTCGGC',
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
-        self.assertEqual(23, read.reference_id)
-        self.assertEqual(Interval(93, 112), query_coverage_interval(read))
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, None)
+        assert read.reference_id == 23
+        assert query_coverage_interval(read) == Interval(93, 112)
 
-    def test_pslx_row_to_pysam_full_reverse(self):
+    def test_pslx_row_to_pysam_full_reverse(self, cache):
         pslx_row = {
             'match': 128,
             'mismatch': 0,
@@ -114,13 +113,13 @@ def test_pslx_row_to_pysam_full_reverse(self):
             'percent_ident': 100.0,
             'qseq_full': 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT',
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
-        self.assertEqual(3, read.reference_id)
-        self.assertEqual([(CIGAR.S, 117), (CIGAR.M, 128)], read.cigar)
-        self.assertEqual(2187, read.reference_start)
-        self.assertEqual(Interval(117, 244), query_coverage_interval(read))
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, None)
+        assert read.reference_id == 3
+        assert read.cigar == [(CIGAR.S, 117), (CIGAR.M, 128)]
+        assert read.reference_start == 2187
+        assert query_coverage_interval(read) == Interval(117, 244)
 
-    def test_pslx_row_to_pysam_simple(self):
+    def test_pslx_row_to_pysam_simple(self, cache):
         pslx_row = {
             'tstarts': [950],
             'block_sizes': [53],
@@ -131,14 +130,14 @@ def test_pslx_row_to_pysam_simple(self):
             'score': 0,
             'qseq_full': 'ATCTAATAACTTGATCAATA' 'TCTGTGATTATATTTTCATT' 'GCCTTCCAATTTT',
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
-        self.assertEqual(0, read.reference_id)
-        self.assertEqual(Interval(0, 52), query_coverage_interval(read))
-        self.assertEqual(950, read.reference_start)
-        self.assertEqual(1003, read.reference_end)
-        self.assertEqual([(CIGAR.M, 53)], read.cigar)
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, None)
+        assert read.reference_id == 0
+        assert query_coverage_interval(read) == Interval(0, 52)
+        assert read.reference_start == 950
+        assert read.reference_end == 1003
+        assert read.cigar == [(CIGAR.M, 53)]
 
-    def test_pslx_row_to_pysam_simple_with_reference(self):
+    def test_pslx_row_to_pysam_simple_with_reference(self, cache):
         pslx_row = {
             'tstarts': [950],
             'block_sizes': [53],
@@ -149,14 +148,14 @@ def test_pslx_row_to_pysam_simple_with_reference(self):
             'score': 0,
             'qseq_full': 'ATCTAATAACTTGATCAATA' 'TCTGTGATTATATTTTCATT' 'GCCTTCCAATTTT',
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
-        self.assertEqual(0, read.reference_id)
-        self.assertEqual(Interval(0, 52), query_coverage_interval(read))
-        self.assertEqual(950, read.reference_start)
-        self.assertEqual(1003, read.reference_end)
-        self.assertEqual([(CIGAR.EQ, 53)], read.cigar)
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
+        assert read.reference_id == 0
+        assert query_coverage_interval(read) == Interval(0, 52)
+        assert read.reference_start == 950
+        assert read.reference_end == 1003
+        assert read.cigar == [(CIGAR.EQ, 53)]
 
-    def test_pslx_row_to_pysam_gapped_alignment(self):
+    def test_pslx_row_to_pysam_gapped_alignment(self, cache):
         pslx_row = {
             'block_count': 1,
             'tstarts': [950, 7233],
@@ -175,13 +174,13 @@ def test_pslx_row_to_pysam_gapped_alignment(self):
             'ATACTTCATGTTGCCATGTT',
             'score': 1,
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, None)
-        self.assertEqual(0, read.reference_id)
-        self.assertEqual(Interval(0, 146), query_coverage_interval(read))
-        self.assertEqual(950, read.reference_start)
-        self.assertEqual([(CIGAR.M, 47), (CIGAR.D, 6236), (CIGAR.M, 100)], read.cigar)
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, None)
+        assert read.reference_id == 0
+        assert query_coverage_interval(read) == Interval(0, 146)
+        assert read.reference_start == 950
+        assert read.cigar == [(CIGAR.M, 47), (CIGAR.D, 6236), (CIGAR.M, 100)]
 
-    def test_pslx_row_to_pysam_gapped_alignment_with_reference(self):
+    def test_pslx_row_to_pysam_gapped_alignment_with_reference(self, cache):
         pslx_row = {
             'block_count': 1,
             'tstarts': [950, 7233],
@@ -200,13 +199,13 @@ def test_pslx_row_to_pysam_gapped_alignment_with_reference(self):
             'ATACTTCATGTTGCCATGTT',
             'score': 1,
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
-        self.assertEqual(0, read.reference_id)
-        self.assertEqual(Interval(0, 146), query_coverage_interval(read))
-        self.assertEqual(950, read.reference_start)
-        self.assertEqual([(CIGAR.EQ, 53), (CIGAR.D, 6236), (CIGAR.EQ, 94)], read.cigar)
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
+        assert read.reference_id == 0
+        assert query_coverage_interval(read) == Interval(0, 146)
+        assert read.reference_start == 950
+        assert read.cigar == [(CIGAR.EQ, 53), (CIGAR.D, 6236), (CIGAR.EQ, 94)]
 
-    def test_pslx_row_to_pysam_revcomp_deletion(self):
+    def test_pslx_row_to_pysam_revcomp_deletion(self, cache):
         pslx_row = {
             'block_count': 2,
             'tstarts': [2205, 2281],
@@ -226,17 +225,15 @@ def test_pslx_row_to_pysam_revcomp_deletion(self):
                 'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG',
             ],
         }
-        read = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
-        self.assertEqual(3, read.reference_id)
-        self.assertEqual(Interval(0, 83), query_coverage_interval(read))
-        self.assertEqual(2205, read.reference_start)
-        self.assertEqual([(CIGAR.EQ, 51), (CIGAR.D, 26), (CIGAR.EQ, 33)], read.cigar)
-        self.assertEqual(
-            'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA', read.query_sequence[0:50]
-        )
-        self.assertEqual('CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG', read.query_sequence[50:])
+        read = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
+        assert read.reference_id == 3
+        assert query_coverage_interval(read) == Interval(0, 83)
+        assert read.reference_start == 2205
+        assert read.cigar == [(CIGAR.EQ, 51), (CIGAR.D, 26), (CIGAR.EQ, 33)]
+        assert read.query_sequence[0:50] == 'TAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCA'
+        assert read.query_sequence[50:] == 'CCAAATTCTGTGTTTACAGGGCTTTCATGCTCAG'
 
-    def test_pslx_row_to_pysam_inversion(self):
+    def test_pslx_row_to_pysam_inversion(self, cache):
         s = 'CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT'
         # first part of the inversion
         pslx_row = {
@@ -258,11 +255,11 @@ def test_pslx_row_to_pysam_inversion(self):
                 'TTTTCATTTCTGTATGTTAAT'
             ],
         }
-        read1 = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
-        self.assertEqual(3, read1.reference_id)
-        self.assertEqual(Interval(125, 244), query_coverage_interval(read1))
-        self.assertEqual(1114, read1.reference_start)
-        self.assertEqual([(CIGAR.S, 125), (CIGAR.EQ, 120)], read1.cigar)
+        read1 = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
+        assert read1.reference_id == 3
+        assert query_coverage_interval(read1) == Interval(125, 244)
+        assert read1.reference_start == 1114
+        assert read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
 
         # second part of the inversion
         pslx_row = {
@@ -284,15 +281,15 @@ def test_pslx_row_to_pysam_inversion(self):
                 'TCTGTGTTTACAGGGCTTTCATGCTCAG'
             ],
         }
-        read2 = Blat.pslx_row_to_pysam(pslx_row, self.cache, REFERENCE_GENOME)
-        self.assertEqual(3, read2.reference_id)
-        self.assertEqual(2187, read2.reference_start)
-        self.assertEqual([(CIGAR.S, 117), (CIGAR.EQ, 128)], read2.cigar)
-        self.assertEqual(Interval(117, 244), query_coverage_interval(read2))
-        self.assertEqual(read1.query_sequence, reverse_complement(read2.query_sequence))
+        read2 = Blat.pslx_row_to_pysam(pslx_row, cache, REFERENCE_GENOME)
+        assert read2.reference_id == 3
+        assert read2.reference_start == 2187
+        assert read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
+        assert query_coverage_interval(read2) == Interval(117, 244)
+        assert reverse_complement(read2.query_sequence) == read1.query_sequence
         # test that this is selected for duplication or insertion evidence
 
-    def test_pslx_row_to_pysam_duplication(self):
+    def test_pslx_row_to_pysam_duplication(self, cache):
         reference = {
             '14': MockObject(
                 seq=MockLongString(
@@ -312,12 +309,8 @@ def test_pslx_row_to_pysam_duplication(self):
             'qseq_full': 'AAGAAGGGTAACCTTAAAAAATACATTTCCCACTCCAGAAAATACTCATATGTGGCCTGTTAGCAGCACAAGAAGGGTGAAAGCAATGCCCATTCCTGCCTCCCTCCCCCTGCTCACCTCCACGTCCCTGTTTGCCCCTTTACTCATATGTGGCCTGTTAGCAGCACAAGAAGGGTGAAAGCAATGCCCATTCCTGCCTCCCTCCCCCTGCTCACCTCCACGTCCCTGTTTGCCCCTTTGTAGGTGAAGTGAGTATATTCAGCGTCTTC',
             'score': 1,
         }
-        read2 = Blat.pslx_row_to_pysam(pslx_row, self.cache, reference)
-        self.assertEqual(13, read2.reference_id)
-        self.assertEqual(73014606, read2.reference_start)
-        self.assertEqual(
-            [(CIGAR.M, 141), (CIGAR.I, 98), (CIGAR.M, 30)], _cigar.convert_for_igv(read2.cigar)
-        )
-        self.assertEqual(
-            Interval(0, len(pslx_row['qseq_full']) - 1), query_coverage_interval(read2)
-        )
+        read2 = Blat.pslx_row_to_pysam(pslx_row, cache, reference)
+        assert read2.reference_id == 13
+        assert read2.reference_start == 73014606
+        assert _cigar.convert_for_igv(read2.cigar) == [(CIGAR.M, 141), (CIGAR.I, 98), (CIGAR.M, 30)]
+        assert query_coverage_interval(read2) == Interval(0, len(pslx_row['qseq_full']) - 1)
diff --git a/tests/integration/test_breakpoint.py b/tests/integration/test_breakpoint.py
index 659cf486..f6e0b3bb 100644
--- a/tests/integration/test_breakpoint.py
+++ b/tests/integration/test_breakpoint.py
@@ -1,14 +1,14 @@
-import unittest
 from functools import partial
 
+import pytest
 from mavis.annotate.file_io import load_reference_genome
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import CIGAR, ORIENT, STRAND, reverse_complement
+from mavis.constants import ORIENT, STRAND
 from mavis.interval import Interval
 from mavis.validate.evidence import TranscriptomeEvidence
 
 from ..util import get_data
-from . import MockObject, MockRead, get_example_genes
+from . import MockObject, get_example_genes
 
 REFERENCE_GENOME = None
 REF_CHR = 'fake'
@@ -24,55 +24,61 @@ def setUpModule():
         raise AssertionError('fake genome file does not have the expected contents')
 
 
-class TestNetSizeTransEGFR(unittest.TestCase):
-    def setUp(self):
-        self.evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            call_error=11,
-            overlapping_transcripts=set(get_example_genes()['EGFR'].transcripts),
-        )
-        setattr(
-            self.evidence, '_select_transcripts', lambda *pos: self.evidence.overlapping_transcripts
-        )
-        setattr(self.evidence, 'distance', partial(TranscriptomeEvidence.distance, self.evidence))
+@pytest.fixture
+def egfr_evidence():
+    evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        call_error=11,
+        overlapping_transcripts=set(get_example_genes()['EGFR'].transcripts),
+    )
+    setattr(evidence, '_select_transcripts', lambda *pos: evidence.overlapping_transcripts)
+    setattr(evidence, 'distance', partial(TranscriptomeEvidence.distance, evidence))
+    return evidence
 
-    def egfr_distance(self, pos1, pos2):
-        return TranscriptomeEvidence.distance(self.evidence, pos1, pos2)
 
-    def test_deletion_in_exon(self):
+class TestNetSizeTransEGFR:
+    def test_deletion_in_exon(self, egfr_evidence):
         bpp = BreakpointPair(
             Breakpoint('7', 55238890, orient=ORIENT.LEFT),
             Breakpoint('7', 55238899, orient=ORIENT.RIGHT),
             untemplated_seq='',
         )
-        self.assertEqual(Interval(-8), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(-8)
 
         bpp = BreakpointPair(
             Breakpoint('7', 55238890, orient=ORIENT.LEFT),
             Breakpoint('7', 55238899, orient=ORIENT.RIGHT),
             untemplated_seq='GTAC',
         )
-        self.assertEqual(Interval(-4), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(-4)
 
-    def test_deletion_across_intron(self):
+    def test_deletion_across_intron(self, egfr_evidence):
         # 55240539_55240621  55323947_55324313
         bpp = BreakpointPair(
             Breakpoint('7', 55240610, orient=ORIENT.LEFT),
             Breakpoint('7', 55323950, orient=ORIENT.RIGHT),
             untemplated_seq='GTAC',
         )
-        self.assertEqual(Interval(-10), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(-10)
         # 55210998_55211181 55218987_55219055
         bpp = BreakpointPair(
             Breakpoint('7', 55211180, orient=ORIENT.LEFT),
             Breakpoint('7', 55218990, orient=ORIENT.RIGHT),
             untemplated_seq='',
         )
-        self.assertEqual(Interval(-4 + -135, -4), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(-4 + -135, -4)
 
-    def test_insertion_at_exon_start_mixed(self):
+    def test_insertion_at_exon_start_mixed(self, egfr_evidence):
         # EXON 15: 55232973-55233130
         # EXON 16: 55238868-55238906
         # EXON 17: 55240676-55240817
@@ -81,55 +87,67 @@ def test_insertion_at_exon_start_mixed(self):
             Breakpoint('7', 55238868, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(6), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(6)
 
-    def test_insertion_at_exon_start(self):
+    def test_insertion_at_exon_start(self, egfr_evidence):
         # 55238868_55238906
         bpp = BreakpointPair(
             Breakpoint('7', 55233130, orient=ORIENT.LEFT),
             Breakpoint('7', 55238868, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(6), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(6)
 
-    def test_insertion_at_exon_end_mixed(self):
+    def test_insertion_at_exon_end_mixed(self, egfr_evidence):
         # 55238868_55238906
         bpp = BreakpointPair(
             Breakpoint('7', 55238905, orient=ORIENT.LEFT),
             Breakpoint('7', 55238906, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(6), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(6)
 
-    def test_insertion_at_exon_end(self):
+    def test_insertion_at_exon_end(self, egfr_evidence):
         # 55238868_55238906
         bpp = BreakpointPair(
             Breakpoint('7', 55238906, orient=ORIENT.LEFT),
             Breakpoint('7', 55240676, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(6), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(6)
 
-    def test_insertion_in_intron(self):
+    def test_insertion_in_intron(self, egfr_evidence):
         # 55238868_55238906
         bpp = BreakpointPair(
             Breakpoint('7', 5523750, orient=ORIENT.LEFT),
             Breakpoint('7', 5523751, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(6), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(6)
 
-    def test_indel_in_intron(self):
+    def test_indel_in_intron(self, egfr_evidence):
         # 55238868_55238906
         bpp = BreakpointPair(
             Breakpoint('7', 5523700, orient=ORIENT.LEFT),
             Breakpoint('7', 5523751, orient=ORIENT.RIGHT),
             untemplated_seq='TTATCG',
         )
-        self.assertEqual(Interval(-44), bpp.net_size(self.egfr_distance))
+        assert bpp.net_size(
+            lambda p1, p2: TranscriptomeEvidence.distance(egfr_evidence, p1, p2)
+        ) == Interval(-44)
 
 
-class TestLt(unittest.TestCase):
+class TestLt:
     def test_break1(self):
         bpp1 = BreakpointPair(
             Breakpoint('1', 1, 10, orient=ORIENT.LEFT),
@@ -141,7 +159,7 @@ def test_break1(self):
             Breakpoint('2', 1, orient=ORIENT.LEFT),
             untemplated_seq='',
         )
-        self.assertTrue(bpp2 < bpp1)
+        assert bpp2 < bpp1
 
     def test_useq(self):
         bpp1 = BreakpointPair(
@@ -154,7 +172,7 @@ def test_useq(self):
             Breakpoint('2', 1, orient=ORIENT.LEFT),
             untemplated_seq=None,
         )
-        self.assertTrue(bpp2 > bpp1)
+        assert bpp2 > bpp1
 
     def test_break2(self):
         bpp1 = BreakpointPair(
@@ -167,20 +185,20 @@ def test_break2(self):
             Breakpoint('2', 1, orient=ORIENT.LEFT),
             untemplated_seq=None,
         )
-        self.assertTrue(bpp2 < bpp1)
+        assert bpp2 < bpp1
 
 
-class TestBreakpointSequenceHomology(unittest.TestCase):
+class TestBreakpointSequenceHomology:
     def test_left_pos_right_pos(self):
         b1 = Breakpoint(REF_CHR, 157, strand=STRAND.POS, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 1788, strand=STRAND.POS, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('CAATGC', ''), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('CAATGC', '')
 
         b1 = Breakpoint(REF_CHR, 589, strand=STRAND.POS, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 704, strand=STRAND.POS, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('TTAA', 'ATAGC'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('TTAA', 'ATAGC')
 
     def test_left_pos_left_neg(self):
         # CCC|AAA ------------ TTT|GGG
@@ -189,7 +207,7 @@ def test_left_pos_left_neg(self):
         b1 = Breakpoint(REF_CHR, 1459, strand=STRAND.POS, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2914, strand=STRAND.NEG, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('CCC', 'TTT'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('CCC', 'TTT')
 
     def test_left_neg_left_pos(self):
         # CCC|AAA ------------ TTT|GGG
@@ -198,7 +216,7 @@ def test_left_neg_left_pos(self):
         b1 = Breakpoint(REF_CHR, 1459, strand=STRAND.NEG, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 2914, strand=STRAND.POS, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('CCC', 'TTT'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('CCC', 'TTT')
 
     def test_right_pos_right_neg(self):
         # CCC|AAA ------------ TTT|GGG
@@ -207,7 +225,7 @@ def test_right_pos_right_neg(self):
         b1 = Breakpoint(REF_CHR, 1460, strand=STRAND.POS, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 2915, strand=STRAND.NEG, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('AAA', 'GGG'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('AAA', 'GGG')
 
     def test_right_neg_right_pos(self):
         # CCC|AAA ------------ TTT|GGG
@@ -216,14 +234,14 @@ def test_right_neg_right_pos(self):
         b1 = Breakpoint(REF_CHR, 1460, strand=STRAND.NEG, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 2915, strand=STRAND.POS, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('AAA', 'GGG'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('AAA', 'GGG')
 
     def test_close_del(self):
         # ....TT|TT....
         b1 = Breakpoint(REF_CHR, 1001, strand=STRAND.POS, orient=ORIENT.LEFT)
         b2 = Breakpoint(REF_CHR, 1002, strand=STRAND.POS, orient=ORIENT.RIGHT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('', ''), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('', '')
 
     def test_close_dup(self):
         # ....GATACATTTCTTCTTGAAAA...
@@ -234,11 +252,11 @@ def test_close_dup(self):
         b1 = Breakpoint(REF_CHR, 745, strand=STRAND.POS, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 747, strand=STRAND.POS, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2)
-        self.assertEqual(('CT', 'TT'), bpp.breakpoint_sequence_homology(REFERENCE_GENOME))
+        assert bpp.breakpoint_sequence_homology(REFERENCE_GENOME) == ('CT', 'TT')
 
     def test_non_specific_error(self):
         b1 = Breakpoint(REF_CHR, 740, 745, strand=STRAND.POS, orient=ORIENT.RIGHT)
         b2 = Breakpoint(REF_CHR, 747, strand=STRAND.POS, orient=ORIENT.LEFT)
         bpp = BreakpointPair(b1, b2)
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             bpp.breakpoint_sequence_homology(REFERENCE_GENOME)
diff --git a/tests/integration/test_cluster.py b/tests/integration/test_cluster.py
index cfffbf59..3434c62b 100644
--- a/tests/integration/test_cluster.py
+++ b/tests/integration/test_cluster.py
@@ -15,7 +15,7 @@
 REF_CHR = 'fake'
 
 
-class TestFullClustering(unittest.TestCase):
+class TestFullClustering:
     def test_mocked_events(self):
         # none of the 24 events in the mocked file should cluster together
         # if we change the mock file we may need to update this function
@@ -26,7 +26,7 @@ def test_mocked_events(self):
             if bpp.data[COLUMNS.protocol] == PROTOCOL.GENOME:
                 bpps.append(bpp)
                 print(bpp)
-        self.assertEqual(28, len(bpps))
+        assert len(bpps) == 28
         clusters = merge_breakpoint_pairs(bpps, 10, 10)
 
         for cluster, input_pairs in sorted(
@@ -35,8 +35,8 @@ def test_mocked_events(self):
             print(cluster)
             for ip in input_pairs:
                 print('\t', ip)
-            self.assertEqual(1, len(input_pairs))
-        self.assertEqual(len(bpps), len(clusters))
+            assert len(input_pairs) == 1
+        assert len(clusters) == len(bpps)
 
     def test_clustering_events(self):
         # this file contains 2 events that should be clustered and produce a valid bpp
@@ -47,10 +47,10 @@ def test_clustering_events(self):
             if bpp.data[COLUMNS.protocol] == PROTOCOL.GENOME:
                 bpps.append(bpp)
                 print(bpp)
-        self.assertEqual(2, len(bpps))
+        assert len(bpps) == 2
         clusters = merge_breakpoint_pairs(bpps, 200, 25)
 
-        self.assertEqual(1, len(clusters))
+        assert len(clusters) == 1
 
         for cluster, input_pairs in sorted(
             clusters.items(), key=lambda x: (x[1][0].break1.chr, x[1][0].break2.chr)
@@ -60,17 +60,17 @@ def test_clustering_events(self):
                 print('\t', ip)
             print(cluster.flatten())
             # BPP(Breakpoint(15:67333604L), Breakpoint(15:67333606R), opposing=False)
-            self.assertEqual('L', cluster.break1.orient)
-            self.assertEqual('R', cluster.break2.orient)
-            self.assertEqual('15', cluster.break1.chr)
-            self.assertEqual('15', cluster.break2.chr)
-            self.assertEqual(67333604, cluster.break1.start)
-            self.assertEqual(67333606, cluster.break2.start)
-            self.assertEqual(67333604, cluster.break1.end)
-            self.assertEqual(67333606, cluster.break2.end)
+            assert cluster.break1.orient == 'L'
+            assert cluster.break2.orient == 'R'
+            assert cluster.break1.chr == '15'
+            assert cluster.break2.chr == '15'
+            assert cluster.break1.start == 67333604
+            assert cluster.break2.start == 67333606
+            assert cluster.break1.end == 67333604
+            assert cluster.break2.end == 67333606
 
 
-class TestMergeBreakpointPairs(unittest.TestCase):
+class TestMergeBreakpointPairs:
     def test_order_is_retained(self):
         # BPP(Breakpoint(1:1925143-1925155R), Breakpoint(1:1925144L), opposing=False)
         # >>  BPP(Breakpoint(1:1925144L), Breakpoint(1:1925144-1925158R), opposing=False)
@@ -93,10 +93,10 @@ def test_order_is_retained(self):
         for merge, inputs in mapping.items():
             print(merge)
             print(inputs)
-        self.assertEqual(1, len(mapping))
+        assert len(mapping) == 1
         merge = list(mapping)[0]
-        self.assertEqual('L', merge.break1.orient)
-        self.assertEqual('R', merge.break2.orient)
+        assert merge.break1.orient == 'L'
+        assert merge.break2.orient == 'R'
 
     def test_merging_identical_large_inputs(self):
         b1 = BreakpointPair(
@@ -110,17 +110,17 @@ def test_merging_identical_large_inputs(self):
             opposing_strands=False,
         )
         mapping = merge_breakpoint_pairs([b1, b2], 100, 25, verbose=True)
-        self.assertEqual(1, len(mapping))
+        assert len(mapping) == 1
         merge = list(mapping)[0]
-        self.assertEqual(2, len(mapping[merge]))
-        self.assertEqual('L', merge.break1.orient)
-        self.assertEqual('R', merge.break2.orient)
-        self.assertEqual('11', merge.break1.chr)
-        self.assertEqual('11', merge.break2.chr)
-        self.assertEqual(12856838, merge.break1.start)
-        self.assertEqual(12856840, merge.break2.start)  # putative indel will be shifted
-        self.assertEqual(12897006, merge.break1.end)
-        self.assertEqual(12897006, merge.break2.end)
+        assert len(mapping[merge]) == 2
+        assert merge.break1.orient == 'L'
+        assert merge.break2.orient == 'R'
+        assert merge.break1.chr == '11'
+        assert merge.break2.chr == '11'
+        assert merge.break1.start == 12856838
+        assert merge.break2.start == 12856840  # putative indel will be shifted
+        assert merge.break1.end == 12897006
+        assert merge.break2.end == 12897006
 
     def test_events_separate(self):
         bpps = [
@@ -150,28 +150,28 @@ def test_events_separate(self):
             ),
         ]
         mapping = merge_breakpoint_pairs(bpps, 100, 25, verbose=True)
-        self.assertEqual(2, len(mapping))
+        assert len(mapping) == 2
 
 
-class TestMergeIntervals(unittest.TestCase):
+class TestMergeIntervals:
     def test_merge_even_length(self):
         i1 = Interval(1001, 1002)
         result = merge_integer_intervals(i1, i1, weight_adjustment=25)
-        self.assertEqual(i1, result)
+        assert result == i1
 
     def test_merge_odd_length(self):
         i1 = Interval(1001, 1003)
         result = merge_integer_intervals(i1, i1, weight_adjustment=25)
-        self.assertEqual(i1, result)
+        assert result == i1
 
     def test_merge_large_length(self):
         i1 = Interval(1001, 5003)
         result = merge_integer_intervals(i1, i1, weight_adjustment=25)
-        self.assertEqual(i1, result)
+        assert result == i1
 
         i1 = Interval(12856838, 12897006)
         result = merge_integer_intervals(i1, i1, weight_adjustment=25)
-        self.assertEqual(i1, result)
+        assert result == i1
 
 
 if __name__ == '__main__':
diff --git a/tests/integration/test_illustrate.py b/tests/integration/test_illustrate.py
index ce377207..0ce2bdae 100644
--- a/tests/integration/test_illustrate.py
+++ b/tests/integration/test_illustrate.py
@@ -1,7 +1,6 @@
-import os
 import random
-import unittest
 
+import pytest
 from mavis.annotate import fusion, genomic, protein, variant
 from mavis.annotate.base import BioInterval
 from mavis.annotate.file_io import load_templates
@@ -33,10 +32,12 @@ def setUpModule():
     TEMPLATE_METADATA = load_templates(get_data('cytoBand.txt'))
 
 
-class TestDraw(unittest.TestCase):
-    def setUp(self):
-        self.canvas = Drawing(height=100, width=1000)
+@pytest.fixture
+def canvas():
+    return Drawing(height=100, width=1000)
 
+
+class TestDraw:
     def test_generate_interval_mapping_outside_range_error(self):
         temp = [
             Interval(48556470, 48556646),
@@ -58,14 +59,14 @@ def test_generate_interval_mapping_outside_range_error(self):
         Interval.convert_pos(mapping, st)
         Interval.convert_pos(mapping, end)
 
-    def test_generate_gene_mapping_err(self):
+    def test_generate_gene_mapping_err(self, canvas):
         #  _generate_interval_mapping [genomic.IntergenicRegion(11:77361962_77361962+)] 1181.39453125 5 30 None 77356962 77366962)
         ir = genomic.IntergenicRegion('11', 5000, 5000, STRAND.POS)
         tgt_width = 1000
         d = DiagramSettings(domain_name_regex_filter=r'.*')
         d.gene_min_buffer = 10
         # (self, canvas, gene, width, height, fill, label='', reference_genome=None)
-        draw_genes(d, self.canvas, [ir], tgt_width, [])
+        draw_genes(d, canvas, [ir], tgt_width, [])
 
         # _generate_interval_mapping ['Interval(29684391, 29684391)', 'Interval(29663998, 29696515)'] 1181.39453125 5 60 None 29662998 29697515
         # def generate_interval_mapping(cls, input_intervals, target_width, ratio, min_width, buffer_length=None, start=None, end=None, min_inter_width=None)
@@ -77,12 +78,12 @@ def test_split_intervals_into_tracks(self):
         # ------======--------
         # -----===============
         t = split_intervals_into_tracks([(1, 3), (3, 7), (2, 2), (4, 5), (3, 10)])
-        self.assertEqual(3, len(t))
-        self.assertEqual([(1, 3), (4, 5)], t[0])
-        self.assertEqual([(2, 2), (3, 7)], t[1])
-        self.assertEqual([(3, 10)], t[2])
+        assert len(t) == 3
+        assert t[0] == [(1, 3), (4, 5)]
+        assert t[1] == [(2, 2), (3, 7)]
+        assert t[2] == [(3, 10)]
 
-    def test_draw_genes(self):
+    def test_draw_genes(self, canvas):
 
         x = genomic.Gene('1', 1000, 2000, strand=STRAND.POS)
         y = genomic.Gene('1', 5000, 7000, strand=STRAND.NEG)
@@ -92,7 +93,7 @@ def test_draw_genes(self):
         breakpoints = [Breakpoint('1', 1100, 1200, orient=ORIENT.RIGHT)]
         g = draw_genes(
             d,
-            self.canvas,
+            canvas,
             [x, y, z],
             500,
             breakpoints,
@@ -100,24 +101,24 @@ def test_draw_genes(self):
         )
 
         # test the class structure
-        self.assertEqual(6, len(g.elements))
-        self.assertEqual('scaffold', g.elements[0].attribs.get('class', ''))
+        assert len(g.elements) == 6
+        assert g.elements[0].attribs.get('class', '') == 'scaffold'
         for i in range(1, 4):
-            self.assertEqual('gene', g.elements[i].attribs.get('class', ''))
-        self.assertEqual('mask', g.elements[4].attribs.get('class', ''))
-        self.assertEqual('breakpoint', g.elements[5].attribs.get('class', ''))
-        self.assertEqual(
-            d.track_height * 2 + d.padding + d.breakpoint_bottom_margin + d.breakpoint_top_margin,
-            g.height,
+            assert g.elements[i].attribs.get('class', '') == 'gene'
+        assert g.elements[4].attribs.get('class', '') == 'mask'
+        assert g.elements[5].attribs.get('class', '') == 'breakpoint'
+        assert (
+            g.height
+            == d.track_height * 2 + d.padding + d.breakpoint_bottom_margin + d.breakpoint_top_margin
         )
-        self.canvas.add(g)
-        self.assertEqual(len(g.labels), 4)
-        self.assertEqual(x, g.labels['G1'])
-        self.assertEqual(z, g.labels['G2'])
-        self.assertEqual(y, g.labels['G3'])
-        self.assertEqual(breakpoints[0], g.labels['B1'])
-
-    def test_draw_ustranscript(self):
+        canvas.add(g)
+        assert 4 == len(g.labels)
+        assert g.labels['G1'] == x
+        assert g.labels['G2'] == z
+        assert g.labels['G3'] == y
+        assert g.labels['B1'] == breakpoints[0]
+
+    def test_draw_ustranscript(self, canvas):
         d = DiagramSettings(domain_name_regex_filter=r'.*')
         # domains = [protein.Domain()]
         d1 = protein.Domain('first', [(55, 61), (71, 73)])
@@ -132,43 +133,38 @@ def test_draw_ustranscript(self):
             domains=[d2, d1],
         )
         b = Breakpoint('1', 350, 410, orient=ORIENT.LEFT)
-        g = draw_ustranscript(
-            d, self.canvas, t, 500, colors={t.exons[1]: '#FFFF00'}, breakpoints=[b]
-        )
-        self.canvas.add(g)
-        # self.canvas.saveas('test_draw_ustranscript.svg')
-        self.assertEqual(2, len(self.canvas.elements))
-        self.assertEqual(3, len(g.elements))
+        g = draw_ustranscript(d, canvas, t, 500, colors={t.exons[1]: '#FFFF00'}, breakpoints=[b])
+        canvas.add(g)
+        # canvas.saveas('test_draw_ustranscript.svg')
+        assert len(canvas.elements) == 2
+        assert len(g.elements) == 3
         for el, cls in zip(g.elements[0].elements, ['splicing', 'exon_track', 'protein']):
-            self.assertEqual(cls, el.attribs.get('class', ''))
+            assert el.attribs.get('class', '') == cls
 
         for el, cls in zip(
             g.elements[0].elements[1].elements, ['scaffold', 'exon', 'exon', 'exon']
         ):
-            self.assertEqual(cls, el.attribs.get('class', ''))
+            assert el.attribs.get('class', '') == cls
 
         for el, cls in zip(g.elements[0].elements[2].elements, ['translation', 'domain', 'domain']):
-            self.assertEqual(cls, el.attribs.get('class', ''))
-
-        self.assertEqual(
-            sum(
-                [
-                    d.track_height,
-                    d.splice_height,
-                    2 * d.padding,
-                    d.domain_track_height * 2,
-                    d.translation_track_height,
-                    d.padding,
-                    d.breakpoint_top_margin,
-                    d.breakpoint_bottom_margin,
-                ]
-            ),
-            g.height,
-        )
-        self.assertEqual(d1.name, g.labels['D1'])
-        self.assertEqual(d2.name, g.labels['D2'])
-
-    def test_draw_consec_exons(self):
+            assert el.attribs.get('class', '') == cls
+
+        assert g.height == sum(
+            [
+                d.track_height,
+                d.splice_height,
+                2 * d.padding,
+                d.domain_track_height * 2,
+                d.translation_track_height,
+                d.padding,
+                d.breakpoint_top_margin,
+                d.breakpoint_bottom_margin,
+            ]
+        )
+        assert g.labels['D1'] == d1.name
+        assert g.labels['D2'] == d2.name
+
+    def test_draw_consec_exons(self, canvas):
         d = DiagramSettings(domain_name_regex_filter=r'.*')
         # domains = [protein.Domain()]
         t = build_transcript(
@@ -180,32 +176,30 @@ def test_draw_consec_exons(self):
             domains=[],
         )
         b = Breakpoint('1', 350, 410, orient=ORIENT.LEFT)
-        g = draw_ustranscript(
-            d, self.canvas, t, 500, colors={t.exons[1]: '#FFFF00'}, breakpoints=[b]
-        )
-        self.canvas.add(g)
+        g = draw_ustranscript(d, canvas, t, 500, colors={t.exons[1]: '#FFFF00'}, breakpoints=[b])
+        canvas.add(g)
         if OUTPUT_SVG:
-            self.canvas.saveas('test_draw_consec_exons.svg')
+            canvas.saveas('test_draw_consec_exons.svg')
 
-        # self.canvas.saveas('test_draw_ustranscript.svg')
-        self.assertEqual(2, len(self.canvas.elements))
-        self.assertEqual(3, len(g.elements))
+        # canvas.saveas('test_draw_ustranscript.svg')
+        assert len(canvas.elements) == 2
+        assert len(g.elements) == 3
         # check that only 2 splicing marks were created
-        self.assertEqual(2, len(g.elements[0].elements[0].elements))
+        assert len(g.elements[0].elements[0].elements) == 2
         # get the second exon
         ex2 = g.elements[0].elements[1].elements[2].elements[0]
         print(ex2)
-        self.assertAlmostEqual(120.7783426339, ex2.attribs.get('width'))
+        assert pytest.approx(ex2.attribs.get('width')) == 120.7783426339
         # get the third exon
         ex3 = g.elements[0].elements[1].elements[3].elements[0]
         print(ex3)
-        self.assertAlmostEqual(96.52494419642852, ex3.attribs.get('width'))
+        assert pytest.approx(ex3.attribs.get('width')) == 96.52494419642852
 
     def test_dynamic_label_color(self):
-        self.assertEqual(HEX_WHITE, dynamic_label_color(HEX_BLACK))
-        self.assertEqual(HEX_BLACK, dynamic_label_color(HEX_WHITE))
+        assert dynamic_label_color(HEX_BLACK) == HEX_WHITE
+        assert dynamic_label_color(HEX_WHITE) == HEX_BLACK
 
-    def test_draw_legend(self):
+    def test_draw_legend(self, canvas):
         d = DiagramSettings(domain_name_regex_filter=r'.*')
         swatches = [
             ('#000000', 'black'),
@@ -214,19 +208,20 @@ def test_draw_legend(self):
             ('#00FF00', 'green'),
             ('#FFFF00', 'yellow'),
         ]
-        g = draw_legend(d, self.canvas, swatches)
-        self.canvas.add(g)
+        g = draw_legend(d, canvas, swatches)
+        canvas.add(g)
 
-        self.assertEqual('legend', g.attribs.get('class', ''))
-        self.assertEqual(
-            d.legend_swatch_size * len(swatches) + d.padding * (len(swatches) - 1 + 2), g.height
+        assert g.attribs.get('class', '') == 'legend'
+        assert g.height == d.legend_swatch_size * len(swatches) + d.padding * (
+            len(swatches) - 1 + 2
         )
-        self.assertEqual(6, len(g.elements))
-        self.assertEqual(
-            6 * d.legend_font_size * d.font_width_height_ratio
+
+        assert len(g.elements) == 6
+        assert (
+            g.width
+            == 6 * d.legend_font_size * d.font_width_height_ratio
             + d.padding * 3
-            + d.legend_swatch_size,
-            g.width,
+            + d.legend_swatch_size
         )
 
     def test_draw_layout_single_transcript(self):
@@ -247,7 +242,7 @@ def test_draw_layout_single_transcript(self):
         ft = variant.FusionTranscript.build(ann, reference_genome)
         ann.fusion = ft
         canvas, legend = draw_sv_summary_diagram(d, ann)
-        self.assertEqual(4, len(canvas.elements))  # defs counts as element
+        assert len(canvas.elements) == 4  # defs counts as element
         expected_height = (
             d.top_margin
             + d.bottom_margin
@@ -269,7 +264,7 @@ def test_draw_layout_single_transcript(self):
         )
         if OUTPUT_SVG:
             canvas.saveas('test_draw_layout_single_transcript.svg')
-        self.assertEqual(expected_height, canvas.attribs['height'])
+        assert canvas.attribs['height'] == expected_height
 
     def test_draw_layout_single_genomic(self):
         d = DiagramSettings(domain_name_regex_filter=r'.*')
@@ -305,12 +300,12 @@ def test_draw_layout_single_genomic(self):
 
         ft = variant.FusionTranscript.build(ann, reference_genome)
         ann.fusion = ft
-        self.assertEqual(t1.exons[0], ft.exon_mapping[ft.exons[0].position])
-        self.assertEqual(t2.exons[2], ft.exon_mapping[ft.exons[1].position])
-        self.assertEqual(t2.exons[3], ft.exon_mapping[ft.exons[2].position])
+        assert ft.exon_mapping[ft.exons[0].position] == t1.exons[0]
+        assert ft.exon_mapping[ft.exons[1].position] == t2.exons[2]
+        assert ft.exon_mapping[ft.exons[2].position] == t2.exons[3]
 
         canvas, legend = draw_sv_summary_diagram(d, ann)
-        self.assertEqual(5, len(canvas.elements))  # defs counts as element
+        assert len(canvas.elements) == 5  # defs counts as element
 
         expected_height = (
             d.top_margin
@@ -331,7 +326,7 @@ def test_draw_layout_single_genomic(self):
             + d.track_height
             + d.splice_height
         )
-        self.assertEqual(expected_height, canvas.attribs['height'])
+        assert canvas.attribs['height'] == expected_height
         if OUTPUT_SVG:
             canvas.saveas('test_draw_layout_single_genomic.svg')
 
@@ -378,7 +373,7 @@ def test_draw_layout_translocation(self):
         ft = variant.FusionTranscript.build(ann, reference_genome)
         ann.fusion = ft
         canvas, legend = draw_sv_summary_diagram(d, ann)
-        self.assertEqual(6, len(canvas.elements))  # defs counts as element
+        assert len(canvas.elements) == 6  # defs counts as element
         expected_height = (
             d.top_margin
             + d.bottom_margin
@@ -399,7 +394,7 @@ def test_draw_layout_translocation(self):
             + d.track_height
             + d.splice_height
         )
-        self.assertEqual(expected_height, canvas.attribs['height'])
+        assert canvas.attribs['height'] == expected_height
 
     def test_draw_template(self):
         # def draw_template(self, canvas, template, target_width, height, labels=None, colors=None):
@@ -417,12 +412,13 @@ def test_draw_template(self):
         canvas = Drawing(size=(1000, 50))
 
         g = draw_template(d, canvas, TEMPLATE_METADATA['1'], 1000)
-        self.assertEqual(
-            d.breakpoint_top_margin + d.breakpoint_bottom_margin + d.template_track_height, g.height
+        assert (
+            g.height
+            == d.breakpoint_top_margin + d.breakpoint_bottom_margin + d.template_track_height
         )
         canvas.add(g)
         canvas.attribs['height'] = g.height
-        self.assertEqual(2, len(canvas.elements))
+        assert len(canvas.elements) == 2
 
     def test_draw_translocation_with_template(self):
         d = DiagramSettings(domain_name_regex_filter=r'.*')
@@ -473,7 +469,7 @@ def test_draw_translocation_with_template(self):
         )
         if OUTPUT_SVG:
             canvas.saveas('test_draw_translocation_with_template.svg')
-        self.assertEqual(8, len(canvas.elements))  # defs counts as element
+        assert len(canvas.elements) == 8  # defs counts as element
         expected_height = (
             d.top_margin
             + d.bottom_margin
@@ -497,7 +493,7 @@ def test_draw_translocation_with_template(self):
             + d.splice_height
             + d.template_track_height
         )
-        self.assertAlmostEqual(expected_height, canvas.attribs['height'])
+        assert pytest.approx(canvas.attribs['height']) == expected_height
 
     def test_draw_overlay(self):
         gene = genomic.Gene('12', 25357723, 25403870, strand=STRAND.NEG, name='KRAS')
@@ -553,7 +549,7 @@ def test_draw_overlay(self):
 
         d.gene_min_buffer = 0
         canvas = draw_multi_transcript_overlay(d, gene, vmarkers=[marker], plots=[s, s])
-        self.assertEqual(2, len(canvas.elements))  # defs counts as element
+        assert len(canvas.elements) == 2  # defs counts as element
         if OUTPUT_SVG:
             canvas.saveas('test_draw_overlay.svg')
 
diff --git a/tests/integration/test_pairing.py b/tests/integration/test_pairing.py
index 3a0064f8..4a3a77e1 100644
--- a/tests/integration/test_pairing.py
+++ b/tests/integration/test_pairing.py
@@ -1,101 +1,130 @@
 import unittest
 
+import pytest
 from mavis.annotate.genomic import PreTranscript
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import CALL_METHOD, COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE
 from mavis.pairing import pairing
 
 
-class TestPairing(unittest.TestCase):
-    def setUp(self):
-        self.gev1 = BreakpointPair(
-            Breakpoint('1', 1),
-            Breakpoint('1', 10),
-            opposing_strands=True,
-            **{
-                COLUMNS.event_type: SVTYPE.DEL,
-                COLUMNS.call_method: CALL_METHOD.CONTIG,
-                COLUMNS.fusion_sequence_fasta_id: None,
-                COLUMNS.protocol: PROTOCOL.GENOME,
-            },
-        )
-        self.gev2 = BreakpointPair(
-            Breakpoint('1', 1),
-            Breakpoint('1', 10),
-            opposing_strands=True,
-            **{
-                COLUMNS.event_type: SVTYPE.DEL,
-                COLUMNS.call_method: CALL_METHOD.CONTIG,
-                COLUMNS.fusion_sequence_fasta_id: None,
-                COLUMNS.protocol: PROTOCOL.GENOME,
-            },
-        )
-
-        self.ust1 = PreTranscript(
-            exons=[(1, 100), (301, 400), (501, 600)], strand=STRAND.POS, name='t1'
-        )
-        self.ust2 = PreTranscript(
-            exons=[(1001, 1100), (1301, 1400), (1501, 1600)], strand=STRAND.POS, name='t2'
-        )
-        self.distances = {CALL_METHOD.CONTIG: 0, CALL_METHOD.FLANK: 0, CALL_METHOD.SPLIT: 10}
-        self.TRANSCRIPTS = {self.ust1.name: self.ust1, self.ust2.name: self.ust2}
-
-    def test_genome_protocol_diff_chrom(self):
-        self.gev2.break1.chr = '2'
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, self.TRANSCRIPTS))
-
-    def test_genome_protocol_diff_orient(self):
-        self.gev2.break1.orient = ORIENT.LEFT
-        self.gev1.break1.orient = ORIENT.RIGHT
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, self.TRANSCRIPTS))
-
-    def test_genome_protocol_diff_strand(self):
-        self.gev2.break1.strand = STRAND.POS
-        self.gev1.break1.strand = STRAND.NEG
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, self.TRANSCRIPTS))
-
-    def test_genome_protocol_diff_event_type(self):
-        self.gev2.data[COLUMNS.event_type] = SVTYPE.DEL
-        self.gev1.data[COLUMNS.event_type] = SVTYPE.INS
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, self.TRANSCRIPTS))
-
-    def test_genome_protocol_ns_orient(self):
-        self.gev2.break1.orient = ORIENT.LEFT
-        self.gev1.break2.orient = ORIENT.RIGHT
-        self.assertTrue(pairing.equivalent(self.gev1, self.gev2, self.TRANSCRIPTS))
-
-    def test_genome_protocol_by_contig(self):
-        self.gev1.call_method = CALL_METHOD.CONTIG
-        self.gev2.call_method = CALL_METHOD.CONTIG
-        self.distances[CALL_METHOD.CONTIG] = 0
-        self.distances[CALL_METHOD.SPLIT] = 10
-        self.assertTrue(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-
-        self.gev1.break1.start = 2
-        self.gev1.break1.end = 20
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-
-    def test_genome_protocol_by_split(self):
-        self.gev1.call_method = CALL_METHOD.SPLIT
-        self.gev2.call_method = CALL_METHOD.SPLIT
-        self.assertTrue(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-        self.distances[CALL_METHOD.FLANK] = 100
-        self.distances[CALL_METHOD.SPLIT] = 10
-        self.gev1.break1.start = 11
-        self.gev1.break1.end = 20
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-
-    def test_genome_protocol_by_flanking(self):
-        self.gev1.call_method = CALL_METHOD.FLANK
-        self.gev2.call_method = CALL_METHOD.FLANK
-        self.assertTrue(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-        self.distances[CALL_METHOD.FLANK] = 10
-        self.distances[CALL_METHOD.SPLIT] = 100
-        self.gev1.break1.start = 11
-        self.gev1.break1.end = 20
-        self.assertFalse(pairing.equivalent(self.gev1, self.gev2, distances=self.distances))
-
-    def test_mixed_protocol_fusions_same_sequence(self):
+@pytest.fixture
+def genomic_event1():
+    return BreakpointPair(
+        Breakpoint('1', 1),
+        Breakpoint('1', 10),
+        opposing_strands=True,
+        **{
+            COLUMNS.event_type: SVTYPE.DEL,
+            COLUMNS.call_method: CALL_METHOD.CONTIG,
+            COLUMNS.fusion_sequence_fasta_id: None,
+            COLUMNS.protocol: PROTOCOL.GENOME,
+        },
+    )
+
+
+@pytest.fixture
+def genomic_event2():
+    return BreakpointPair(
+        Breakpoint('1', 1),
+        Breakpoint('1', 10),
+        opposing_strands=True,
+        **{
+            COLUMNS.event_type: SVTYPE.DEL,
+            COLUMNS.call_method: CALL_METHOD.CONTIG,
+            COLUMNS.fusion_sequence_fasta_id: None,
+            COLUMNS.protocol: PROTOCOL.GENOME,
+        },
+    )
+
+
+@pytest.fixture
+def unspliced_transcript1():
+    return PreTranscript(exons=[(1, 100), (301, 400), (501, 600)], strand=STRAND.POS, name='t1')
+
+
+@pytest.fixture
+def unspliced_transcript2():
+    return PreTranscript(
+        exons=[(1001, 1100), (1301, 1400), (1501, 1600)], strand=STRAND.POS, name='t2'
+    )
+
+
+@pytest.fixture
+def transcripts(unspliced_transcript1, unspliced_transcript2):
+    return {
+        unspliced_transcript1.name: unspliced_transcript1,
+        unspliced_transcript2.name: unspliced_transcript2,
+    }
+
+
+@pytest.fixture
+def distances():
+    return {CALL_METHOD.CONTIG: 0, CALL_METHOD.FLANK: 0, CALL_METHOD.SPLIT: 10}
+
+
+class TestPairing:
+    def test_genome_protocol_diff_chrom(self, genomic_event1, genomic_event2, transcripts):
+        genomic_event2.break1.chr = '2'
+        assert not pairing.equivalent(genomic_event1, genomic_event2, transcripts)
+
+    def test_genome_protocol_diff_orient(self, genomic_event1, genomic_event2, transcripts):
+        genomic_event2.break1.orient = ORIENT.LEFT
+        genomic_event1.break1.orient = ORIENT.RIGHT
+        assert not pairing.equivalent(genomic_event1, genomic_event2, transcripts)
+
+    def test_genome_protocol_diff_strand(self, genomic_event1, genomic_event2, transcripts):
+        genomic_event2.break1.strand = STRAND.POS
+        genomic_event1.break1.strand = STRAND.NEG
+        assert not pairing.equivalent(genomic_event1, genomic_event2, transcripts)
+
+    def test_genome_protocol_diff_event_type(self, genomic_event1, genomic_event2, transcripts):
+        genomic_event2.data[COLUMNS.event_type] = SVTYPE.DEL
+        genomic_event1.data[COLUMNS.event_type] = SVTYPE.INS
+        assert not pairing.equivalent(genomic_event1, genomic_event2, transcripts)
+
+    def test_genome_protocol_ns_orient(self, genomic_event1, genomic_event2, transcripts):
+        genomic_event2.break1.orient = ORIENT.LEFT
+        genomic_event1.break2.orient = ORIENT.RIGHT
+        assert pairing.equivalent(genomic_event1, genomic_event2, transcripts)
+
+    def test_genome_protocol_by_contig(
+        self, genomic_event1, genomic_event2, transcripts, distances
+    ):
+        genomic_event1.call_method = CALL_METHOD.CONTIG
+        genomic_event2.call_method = CALL_METHOD.CONTIG
+        distances[CALL_METHOD.CONTIG] = 0
+        distances[CALL_METHOD.SPLIT] = 10
+        assert pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+
+        genomic_event1.break1.start = 2
+        genomic_event1.break1.end = 20
+        assert not pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+
+    def test_genome_protocol_by_split(self, genomic_event1, genomic_event2, transcripts, distances):
+        genomic_event1.call_method = CALL_METHOD.SPLIT
+        genomic_event2.call_method = CALL_METHOD.SPLIT
+        assert pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+        distances[CALL_METHOD.FLANK] = 100
+        distances[CALL_METHOD.SPLIT] = 10
+        genomic_event1.break1.start = 11
+        genomic_event1.break1.end = 20
+        assert not pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+
+    def test_genome_protocol_by_flanking(
+        self, genomic_event1, genomic_event2, transcripts, distances
+    ):
+        genomic_event1.call_method = CALL_METHOD.FLANK
+        genomic_event2.call_method = CALL_METHOD.FLANK
+        assert pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+        distances[CALL_METHOD.FLANK] = 10
+        distances[CALL_METHOD.SPLIT] = 100
+        genomic_event1.break1.start = 11
+        genomic_event1.break1.end = 20
+        assert not pairing.equivalent(genomic_event1, genomic_event2, distances=distances)
+
+    def test_mixed_protocol_fusions_same_sequence(
+        self, genomic_event1, genomic_event2, transcripts
+    ):
         genome_ev = BreakpointPair(
             Breakpoint('1', 1),
             Breakpoint('1', 10),
@@ -126,12 +155,14 @@ def test_mixed_protocol_fusions_same_sequence(self):
                 COLUMNS.fusion_cdna_coding_end: 10,
             },
         )
-        self.assertFalse(pairing.equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
+        assert not pairing.equivalent(genome_ev, trans_ev, transcripts)
         genome_ev.data[COLUMNS.fusion_sequence_fasta_id] = 'a'
         trans_ev.data[COLUMNS.fusion_sequence_fasta_id] = 'a'
-        self.assertTrue(pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
+        assert pairing.inferred_equivalent(genome_ev, trans_ev, transcripts)
 
-    def test_mixed_protocol_fusions_same_sequence_diff_translation(self):
+    def test_mixed_protocol_fusions_same_sequence_diff_translation(
+        self, genomic_event1, genomic_event2, transcripts
+    ):
         genome_ev = BreakpointPair(
             Breakpoint('1', 1),
             Breakpoint('1', 10),
@@ -162,9 +193,11 @@ def test_mixed_protocol_fusions_same_sequence_diff_translation(self):
                 COLUMNS.fusion_cdna_coding_end: 50,
             },
         )
-        self.assertFalse(pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
+        assert not pairing.inferred_equivalent(genome_ev, trans_ev, transcripts)
 
-    def test_mixed_protocol_fusions_different_sequence(self):
+    def test_mixed_protocol_fusions_different_sequence(
+        self, genomic_event1, genomic_event2, transcripts
+    ):
         genome_ev = BreakpointPair(
             Breakpoint('1', 1),
             Breakpoint('1', 10),
@@ -195,9 +228,11 @@ def test_mixed_protocol_fusions_different_sequence(self):
                 COLUMNS.fusion_cdna_coding_end: 10,
             },
         )
-        self.assertFalse(pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
+        assert not pairing.inferred_equivalent(genome_ev, trans_ev, transcripts)
 
-    def test_mixed_protocol_one_predicted_one_match(self):
+    def test_mixed_protocol_one_predicted_one_match(
+        self, genomic_event1, genomic_event2, transcripts, unspliced_transcript1
+    ):
         genome_ev = BreakpointPair(
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
@@ -207,7 +242,7 @@ def test_mixed_protocol_one_predicted_one_match(self):
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.GENOME,
-                COLUMNS.transcript1: self.ust1.name,
+                COLUMNS.transcript1: unspliced_transcript1.name,
                 COLUMNS.transcript2: None,
             },
         )
@@ -220,21 +255,23 @@ def test_mixed_protocol_one_predicted_one_match(self):
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.TRANS,
-                COLUMNS.transcript1: self.ust1.name,
+                COLUMNS.transcript1: unspliced_transcript1.name,
                 COLUMNS.transcript2: None,
             },
         )
-        self.assertTrue(pairing.equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
-        self.assertTrue(pairing.equivalent(trans_ev, genome_ev, self.TRANSCRIPTS))
+        assert pairing.equivalent(genome_ev, trans_ev, transcripts)
+        assert pairing.equivalent(trans_ev, genome_ev, transcripts)
 
-        genome_ev.data[COLUMNS.transcript2] = self.ust1.name
+        genome_ev.data[COLUMNS.transcript2] = unspliced_transcript1.name
         genome_ev.data[COLUMNS.transcript1] = None
-        trans_ev.data[COLUMNS.transcript2] = self.ust1.name
+        trans_ev.data[COLUMNS.transcript2] = unspliced_transcript1.name
         trans_ev.data[COLUMNS.transcript1] = None
-        self.assertTrue(pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
-        self.assertTrue(pairing.inferred_equivalent(trans_ev, genome_ev, self.TRANSCRIPTS))
+        assert pairing.inferred_equivalent(genome_ev, trans_ev, transcripts)
+        assert pairing.inferred_equivalent(trans_ev, genome_ev, transcripts)
 
-    def test_mixed_protocol_one_predicted_one_mismatch(self):
+    def test_mixed_protocol_one_predicted_one_mismatch(
+        self, genomic_event1, genomic_event2, transcripts, unspliced_transcript1
+    ):
         genome_ev = BreakpointPair(
             Breakpoint('1', 350, orient=ORIENT.LEFT),
             Breakpoint('1', 400, orient=ORIENT.RIGHT),
@@ -244,7 +281,7 @@ def test_mixed_protocol_one_predicted_one_mismatch(self):
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.GENOME,
-                COLUMNS.transcript1: self.ust1.name,
+                COLUMNS.transcript1: unspliced_transcript1.name,
                 COLUMNS.transcript2: None,
             },
         )
@@ -257,19 +294,19 @@ def test_mixed_protocol_one_predicted_one_mismatch(self):
                 COLUMNS.call_method: CALL_METHOD.CONTIG,
                 COLUMNS.fusion_sequence_fasta_id: None,
                 COLUMNS.protocol: PROTOCOL.TRANS,
-                COLUMNS.transcript1: self.ust1.name,
+                COLUMNS.transcript1: unspliced_transcript1.name,
                 COLUMNS.transcript2: None,
             },
         )
-        self.assertTrue(pairing.equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
-        self.assertTrue(pairing.equivalent(trans_ev, genome_ev, self.TRANSCRIPTS))
+        assert pairing.equivalent(genome_ev, trans_ev, transcripts)
+        assert pairing.equivalent(trans_ev, genome_ev, transcripts)
 
-        genome_ev.data[COLUMNS.transcript2] = self.ust1.name
+        genome_ev.data[COLUMNS.transcript2] = unspliced_transcript1.name
         genome_ev.data[COLUMNS.transcript1] = None
-        trans_ev.data[COLUMNS.transcript2] = self.ust1.name
+        trans_ev.data[COLUMNS.transcript2] = unspliced_transcript1.name
         trans_ev.data[COLUMNS.transcript1] = None
-        self.assertTrue(pairing.inferred_equivalent(genome_ev, trans_ev, self.TRANSCRIPTS))
-        self.assertTrue(pairing.inferred_equivalent(trans_ev, genome_ev, self.TRANSCRIPTS))
+        assert pairing.inferred_equivalent(genome_ev, trans_ev, transcripts)
+        assert pairing.inferred_equivalent(trans_ev, genome_ev, transcripts)
 
     def test_mixed_protocol_both_predicted(self):
 
@@ -288,101 +325,107 @@ def test_transcriptome_protocol(self):
         raise unittest.SkipTest('TODO')
 
 
-class TestBreakpointPrediction(unittest.TestCase):
-    def setUp(self):
-        self.pre_transcript = PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.POS)
-        self.n_ust = PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.NEG)
+@pytest.fixture
+def positive_transcript():
+    return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.POS)
+
+
+@pytest.fixture
+def negative_transcript():
+    return PreTranscript([(101, 200), (301, 400), (501, 600)], strand=STRAND.NEG)
+
 
-    def test_exonic_five_prime(self):
+class TestBreakpointPrediction:
+    def test_exonic_five_prime(self, positive_transcript):
         b = Breakpoint('1', 350, orient=ORIENT.LEFT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(2, len(breaks))
-        self.assertEqual(200, breaks[0].start)
-        self.assertEqual(b, breaks[1])
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 2
+        assert breaks[0].start == 200
+        assert breaks[1] == b
 
-    def test_exonic_five_prime_first_exon(self):
+    def test_exonic_five_prime_first_exon(self, positive_transcript):
         b = Breakpoint('1', 150, orient=ORIENT.LEFT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 1
+        assert breaks[0] == b
 
-    def test_exonic_three_prime(self):
+    def test_exonic_three_prime(self, positive_transcript):
         b = Breakpoint('1', 350, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(2, len(breaks))
-        self.assertEqual(501, breaks[1].start)
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 2
+        assert breaks[1].start == 501
+        assert breaks[0] == b
 
-    def test_exonic_three_prime_last_exon(self):
+    def test_exonic_three_prime_last_exon(self, positive_transcript):
         b = Breakpoint('1', 550, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 1
+        assert breaks[0] == b
 
-    def test_intronic_five_prime(self):
+    def test_intronic_five_prime(self, positive_transcript):
         b = Breakpoint('1', 450, orient=ORIENT.LEFT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(400, breaks[0].start)
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 1
+        assert breaks[0].start == 400
 
-    def test_intronic_three_prime(self):
+    def test_intronic_three_prime(self, positive_transcript):
         b = Breakpoint('1', 250, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(301, breaks[0].start)
+        breaks = pairing.predict_transcriptome_breakpoint(b, positive_transcript)
+        assert len(breaks) == 1
+        assert breaks[0].start == 301
 
-    def test_outside_transcript(self):
+    def test_outside_transcript(self, positive_transcript):
         b = Breakpoint('1', 100, orient=ORIENT.RIGHT)
-        with self.assertRaises(AssertionError):
-            pairing.predict_transcriptome_breakpoint(b, self.pre_transcript)
+        with pytest.raises(AssertionError):
+            pairing.predict_transcriptome_breakpoint(b, positive_transcript)
 
     # for neg transcripts
-    def test_exonic_three_prime_neg(self):
+    def test_exonic_three_prime_neg(self, negative_transcript):
         b = Breakpoint('1', 350, orient=ORIENT.LEFT, strand=STRAND.NEG)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(2, len(breaks))
-        self.assertEqual(200, breaks[0].start)
-        self.assertEqual(b, breaks[1])
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 2
+        assert breaks[0].start == 200
+        assert breaks[1] == b
 
-    def test_intronic_three_prime_neg(self):
+    def test_intronic_three_prime_neg(self, negative_transcript):
         b = Breakpoint('1', 450, orient=ORIENT.LEFT, strand=STRAND.NEG)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(400, breaks[0].start)
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 1
+        assert breaks[0].start == 400
 
-    def test_exonic_five_prime_neg_first_exon(self):
+    def test_exonic_five_prime_neg_first_exon(self, negative_transcript):
         b = Breakpoint('1', 150, orient=ORIENT.LEFT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 1
+        assert breaks[0] == b
 
-    def test_exonic_three_prime_neg_first_exon(self):
+    def test_exonic_three_prime_neg_first_exon(self, negative_transcript):
         b = Breakpoint('1', 150, orient=ORIENT.LEFT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 1
+        assert breaks[0] == b
 
-    def test_exonic_five_prime_neg(self):
+    def test_exonic_five_prime_neg(self, negative_transcript):
         b = Breakpoint('1', 350, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(2, len(breaks))
-        self.assertEqual(501, breaks[1].start)
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 2
+        assert breaks[1].start == 501
+        assert breaks[0] == b
 
-    def test_exonic_five_prime_neg_last_exon(self):
+    def test_exonic_five_prime_neg_last_exon(self, negative_transcript):
         b = Breakpoint('1', 550, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(b, breaks[0])
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 1
+        assert breaks[0] == b
 
-    def test_intronic_five_prime_neg(self):
+    def test_intronic_five_prime_neg(self, negative_transcript):
         b = Breakpoint('1', 250, orient=ORIENT.RIGHT)
-        breaks = pairing.predict_transcriptome_breakpoint(b, self.n_ust)
-        self.assertEqual(1, len(breaks))
-        self.assertEqual(301, breaks[0].start)
+        breaks = pairing.predict_transcriptome_breakpoint(b, negative_transcript)
+        assert len(breaks) == 1
+        assert breaks[0].start == 301
 
 
-class TestEquivalent(unittest.TestCase):
+class TestEquivalent:
     def test_useq_uncertainty(self):
         event1 = BreakpointPair(
             Breakpoint('1', 157540650, orient='L'),
@@ -397,7 +440,7 @@ def test_useq_uncertainty(self):
             event_type='deletion',
             call_method='spanning reads',
         )
-        self.assertTrue(pairing.equivalent(event1, event2))
+        assert pairing.equivalent(event1, event2)
 
     def test_useq_uncertainty2(self):
         event1 = BreakpointPair(
@@ -414,4 +457,4 @@ def test_useq_uncertainty2(self):
             call_method='contig',
             untemplated_seq='TTTTTTTTT',
         )
-        self.assertTrue(pairing.equivalent(event1, event2))
+        assert pairing.equivalent(event1, event2)
diff --git a/tests/integration/test_splicing.py b/tests/integration/test_splicing.py
index a80adb11..471c2bf1 100644
--- a/tests/integration/test_splicing.py
+++ b/tests/integration/test_splicing.py
@@ -1,8 +1,7 @@
-import os
-import unittest
+import argparse
 
+import pytest
 from mavis.annotate.constants import SPLICE_SITE_RADIUS
-from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.annotate.genomic import Exon, PreTranscript
 from mavis.annotate.splicing import predict_splice_sites
 from mavis.annotate.variant import annotate_events
@@ -10,7 +9,7 @@
 from mavis.constants import PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
 from mavis.interval import Interval
 
-from . import DATA_DIR, MockLongString, MockObject, get_example_genes
+from . import MockLongString, MockObject, get_example_genes
 
 EXAMPLE_GENES = None
 
@@ -20,340 +19,316 @@ def setUpModule():
     EXAMPLE_GENES = get_example_genes()
 
 
-class TestSplicingPatterns(unittest.TestCase):
-    def setUp(self):
-        self.setup_by_strand(STRAND.POS)
-
-    def setup_by_strand(self, strand):
-        self.ex1 = Exon(100, 199, strand=strand)  # C
-        self.ex2 = Exon(500, 599, strand=strand)  # G
-        self.ex3 = Exon(1200, 1299, strand=strand)  # T
-        self.ex4 = Exon(1500, 1599, strand=strand)  # C
-        self.ex5 = Exon(1700, 1799, strand=strand)  # G
-        self.ex6 = Exon(2000, 2099, strand=strand)  # C
-        # introns: 99, 300, 600, 200, 100, ...
-        reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
-        reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
-        reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
-        self.reference_sequence = reference_sequence
-        self.pre_transcript = PreTranscript(
-            exons=[self.ex1, self.ex2, self.ex3, self.ex4, self.ex5, self.ex6], strand=strand
-        )
-
+@pytest.fixture
+def neg_splicing_pattern():
+    n = argparse.Namespace()
+    n.ex1 = Exon(100, 199, strand=STRAND.NEG)  # C
+    n.ex2 = Exon(500, 599, strand=STRAND.NEG)  # G
+    n.ex3 = Exon(1200, 1299, strand=STRAND.NEG)  # T
+    n.ex4 = Exon(1500, 1599, strand=STRAND.NEG)  # C
+    n.ex5 = Exon(1700, 1799, strand=STRAND.NEG)  # G
+    n.ex6 = Exon(2000, 2099, strand=STRAND.NEG)  # C
+    # introns: 99, 300, 600, 200, 100, ...
+    reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
+    reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
+    reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
+    n.reference_sequence = reference_sequence
+    n.pre_transcript = PreTranscript(
+        exons=[n.ex1, n.ex2, n.ex3, n.ex4, n.ex5, n.ex6], strand=STRAND.NEG
+    )
+    return n
+
+
+@pytest.fixture
+def pos_splicing_pattern():
+    n = argparse.Namespace()
+    n.ex1 = Exon(100, 199, strand=STRAND.POS)  # C
+    n.ex2 = Exon(500, 599, strand=STRAND.POS)  # G
+    n.ex3 = Exon(1200, 1299, strand=STRAND.POS)  # T
+    n.ex4 = Exon(1500, 1599, strand=STRAND.POS)  # C
+    n.ex5 = Exon(1700, 1799, strand=STRAND.POS)  # G
+    n.ex6 = Exon(2000, 2099, strand=STRAND.POS)  # C
+    # introns: 99, 300, 600, 200, 100, ...
+    reference_sequence = 'a' * 99 + 'C' * 100 + 'a' * 300 + 'G' * 100
+    reference_sequence += 'a' * 600 + 'T' * 100 + 'a' * 200 + 'C' * 100
+    reference_sequence += 'a' * 100 + 'G' * 100 + 'a' * 200 + 'C' * 100
+    n.reference_sequence = reference_sequence
+    n.pre_transcript = PreTranscript(
+        exons=[n.ex1, n.ex2, n.ex3, n.ex4, n.ex5, n.ex6], strand=STRAND.POS
+    )
+    return n
+
+
+class TestSplicingPatterns:
     def test_single_exon(self):
         t = PreTranscript([(3, 4)], strand=STRAND.POS)
         patt = t.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(0, len(patt[0]))
-        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)
-
-    def test_normal_pattern_pos(self):
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex2.start,
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)
-
-    def test_normal_pattern_neg(self):
-        self.setup_by_strand(STRAND.NEG)
-        self.assertTrue(self.pre_transcript.is_reverse)
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex2.start,
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            sorted([s.pos for s in patt[0]]),
-        )
-        self.assertEqual(SPLICE_TYPE.NORMAL, patt[0].splice_type)
-
-    def test_abrogate_a_pos(self):
-        self.ex2.start_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(2, len(patt))
-
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)
-
-        self.assertEqual(
-            [
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[1]],
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)
-
-    def test_abrogate_a_neg(self):
-        self.setup_by_strand(STRAND.NEG)
-        self.ex2.start_splice_site.intact = False
-        patt = sorted(self.pre_transcript.generate_splicing_patterns())
-        self.assertEqual(2, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            sorted([s.pos for s in patt[0]]),
-        )
-        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)
-        self.assertEqual(
-            [
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            sorted([s.pos for s in patt[1]]),
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)
-
-    def test_abrogate_a_last_exon(self):
-        self.ex6.start_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex2.start,
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)
-
-    def test_abrogate_d_first_exon(self):
-        self.ex1.end_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(
-            [
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)
-
-    def test_abrogate_ad(self):
-        self.ex2.start_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(2, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.SKIP, patt[0].splice_type)
-
-        self.assertEqual(
-            [
-                self.ex2.end,
-                self.ex3.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[1]],
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[1].splice_type)
-
-    def test_abrogate_da(self):
-        self.ex2.end_splice_site.intact = False
-        self.ex3.start_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(1, len(patt))
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex2.start,
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.RETAIN, patt[0].splice_type)
-
-    def test_multiple_exons_or_multiple_introns_abrogate_ada(self):
-        self.ex2.start_splice_site.intact = False
-        self.ex2.end_splice_site.intact = False
-        self.ex3.start_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(2, len(patt))
-
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[0].splice_type)
-
-        self.assertEqual(
-            [
-                self.ex3.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[1]],
-        )
-        self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[1].splice_type)
-
-    def test_multiple_exons_or_multiple_introns_abrogate_dad(self):
-        self.ex2.end_splice_site.intact = False
-        self.ex3.start_splice_site.intact = False
-        self.ex3.end_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(2, len(patt))
-
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex2.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[0]],
-        )
-        self.assertEqual(SPLICE_TYPE.MULTI_RETAIN, patt[0].splice_type)
-
-        self.assertEqual(
-            [
-                self.ex1.end,
-                self.ex4.start,
-                self.ex4.end,
-                self.ex5.start,
-                self.ex5.end,
-                self.ex6.start,
-            ],
-            [s.pos for s in patt[1]],
-        )
-        self.assertEqual(SPLICE_TYPE.MULTI_SKIP, patt[1].splice_type)
-
-    def test_complex(self):
-        self.ex2.end_splice_site.intact = False
-        self.ex4.end_splice_site.intact = False
-        patt = self.pre_transcript.generate_splicing_patterns()
-        self.assertEqual(4, len(patt))
-        self.assertTrue(SPLICE_TYPE.COMPLEX in [p.splice_type for p in patt])
-
-
-class TestExonSpliceSites(unittest.TestCase):
+        assert len(patt) == 1
+        assert len(patt[0]) == 0
+        assert patt[0].splice_type == SPLICE_TYPE.NORMAL
+
+    def test_normal_pattern_pos(self, pos_splicing_pattern):
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 1
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex2.start,
+            pos_splicing_pattern.ex2.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.NORMAL
+
+    def test_normal_pattern_neg(self, neg_splicing_pattern):
+        assert neg_splicing_pattern.pre_transcript.is_reverse
+        patt = neg_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 1
+        assert sorted([s.pos for s in patt[0]]) == [
+            neg_splicing_pattern.ex1.end,
+            neg_splicing_pattern.ex2.start,
+            neg_splicing_pattern.ex2.end,
+            neg_splicing_pattern.ex3.start,
+            neg_splicing_pattern.ex3.end,
+            neg_splicing_pattern.ex4.start,
+            neg_splicing_pattern.ex4.end,
+            neg_splicing_pattern.ex5.start,
+            neg_splicing_pattern.ex5.end,
+            neg_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.NORMAL
+
+    def test_abrogate_a_pos(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.start_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 2
+
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.SKIP
+
+        assert [s.pos for s in patt[1]] == [
+            pos_splicing_pattern.ex2.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[1].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_abrogate_a_neg(self, neg_splicing_pattern):
+        neg_splicing_pattern.ex2.start_splice_site.intact = False
+        patt = sorted(neg_splicing_pattern.pre_transcript.generate_splicing_patterns())
+        assert len(patt) == 2
+        assert sorted([s.pos for s in patt[0]]) == [
+            neg_splicing_pattern.ex1.end,
+            neg_splicing_pattern.ex3.start,
+            neg_splicing_pattern.ex3.end,
+            neg_splicing_pattern.ex4.start,
+            neg_splicing_pattern.ex4.end,
+            neg_splicing_pattern.ex5.start,
+            neg_splicing_pattern.ex5.end,
+            neg_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.SKIP
+        assert sorted([s.pos for s in patt[1]]) == [
+            neg_splicing_pattern.ex2.end,
+            neg_splicing_pattern.ex3.start,
+            neg_splicing_pattern.ex3.end,
+            neg_splicing_pattern.ex4.start,
+            neg_splicing_pattern.ex4.end,
+            neg_splicing_pattern.ex5.start,
+            neg_splicing_pattern.ex5.end,
+            neg_splicing_pattern.ex6.start,
+        ]
+        assert patt[1].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_abrogate_a_last_exon(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex6.start_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 1
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex2.start,
+            pos_splicing_pattern.ex2.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_abrogate_d_first_exon(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex1.end_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 1
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex2.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_abrogate_ad(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.start_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 2
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.SKIP
+
+        assert [s.pos for s in patt[1]] == [
+            pos_splicing_pattern.ex2.end,
+            pos_splicing_pattern.ex3.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[1].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_abrogate_da(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.end_splice_site.intact = False
+        pos_splicing_pattern.ex3.start_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 1
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex2.start,
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.RETAIN
+
+    def test_multiple_exons_or_multiple_introns_abrogate_ada(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.start_splice_site.intact = False
+        pos_splicing_pattern.ex2.end_splice_site.intact = False
+        pos_splicing_pattern.ex3.start_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 2
+
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.MULTI_SKIP
+
+        assert [s.pos for s in patt[1]] == [
+            pos_splicing_pattern.ex3.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[1].splice_type == SPLICE_TYPE.MULTI_RETAIN
+
+    def test_multiple_exons_or_multiple_introns_abrogate_dad(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.end_splice_site.intact = False
+        pos_splicing_pattern.ex3.start_splice_site.intact = False
+        pos_splicing_pattern.ex3.end_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 2
+
+        assert [s.pos for s in patt[0]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex2.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[0].splice_type == SPLICE_TYPE.MULTI_RETAIN
+
+        assert [s.pos for s in patt[1]] == [
+            pos_splicing_pattern.ex1.end,
+            pos_splicing_pattern.ex4.start,
+            pos_splicing_pattern.ex4.end,
+            pos_splicing_pattern.ex5.start,
+            pos_splicing_pattern.ex5.end,
+            pos_splicing_pattern.ex6.start,
+        ]
+        assert patt[1].splice_type == SPLICE_TYPE.MULTI_SKIP
+
+    def test_complex(self, pos_splicing_pattern):
+        pos_splicing_pattern.ex2.end_splice_site.intact = False
+        pos_splicing_pattern.ex4.end_splice_site.intact = False
+        patt = pos_splicing_pattern.pre_transcript.generate_splicing_patterns()
+        assert len(patt) == 4
+        assert SPLICE_TYPE.COMPLEX in [p.splice_type for p in patt]
+
+
+class TestExonSpliceSites:
     def test_end_splice_site(self):
         e = Exon(100, 199, strand=STRAND.POS)
-        self.assertEqual(2, SPLICE_SITE_RADIUS)
-        self.assertEqual(Interval(198, 201), e.end_splice_site)
+        assert SPLICE_SITE_RADIUS == 2
+        print(e.end_splice_site)
+        assert Interval(198, 201) == e.end_splice_site
 
     def test_start_splice_site(self):
         e = Exon(100, 199, strand=STRAND.POS)
-        self.assertEqual(2, SPLICE_SITE_RADIUS)
-        self.assertEqual(Interval(98, 101), e.start_splice_site)
+        assert SPLICE_SITE_RADIUS == 2
+        print(e.start_splice_site)
+        assert Interval(98, 101) == e.start_splice_site
 
 
-class TestPredictSpliceSites(unittest.TestCase):
+class TestPredictSpliceSites:
     def test_gimap4(self):
         gimap4 = EXAMPLE_GENES['GIMAP4']
         donors = predict_splice_sites(gimap4.seq)
         for d in donors:
             print(d)
-        self.assertEqual(5, len(donors))
+        assert len(donors) == 5
 
     def test_gimap4_reverse(self):
         gimap4 = EXAMPLE_GENES['GIMAP4']
         gimap4_seq = reverse_complement(gimap4.seq)
         donors = predict_splice_sites(gimap4_seq, True)
         for d in donors:
-            self.assertEqual(d.seq, gimap4_seq[d.start - 1 : d.end])
-        self.assertEqual(5, len(donors))
+            assert gimap4_seq[d.start - 1 : d.end] == d.seq
+        assert len(donors) == 5
 
+    @pytest.mark.skip(reason='TODO: dependent functionality not yet implemented')
     def test_fusion_with_novel_splice_site(self):
-        raise unittest.SkipTest('TODO: dependent functionality not yet implemented')
         bpp = BreakpointPair(
             Breakpoint('7', 150268089, 150268089, 'L', '+'),
             Breakpoint('8', 79715940, 79715940, 'L', '-'),
@@ -368,7 +343,7 @@ def test_fusion_with_novel_splice_site(self):
             il7.chr: MockObject(seq=MockLongString(il7.seq, offset=il7.start - 1)),
         }
         annotations = annotate_events([bpp], {gimap4.chr: [gimap4], il7.chr: [il7]}, ref_genome)
-        self.assertEqual(1, len(annotations))
+        assert len(annotations) == 1
         ann = annotations[0]
         print(ann, ann.transcript1, ann.transcript2)
         print(ann.fusion)
@@ -378,4 +353,4 @@ def test_fusion_with_novel_splice_site(self):
         )
         for ex in ann.fusion.transcripts[0].exons:
             print(ex, len(ex))
-        self.assertTrue(False)
+        assert False
diff --git a/tests/integration/test_validate.py b/tests/integration/test_validate.py
index 3458aa68..8f0d8471 100644
--- a/tests/integration/test_validate.py
+++ b/tests/integration/test_validate.py
@@ -1,5 +1,4 @@
-import unittest
-
+import pytest
 from mavis.annotate.file_io import load_reference_genome
 from mavis.bam import cigar as _cigar
 from mavis.bam.cache import BamCache
@@ -10,8 +9,8 @@
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence
 
-from ..util import get_data
-from . import RUN_FULL, MockLongString, MockObject, MockRead, mock_read_pair
+from ..util import get_data, long_running_test
+from . import MockLongString, MockObject, MockRead, mock_read_pair
 
 REFERENCE_GENOME = None
 
@@ -42,10 +41,8 @@ def setUpModule():
     # add a check to determine if it is the expected bam file
 
 
-@unittest.skipIf(
-    not RUN_FULL, 'slower tests will not be run unless the environment variable RUN_FULL is given'
-)
-class TestFullEvidenceGathering(unittest.TestCase):
+@long_running_test
+class TestFullEvidenceGathering:
     # need to make the assertions more specific by checking the actual names of the reads found in each bin
     # rather than just the counts.
     def genome_evidence(self, break1, break2, opposing_strands):
@@ -130,9 +127,9 @@ def test_load_evidence_translocation(self):
         )
         ev1.load_evidence()
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(14, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(20, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(21, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 14
+        assert self.count_original_reads(ev1.split_reads[1]) == 20
+        assert len(ev1.flanking_pairs) == 21
 
         # second example
         ev1 = self.genome_evidence(
@@ -142,10 +139,10 @@ def test_load_evidence_translocation(self):
         )
         ev1.load_evidence()
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(21, self.count_original_reads(ev1.split_reads[0]))
+        assert self.count_original_reads(ev1.split_reads[0]) == 21
         # one of the reads that appears to look good in the bam is too low quality % match
-        self.assertEqual(40, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(57, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[1]) == 40
+        assert len(ev1.flanking_pairs) == 57
 
     def test_load_evidence_inversion(self):
         # first example
@@ -157,9 +154,9 @@ def test_load_evidence_inversion(self):
 
         ev1.load_evidence()
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(54, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(20, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(104, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 54
+        assert self.count_original_reads(ev1.split_reads[1]) == 20
+        assert len(ev1.flanking_pairs) == 104
 
         # second example
         ev1 = self.genome_evidence(
@@ -169,9 +166,9 @@ def test_load_evidence_inversion(self):
         )
         ev1.load_evidence()
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(15, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(27, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(52, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[1]) == 15
+        assert self.count_original_reads(ev1.split_reads[0]) == 27
+        assert len(ev1.flanking_pairs) == 52
 
     def test_load_evidence_duplication(self):
         ev1 = self.genome_evidence(
@@ -182,9 +179,9 @@ def test_load_evidence_duplication(self):
         ev1.load_evidence()
         self.print_evidence(ev1)
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(35, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(11, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(64, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 35
+        assert self.count_original_reads(ev1.split_reads[1]) == 11
+        assert len(ev1.flanking_pairs) == 64
 
     def test_load_evidence_deletion1(self):
         # first example
@@ -196,9 +193,9 @@ def test_load_evidence_deletion1(self):
         ev1.load_evidence()
         self.print_evidence(ev1)
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(49, len(ev1.flanking_pairs))
-        self.assertEqual(22, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(14, self.count_original_reads(ev1.split_reads[1]))
+        assert len(ev1.flanking_pairs) == 49
+        assert self.count_original_reads(ev1.split_reads[0]) == 22
+        assert self.count_original_reads(ev1.split_reads[1]) == 14
 
     def test_load_evidence_deletion2(self):
         # second example
@@ -210,9 +207,9 @@ def test_load_evidence_deletion2(self):
         ev1.load_evidence()
         self.print_evidence(ev1)
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(4, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(10, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(27, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 4
+        assert self.count_original_reads(ev1.split_reads[1]) == 10
+        assert len(ev1.flanking_pairs) == 27
 
     def test_load_evidence_deletion3(self):
         # third example
@@ -224,9 +221,9 @@ def test_load_evidence_deletion3(self):
         ev1.load_evidence()
         self.print_evidence(ev1)
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(8, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(9, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(26, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 8
+        assert self.count_original_reads(ev1.split_reads[1]) == 9
+        assert len(ev1.flanking_pairs) == 26
 
     def test_load_evidence_deletion4(self):
         # forth example
@@ -238,9 +235,9 @@ def test_load_evidence_deletion4(self):
         ev1.load_evidence()
         self.print_evidence(ev1)
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(20, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(18, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(40, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 20
+        assert self.count_original_reads(ev1.split_reads[1]) == 18
+        assert len(ev1.flanking_pairs) == 40
 
     def test_load_evidence_small_deletion1(self):
         # first example
@@ -255,10 +252,10 @@ def test_load_evidence_small_deletion1(self):
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs), len(ev1.spanning_reads))
         print(len(ev1.spanning_reads))
 
-        self.assertEqual(5, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(3, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(20, len(ev1.spanning_reads))
-        self.assertEqual(6, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 5
+        assert self.count_original_reads(ev1.split_reads[1]) == 3
+        assert len(ev1.spanning_reads) == 20
+        assert len(ev1.flanking_pairs) == 6
 
     def test_load_evidence_small_deletion2(self):
         # second example
@@ -275,10 +272,10 @@ def test_load_evidence_small_deletion2(self):
         for read, mate in ev1.flanking_pairs:
             print(read.query_name)
 
-        self.assertEqual(27, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(52, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(19, len(ev1.spanning_reads))
-        self.assertEqual(7, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 27
+        assert self.count_original_reads(ev1.split_reads[1]) == 52
+        assert len(ev1.spanning_reads) == 19
+        assert len(ev1.flanking_pairs) == 7
 
     def test_load_evidence_small_deletion_test1(self):
         ev1 = self.genome_evidence(
@@ -294,10 +291,10 @@ def test_load_evidence_small_deletion_test1(self):
         for read, mate in ev1.flanking_pairs:
             print(read.query_name)
 
-        self.assertEqual(18, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(16, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(22, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 18
+        assert self.count_original_reads(ev1.split_reads[1]) == 16
+        assert len(ev1.spanning_reads) == 0
+        assert len(ev1.flanking_pairs) == 22
 
     def test_load_evidence_small_deletion_test2(self):
         ev1 = self.genome_evidence(
@@ -307,10 +304,10 @@ def test_load_evidence_small_deletion_test2(self):
         )
         ev1.load_evidence()
         self.print_evidence(ev1)
-        self.assertEqual(20, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(18, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(40, len(set(ev1.flanking_pairs)))
+        assert self.count_original_reads(ev1.split_reads[0]) == 20
+        assert self.count_original_reads(ev1.split_reads[1]) == 18
+        assert len(ev1.spanning_reads) == 0
+        assert len(set(ev1.flanking_pairs)) == 40
 
     def test_load_evidence_small_deletion_test3(self):
         ev1 = self.genome_evidence(
@@ -326,10 +323,10 @@ def test_load_evidence_small_deletion_test3(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(27, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(5, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(53, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 27
+        assert self.count_original_reads(ev1.split_reads[1]) == 5
+        assert len(ev1.spanning_reads) == 0
+        assert len(ev1.flanking_pairs) == 53
 
     def test_load_evidence_small_deletion_test4(self):
         ev1 = self.genome_evidence(
@@ -345,10 +342,10 @@ def test_load_evidence_small_deletion_test4(self):
         print(self.count_original_reads(ev1.split_reads[0]))
         print(self.count_original_reads(ev1.split_reads[1]))
 
-        self.assertEqual(33, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(6, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(77, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 33
+        assert self.count_original_reads(ev1.split_reads[1]) == 6
+        assert len(ev1.spanning_reads) == 0
+        assert len(ev1.flanking_pairs) == 77
 
     def test_load_evidence_small_deletion_test5(self):
         ev1 = self.genome_evidence(
@@ -364,10 +361,10 @@ def test_load_evidence_small_deletion_test5(self):
         print(self.count_original_reads(ev1.split_reads[0]))
         print(self.count_original_reads(ev1.split_reads[1]))
 
-        self.assertEqual(19, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(11, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(48, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 19
+        assert self.count_original_reads(ev1.split_reads[1]) == 11
+        assert len(ev1.spanning_reads) == 0
+        assert len(ev1.flanking_pairs) == 48
 
     def test_load_evidence_small_deletion_test6(self):
         ev1 = self.genome_evidence(
@@ -382,9 +379,9 @@ def test_load_evidence_small_deletion_test6(self):
         print(self.count_original_reads(ev1.split_reads[0]))
         print(self.count_original_reads(ev1.split_reads[1]))
 
-        self.assertEqual(18, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(13, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(53, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 18
+        assert self.count_original_reads(ev1.split_reads[1]) == 13
+        assert len(ev1.flanking_pairs) == 53
 
     def test_load_evidence_small_deletion_test7(self):
         ev1 = self.genome_evidence(
@@ -400,9 +397,9 @@ def test_load_evidence_small_deletion_test7(self):
         print(self.count_original_reads(ev1.split_reads[0]))
         print(self.count_original_reads(ev1.split_reads[1]))
 
-        self.assertEqual(39, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(13, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(49, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 39
+        assert self.count_original_reads(ev1.split_reads[1]) == 13
+        assert len(ev1.flanking_pairs) == 49
 
     def test_load_evidence_small_deletion_test8(self):
         ev1 = self.genome_evidence(
@@ -418,11 +415,11 @@ def test_load_evidence_small_deletion_test8(self):
         print(self.count_original_reads(ev1.split_reads[0]))
         print(self.count_original_reads(ev1.split_reads[1]))
 
-        self.assertEqual(59, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(8, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(59, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 59
+        assert self.count_original_reads(ev1.split_reads[1]) == 8
+        assert len(ev1.flanking_pairs) == 59
 
-    @unittest.skip('skip because too complex')
+    @pytest.mark.skip(reason='skip because too complex')
     def test_load_evidence_complex_deletion(self):
         ev1 = self.genome_evidence(
             Breakpoint('reference12', 6001, orient=ORIENT.LEFT),
@@ -440,12 +437,12 @@ def test_load_evidence_complex_deletion(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(76, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(83, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(1, len(ev1.spanning_reads))
-        self.assertEqual(2, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 76
+        assert self.count_original_reads(ev1.split_reads[1]) == 83
+        assert len(ev1.spanning_reads) == 1
+        assert len(ev1.flanking_pairs) == 2
 
-    @unittest.skip('skip because high coverage')
+    @pytest.mark.skip(reason='skip because high coverage')
     def test_load_evidence_small_insertion(self):
         ev1 = self.genome_evidence(
             Breakpoint('reference1', 2000, orient=ORIENT.LEFT),
@@ -460,12 +457,12 @@ def test_load_evidence_small_insertion(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(17, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(17, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(48, len(ev1.spanning_reads))
-        self.assertEqual(4, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 17
+        assert self.count_original_reads(ev1.split_reads[1]) == 17
+        assert len(ev1.spanning_reads) == 48
+        assert len(ev1.flanking_pairs) == 4
 
-    @unittest.skip('skip because too high coverage')
+    @pytest.mark.skip(reason='skip because too high coverage')
     def test_load_evidence_small_insertion_high_coverage(self):
         ev1 = self.genome_evidence(
             Breakpoint('reference9', 2000, orient=ORIENT.LEFT),
@@ -480,10 +477,10 @@ def test_load_evidence_small_insertion_high_coverage(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(37, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(52, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(37, len(ev1.spanning_reads))
-        self.assertEqual(9, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 37
+        assert self.count_original_reads(ev1.split_reads[1]) == 52
+        assert len(ev1.spanning_reads) == 37
+        assert len(ev1.flanking_pairs) == 9
 
         ev1 = self.genome_evidence(
             Breakpoint('reference16', 2000, orient=ORIENT.LEFT),
@@ -498,10 +495,10 @@ def test_load_evidence_small_insertion_high_coverage(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(27, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(52, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(19, len(ev1.spanning_reads))
-        self.assertEqual(9, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 27
+        assert self.count_original_reads(ev1.split_reads[1]) == 52
+        assert len(ev1.spanning_reads) == 19
+        assert len(ev1.flanking_pairs) == 9
 
     def test_load_evidence_small_duplication(self):
         ev1 = self.genome_evidence(
@@ -517,10 +514,10 @@ def test_load_evidence_small_duplication(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(29, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(51, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.spanning_reads))
-        self.assertEqual(0, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 29
+        assert self.count_original_reads(ev1.split_reads[1]) == 51
+        assert len(ev1.spanning_reads) == 0
+        assert len(ev1.flanking_pairs) == 0
 
         # Example 2
         ev1 = self.genome_evidence(
@@ -536,10 +533,10 @@ def test_load_evidence_small_duplication(self):
         for read in sorted(ev1.spanning_reads, key=lambda x: x.query_name):
             print(read.query_name)
 
-        self.assertEqual(25, self.count_original_reads(ev1.split_reads[0]))
-        self.assertEqual(56, self.count_original_reads(ev1.split_reads[1]))
-        self.assertEqual(3, len(ev1.spanning_reads))
-        self.assertEqual(0, len(ev1.flanking_pairs))
+        assert self.count_original_reads(ev1.split_reads[0]) == 25
+        assert self.count_original_reads(ev1.split_reads[1]) == 56
+        assert len(ev1.spanning_reads) == 3
+        assert len(ev1.flanking_pairs) == 0
 
     def test_load_evidence_low_qual_deletion(self):
         ev1 = self.genome_evidence(
@@ -551,31 +548,32 @@ def test_load_evidence_low_qual_deletion(self):
         self.print_evidence(ev1)
         print(len(ev1.spanning_reads))
         print(len(ev1.split_reads[0]), len(ev1.flanking_pairs))
-        self.assertEqual(0, len(ev1.split_reads[0]))
-        self.assertEqual(0, len(ev1.split_reads[1]))
-        self.assertEqual(0, len(ev1.flanking_pairs))
-
-
-class TestEvidenceGathering(unittest.TestCase):
-    def setUp(self):
-        # test loading of evidence for event found on reference3 1114 2187
-        self.ev1 = GenomeEvidence(
-            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
-            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
-            BAM_CACHE,
-            REFERENCE_GENOME,
-            opposing_strands=True,
-            read_length=125,
-            stdev_fragment_size=100,
-            median_fragment_size=380,
-            config={
-                'validate.stdev_count_abnormal': 3,
-                'validate.min_flanking_pairs_resolution': 3,
-                'validate.assembly_min_edge_trim_weight': 3,
-            },
-        )
-
-    def test_collect_split_read(self):
+        assert len(ev1.split_reads[0]) == 0
+        assert len(ev1.split_reads[1]) == 0
+        assert len(ev1.flanking_pairs) == 0
+
+
+@pytest.fixture
+def ev_gathering_setup():
+    return GenomeEvidence(
+        Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+        Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+        BAM_CACHE,
+        REFERENCE_GENOME,
+        opposing_strands=True,
+        read_length=125,
+        stdev_fragment_size=100,
+        median_fragment_size=380,
+        config={
+            'validate.stdev_count_abnormal': 3,
+            'validate.min_flanking_pairs_resolution': 3,
+            'validate.assembly_min_edge_trim_weight': 3,
+        },
+    )
+
+
+class TestEvidenceGathering:
+    def test_collect_split_read(self, ev_gathering_setup):
         ev1_sr = MockRead(
             query_name='HISEQX1_11:3:1105:15351:25130:split',
             reference_id=1,
@@ -589,10 +587,10 @@ def test_collect_split_read(self):
             next_reference_id=1,
             next_reference_start=2341,
         )
-        self.ev1.collect_split_read(ev1_sr, True)
-        self.assertEqual(ev1_sr, list(self.ev1.split_reads[0])[0])
+        ev_gathering_setup.collect_split_read(ev1_sr, True)
+        assert list(ev_gathering_setup.split_reads[0])[0] == ev1_sr
 
-    def test_collect_split_read_failure(self):
+    def test_collect_split_read_failure(self, ev_gathering_setup):
         # wrong cigar string
         ev1_sr = MockRead(
             query_name='HISEQX1_11:4:1203:3062:55280:split',
@@ -607,10 +605,10 @@ def test_collect_split_read_failure(self):
             next_reference_id=1,
             next_reference_start=2550,
         )
-        self.assertFalse(self.ev1.collect_split_read(ev1_sr, True))
+        assert not ev_gathering_setup.collect_split_read(ev1_sr, True)
 
-    def test_collect_flanking_pair(self):
-        self.ev1.collect_flanking_pair(
+    def test_collect_flanking_pair(self, ev_gathering_setup):
+        ev_gathering_setup.collect_flanking_pair(
             MockRead(
                 reference_id=1,
                 reference_start=2214,
@@ -631,47 +629,45 @@ def test_collect_flanking_pair(self):
                 is_read1=False,
             ),
         )
-        self.assertEqual(1, len(self.ev1.flanking_pairs))
+        assert len(ev_gathering_setup.flanking_pairs) == 1
 
-    def test_collect_flanking_pair_not_overlapping_evidence_window(self):
+    def test_collect_flanking_pair_not_overlapping_evidence_window(self, ev_gathering_setup):
         # first read in pair does not overlap the first evidence window
         # therefore this should return False and not add to the flanking_pairs
         pair = mock_read_pair(
             MockRead(reference_id=1, reference_start=1903, reference_end=2053, is_reverse=True),
             MockRead(reference_id=1, reference_start=2052, reference_end=2053, is_reverse=True),
         )
-        self.assertFalse(self.ev1.collect_flanking_pair(*pair))
-        self.assertEqual(0, len(self.ev1.flanking_pairs))
+        assert not ev_gathering_setup.collect_flanking_pair(*pair)
+        assert len(ev_gathering_setup.flanking_pairs) == 0
 
-    #    @unittest.skip("demonstrating skipping")
-    def test_load_evidence(self):
-        print(self.ev1)
-        self.ev1.load_evidence()
-        print(self.ev1.spanning_reads)
-        self.assertEqual(
-            2,
+    def test_load_evidence(self, ev_gathering_setup):
+        print(ev_gathering_setup)
+        ev_gathering_setup.load_evidence()
+        print(ev_gathering_setup.spanning_reads)
+        assert (
             len(
                 [
                     r
-                    for r in self.ev1.split_reads[0]
+                    for r in ev_gathering_setup.split_reads[0]
                     if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
                 ]
-            ),
+            )
+            == 2
         )
-        self.assertEqual(7, len(self.ev1.flanking_pairs))
-        self.assertEqual(
-            2,
+        assert len(ev_gathering_setup.flanking_pairs) == 7
+        assert (
             len(
                 [
                     r
-                    for r in self.ev1.split_reads[1]
+                    for r in ev_gathering_setup.split_reads[1]
                     if not r.has_tag(PYSAM_READ_FLAGS.TARGETED_ALIGNMENT)
                 ]
-            ),
+            )
+            == 2
         )
 
-    #    @unittest.skip("demonstrating skipping")
-    def test_assemble_split_reads(self):
+    def test_assemble_split_reads(self, ev_gathering_setup):
         sr1 = MockRead(
             query_name='HISEQX1_11:3:1105:15351:25130:split',
             query_sequence='TCGTGAGTGGCAGGTGCCATCGTGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTG',
@@ -709,23 +705,23 @@ def test_assemble_split_reads(self):
             query_sequence='CTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTT',
             flag=113,
         )
-        self.ev1.split_reads = (
+        ev_gathering_setup.split_reads = (
             {sr1},
             {sr1, sr3, sr7, sr9, sr12, sr15, sr19, sr24},
         )  # subset needed to make a contig
-        #        self.ev1.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset.
+        #        ev_gathering_setup.split_reads=([],[sr1,sr3,sr5,sr6,sr7,sr8,sr9,sr10,sr11,sr12,sr13,sr14,sr15,sr16,sr17,sr18,sr19,sr20,sr21,sr22,sr23,sr24]) #full set of reads produces different contig from subset.
         # full contig with more read support should be
         # CTGAGCATGAAAGCCCTGTAAACACAGAATTTGGATTCTTTCCTGTTTGGTTCCTGGTCGTGAGTGGCAGGTGCCATCATGTTTCATTCTGCCTGAGAGCAGTCTACCTAAATATATAGCTCTGCTCACAGTTTCCCTGCAATGCATAATTAAAATAGCACTATGCAGTTGCTTACACTTCAGATAATGGCTTCCTACATATTGTTGGTTATGAAATTTCAGGGTTTTCATTTCTGTATGTTAAT
-        self.ev1.half_mapped = (set(), {sr2})
-        self.ev1.assemble_contig()
-        print(self.ev1.contigs)
+        ev_gathering_setup.half_mapped = (set(), {sr2})
+        ev_gathering_setup.assemble_contig()
+        print(ev_gathering_setup.contigs)
         exp = 'CAACAATATGTAGGAAGCCATTATCTGAAGTGTAAGCAACTGCATAGTGCTATTTTAATTATGCATTGCAGGGAAACTGTGAGCAGAGCTATATATTTAGGTAGACTGCTCTCAGGCAGAATGAAACATGATGGCACCTGCCACTCACGACCAGGAACCAAACAGGAAAGAATC'
-        self.assertEqual(exp, self.ev1.contigs[0].seq)
+        assert ev_gathering_setup.contigs[0].seq == exp
 
 
-class TestStandardizeRead(unittest.TestCase):
-    def setUp(self):
-        self.mock_evidence = MockObject(
+class TestStandardizeRead:
+    def test_bwa_mem(self):
+        mock_evidence = MockObject(
             reference_genome={
                 '1': MockObject(
                     seq=MockLongString(
@@ -749,8 +745,6 @@ def setUp(self):
                 **DEFAULTS,
             },
         )
-
-    def test_bwa_mem(self):
         # SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
         # std SamRead(1:224646710-224646924, 183=12D19=, TCAGCTCTCT...) TCAGCTCTCTTAGGGCACACCCTCCAAGGTGCCTAAATGCCATCCCAGGATTGGTTCCAGTGTCTATTATCTGTTTGACTCCAAATGGCCAAACACCTGACTTCCTCTCTGGTAGCCTGGCTTTTATCTTCTAGGACATCCAGGGCCCCTCTCTTTGCCTTCCCCTCTTTCTTCCTTCTACTGCTTCAGCAGACATCATGTG
         # > BPP(Breakpoint(1:224646893L-), Breakpoint(1:224646906R-), opposing=False, seq='')
@@ -762,17 +756,11 @@ def test_bwa_mem(self):
         read.cigar = _cigar.join(_cigar.convert_string_to_cigar('183=12D19='))
         read.query_name = 'name'
         read.mapping_quality = NA_MAPPING_QUALITY
-        std_read = Evidence.standardize_read(self.mock_evidence, read)
-        print(SamRead.__repr__(read))
-        print(SamRead.__repr__(std_read))
-        self.assertEqual(_cigar.convert_string_to_cigar('186=12D16='), std_read.cigar)
-        self.assertEqual(read.reference_start, std_read.reference_start)
+        std_read = Evidence.standardize_read(mock_evidence, read)
+        assert std_read.cigar == _cigar.convert_string_to_cigar('186=12D16=')
+        assert std_read.reference_start == read.reference_start
 
 
 class MockEvidence:
     def __init__(self, ref=None):
         self.HUMAN_REFERENCE_GENOME = ref
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/integration/test_validate_call.py b/tests/integration/test_validate_call.py
index 5be1962a..23d53e37 100644
--- a/tests/integration/test_validate_call.py
+++ b/tests/integration/test_validate_call.py
@@ -1,6 +1,6 @@
-import unittest
 from unittest import mock
 
+import pytest
 from mavis.align import call_paired_read_event, select_contig_alignments
 from mavis.annotate.file_io import load_reference_genome
 from mavis.annotate.genomic import PreTranscript, Transcript
@@ -15,7 +15,7 @@
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
 
-from ..util import get_data
+from ..util import get_data, todo
 from . import MockBamFileHandle, MockLongString, MockRead, get_example_genes, mock_read_pair
 
 REFERENCE_GENOME = None
@@ -52,7 +52,7 @@ def tearDownModule():
     mock.patch.stopall()
 
 
-class TestCallByContig(unittest.TestCase):
+class TestCallByContig:
     def test_EGFR_small_del_transcriptome(self):
         gene = get_example_genes()['EGFR']
         reference_annotations = {gene.chr: [gene]}
@@ -93,35 +93,14 @@ def test_EGFR_small_del_transcriptome(self):
         for ev in events:
             print(ev)
             print(evidence.distance(ev.break1.start, ev.break2.start))
-        self.assertEqual(1, len(events))
-        self.assertEqual(Breakpoint('7', 55242465, orient='L', strand='+'), events[0].break1)
-        self.assertEqual(Breakpoint('7', 55242481, orient='R', strand='+'), events[0].break2)
+        assert len(events) == 1
+        assert events[0].break1 == Breakpoint('7', 55242465, orient='L', strand='+')
+        assert events[0].break2 == Breakpoint('7', 55242481, orient='R', strand='+')
         print(events[0].contig_alignment.score())
-        self.assertTrue(events[0].contig_alignment.score() > 0.99)
+        assert events[0].contig_alignment.score() > 0.99
 
 
-class TestEventCall(unittest.TestCase):
-    def setUp(self):
-        self.ev1 = GenomeEvidence(
-            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
-            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
-            BAM_CACHE,
-            REFERENCE_GENOME,
-            opposing_strands=True,
-            read_length=125,
-            stdev_fragment_size=100,
-            median_fragment_size=380,
-            stdev_count_abnormal=3,
-            min_flanking_pairs_resolution=3,
-        )
-        self.ev = call.EventCall(
-            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
-            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
-            source_evidence=self.ev1,
-            event_type=SVTYPE.INV,
-            call_method=CALL_METHOD.SPLIT,
-        )
-
+class TestEventCall:
     def test_bad_deletion(self):
         evidence = GenomeEvidence(
             Breakpoint('reference3', 16, orient='L'),
@@ -132,7 +111,7 @@ def test_bad_deletion(self):
             stdev_fragment_size=100,
             median_fragment_size=380,
         )
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             call.EventCall(
                 Breakpoint('reference3', 43, orient='L'),
                 Breakpoint('reference3', 44, orient='R'),
@@ -142,12 +121,49 @@ def test_bad_deletion(self):
             )
 
     def test_flanking_support_empty(self):
-        self.assertEqual(0, len(self.ev.flanking_pairs))
+
+        ev = call.EventCall(
+            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+            source_evidence=GenomeEvidence(
+                Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+                Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+                BAM_CACHE,
+                REFERENCE_GENOME,
+                opposing_strands=True,
+                read_length=125,
+                stdev_fragment_size=100,
+                median_fragment_size=380,
+                stdev_count_abnormal=3,
+                min_flanking_pairs_resolution=3,
+            ),
+            event_type=SVTYPE.INV,
+            call_method=CALL_METHOD.SPLIT,
+        )
+        assert len(ev.flanking_pairs) == 0
 
     def test_flanking_support(self):
         # 1114 ++
         # 2187 ++
-        self.ev.flanking_pairs.add(
+        ev = call.EventCall(
+            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+            source_evidence=GenomeEvidence(
+                Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+                Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+                BAM_CACHE,
+                REFERENCE_GENOME,
+                opposing_strands=True,
+                read_length=125,
+                stdev_fragment_size=100,
+                median_fragment_size=380,
+                stdev_count_abnormal=3,
+                min_flanking_pairs_resolution=3,
+            ),
+            event_type=SVTYPE.INV,
+            call_method=CALL_METHOD.SPLIT,
+        )
+        ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     query_name='test1',
@@ -160,7 +176,7 @@ def test_flanking_support(self):
                 MockRead(reference_id=3, reference_start=2200, reference_end=2250, is_reverse=True),
             )
         )
-        self.ev.flanking_pairs.add(
+        ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     query_name='test2',
@@ -173,36 +189,53 @@ def test_flanking_support(self):
                 MockRead(reference_id=3, reference_start=2200, reference_end=2250, is_reverse=True),
             )
         )
-        median, stdev = self.ev.flanking_metrics()
-        self.assertEqual(2, len(self.ev.flanking_pairs))
-        self.assertEqual(530, median)
-        self.assertEqual(30, stdev)
+        median, stdev = ev.flanking_metrics()
+        assert len(ev.flanking_pairs) == 2
+        assert median == 530
+        assert stdev == 30
 
     def test_split_read_support_empty(self):
-        self.assertEqual(0, len(self.ev.break1_split_reads) + len(self.ev.break2_split_reads))
+        ev = call.EventCall(
+            Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+            Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+            source_evidence=GenomeEvidence(
+                Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
+                Breakpoint('reference3', 2187, orient=ORIENT.RIGHT),
+                BAM_CACHE,
+                REFERENCE_GENOME,
+                opposing_strands=True,
+                read_length=125,
+                stdev_fragment_size=100,
+                median_fragment_size=380,
+                stdev_count_abnormal=3,
+                min_flanking_pairs_resolution=3,
+            ),
+            event_type=SVTYPE.INV,
+            call_method=CALL_METHOD.SPLIT,
+        )
+        assert len(ev.break1_split_reads) + len(ev.break2_split_reads) == 0
 
+    @todo
     def test_call_by_split_delins_del_only(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_call_by_split_delins_both(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_call_by_split_delins_ins_only(self):
         # not implemented yet??
-        raise unittest.SkipTest('TODO')
-
+        pass
 
-class TestPullFlankingSupport(unittest.TestCase):
-    def setUp(self):
-        self.bam_cache = BamCache(MockBamFileHandle({'1': 0, '2': 1}))
-        self.REFERENCE_GENOME = None
 
+class TestPullFlankingSupport:
     def build_genome_evidence(self, b1, b2, opposing_strands=False):
         evidence = GenomeEvidence(
             b1,
             b2,
-            self.bam_cache,
-            self.REFERENCE_GENOME,
+            BamCache(MockBamFileHandle({'1': 0, '2': 1})),
+            None,
             opposing_strands=opposing_strands,
             read_length=100,
             median_fragment_size=500,
@@ -230,7 +263,7 @@ def test_deletion(self):
         )
 
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
         # now test one where the read pair type is right but the positioning of the reads doesn't
         # support the current call
@@ -241,7 +274,7 @@ def test_deletion(self):
             )
         )
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
     def test_small_deletion_flanking_for_larger_deletion(self):
         evidence = self.build_genome_evidence(
@@ -262,7 +295,7 @@ def test_small_deletion_flanking_for_larger_deletion(self):
         )
 
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(0, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 0
 
     def test_insertion(self):
         evidence = self.build_genome_evidence(
@@ -283,7 +316,7 @@ def test_insertion(self):
             CALL_METHOD.SPLIT,
         )
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
     def test_inversion(self):
         evidence = self.build_genome_evidence(
@@ -306,7 +339,7 @@ def test_inversion(self):
         )
 
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
         # test read that is the right type but the positioning does not support the current call
         flanking_pairs.append(
@@ -316,7 +349,7 @@ def test_inversion(self):
             )
         )
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
     def test_inverted_translocation(self):
         evidence = self.build_genome_evidence(
@@ -338,7 +371,7 @@ def test_inverted_translocation(self):
             CALL_METHOD.SPLIT,
         )
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
     def test_translocation_rl(self):
         b1 = Breakpoint('11', 128675261, orient=ORIENT.RIGHT, strand=STRAND.POS)
@@ -372,7 +405,7 @@ def test_translocation_rl(self):
             ),
         ]
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(len(flanking_pairs), len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == len(flanking_pairs)
 
     def test_translocation_rl_filter_nonsupporting(self):
         evidence = self.build_genome_evidence(
@@ -393,7 +426,7 @@ def test_translocation_rl_filter_nonsupporting(self):
         )
 
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
         # test read that is the right type but the positioning does not support the current call
         # the mate is on the wrong chromosome (not sure if this would actually be added as flanking support)
@@ -404,7 +437,7 @@ def test_translocation_rl_filter_nonsupporting(self):
             )
         )
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
     def test_duplication(self):
         evidence = self.build_genome_evidence(
@@ -427,23 +460,20 @@ def test_duplication(self):
         )
 
         event.add_flanking_support(flanking_pairs)
-        self.assertEqual(1, len(event.flanking_pairs))
+        assert len(event.flanking_pairs) == 1
 
+    @todo
     def test_outside_call_range(self):
-        raise unittest.SkipTest('TODO')
-
+        pass
 
-class TestEvidenceConsumption(unittest.TestCase):
-    def setUp(self):
-        self.bam_cache = BamCache(MockBamFileHandle({'1': 0, '2': 1}))
-        self.REFERENCE_GENOME = None
 
+class TestEvidenceConsumption:
     def build_genome_evidence(self, b1, b2, opposing_strands=False):
         evidence = GenomeEvidence(
             b1,
             b2,
-            self.bam_cache,
-            self.REFERENCE_GENOME,
+            BamCache(MockBamFileHandle({'1': 0, '2': 1})),
+            None,
             opposing_strands=opposing_strands,
             read_length=100,
             median_fragment_size=200,
@@ -551,25 +581,25 @@ def test_call_all_methods(self):
         events = call.call_events(evidence)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(4, len(events))
-        self.assertEqual('contig', events[0].call_method)
-        self.assertEqual(100, events[0].break1.start)
-        self.assertEqual(481, events[0].break2.start)
-        self.assertEqual('deletion', events[0].event_type)
-        self.assertEqual('split reads', events[1].call_method)
-        self.assertEqual(120, events[1].break1.start)
-        self.assertEqual(501, events[1].break2.start)
-        self.assertEqual('deletion', events[1].event_type)
-        self.assertEqual('flanking reads', events[2].call_method)
-        self.assertEqual(90, events[2].break1.start)
-        self.assertEqual(299, events[2].break1.end)
-        self.assertEqual(591, events[2].break2.start)
-        self.assertEqual(806, events[2].break2.end)
-        self.assertEqual('deletion', events[2].event_type)
-        self.assertEqual('split reads', events[3].call_method)
-        self.assertEqual(120, events[3].break1.start)
-        self.assertEqual(501, events[3].break2.start)
-        self.assertEqual('insertion', events[3].event_type)
+        assert len(events) == 4
+        assert events[0].call_method == 'contig'
+        assert events[0].break1.start == 100
+        assert events[0].break2.start == 481
+        assert events[0].event_type == 'deletion'
+        assert events[1].call_method == 'split reads'
+        assert events[1].break1.start == 120
+        assert events[1].break2.start == 501
+        assert events[1].event_type == 'deletion'
+        assert events[2].call_method == 'flanking reads'
+        assert events[2].break1.start == 90
+        assert events[2].break1.end == 299
+        assert events[2].break2.start == 591
+        assert events[2].break2.end == 806
+        assert events[2].event_type == 'deletion'
+        assert events[3].call_method == 'split reads'
+        assert events[3].break1.start == 120
+        assert events[3].break2.start == 501
+        assert events[3].event_type == 'insertion'
 
     def test_call_contig_only(self):
         # event should only be 100L+, 501R+ deletion
@@ -660,10 +690,10 @@ def test_call_contig_only(self):
         events = call.call_events(evidence)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(1, len(events))
-        self.assertEqual(100, events[0].break1.start)
-        self.assertEqual(501, events[0].break2.start)
-        self.assertEqual('contig', events[0].call_method)
+        assert len(events) == 1
+        assert events[0].break1.start == 100
+        assert events[0].break2.start == 501
+        assert events[0].call_method == 'contig'
 
     def test_call_contig_and_split(self):
         # contig breakpoint is 100L 501R, split reads is 120L 521R
@@ -748,17 +778,17 @@ def test_call_contig_and_split(self):
         events = call.call_events(evidence)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(3, len(events))
-        self.assertEqual(100, events[0].break1.start)
-        self.assertEqual(501, events[0].break2.start)
-        self.assertEqual('contig', events[0].call_method)
-        self.assertEqual('split reads', events[1].call_method)
-        self.assertEqual(120, events[1].break1.start)
-        self.assertEqual(521, events[1].break2.start)
-        self.assertEqual('insertion', events[2].event_type)
-        self.assertEqual('split reads', events[2].call_method)
-        self.assertEqual(120, events[2].break1.start)
-        self.assertEqual(521, events[2].break2.start)
+        assert len(events) == 3
+        assert events[0].break1.start == 100
+        assert events[0].break2.start == 501
+        assert events[0].call_method == 'contig'
+        assert events[1].call_method == 'split reads'
+        assert events[1].break1.start == 120
+        assert events[1].break2.start == 521
+        assert events[2].event_type == 'insertion'
+        assert events[2].call_method == 'split reads'
+        assert events[2].break1.start == 120
+        assert events[2].break2.start == 521
 
     def test_call_split_only(self):
         evidence = self.build_genome_evidence(
@@ -795,14 +825,14 @@ def test_call_split_only(self):
         events = call.call_events(evidence)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(2, len(events))
-        self.assertEqual(170, events[0].break1.start)
-        self.assertEqual(871, events[0].break2.start)
-        self.assertEqual('split reads', events[0].call_method)
-        self.assertEqual('split reads', events[1].call_method)
-        self.assertEqual(170, events[1].break1.start)
-        self.assertEqual(871, events[1].break2.start)
-        self.assertEqual('insertion', events[1].event_type)
+        assert len(events) == 2
+        assert events[0].break1.start == 170
+        assert events[0].break2.start == 871
+        assert events[0].call_method == 'split reads'
+        assert events[1].call_method == 'split reads'
+        assert events[1].break1.start == 170
+        assert events[1].break2.start == 871
+        assert events[1].event_type == 'insertion'
 
     def test_call_flanking_only(self):
         evidence = self.build_genome_evidence(
@@ -833,70 +863,76 @@ def test_call_flanking_only(self):
         events = call.call_events(evidence)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(1, len(events))
-        self.assertEqual(140, events[0].break1.start)
-        self.assertEqual(292, events[0].break1.end)
-        self.assertEqual('flanking reads', events[0].call_method)
-        self.assertEqual(656, events[0].break2.start)
-        self.assertEqual(886, events[0].break2.end)
-
-
-class TestCallBySupportingReads(unittest.TestCase):
-    def setUp(self):
-        self.ev = GenomeEvidence(
-            Breakpoint('fake', 50, 150, orient=ORIENT.RIGHT),
-            Breakpoint('fake', 450, 550, orient=ORIENT.RIGHT),
-            BamCache(MockBamFileHandle()),
-            None,
-            opposing_strands=True,
-            read_length=40,
-            stdev_fragment_size=25,
-            median_fragment_size=100,
-            config={
-                'validate.stdev_count_abnormal': 2,
-                'validate.min_splits_reads_resolution': 1,
-                'validate.min_flanking_pairs_resolution': 1,
-                'validate.min_linking_split_reads': 1,
-                'validate.min_spanning_reads_resolution': 3,
-                'validate. min_call_complexity': 0,
-            },
-        )
-        self.dup = GenomeEvidence(
-            Breakpoint('fake', 50, orient=ORIENT.RIGHT),
-            Breakpoint('fake', 90, orient=ORIENT.LEFT),
-            BamCache(MockBamFileHandle()),
-            None,
-            opposing_strands=False,
-            read_length=40,
-            stdev_fragment_size=25,
-            median_fragment_size=100,
-            config={
-                'validate.stdev_count_abnormal': 2,
-                'validate.min_splits_reads_resolution': 1,
-                'validate.min_flanking_pairs_resolution': 1,
-                'validate.min_linking_split_reads': 1,
-                'validate.min_spanning_reads_resolution': 3,
-                'validate. min_call_complexity': 0,
-            },
-        )
-
-    def test_empty(self):
-        with self.assertRaises(AssertionError):
-            bpp = call._call_by_flanking_pairs(self.ev, SVTYPE.INV)[0]
-
-    def test_call_no_duplication_by_split_reads(self):
-        self.dup.split_reads[0].add(
+        assert len(events) == 1
+        assert events[0].break1.start == 140
+        assert events[0].break1.end == 292
+        assert events[0].call_method == 'flanking reads'
+        assert events[0].break2.start == 656
+        assert events[0].break2.end == 886
+
+
+@pytest.fixture
+def duplication_ev():
+    return GenomeEvidence(
+        Breakpoint('fake', 50, orient=ORIENT.RIGHT),
+        Breakpoint('fake', 90, orient=ORIENT.LEFT),
+        BamCache(MockBamFileHandle()),
+        None,
+        opposing_strands=False,
+        read_length=40,
+        stdev_fragment_size=25,
+        median_fragment_size=100,
+        config={
+            'validate.stdev_count_abnormal': 2,
+            'validate.min_splits_reads_resolution': 1,
+            'validate.min_flanking_pairs_resolution': 1,
+            'validate.min_linking_split_reads': 1,
+            'validate.min_spanning_reads_resolution': 3,
+            'validate. min_call_complexity': 0,
+        },
+    )
+
+
+@pytest.fixture
+def inversion_evidence():
+    return GenomeEvidence(
+        Breakpoint('fake', 50, 150, orient=ORIENT.RIGHT),
+        Breakpoint('fake', 450, 550, orient=ORIENT.RIGHT),
+        BamCache(MockBamFileHandle()),
+        None,
+        opposing_strands=True,
+        read_length=40,
+        stdev_fragment_size=25,
+        median_fragment_size=100,
+        config={
+            'validate.stdev_count_abnormal': 2,
+            'validate.min_splits_reads_resolution': 1,
+            'validate.min_flanking_pairs_resolution': 1,
+            'validate.min_linking_split_reads': 1,
+            'validate.min_spanning_reads_resolution': 3,
+            'validate. min_call_complexity': 0,
+        },
+    )
+
+
+class TestCallBySupportingReads:
+    def test_empty(self, inversion_evidence):
+        with pytest.raises(AssertionError):
+            call._call_by_flanking_pairs(inversion_evidence, SVTYPE.INV)[0]
+
+    def test_call_no_duplication_by_split_reads(self, duplication_ev, inversion_evidence):
+        duplication_ev.split_reads[0].add(
             MockRead(query_name='t1', reference_start=30, cigar=[(CIGAR.EQ, 20), (CIGAR.S, 20)])
         )
-        self.dup.split_reads[1].add(
+        duplication_ev.split_reads[1].add(
             MockRead(query_name='t1', reference_start=90, cigar=[(CIGAR.S, 20), (CIGAR.EQ, 20)])
         )
 
-        bpps = call._call_by_split_reads(self.ev, SVTYPE.DUP)
-        self.assertEqual(0, len(bpps))
+        bpps = call._call_by_split_reads(inversion_evidence, SVTYPE.DUP)
+        assert len(bpps) == 0
 
-    def test_by_split_read(self):
-        self.ev.split_reads[0].add(
+    def test_by_split_read(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -904,7 +940,7 @@ def test_by_split_read(self):
                 query_sequence='A' * 40,
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -912,7 +948,7 @@ def test_by_split_read(self):
                 query_sequence='G' * 40,
             )
         )
-        self.ev.split_reads[0].add(
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t2',
                 reference_start=100,
@@ -920,7 +956,7 @@ def test_by_split_read(self):
                 query_sequence='C' * 40,
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t2',
                 reference_start=500,
@@ -929,17 +965,17 @@ def test_by_split_read(self):
             )
         )
 
-        events = call._call_by_split_reads(self.ev, SVTYPE.INV)
-        self.assertEqual(1, len(events))
+        events = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)
+        assert len(events) == 1
         event = events[0]
-        self.assertEqual(4, len(event.support()))
-        self.assertEqual(101, event.break1.start)
-        self.assertEqual(101, event.break1.end)
-        self.assertEqual(501, event.break2.start)
-        self.assertEqual(501, event.break2.end)
-
-    def test_call_by_split_read_low_resolution(self):
-        self.ev.split_reads[0].add(
+        assert len(event.support()) == 4
+        assert event.break1.start == 101
+        assert event.break1.end == 101
+        assert event.break2.start == 501
+        assert event.break2.end == 501
+
+    def test_call_by_split_read_low_resolution(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -947,7 +983,7 @@ def test_call_by_split_read_low_resolution(self):
                 query_sequence='A' * 40,
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -956,17 +992,17 @@ def test_call_by_split_read_low_resolution(self):
             )
         )
 
-        bpp = call._call_by_split_reads(self.ev, SVTYPE.INV)
-        self.assertEqual(1, len(bpp))
+        bpp = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)
+        assert len(bpp) == 1
         bpp = bpp[0]
 
-        self.assertEqual(101, bpp.break1.start)
-        self.assertEqual(101, bpp.break1.end)
-        self.assertEqual(501, bpp.break2.start)
-        self.assertEqual(501, bpp.break2.end)
+        assert bpp.break1.start == 101
+        assert bpp.break1.end == 101
+        assert bpp.break2.start == 501
+        assert bpp.break2.end == 501
 
-    def test_call_by_split_read_resolve_untemp(self):
-        self.ev.split_reads[0].add(
+    def test_call_by_split_read_resolve_untemp(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -974,7 +1010,7 @@ def test_call_by_split_read_resolve_untemp(self):
                 query_sequence='TCGGCTCCCGTACTTGTGTATAAGGGGCTTCTGATGTTAT',
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -984,16 +1020,16 @@ def test_call_by_split_read_resolve_untemp(self):
             )
         )
 
-        event = call._call_by_split_reads(self.ev, SVTYPE.INV)[0]
+        event = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)[0]
 
-        self.assertEqual(101, event.break1.start)
-        self.assertEqual(101, event.break1.end)
-        self.assertEqual(501, event.break2.start)
-        self.assertEqual(501, event.break2.end)
-        self.assertEqual('', event.untemplated_seq)
+        assert event.break1.start == 101
+        assert event.break1.end == 101
+        assert event.break2.start == 501
+        assert event.break2.end == 501
+        assert event.untemplated_seq == ''
 
-    def test_call_by_split_read_resolve_untemp_exists(self):
-        self.ev.split_reads[0].add(
+    def test_call_by_split_read_resolve_untemp_exists(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -1001,7 +1037,7 @@ def test_call_by_split_read_resolve_untemp_exists(self):
                 query_sequence='TCGGCTCCCGTACTTGTGTATAAGGGGCTTCTGATGTTAT',
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -1011,16 +1047,16 @@ def test_call_by_split_read_resolve_untemp_exists(self):
             )
         )
 
-        event = call._call_by_split_reads(self.ev, SVTYPE.INV)[0]
+        event = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)[0]
 
-        self.assertEqual(101, event.break1.start)
-        self.assertEqual(101, event.break1.end)
-        self.assertEqual(501, event.break2.start)
-        self.assertEqual(501, event.break2.end)
-        self.assertEqual('TA', event.untemplated_seq)
+        assert event.break1.start == 101
+        assert event.break1.end == 101
+        assert event.break2.start == 501
+        assert event.break2.end == 501
+        assert event.untemplated_seq == 'TA'
 
-    def test_call_by_split_read_shift_overlap(self):
-        self.ev.split_reads[0].add(
+    def test_call_by_split_read_shift_overlap(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -1028,7 +1064,7 @@ def test_call_by_split_read_shift_overlap(self):
                 query_sequence='TCGGCTCCCGTACTTGTGTATAAGGGGCTTCTGATGTTAT',
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -1038,37 +1074,37 @@ def test_call_by_split_read_shift_overlap(self):
             )
         )
 
-        event = call._call_by_split_reads(self.ev, SVTYPE.INV)[0]
+        event = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)[0]
 
-        self.assertEqual(101, event.break1.start)
-        self.assertEqual(101, event.break1.end)
-        self.assertEqual(503, event.break2.start)
-        self.assertEqual(503, event.break2.end)
-        self.assertEqual('', event.untemplated_seq)
+        assert event.break1.start == 101
+        assert event.break1.end == 101
+        assert event.break2.start == 503
+        assert event.break2.end == 503
+        assert event.untemplated_seq == ''
 
-    def test_both_by_flanking_pairs(self):
-        self.ev.flanking_pairs.add(
+    def test_both_by_flanking_pairs(self, inversion_evidence):
+        inversion_evidence.flanking_pairs.add(
             mock_read_pair(
                 MockRead(query_name='t1', reference_id=0, reference_start=150, reference_end=150),
                 MockRead(query_name='t1', reference_id=0, reference_start=500, reference_end=520),
             )
         )
-        self.ev.flanking_pairs.add(
+        inversion_evidence.flanking_pairs.add(
             mock_read_pair(
                 MockRead(query_name='t2', reference_id=0, reference_start=120, reference_end=140),
                 MockRead(query_name='t2', reference_id=0, reference_start=520, reference_end=520),
             )
         )
-        bpp = call._call_by_flanking_pairs(self.ev, SVTYPE.INV)
+        bpp = call._call_by_flanking_pairs(inversion_evidence, SVTYPE.INV)
         # 120-149  ..... 500-519
         # max frag = 150 - 80 = 70
-        self.assertEqual(42, bpp.break1.start)
-        self.assertEqual(120, bpp.break1.end)
-        self.assertEqual(412, bpp.break2.start)  # 70 - 21 = 49
-        self.assertEqual(500, bpp.break2.end)
+        assert bpp.break1.start == 42
+        assert bpp.break1.end == 120
+        assert bpp.break2.start == 412  # 70 - 21 = 49
+        assert bpp.break2.end == 500
 
-    def test_by_split_reads_multiple_calls(self):
-        self.ev.split_reads[0].add(
+    def test_by_split_reads_multiple_calls(self, inversion_evidence):
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t1',
                 reference_start=100,
@@ -1076,7 +1112,7 @@ def test_by_split_reads_multiple_calls(self):
                 query_sequence='A' * 40,
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t1',
                 reference_start=500,
@@ -1084,7 +1120,7 @@ def test_by_split_reads_multiple_calls(self):
                 query_sequence='T' * 40,
             )
         )
-        self.ev.split_reads[0].add(
+        inversion_evidence.split_reads[0].add(
             MockRead(
                 query_name='t2',
                 reference_start=110,
@@ -1092,7 +1128,7 @@ def test_by_split_reads_multiple_calls(self):
                 query_sequence='T' * 40,
             )
         )
-        self.ev.split_reads[1].add(
+        inversion_evidence.split_reads[1].add(
             MockRead(
                 query_name='t2',
                 reference_start=520,
@@ -1101,8 +1137,8 @@ def test_by_split_reads_multiple_calls(self):
             )
         )
 
-        evs = call._call_by_split_reads(self.ev, SVTYPE.INV)
-        self.assertEqual(2, len(evs))
+        evs = call._call_by_split_reads(inversion_evidence, SVTYPE.INV)
+        assert len(evs) == 2
 
     def test_call_by_split_reads_consume_flanking(self):
         evidence = GenomeEvidence(
@@ -1182,36 +1218,38 @@ def test_call_by_split_reads_consume_flanking(self):
         events = call._call_by_split_reads(evidence, event_type=SVTYPE.INV)
         for ev in events:
             print(ev, ev.event_type, ev.call_method)
-        self.assertEqual(1, len(events))
+        assert len(events) == 1
         event = events[0]
-        self.assertEqual(1, len(event.flanking_pairs))
-        self.assertEqual(2, len(event.break1_split_reads))
-        self.assertEqual(2, len(event.break2_split_reads))
+        assert len(event.flanking_pairs) == 1
+        assert len(event.break1_split_reads) == 2
+        assert len(event.break2_split_reads) == 2
         b1 = set([read.query_name for read in event.break1_split_reads])
         b2 = set([read.query_name for read in event.break2_split_reads])
-        self.assertEqual(1, len(b1 & b2))
-
-
-class TestCallByFlankingReadsGenome(unittest.TestCase):
-    def setUp(self):
-        self.ev_LR = GenomeEvidence(
-            Breakpoint('fake', 100, orient=ORIENT.LEFT),
-            Breakpoint('fake', 200, orient=ORIENT.RIGHT),
-            BamCache(MockBamFileHandle()),
-            None,
-            opposing_strands=False,
-            read_length=25,
-            stdev_fragment_size=25,
-            median_fragment_size=100,
-            config={
-                'validate.stdev_count_abnormal': 2,
-                'validate.min_flanking_pairs_resolution': 1,
-                'validate.min_call_complexity': 0,
-            },
-        )
-
+        assert len(b1 & b2) == 1
+
+
+@pytest.fixture
+def left_right_ev():
+    return GenomeEvidence(
+        Breakpoint('fake', 100, orient=ORIENT.LEFT),
+        Breakpoint('fake', 200, orient=ORIENT.RIGHT),
+        BamCache(MockBamFileHandle()),
+        None,
+        opposing_strands=False,
+        read_length=25,
+        stdev_fragment_size=25,
+        median_fragment_size=100,
+        config={
+            'validate.stdev_count_abnormal': 2,
+            'validate.min_flanking_pairs_resolution': 1,
+            'validate.min_call_complexity': 0,
+        },
+    )
+
+
+class TestCallByFlankingReadsGenome:
     def test_call_coverage_too_large(self):
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             call._call_interval_by_flanking_coverage(
                 Interval(1901459, 1902200),
                 ORIENT.RIGHT,
@@ -1221,13 +1259,13 @@ def test_call_coverage_too_large(self):
                 Evidence.traverse,
             )
 
-    def test_intrachromosomal_lr(self):
+    def test_intrachromosomal_lr(self, left_right_ev):
         # --LLL-100------------500-RRR-------
         # max fragment size: 100 + 2 * 25 = 150
         # max distance = 150 - read_length = 125
         # coverage ranges: 20->80 (61)   600->675 (76)
-        self.assertEqual(150, self.ev_LR.max_expected_fragment_size)
-        self.ev_LR.flanking_pairs.add(
+        assert left_right_ev.max_expected_fragment_size == 150
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=19,
@@ -1244,7 +1282,7 @@ def test_intrachromosomal_lr(self):
                 ),
             )
         )
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=39,
@@ -1262,7 +1300,7 @@ def test_intrachromosomal_lr(self):
             )
         )
         # add a pair that will be ignored
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=39,
@@ -1279,18 +1317,18 @@ def test_intrachromosomal_lr(self):
                 ),
             )
         )
-        bpp = call._call_by_flanking_pairs(self.ev_LR, SVTYPE.DEL)
+        bpp = call._call_by_flanking_pairs(left_right_ev, SVTYPE.DEL)
         print(bpp, bpp.flanking_pairs)
-        self.assertEqual(80, bpp.break1.start)
-        self.assertEqual(80 + 125 - 45, bpp.break1.end)
-        self.assertEqual(600 - 125 + 75, bpp.break2.start)
-        self.assertEqual(600, bpp.break2.end)
+        assert bpp.break1.start == 80
+        assert bpp.break1.end == 80 + 125 - 45
+        assert bpp.break2.start == 600 - 125 + 75
+        assert bpp.break2.end == 600
 
-    def test_intrachromosomal_lr_coverage_overlaps_range(self):
+    def test_intrachromosomal_lr_coverage_overlaps_range(self, left_right_ev):
         # this test is for ensuring that if a theoretical window calculated for the
         # first breakpoint overlaps the actual coverage for the second breakpoint (or the reverse)
         # that we adjust the theoretical window accordingly
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=21,
@@ -1307,7 +1345,7 @@ def test_intrachromosomal_lr_coverage_overlaps_range(self):
                 ),
             )
         )
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=41,
@@ -1325,7 +1363,7 @@ def test_intrachromosomal_lr_coverage_overlaps_range(self):
             )
         )
         # pair to skip
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=39,
@@ -1342,14 +1380,14 @@ def test_intrachromosomal_lr_coverage_overlaps_range(self):
                 ),
             )
         )
-        break1, break2 = call._call_by_flanking_pairs(self.ev_LR, SVTYPE.INS)
-        self.assertEqual(80, break1.start)
-        self.assertEqual(80, break1.end)  # 119
-        self.assertEqual(81, break2.start)
-        self.assertEqual(81, break2.end)
+        break1, break2 = call._call_by_flanking_pairs(left_right_ev, SVTYPE.INS)
+        assert break1.start == 80
+        assert break1.end == 80  # 119
+        assert break2.start == 81
+        assert break2.end == 81
 
-    def test_intrachromosomal_flanking_coverage_overlap_error(self):
-        self.ev_LR.flanking_pairs.add(
+    def test_intrachromosomal_flanking_coverage_overlap_error(self, left_right_ev):
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=19,
@@ -1365,7 +1403,7 @@ def test_intrachromosomal_flanking_coverage_overlap_error(self):
                 ),
             )
         )
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=620,
@@ -1381,11 +1419,11 @@ def test_intrachromosomal_flanking_coverage_overlap_error(self):
                 ),
             )
         )
-        with self.assertRaises(AssertionError):
-            call._call_by_flanking_pairs(self.ev_LR, SVTYPE.DEL)
+        with pytest.raises(AssertionError):
+            call._call_by_flanking_pairs(left_right_ev, SVTYPE.DEL)
 
-    def test_coverage_larger_than_max_expected_variance_error(self):
-        self.ev_LR.flanking_pairs.add(
+    def test_coverage_larger_than_max_expected_variance_error(self, left_right_ev):
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=19,
@@ -1401,7 +1439,7 @@ def test_coverage_larger_than_max_expected_variance_error(self):
                 ),
             )
         )
-        self.ev_LR.flanking_pairs.add(
+        left_right_ev.flanking_pairs.add(
             mock_read_pair(
                 MockRead(
                     reference_start=301,
@@ -1417,10 +1455,10 @@ def test_coverage_larger_than_max_expected_variance_error(self):
                 ),
             )
         )
-        with self.assertRaises(AssertionError):
-            call._call_by_flanking_pairs(self.ev_LR, SVTYPE.DEL)
+        with pytest.raises(AssertionError):
+            call._call_by_flanking_pairs(left_right_ev, SVTYPE.DEL)
 
-    def test_close_to_zero(self):
+    def test_close_to_zero(self, left_right_ev):
         # this test is for ensuring that if a theoretical window calculated for the
         # first breakpoint overlaps the actual coverage for the second breakpoint (or the reverse)
         # that we adjust the theoretical window accordingly
@@ -1472,12 +1510,12 @@ def test_close_to_zero(self):
         )
         break1, break2 = call._call_by_flanking_pairs(ev, SVTYPE.INV)
 
-        self.assertEqual(1, break1.start)
-        self.assertEqual(20, break1.end)
-        self.assertEqual(65, break2.start)
-        self.assertEqual(150, break2.end)
+        assert break1.start == 1
+        assert break1.end == 20
+        assert break2.start == 65
+        assert break2.end == 150
 
-    def test_call_with_overlapping_coverage_intervals(self):
+    def test_call_with_overlapping_coverage_intervals(self, left_right_ev):
         evidence = GenomeEvidence(
             Breakpoint('1', 76185710, 76186159, orient=ORIENT.RIGHT),
             Breakpoint('1', 76186430, 76186879, orient=ORIENT.LEFT),
@@ -1505,11 +1543,11 @@ def test_call_with_overlapping_coverage_intervals(self):
                 ),
             )
         )
-        with self.assertRaises(AssertionError):
-            bpp = call._call_by_flanking_pairs(evidence, SVTYPE.DUP)
+        with pytest.raises(AssertionError):
+            call._call_by_flanking_pairs(evidence, SVTYPE.DUP)
 
 
-class TestCallByFlankingReadsTranscriptome(unittest.TestCase):
+class TestCallByFlankingReadsTranscriptome:
     def build_transcriptome_evidence(self, b1, b2, opposing_strands=False):
         return TranscriptomeEvidence(
             {},  # fake the annotations
@@ -1531,17 +1569,20 @@ def build_transcriptome_evidence(self, b1, b2, opposing_strands=False):
             },
         )
 
+    @todo
     def test_call_translocation(self):
         # transcriptome test will use exonic coordinates for the associated transcripts
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_call_inversion(self):
         # transcriptome test will use exonic coordinates for the associated transcripts
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_call_inversion_overlapping_breakpoint_calls(self):
         # transcriptome test will use exonic coordinates for the associated transcripts
-        raise unittest.SkipTest('TODO')
+        pass
 
     def test_call_deletion(self):
         # transcriptome test will use exonic coordinates for the associated transcripts
@@ -1562,32 +1603,32 @@ def test_call_deletion(self):
         )
         print(read_pair_type(pair[0]))
         # following help in debugging the mockup
-        self.assertFalse(pair[0].is_reverse)
-        self.assertFalse(pair[0].is_read1)
-        self.assertTrue(pair[0].is_read2)
-        self.assertTrue(pair[1].is_reverse)
-        self.assertTrue(pair[1].is_read1)
-        self.assertFalse(pair[1].is_read2)
-        self.assertEqual(STRAND.POS, sequenced_strand(pair[0], 2))
-        self.assertEqual(STRAND.POS, evidence.decide_sequenced_strand([pair[0]]))
-        self.assertEqual(STRAND.POS, sequenced_strand(pair[1], 2))
-        self.assertEqual(STRAND.POS, evidence.decide_sequenced_strand([pair[1]]))
+        assert not pair[0].is_reverse
+        assert not pair[0].is_read1
+        assert pair[0].is_read2
+        assert pair[1].is_reverse
+        assert pair[1].is_read1
+        assert not pair[1].is_read2
+        assert sequenced_strand(pair[0], 2) == STRAND.POS
+        assert evidence.decide_sequenced_strand([pair[0]]) == STRAND.POS
+        assert sequenced_strand(pair[1], 2) == STRAND.POS
+        assert evidence.decide_sequenced_strand([pair[1]]) == STRAND.POS
         print(evidence.max_expected_fragment_size, evidence.read_length)
         evidence.flanking_pairs.add(pair)
         breakpoint1, breakpoint2 = call._call_by_flanking_pairs(evidence, SVTYPE.DEL)
         print(breakpoint1, breakpoint2)
-        self.assertEqual(Breakpoint('1', 1051, 1351, 'L', '+'), breakpoint1)
-        self.assertEqual(Breakpoint('1', 2000, 2300, 'R', '+'), breakpoint2)
+        assert breakpoint1 == Breakpoint('1', 1051, 1351, 'L', '+')
+        assert breakpoint2 == Breakpoint('1', 2000, 2300, 'R', '+')
 
         # now add the transcript and call again
         evidence.overlapping_transcripts.add(pre_transcript)
         breakpoint1, breakpoint2 = call._call_by_flanking_pairs(evidence, SVTYPE.DEL)
         print(breakpoint1, breakpoint2)
-        self.assertEqual(Breakpoint('1', 1051, 2051, 'L', '+'), breakpoint1)
-        self.assertEqual(Breakpoint('1', 1600, 2300, 'R', '+'), breakpoint2)
+        assert breakpoint1 == Breakpoint('1', 1051, 2051, 'L', '+')
+        assert breakpoint2 == Breakpoint('1', 1600, 2300, 'R', '+')
 
 
-class TestCallBySpanningReads(unittest.TestCase):
+class TestCallBySpanningReads:
     def test_deletion(self):
         # ATCGATCTAGATCTAGGATAGTTCTAGCAGTCATAGCTAT
         ev = GenomeEvidence(
@@ -1623,8 +1664,8 @@ def test_deletion(self):
         ]
         ev.spanning_reads = set(spanning_reads)
         calls = call._call_by_spanning_reads(ev, set())
-        self.assertEqual(1, len(calls))
-        self.assertEqual(2, len(calls[0].support()))
+        assert len(calls) == 1
+        assert len(calls[0].support()) == 2
 
     def test_insertion(self):
         pass
@@ -1639,7 +1680,7 @@ def test_duplication(self):
         pass
 
 
-class TestCharacterizeRepeatRegion(unittest.TestCase):
+class TestCharacterizeRepeatRegion:
     def test_bad_deletion_call(self):
         reference_genome = {
             '19': mock.Mock(
@@ -1662,7 +1703,7 @@ def test_bad_deletion_call(self):
             untemplated_seq='',
             event_type=SVTYPE.DEL,
         )
-        self.assertEqual((0, ''), call.EventCall.characterize_repeat_region(bpp, reference_genome))
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (0, '')
 
     def test_homopolymer_insertion(self):
         bpp = BreakpointPair(
@@ -1679,7 +1720,7 @@ def test_homopolymer_insertion(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual((4, 'T'), call.EventCall.characterize_repeat_region(bpp, reference_genome))
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (4, 'T')
 
     def test_homopolymer_deletion(self):
         bpp = BreakpointPair(
@@ -1696,7 +1737,7 @@ def test_homopolymer_deletion(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual((4, 'T'), call.EventCall.characterize_repeat_region(bpp, reference_genome))
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (4, 'T')
 
     def test_homopolymer_duplication(self):
         bpp = BreakpointPair(
@@ -1713,7 +1754,7 @@ def test_homopolymer_duplication(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual((4, 'T'), call.EventCall.characterize_repeat_region(bpp, reference_genome))
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (4, 'T')
 
     def test_repeat_duplication(self):
         bpp = BreakpointPair(
@@ -1732,9 +1773,7 @@ def test_repeat_duplication(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual(
-            (2, 'TAG'), call.EventCall.characterize_repeat_region(bpp, reference_genome)
-        )
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (2, 'TAG')
 
     def test_repeat_insertion(self):
         bpp = BreakpointPair(
@@ -1753,9 +1792,7 @@ def test_repeat_insertion(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual(
-            (3, 'TAG'), call.EventCall.characterize_repeat_region(bpp, reference_genome)
-        )
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (3, 'TAG')
 
     def test_repeat_deletion(self):
         bpp = BreakpointPair(
@@ -1774,9 +1811,7 @@ def test_repeat_deletion(self):
             'upto and including the second breakpoint',
             reference_genome['1'].seq[bpp.break2.start - 10 : bpp.break2.start],
         )
-        self.assertEqual(
-            (3, 'TAG'), call.EventCall.characterize_repeat_region(bpp, reference_genome)
-        )
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (3, 'TAG')
 
     def test_norepeat_insertion(self):
         bpp = BreakpointPair(
@@ -1795,9 +1830,7 @@ def test_norepeat_insertion(self):
             'upto and including the first breakpoint',
             reference_genome['1'].seq[bpp.break1.start - 10 : bpp.break1.start],
         )
-        self.assertEqual(
-            (0, 'TTG'), call.EventCall.characterize_repeat_region(bpp, reference_genome)
-        )
+        assert call.EventCall.characterize_repeat_region(bpp, reference_genome) == (0, 'TTG')
 
     def test_invalid_event_type(self):
         bpp = BreakpointPair(
@@ -1806,9 +1839,5 @@ def test_invalid_event_type(self):
             untemplated_seq='TTG',
             event_type=SVTYPE.INV,
         )
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             call.EventCall.characterize_repeat_region(bpp, None)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/integration/test_validate_evidence.py b/tests/integration/test_validate_evidence.py
index d4b747af..9a53cf82 100644
--- a/tests/integration/test_validate_evidence.py
+++ b/tests/integration/test_validate_evidence.py
@@ -1,12 +1,13 @@
-import unittest
+import argparse
 from functools import partial
 
+import pytest
 from mavis.annotate.genomic import Gene, PreTranscript, Transcript
 from mavis.bam import cigar as _cigar
 from mavis.bam.cache import BamCache
 from mavis.bam.read import SamRead
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import CIGAR, ORIENT, STRAND
+from mavis.constants import ORIENT, STRAND
 from mavis.interval import Interval
 from mavis.schemas import DEFAULTS
 from mavis.validate.base import Evidence
@@ -17,69 +18,73 @@
 REFERENCE_GENOME = None
 
 
-class TestDistance(unittest.TestCase):
-    def setUp(self):
-        self.transcript = PreTranscript(
-            [(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+'
-        )
-        for patt in self.transcript.generate_splicing_patterns():
-            self.transcript.transcripts.append(Transcript(self.transcript, patt))
-        self.trans_evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            call_error=11,
-            overlapping_transcripts={self.transcript},
-        )
-        setattr(
-            self.trans_evidence,
-            '_select_transcripts',
-            lambda *pos: self.trans_evidence.overlapping_transcripts,
-        )
-        setattr(
-            self.trans_evidence,
-            'distance',
-            partial(TranscriptomeEvidence.distance, self.trans_evidence),
-        )
-
-    def test_exonic(self):
-        self.assertEqual(Interval(149), self.trans_evidence.distance(1001, 1550))
-
-    def test_intergenic_exonic(self):
-        dist = self.trans_evidence.distance(101, 1550)
-        self.assertEqual(Interval(1049, 1049), dist)
-
-    def test_intergenic_intergenic(self):
-        dist = self.trans_evidence.distance(101, 300)
-        self.assertEqual(Interval(199), dist)
-
-    def test_aligned_intronic(self):
-        dist = self.trans_evidence.distance(1102, 1499)
-        self.assertEqual(Interval(5), dist)
-
-    def test_indel_at_exon_boundary(self):
-        self.assertEqual(Interval(2), self.trans_evidence.distance(1101, 1501))
-
-    def test_no_annotations(self):
-        dist = self.trans_evidence.distance(101, 300, [])
-        self.assertEqual(Interval(199), dist)
-
-    def test_intergenic_intronic(self):
-        dist = self.trans_evidence.distance(101, 1400)
-        self.assertEqual(Interval(1101), dist)
-
-    def test_empty_intron(self):
+@pytest.fixture
+def distance_setup():
+    n = argparse.Namespace()
+    n.transcript = PreTranscript(
+        [(1001, 1100), (1501, 1600), (2001, 2100), (2201, 2300)], strand='+'
+    )
+    for patt in n.transcript.generate_splicing_patterns():
+        n.transcript.transcripts.append(Transcript(n.transcript, patt))
+    n.trans_evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        call_error=11,
+        overlapping_transcripts={n.transcript},
+    )
+    setattr(
+        n.trans_evidence,
+        '_select_transcripts',
+        lambda *pos: n.trans_evidence.overlapping_transcripts,
+    )
+    setattr(
+        n.trans_evidence,
+        'distance',
+        partial(TranscriptomeEvidence.distance, n.trans_evidence),
+    )
+    return n
+
+
+class TestDistance:
+    def test_exonic(self, distance_setup):
+        assert distance_setup.trans_evidence.distance(1001, 1550) == Interval(149)
+
+    def test_intergenic_exonic(self, distance_setup):
+        dist = distance_setup.trans_evidence.distance(101, 1550)
+        assert dist == Interval(1049, 1049)
+
+    def test_intergenic_intergenic(self, distance_setup):
+        dist = distance_setup.trans_evidence.distance(101, 300)
+        assert dist == Interval(199)
+
+    def test_aligned_intronic(self, distance_setup):
+        dist = distance_setup.trans_evidence.distance(1102, 1499)
+        assert dist == Interval(5)
+
+    def test_indel_at_exon_boundary(self, distance_setup):
+        assert distance_setup.trans_evidence.distance(1101, 1501) == Interval(2)
+
+    def test_no_annotations(self, distance_setup):
+        dist = distance_setup.trans_evidence.distance(101, 300, [])
+        assert dist == Interval(199)
+
+    def test_intergenic_intronic(self, distance_setup):
+        dist = distance_setup.trans_evidence.distance(101, 1400)
+        assert dist == Interval(1101)
+
+    def test_empty_intron(self, distance_setup):
         t2 = PreTranscript([(1001, 1100), (1501, 1600), (2001, 2200), (2201, 2300)], strand='+')
         for patt in t2.generate_splicing_patterns():
             t2.transcripts.append(Transcript(t2, patt))
         print(t2)
-        print(self.trans_evidence.overlapping_transcripts)
-        self.trans_evidence.overlapping_transcripts.add(t2)
-        dist = self.trans_evidence.distance(1001, 2301)
-        self.assertEqual(Interval(400, 400), dist)
+        print(distance_setup.trans_evidence.overlapping_transcripts)
+        distance_setup.trans_evidence.overlapping_transcripts.add(t2)
+        dist = distance_setup.trans_evidence.distance(1001, 2301)
+        assert dist == Interval(400, 400)
 
 
-class TestTransStandardize(unittest.TestCase):
+class TestTransStandardize:
     def test_shift_overaligned(self):
         # qwertyuiopas---kkkkk------dfghjklzxcvbnm
         # ..........      ................
@@ -106,7 +111,7 @@ def test_shift_overaligned(self):
         )
         evidence.overlapping_transcripts.add(transcript)
         new_read = evidence.standardize_read(read)
-        self.assertEqual(_cigar.convert_string_to_cigar('12=7N14='), new_read.cigar)
+        assert new_read.cigar == _cigar.convert_string_to_cigar('12=7N14=')
 
     def test_shift_overaligned_left(self):
         # qwertyuiopasdf---kkkkkdf------ghjklzxcvbnm
@@ -134,7 +139,7 @@ def test_shift_overaligned_left(self):
         )
         evidence.overlapping_transcripts.add(transcript)
         new_read = evidence.standardize_read(read)
-        self.assertEqual(_cigar.convert_string_to_cigar('14=7N12='), new_read.cigar)
+        assert new_read.cigar == _cigar.convert_string_to_cigar('14=7N12=')
 
     def test_shift_no_transcripts(self):
         read = SamRead(
@@ -154,296 +159,317 @@ def test_shift_no_transcripts(self):
             median_fragment_size=220,
         )
         new_cigar = evidence.exon_boundary_shift_cigar(read)
-        self.assertEqual(_cigar.convert_string_to_cigar('14=7D18='), new_cigar)
-
-
-class TestComputeFragmentSizes(unittest.TestCase):
-    def setUp(self):
-        b1 = Breakpoint('1', 1051, 1051, 'L')
-        b2 = Breakpoint('1', 1551, 1551, 'R')
-        self.read_length = 50
-        self.trans_ev = TranscriptomeEvidence(
-            {},  # fake the annotations
-            b1,
-            b2,
-            None,
-            None,  # bam_cache and reference_genome
-            opposing_strands=False,
-            read_length=self.read_length,
-            stdev_fragment_size=100,
-            median_fragment_size=100,
-            config={'validate.stdev_count_abnormal': 1},
-        )
-        self.genomic_ev = GenomeEvidence(
-            b1,
-            b2,
-            None,
-            None,  # bam_cache and reference_genome
-            opposing_strands=False,
-            read_length=self.read_length,
-            stdev_fragment_size=100,
-            median_fragment_size=100,
-            config={'validate.stdev_count_abnormal': 1},
-        )
-
-    def test_genomic_vs_trans_no_annotations(self):
+        assert new_cigar == _cigar.convert_string_to_cigar('14=7D18=')
+
+
+@pytest.fixture
+def read_length():
+    return 50
+
+
+@pytest.fixture
+def trans_evidence(read_length):
+    return TranscriptomeEvidence(
+        {},  # fake the annotations
+        Breakpoint('1', 1051, 1051, 'L'),
+        Breakpoint('1', 1551, 1551, 'R'),
+        None,
+        None,  # bam_cache and reference_genome
+        opposing_strands=False,
+        read_length=read_length,
+        stdev_fragment_size=100,
+        median_fragment_size=100,
+        config={'validate.stdev_count_abnormal': 1},
+    )
+
+
+@pytest.fixture
+def genomic_evidence(read_length):
+    return GenomeEvidence(
+        Breakpoint('1', 1051, 1051, 'L'),
+        Breakpoint('1', 1551, 1551, 'R'),
+        None,
+        None,  # bam_cache and reference_genome
+        opposing_strands=False,
+        read_length=read_length,
+        stdev_fragment_size=100,
+        median_fragment_size=100,
+        config={'validate.stdev_count_abnormal': 1},
+    )
+
+
+class TestComputeFragmentSizes:
+    def test_genomic_vs_trans_no_annotations(self, genomic_evidence, read_length, trans_evidence):
         # should be identical
         read, mate = mock_read_pair(
-            MockRead('name', '1', 1051 - self.read_length + 1, 1051, is_reverse=False),
-            MockRead('name', '1', 2300, 2300 + self.read_length - 1, is_reverse=True),
-        )
-        self.assertEqual(
-            self.trans_ev.compute_fragment_size(read, mate),
-            self.genomic_ev.compute_fragment_size(read, mate),
+            MockRead('name', '1', 1051 - read_length + 1, 1051, is_reverse=False),
+            MockRead('name', '1', 2300, 2300 + read_length - 1, is_reverse=True),
         )
+        assert genomic_evidence.compute_fragment_size(
+            read, mate
+        ) == trans_evidence.compute_fragment_size(read, mate)
 
-    def test_reverse_reads(self):
+    def test_reverse_reads(self, genomic_evidence, trans_evidence):
         read, mate = mock_read_pair(
             MockRead('name', '1', 1001, 1100, is_reverse=False),
             MockRead('name', '1', 2201, 2301, is_reverse=True),
         )
-        self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(read, mate))
-        self.assertEqual(Interval(1300), self.genomic_ev.compute_fragment_size(mate, read))
-        self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(read, mate))
-        self.assertEqual(Interval(1300), self.trans_ev.compute_fragment_size(mate, read))
-
-
-class TestTraverse(unittest.TestCase):
-    def setUp(self):
-        self.transcript = PreTranscript(
-            [(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS
-        )
-        for patt in self.transcript.generate_splicing_patterns():
-            self.transcript.transcripts.append(Transcript(self.transcript, patt))
-
-        self.trans_evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            call_error=11,
-            overlapping_transcripts={self.transcript},
-        )
-        setattr(
-            self.trans_evidence,
-            '_select_transcripts',
-            lambda *pos: self.trans_evidence.overlapping_transcripts,
-        )
-        setattr(
-            self.trans_evidence,
-            'traverse',
-            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
-        )
-
-    def test_left_before_transcript(self):
+        assert genomic_evidence.compute_fragment_size(read, mate) == Interval(1300)
+        assert genomic_evidence.compute_fragment_size(mate, read) == Interval(1300)
+        assert trans_evidence.compute_fragment_size(read, mate) == Interval(1300)
+        assert trans_evidence.compute_fragment_size(mate, read) == Interval(1300)
+
+
+@pytest.fixture
+def traverse_setup():
+    n = argparse.Namespace()
+    n.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS)
+    for patt in n.transcript.generate_splicing_patterns():
+        n.transcript.transcripts.append(Transcript(n.transcript, patt))
+
+    n.trans_evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        call_error=11,
+        overlapping_transcripts={n.transcript},
+    )
+    setattr(
+        n.trans_evidence,
+        '_select_transcripts',
+        lambda *pos: n.trans_evidence.overlapping_transcripts,
+    )
+    setattr(
+        n.trans_evidence,
+        'traverse',
+        partial(TranscriptomeEvidence.traverse, n.trans_evidence),
+    )
+    return n
+
+
+class TestTraverse:
+    def test_left_before_transcript(self, traverse_setup):
         exp_pos = Evidence.traverse(900, 500 - 1, ORIENT.LEFT)
-        self.assertEqual(exp_pos, self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT))
+        assert traverse_setup.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT) == exp_pos
 
-    def test_left_after_transcript(self):
+    def test_left_after_transcript(self, traverse_setup):
         exp_pos = Evidence.traverse(2200, 100, ORIENT.LEFT)
-        self.assertEqual(exp_pos, self.trans_evidence.traverse(2200, 100, ORIENT.LEFT))
-
-    def test_left_at_end(self):
-        gpos = self.trans_evidence.traverse(1900, 500, ORIENT.LEFT)
-        self.assertEqual(Interval(900), gpos)
-
-    def test_left_within_transcript_exonic(self):
-        gpos = self.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(1051), gpos)
-
-    def test_left_within_exon(self):
-        gpos = self.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT)
-        self.assertEqual(1731, gpos.start)
-        self.assertEqual(1731, gpos.end)
-
-    def test_left_within_transcript_intronic(self):
-        gpos = self.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(1451), gpos)
-
-    def test_right_before_transcript(self):
-        gpos = self.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(599), gpos)
-
-    def test_right_before_transcript2(self):
-        gpos = self.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1900), gpos)
-
-    def test_right_after_transcript(self):
-        gpos = self.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(2300), gpos)
-
-    def test_right_within_transcript(self):
-        gpos = self.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1750), gpos)
-
-    def test_right_within_exon(self):
-        gpos = self.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1360), gpos)
-
-
-class TestTraverseTransRev(unittest.TestCase):
-    def setUp(self):
-        self.transcript = PreTranscript(
-            [(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG
-        )
-        for patt in self.transcript.generate_splicing_patterns():
-            self.transcript.transcripts.append(Transcript(self.transcript, patt))
-
-        self.trans_evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            call_error=11,
-            overlapping_transcripts={self.transcript},
-        )
-        setattr(
-            self.trans_evidence,
-            '_select_transcripts',
-            lambda *pos: self.trans_evidence.overlapping_transcripts,
-        )
-        setattr(
-            self.trans_evidence,
-            'traverse',
-            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
-        )
-
-    def test_left_before_transcript(self):
-        gpos = self.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(401), gpos)
-        self.assertEqual(gpos, GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT))
-
-    def test_left_after_transcript(self):
-        gpos = self.trans_evidence.traverse(2200, 100, ORIENT.LEFT)
-        self.assertEqual(gpos, GenomeEvidence.traverse(2200, 100, ORIENT.LEFT))
-        self.assertEqual(Interval(2100), gpos)
-
-    def test_left_after_transcript2(self):
-        gpos = self.trans_evidence.traverse(1900, 500 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(901), gpos)
-
-    def test_left_within_transcript_exonic(self):
-        gpos = self.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(1051), gpos)
-
-    def test_left_within_exon(self):
-        gpos = self.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT)
-        self.assertEqual(1731, gpos.start)
-        self.assertEqual(1731, gpos.end)
-
-    def test_left_within_transcript_intronic(self):
-        gpos = self.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT)
-        self.assertEqual(Interval(1451), gpos)
-
-    def test_right_before_transcript(self):
-        gpos = self.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(599), gpos)
-
-    def test_right_before_transcript2(self):
-        gpos = self.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1900), gpos)
-
-    def test_right_after_transcript(self):
-        gpos = self.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(2300), gpos)
-
-    def test_right_within_transcript(self):
-        gpos = self.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1750), gpos)
-
-    def test_right_within_exon(self):
-        gpos = self.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT)
-        self.assertEqual(Interval(1360), gpos)
-
-
-class TestTranscriptomeEvidenceWindow(unittest.TestCase):
-    def setUp(self):
-        gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
-        self.pre_transcript = PreTranscript(
-            gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]
-        )
-        gene.unspliced_transcripts.append(self.pre_transcript)
-        for spl in self.pre_transcript.generate_splicing_patterns():
-            self.pre_transcript.transcripts.append(Transcript(self.pre_transcript, spl))
-        self.annotations = {gene.chr: [gene]}
-        self.genome_evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            config={**DEFAULTS, 'validate.call_error': 11},
-        )
-        self.trans_evidence = MockObject(
-            annotations={},
-            read_length=100,
-            max_expected_fragment_size=550,
-            overlapping_transcripts={self.pre_transcript},
-            config={**DEFAULTS, 'validate.call_error': 11},
-        )
-        setattr(
-            self.trans_evidence,
-            '_select_transcripts',
-            lambda *pos: self.trans_evidence.overlapping_transcripts,
-        )
-        setattr(
-            self.trans_evidence,
-            'traverse',
-            partial(TranscriptomeEvidence.traverse, self.trans_evidence),
-        )
-
-    def transcriptome_window(self, breakpoint, transcripts=None):
-        if transcripts:
-            self.trans_evidence.overlapping_transcripts.update(transcripts)
-        return TranscriptomeEvidence.generate_window(self.trans_evidence, breakpoint)
-
-    def genome_window(self, breakpoint):
-        return GenomeEvidence.generate_window(self.genome_evidence, breakpoint)
-
-    def test_before_start(self):
+        assert traverse_setup.trans_evidence.traverse(2200, 100, ORIENT.LEFT) == exp_pos
+
+    def test_left_at_end(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1900, 500, ORIENT.LEFT)
+        assert gpos == Interval(900)
+
+    def test_left_within_transcript_exonic(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT)
+        assert gpos == Interval(1051)
+
+    def test_left_within_exon(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT)
+        assert gpos.start == 1731
+        assert gpos.end == 1731
+
+    def test_left_within_transcript_intronic(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT)
+        assert gpos == Interval(1451)
+
+    def test_right_before_transcript(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(599)
+
+    def test_right_before_transcript2(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1900)
+
+    def test_right_after_transcript(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(2300)
+
+    def test_right_within_transcript(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1750)
+
+    def test_right_within_exon(self, traverse_setup):
+        gpos = traverse_setup.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1360)
+
+
+@pytest.fixture
+def tranverse_trans_rev_setup():
+    n = argparse.Namespace()
+    n.transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.NEG)
+    for patt in n.transcript.generate_splicing_patterns():
+        n.transcript.transcripts.append(Transcript(n.transcript, patt))
+
+    n.trans_evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        call_error=11,
+        overlapping_transcripts={n.transcript},
+    )
+    setattr(
+        n.trans_evidence,
+        '_select_transcripts',
+        lambda *pos: n.trans_evidence.overlapping_transcripts,
+    )
+    setattr(
+        n.trans_evidence,
+        'traverse',
+        partial(TranscriptomeEvidence.traverse, n.trans_evidence),
+    )
+    return n
+
+
+class TestTraverseTransRev:
+    def test_left_before_transcript(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(900, 500 - 1, ORIENT.LEFT)
+        assert gpos == Interval(401)
+        assert GenomeEvidence.traverse(900, 500 - 1, ORIENT.LEFT) == gpos
+
+    def test_left_after_transcript(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(2200, 100, ORIENT.LEFT)
+        assert GenomeEvidence.traverse(2200, 100, ORIENT.LEFT) == gpos
+        assert gpos == Interval(2100)
+
+    def test_left_after_transcript2(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1900, 500 - 1, ORIENT.LEFT)
+        assert gpos == Interval(901)
+
+    def test_left_within_transcript_exonic(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1750, 200 - 1, ORIENT.LEFT)
+        assert gpos == Interval(1051)
+
+    def test_left_within_exon(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1750, 20 - 1, ORIENT.LEFT)
+        assert gpos.start == 1731
+        assert gpos.end == 1731
+
+    def test_left_within_transcript_intronic(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1600, 150 - 1, ORIENT.LEFT)
+        assert gpos == Interval(1451)
+
+    def test_right_before_transcript(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(500, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(599)
+
+    def test_right_before_transcript2(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(901, 500 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1900)
+
+    def test_right_after_transcript(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(2201, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(2300)
+
+    def test_right_within_transcript(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1351, 100 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1750)
+
+    def test_right_within_exon(self, tranverse_trans_rev_setup):
+        gpos = tranverse_trans_rev_setup.trans_evidence.traverse(1351, 10 - 1, ORIENT.RIGHT)
+        assert gpos == Interval(1360)
+
+
+@pytest.fixture
+def trans_window_setup():
+    n = argparse.Namespace()
+    gene = Gene('1', 1, 9999, name='KRAS', strand=STRAND.POS)
+    n.pre_transcript = PreTranscript(
+        gene=gene, exons=[(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)]
+    )
+    gene.unspliced_transcripts.append(n.pre_transcript)
+    for spl in n.pre_transcript.generate_splicing_patterns():
+        n.pre_transcript.transcripts.append(Transcript(n.pre_transcript, spl))
+    n.annotations = {gene.chr: [gene]}
+    n.genome_evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        config={**DEFAULTS, 'validate.call_error': 11},
+    )
+    n.trans_evidence = MockObject(
+        annotations={},
+        read_length=100,
+        max_expected_fragment_size=550,
+        overlapping_transcripts={n.pre_transcript},
+        config={**DEFAULTS, 'validate.call_error': 11},
+    )
+    setattr(
+        n.trans_evidence,
+        '_select_transcripts',
+        lambda *pos: n.trans_evidence.overlapping_transcripts,
+    )
+    setattr(
+        n.trans_evidence,
+        'traverse',
+        partial(TranscriptomeEvidence.traverse, n.trans_evidence),
+    )
+    return n
+
+
+def transcriptome_window(ev, breakpoint, transcripts=None):
+    if transcripts:
+        ev.overlapping_transcripts.update(transcripts)
+    return TranscriptomeEvidence.generate_window(ev, breakpoint)
+
+
+class TestTranscriptomeEvidenceWindow:
+    def test_before_start(self, trans_window_setup):
         b = Breakpoint(chr='1', start=100, orient=ORIENT.RIGHT)
-        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b
+        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 
         b = Breakpoint(chr='1', start=500, orient=ORIENT.RIGHT)
-        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b
+        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 
-    def test_after_end(self):
+    def test_after_end(self, trans_window_setup):
         b = Breakpoint(chr='1', start=6000, orient=ORIENT.RIGHT)
-        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b
+        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 
-    def test_exonic_long_exon(self):
+    def test_exonic_long_exon(self, trans_window_setup):
         b = Breakpoint(chr='1', start=3200, orient=ORIENT.RIGHT)
-        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b
+        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 
-    def test_intronic_long_exon(self):
+    def test_intronic_long_exon(self, trans_window_setup):
         b = Breakpoint(chr='1', start=2970, orient=ORIENT.RIGHT)
-        self.assertEqual(self.genome_window(b), self.transcriptome_window(b))
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b
+        ) == GenomeEvidence.generate_window(trans_window_setup.genome_evidence, b)
 
-    def test_intronic_long_intron(self):
+    def test_intronic_long_intron(self, trans_window_setup):
         b = Breakpoint(chr='1', start=1800, orient=ORIENT.RIGHT)
-        print(self.genome_window(b))
-        self.assertEqual(Interval(1490, 2360), self.transcriptome_window(b))
+        assert transcriptome_window(trans_window_setup.trans_evidence, b) == Interval(1490, 2360)
 
-    def test_intronic_short_exon_right(self):
+    def test_intronic_short_exon_right(self, trans_window_setup):
         b = Breakpoint(chr='1', start=1690, orient=ORIENT.RIGHT)
-        print(self.genome_window(b))
-        self.assertEqual(Interval(1580, 3500), self.transcriptome_window(b))
+        assert transcriptome_window(trans_window_setup.trans_evidence, b) == Interval(1580, 3500)
 
-    def test_intronic_short_exon_left(self):
+    def test_intronic_short_exon_left(self, trans_window_setup):
         b = Breakpoint(chr='1', start=2200, orient=ORIENT.LEFT)
-        self.assertEqual(Interval(1440, 2310), self.transcriptome_window(b))
+        assert transcriptome_window(trans_window_setup.trans_evidence, b) == Interval(1440, 2310)
 
-    def test_multiple_transcripts(self):
+    def test_multiple_transcripts(self, trans_window_setup):
         #  [(1001, 1100), (1401, 1500), (1701, 1750), (3001, 4000)])
         b = Breakpoint(chr='1', start=1150, orient=ORIENT.RIGHT)
-        gene = self.annotations['1'][0]
+        gene = trans_window_setup.annotations['1'][0]
         t2 = PreTranscript(gene=gene, exons=[(1001, 1100), (1200, 1300), (2100, 2200)])
         for patt in t2.generate_splicing_patterns():
             t2.transcripts.append(Transcript(t2, patt))
         gene.transcripts.append(t2)
         # 989 - 2561
         # 989 - 3411
-        self.assertEqual(
-            Interval(1040, 3160), self.transcriptome_window(b, [self.pre_transcript, t2])
-        )
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b, [trans_window_setup.pre_transcript, t2]
+        ) == Interval(1040, 3160)
 
-    def test_many_small_exons(self):
+    def test_many_small_exons(self, trans_window_setup):
         g = Gene('fake', 17271277, 17279592, strand='+')
         pre_transcript = PreTranscript(
             gene=g,
@@ -463,48 +489,45 @@ def test_many_small_exons(self):
         for patt in pre_transcript.generate_splicing_patterns():
             pre_transcript.transcripts.append(Transcript(pre_transcript, patt))
         b = Breakpoint(chr='fake', start=17279591, orient=ORIENT.LEFT)
-        self.assertEqual(
-            Interval(17277321, 17279701), self.transcriptome_window(b, [pre_transcript])
-        )
+        assert transcriptome_window(
+            trans_window_setup.trans_evidence, b, [pre_transcript]
+        ) == Interval(17277321, 17279701)
 
 
-class TestNetSizeTrans(unittest.TestCase):
-    def setUp(self):
-        self.transcript = PreTranscript(
-            [(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS
-        )
-        for patt in self.transcript.generate_splicing_patterns():
-            self.transcript.transcripts.append(Transcript(self.transcript, patt))
-        self.trans_evidence = MockObject(
+class TestNetSizeTrans:
+    def test_net_zero(self):
+        transcript = PreTranscript([(1001, 1100), (1301, 1400), (1701, 1800)], strand=STRAND.POS)
+        for patt in transcript.generate_splicing_patterns():
+            transcript.transcripts.append(Transcript(transcript, patt))
+        trans_evidence = MockObject(
             annotations={},
             read_length=100,
             max_expected_fragment_size=550,
             call_error=11,
-            overlapping_transcripts={self.transcript},
+            overlapping_transcripts={transcript},
         )
         setattr(
-            self.trans_evidence,
+            trans_evidence,
             '_select_transcripts',
-            lambda *pos: self.trans_evidence.overlapping_transcripts,
+            lambda *pos: trans_evidence.overlapping_transcripts,
         )
         setattr(
-            self.trans_evidence,
+            trans_evidence,
             'distance',
-            partial(TranscriptomeEvidence.distance, self.trans_evidence),
+            partial(TranscriptomeEvidence.distance, trans_evidence),
         )
 
-    def test_net_zero(self):
         bpp = BreakpointPair(
             Breakpoint('1', 1099, orient=ORIENT.LEFT),
             Breakpoint('1', 1302, orient=ORIENT.RIGHT),
             untemplated_seq='TT',
         )
-        dist = partial(TranscriptomeEvidence.distance, self.trans_evidence)
-        self.assertEqual(Interval(-200), bpp.net_size())
-        self.assertEqual(Interval(0), bpp.net_size(dist))
+        dist = partial(TranscriptomeEvidence.distance, trans_evidence)
+        assert bpp.net_size() == Interval(-200)
+        assert bpp.net_size(dist) == Interval(0)
 
 
-class TestGenomeEvidenceWindow(unittest.TestCase):
+class TestGenomeEvidenceWindow:
     def test_orient_ns(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.NS)
         window = GenomeEvidence.generate_window(
@@ -515,9 +538,9 @@ def test_orient_ns(self):
             ),
             bpp,
         )
-        self.assertEqual(440, window.start)
-        self.assertEqual(1560, window.end)
-        self.assertEqual(1121, len(window))
+        assert window.start == 440
+        assert window.end == 1560
+        assert len(window) == 1121
 
     def test_orient_left(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.LEFT)
@@ -529,9 +552,9 @@ def test_orient_left(self):
             ),
             bpp,
         )
-        self.assertEqual(440, window.start)
-        self.assertEqual(1110, window.end)
-        self.assertEqual(671, len(window))
+        assert window.start == 440
+        assert window.end == 1110
+        assert len(window) == 671
 
     def test_orient_right(self):
         bpp = Breakpoint(chr='1', start=1000, end=1000, orient=ORIENT.RIGHT)
@@ -543,9 +566,9 @@ def test_orient_right(self):
             ),
             bpp,
         )
-        self.assertEqual(890, window.start)
-        self.assertEqual(1560, window.end)
-        self.assertEqual(671, len(window))
+        assert window.start == 890
+        assert window.end == 1560
+        assert len(window) == 671
 
     def test_window_accessors(self):
         ge = GenomeEvidence(
@@ -559,87 +582,89 @@ def test_window_accessors(self):
             median_fragment_size=100,
             config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
         )
-        self.assertEqual(901, ge.outer_window1.start)
-        self.assertEqual(1649, ge.outer_window1.end)
-        self.assertEqual(6600, ge.outer_window2.end)
-        self.assertEqual(5852, ge.outer_window2.start)
-
-        self.assertEqual(1351, ge.inner_window1.start)
-        self.assertEqual(1649, ge.inner_window1.end)
-        self.assertEqual(6150, ge.inner_window2.end)
-        self.assertEqual(5852, ge.inner_window2.start)
-
-
-class TestGenomeEvidenceAddReads(unittest.TestCase):
-    def setUp(self):
-        self.ge = GenomeEvidence(
-            Breakpoint('1', 1500, orient=ORIENT.LEFT),
-            Breakpoint('1', 6001, orient=ORIENT.RIGHT),
-            BamCache(MockBamFileHandle({'1': 0})),
-            None,  # reference_genome
-            opposing_strands=False,
-            read_length=150,
-            stdev_fragment_size=500,
-            median_fragment_size=100,
-            config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
-        )
-        # outer windows (901, 1649)  (5852, 6600)
-        # inner windows (1351, 1649)  (5852, 6150)
-
-    def test_collect_flanking_pair_error_unmapped_read(self):
+        assert ge.outer_window1.start == 901
+        assert ge.outer_window1.end == 1649
+        assert ge.outer_window2.end == 6600
+        assert ge.outer_window2.start == 5852
+
+        assert ge.inner_window1.start == 1351
+        assert ge.inner_window1.end == 1649
+        assert ge.inner_window2.end == 6150
+        assert ge.inner_window2.start == 5852
+
+
+@pytest.fixture
+def flanking_ge(read_length):
+    return GenomeEvidence(
+        Breakpoint('1', 1500, orient=ORIENT.LEFT),
+        Breakpoint('1', 6001, orient=ORIENT.RIGHT),
+        BamCache(MockBamFileHandle({'1': 0})),
+        None,  # reference_genome
+        opposing_strands=False,
+        read_length=150,
+        stdev_fragment_size=500,
+        median_fragment_size=100,
+        config={'validate.stdev_count_abnormal': 1, 'validate.call_error': 0},
+    )
+    # outer windows (901, 1649)  (5852, 6600)
+    # inner windows (1351, 1649)  (5852, 6150)
+
+
+class TestGenomeEvidenceAddReads:
+    def test_collect_flanking_pair_error_unmapped_read(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 0, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
         read.is_unmapped = True
-        with self.assertRaises(ValueError):
-            self.ge.collect_flanking_pair(read, mate)
+        with pytest.raises(ValueError):
+            flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_error_mate_unmapped(self):
+    def test_collect_flanking_pair_error_mate_unmapped(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 0, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
         mate.is_unmapped = True
-        with self.assertRaises(ValueError):
-            self.ge.collect_flanking_pair(read, mate)
+        with pytest.raises(ValueError):
+            flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_error_query_names_dont_match(self):
+    def test_collect_flanking_pair_error_query_names_dont_match(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test1', 0, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
-        with self.assertRaises(ValueError):
-            self.ge.collect_flanking_pair(read, mate)
+        with pytest.raises(ValueError):
+            flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_error_template_lengths_dont_match(self):
+    def test_collect_flanking_pair_error_template_lengths_dont_match(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 0, 900, 1000, is_reverse=False, template_length=50),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
         mate.template_length = 55
-        with self.assertRaises(ValueError):
-            self.ge.collect_flanking_pair(read, mate)
+        with pytest.raises(ValueError):
+            flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_read_low_mq(self):
+    def test_collect_flanking_pair_read_low_mq(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 0, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
         read.mapping_quality = 0
-        self.assertFalse(self.ge.collect_flanking_pair(read, mate))
+        assert not flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_mate_low_mq(self):
+    def test_collect_flanking_pair_mate_low_mq(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 0, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
         mate.mapping_quality = 0
-        self.assertFalse(self.ge.collect_flanking_pair(read, mate))
+        assert not flanking_ge.collect_flanking_pair(read, mate)
 
-    def test_collect_flanking_pair_interchromosomal(self):
+    def test_collect_flanking_pair_interchromosomal(self, flanking_ge):
         read, mate = mock_read_pair(
             MockRead('test', 1, 900, 1000, is_reverse=False),
             MockRead('test', 0, 6000, 6099, is_reverse=True),
         )
-        self.assertFalse(self.ge.collect_flanking_pair(read, mate))
+        assert not flanking_ge.collect_flanking_pair(read, mate)
diff --git a/tests/unit/test_annotate.py b/tests/unit/test_annotate.py
index 0a6bea71..339d86ed 100644
--- a/tests/unit/test_annotate.py
+++ b/tests/unit/test_annotate.py
@@ -1,18 +1,16 @@
 import itertools
 import os
-import unittest
 
+import pytest
+import timeout_decorator
 from mavis.annotate.base import ReferenceName
-from mavis.annotate.protein import calculate_orf, Domain, DomainRegion
+from mavis.annotate.protein import Domain, DomainRegion, calculate_orf
 from mavis.annotate.variant import IndelCall
-import timeout_decorator
-
-from .mock import Mock, MockFunction
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
-class TestDomainAlignSeq(unittest.TestCase):
+class TestDomainAlignSeq:
     def test_large_combinations_finishes_with_error(self):
         input_seq = (
             'MADDEDYEEVVEYYTEEVVYEEVPGETITKIYETTTTRTSDYEQSETSKPALAQPALAQPASAKPVERRKVIRKKVDPSK'
@@ -273,100 +271,98 @@ def test_large_combinations_finishes_with_error(self):
             regions.append(DomainRegion(p, p + len(seq) - 1, seq=seq))
             p += len(seq)
         d = Domain('name', regions=regions)
-        with self.assertRaises(UserWarning):
+        with pytest.raises(UserWarning):
             d.align_seq(input_seq)
 
 
-class TestCalculateORF(unittest.TestCase):
-    def setUp(self):
-        # load the sequence
-        with open(os.path.join(DATA_DIR, 'calc_orf_test_sequence.fa'), 'r') as fh:
-            self.seq = fh.readlines()[0].strip()
-
+class TestCalculateORF:
     @timeout_decorator.timeout(20)
     def test_very_long(self):
-        calculate_orf(self.seq, 300)
+        # load the sequence
+        with open(os.path.join(DATA_DIR, 'calc_orf_test_sequence.fa'), 'r') as fh:
+            seq = fh.readlines()[0].strip()
+        calculate_orf(seq, 300)
 
 
-class TestReferenceName(unittest.TestCase):
+class TestReferenceName:
     def test_naked_vs_naked_str(self):
-        self.assertEqual('1', ReferenceName('1'))
-        self.assertNotEqual('2', ReferenceName('1'))
-        self.assertTrue(ReferenceName('1') == '1')
-        self.assertTrue(ReferenceName('1') != '2')
+        assert ReferenceName('1') == '1'
+        assert ReferenceName('1') != '2'
+        assert ReferenceName('1') == '1'
+        assert ReferenceName('1') != '2'
 
     def test_naked_vs_prefixed_str(self):
-        self.assertEqual('chr1', ReferenceName('1'))
-        self.assertNotEqual('chr2', ReferenceName('1'))
-        self.assertTrue(ReferenceName('1') == 'chr1')
-        self.assertTrue(ReferenceName('1') != 'chr2')
+        assert ReferenceName('1') == 'chr1'
+        assert ReferenceName('1') != 'chr2'
+        assert ReferenceName('1') == 'chr1'
+        assert ReferenceName('1') != 'chr2'
 
     def test_prefixed_vs_prefixed_str(self):
-        self.assertEqual('chr1', ReferenceName('chr1'))
-        self.assertNotEqual('chr2', ReferenceName('chr1'))
-        self.assertTrue(ReferenceName('chr1') == 'chr1')
-        self.assertTrue(ReferenceName('chr1') != 'chr2')
+        assert ReferenceName('chr1') == 'chr1'
+        assert ReferenceName('chr1') != 'chr2'
+        assert ReferenceName('chr1') == 'chr1'
+        assert ReferenceName('chr1') != 'chr2'
 
     def test_prefixed_vs_naked_str(self):
-        self.assertEqual('1', ReferenceName('chr1'))
-        self.assertNotEqual('2', ReferenceName('chr1'))
-        self.assertTrue(ReferenceName('chr1') == '1')
+        assert ReferenceName('chr1') == '1'
+        assert ReferenceName('chr1') != '2'
+        assert ReferenceName('chr1') == '1'
 
     def test_obj_comparison(self):
         r = ReferenceName('1')
         rprefix = ReferenceName('chr1')
         r2 = ReferenceName('2')
         r2prefix = ReferenceName('chr2')
-        self.assertEqual(r, rprefix)
-        self.assertEqual(rprefix, r)
-        self.assertEqual(rprefix, ReferenceName('chr1'))
-        self.assertEqual(r, ReferenceName('1'))
-        self.assertNotEqual(r2, rprefix)
-        self.assertNotEqual(r2prefix, rprefix)
-        self.assertNotEqual(r2, r)
-        self.assertNotEqual(r2prefix, r)
-        self.assertTrue(r == rprefix)
-        self.assertTrue(r != r2prefix)
-        self.assertFalse(r != rprefix)
+        assert rprefix == r
+        assert r == rprefix
+        assert ReferenceName('chr1') == rprefix
+        assert ReferenceName('1') == r
+        assert rprefix != r2
+        assert rprefix != r2prefix
+        assert r != r2
+        assert r != r2prefix
+        assert r == rprefix
+        assert r != r2prefix
+        assert not r != rprefix
 
     def test_lt(self):
         r = ReferenceName('1')
         rprefix = ReferenceName('chr1')
         r2 = ReferenceName('2')
         r2prefix = ReferenceName('chr2')
-        self.assertTrue(r <= rprefix)
-        self.assertFalse(r < rprefix)
-        self.assertFalse(rprefix < r)
-        self.assertTrue(rprefix <= r)
+        assert r <= rprefix
+        assert not r < rprefix
+        assert not rprefix < r
+        assert rprefix <= r
         for chr1, chr2 in itertools.product([r, rprefix], [r2, r2prefix]):
-            self.assertTrue(chr1 < chr2)
-            self.assertTrue(chr1 <= chr2)
+            assert chr1 < chr2
+            assert chr1 <= chr2
 
     def test_alpha_sort(self):
-        self.assertTrue(ReferenceName('10') < ReferenceName('3'))
-        self.assertTrue(ReferenceName('10') < ReferenceName('chr3'))
-        self.assertTrue(ReferenceName('chr10') < ReferenceName('3'))
-        self.assertTrue(ReferenceName('chr10') < ReferenceName('chr3'))
+        assert ReferenceName('10') < ReferenceName('3')
+        assert ReferenceName('10') < ReferenceName('chr3')
+        assert ReferenceName('chr10') < ReferenceName('3')
+        assert ReferenceName('chr10') < ReferenceName('chr3')
 
     def test_gt(self):
         r = ReferenceName('1')
         rprefix = ReferenceName('chr1')
         r2 = ReferenceName('2')
         r2prefix = ReferenceName('chr2')
-        self.assertTrue(rprefix >= r)
-        self.assertTrue(r >= rprefix)
-        self.assertFalse(r > rprefix)
-        self.assertFalse(rprefix > r)
+        assert rprefix >= r
+        assert r >= rprefix
+        assert not r > rprefix
+        assert not rprefix > r
         for chr1, chr2 in itertools.product([r, rprefix], [r2, r2prefix]):
-            self.assertTrue(chr2 > chr1)
-            self.assertTrue(chr2 >= chr1)
+            assert chr2 > chr1
+            assert chr2 >= chr1
 
     def test_hash(self):
-        self.assertTrue(ReferenceName('3') in {ReferenceName('3')})
-        self.assertTrue(ReferenceName('3') in {ReferenceName('chr3')})
+        assert ReferenceName('3') in {ReferenceName('3')}
+        assert ReferenceName('3') in {ReferenceName('chr3')}
 
 
-class TestIndelCall(unittest.TestCase):
+class TestIndelCall:
     def test_duplication_in_repeat(self):
         ref = 'ASFHGHGSFSFSLLLLLL' 'FLLLLSFSLMVPWSFKW'
         mut = 'ASFHGHGSFSFSLLLLLLL' 'FLLLLSFSLMVPWSFKW'
@@ -374,11 +370,11 @@ def test_duplication_in_repeat(self):
         call = IndelCall(ref, mut)
         print(call)
 
-        self.assertEqual(18, call.nterm_aligned)
-        self.assertEqual(len(ref) - 13 + 1, call.cterm_aligned)
-        self.assertTrue(call.is_dup)
+        assert call.nterm_aligned == 18
+        assert call.cterm_aligned == len(ref) - 13 + 1
+        assert call.is_dup
 
-        self.assertEqual('p.L18dupL', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.L18dupL'
 
     def test_nterminal_extension(self):
 
@@ -387,13 +383,13 @@ def test_nterminal_extension(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertFalse(call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 1 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('MAF', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert not call.nterm_aligned
+        assert call.cterm_aligned == len(call.ref_seq) - 1 + 1
+        assert not call.is_dup
+        assert call.ins_seq == 'MAF'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.M1ext-3', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.M1ext-3'
 
     def test_nterminal_deletion(self):
         ref = 'MABCDEFGH'
@@ -401,13 +397,13 @@ def test_nterminal_deletion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertFalse(call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 4 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('', call.ins_seq)
-        self.assertEqual('MAB', call.del_seq)
+        assert not call.nterm_aligned
+        assert call.cterm_aligned == len(call.ref_seq) - 4 + 1
+        assert not call.is_dup
+        assert call.ins_seq == ''
+        assert call.del_seq == 'MAB'
 
-        self.assertEqual('p.M1_B3delMAB', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.M1_B3delMAB'
 
     def test_cterminal_deletion(self):
         ref = 'MABCDEFGH'
@@ -415,13 +411,13 @@ def test_cterminal_deletion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(6, call.nterm_aligned)
-        self.assertFalse(call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('', call.ins_seq)
-        self.assertEqual('FGH', call.del_seq)
+        assert call.nterm_aligned == 6
+        assert not call.cterm_aligned
+        assert not call.is_dup
+        assert call.ins_seq == ''
+        assert call.del_seq == 'FGH'
 
-        self.assertEqual('p.F7_H9delFGH', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.F7_H9delFGH'
 
     def test_cterminal_extension(self):
         ref = 'MABCDEFGH'
@@ -429,13 +425,13 @@ def test_cterminal_extension(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(9, call.nterm_aligned)
-        self.assertFalse(call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('IJK', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 9
+        assert not call.cterm_aligned
+        assert not call.is_dup
+        assert call.ins_seq == 'IJK'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.H9ext3', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.H9ext3'
 
     def test_cterminal_stop_extension(self):
         ref = 'MABCDEFGH*'
@@ -443,13 +439,13 @@ def test_cterminal_stop_extension(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(9, call.nterm_aligned)
-        self.assertFalse(call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('IJK', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 9
+        assert not call.cterm_aligned
+        assert not call.is_dup
+        assert call.ins_seq == 'IJK'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.*10ext*3', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.*10ext*3'
 
     def test_cterminal_no_orf_ext(self):
         ref = 'MABCDEFGH'
@@ -457,13 +453,13 @@ def test_cterminal_no_orf_ext(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(9, call.nterm_aligned)
-        self.assertFalse(call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('IJK*', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 9
+        assert not call.cterm_aligned
+        assert not call.is_dup
+        assert call.ins_seq == 'IJK*'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.H9ext*4', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.H9ext*4'
 
     def test_single_aa_insertion(self):
         ref = 'MABCDEFGH'
@@ -471,13 +467,13 @@ def test_single_aa_insertion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(4, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 5 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('K', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 4
+        assert call.cterm_aligned == len(call.ref_seq) - 5 + 1
+        assert not call.is_dup
+        assert call.ins_seq == 'K'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.C4_D5insK', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.C4_D5insK'
 
     def test_insertion(self):
         ref = 'MABCDEFGH'
@@ -485,13 +481,13 @@ def test_insertion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(4, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 5 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('KA', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 4
+        assert call.cterm_aligned == len(call.ref_seq) - 5 + 1
+        assert not call.is_dup
+        assert call.ins_seq == 'KA'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.C4_D5insKA', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.C4_D5insKA'
 
     def test_single_aa_deletion(self):
         ref = 'MABCDEFGH'
@@ -499,13 +495,13 @@ def test_single_aa_deletion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(4, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 6 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('', call.ins_seq)
-        self.assertEqual('D', call.del_seq)
+        assert call.nterm_aligned == 4
+        assert call.cterm_aligned == len(call.ref_seq) - 6 + 1
+        assert not call.is_dup
+        assert call.ins_seq == ''
+        assert call.del_seq == 'D'
 
-        self.assertEqual('p.D5delD', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.D5delD'
 
     def test_deletion(self):
         ref = 'MABCDEFGH'
@@ -513,13 +509,13 @@ def test_deletion(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(4, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 7 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('', call.ins_seq)
-        self.assertEqual('DE', call.del_seq)
+        assert call.nterm_aligned == 4
+        assert call.cterm_aligned == len(call.ref_seq) - 7 + 1
+        assert not call.is_dup
+        assert call.ins_seq == ''
+        assert call.del_seq == 'DE'
 
-        self.assertEqual('p.D5_E6delDE', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.D5_E6delDE'
 
     def test_deletion_in_repeat(self):
         ref = 'MABCDEEEEEEFGH'
@@ -527,13 +523,13 @@ def test_deletion_in_repeat(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(9, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 8 + 1, call.cterm_aligned)
-        self.assertFalse(call.is_dup)
-        self.assertEqual('', call.ins_seq)
-        self.assertEqual('EE', call.del_seq)
+        assert call.nterm_aligned == 9
+        assert call.cterm_aligned == len(call.ref_seq) - 8 + 1
+        assert not call.is_dup
+        assert call.ins_seq == ''
+        assert call.del_seq == 'EE'
 
-        self.assertEqual('p.E10_E11delEE', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.E10_E11delEE'
 
     def test_insertion_in_repeat(self):
         ref = 'MABCDEEEEFGH'
@@ -541,10 +537,10 @@ def test_insertion_in_repeat(self):
 
         call = IndelCall(ref, mut)
         print(call)
-        self.assertEqual(9, call.nterm_aligned)
-        self.assertEqual(len(call.ref_seq) - 6 + 1, call.cterm_aligned)
-        self.assertTrue(call.is_dup)
-        self.assertEqual('EE', call.ins_seq)
-        self.assertEqual('', call.del_seq)
+        assert call.nterm_aligned == 9
+        assert call.cterm_aligned == len(call.ref_seq) - 6 + 1
+        assert call.is_dup
+        assert call.ins_seq == 'EE'
+        assert call.del_seq == ''
 
-        self.assertEqual('p.E8_E9dupEE', call.hgvs_protein_notation())
+        assert call.hgvs_protein_notation() == 'p.E8_E9dupEE'
diff --git a/tests/unit/test_assemble.py b/tests/unit/test_assemble.py
index 3aa18592..fbd5d0cb 100644
--- a/tests/unit/test_assemble.py
+++ b/tests/unit/test_assemble.py
@@ -1,81 +1,82 @@
 import itertools
-import random
 import os
-import unittest
-import pytest
+import random
 
-from mavis.assemble import assemble, Contig, DeBruijnGraph, filter_contigs, kmers
+import pytest
+from mavis.assemble import Contig, DeBruijnGraph, assemble, filter_contigs, kmers
 from mavis.constants import DNA_ALPHABET
 
+from ..util import long_running_test
+
 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
-class TestModule(unittest.TestCase):
+class TestModule:
     """
     test class for functions in the validate namespace
     that are not associated with a class
     """
 
     def test_alphabet_matching(self):
-        self.assertTrue(DNA_ALPHABET.match('N', 'A'))
-        self.assertTrue(DNA_ALPHABET.match('A', 'N'))
+        assert DNA_ALPHABET.match('N', 'A')
+        assert DNA_ALPHABET.match('A', 'N')
 
     def test_kmers(self):
         k = kmers('ABCDEFG', 2)
-        self.assertEqual(['AB', 'BC', 'CD', 'DE', 'EF', 'FG'], k)
+        assert k == ['AB', 'BC', 'CD', 'DE', 'EF', 'FG']
         k = kmers('ABCDEFG', 3)
-        self.assertEqual(['ABC', 'BCD', 'CDE', 'DEF', 'EFG'], k)
+        assert k == ['ABC', 'BCD', 'CDE', 'DEF', 'EFG']
 
     def test_assemble(self):
         sequences = ['ABCD', 'BCDE', 'CDEF', 'ABCDE', 'DEFG']
         c = assemble(sequences, 3, min_edge_trim_weight=1, remap_min_exact_match=1)
-        self.assertEqual(1, len(c))
-        self.assertEqual('ABCDEFG', c[0].seq)
-        self.assertEqual(5, c[0].remap_score())
+        assert len(c) == 1
+        assert c[0].seq == 'ABCDEFG'
+        assert c[0].remap_score() == 5
 
     def test_assemble_empty_list(self):
-        self.assertEqual([], assemble([], 1))
+        assert assemble([], 1) == []
 
     def test_repeat_region_assembly(self):
         rep = 'ABCDEF'
         seqs = kmers(rep + rep, len(rep))
         contigs = assemble(seqs, len(rep) - 1, remap_min_exact_match=1)
-        self.assertEqual(0, len(contigs))
+        assert len(contigs) == 0
 
 
-class TestFilterContigs(unittest.TestCase):
+class TestFilterContigs:
     def test_drop_reverse_complement(self):
         c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1)
         c2 = Contig('gctgatgccctatatcgatcgatcgatcgatcgatcgat', 1)
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(1, len(result))
-        self.assertEqual(c1.seq, result[0].seq)
+        assert len(result) == 1
+        assert result[0].seq == c1.seq
 
     def test_drop_alt_allele_alphabetically(self):
         c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 1)
         c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1)
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(1, len(result))
-        self.assertEqual(c1.seq, result[0].seq)
+        assert len(result) == 1
+        assert result[0].seq == c1.seq
 
     def test_drop_alt_allele_by_score(self):
         c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
         c2 = Contig('atcgatcgatcgatcgatctatcgatatagggcatcagc', 1)
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(1, len(result))
-        self.assertEqual(c1.seq, result[0].seq)
+        assert len(result) == 1
+        assert result[0].seq == c1.seq
 
     def test_retain_disimilar(self):
         c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
         c2 = Contig('atcgadatcgatcgatcgatctgtdstcgatatagggca', 1)
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(2, len(result))
+        assert len(result) == 2
 
     def test_retain_disimilar_different_lengths(self):
         c1 = Contig('atcgatcgatcgatcgatcgatcgatatagggcatcagc', 2)
         c2 = Contig('atcgatcgatcgatcgatcgatcccgtgatatagggcatcagc', 1)
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(2, len(result))
+        assert len(result) == 2
 
     def test_drop_similar_different_lengths(self):
         c1 = Contig(
@@ -87,12 +88,12 @@ def test_drop_similar_different_lengths(self):
             1,
         )
         result = filter_contigs([c2, c1], 0.10)
-        self.assertEqual(1, len(result))
-        self.assertEqual(c1.seq, result[0].seq)
+        assert len(result) == 1
+        assert result[0].seq == c1.seq
 
 
-class TestDeBruijnGraph(unittest.TestCase):
-    @pytest.mark.skipif(os.environ.get('RUN_FULL', '0') != '1', reason='running short tests only')
+class TestDeBruijnGraph:
+    @long_running_test
     def test_trim_tails_by_freq_forks(self):
         g = DeBruijnGraph()
         for s, t in itertools.combinations([1, 2, 3, 4, 5, 6], 2):
@@ -104,7 +105,7 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(8, 7)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        self.assertEqual([1, 2, 3, 4, 5, 6], sorted(g.nodes()))
+        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6]
 
         g = DeBruijnGraph()
         for s, t in itertools.combinations([1, 2, 3, 4, 5, 6], 2):
@@ -116,7 +117,7 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(8, 7)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        self.assertEqual([1, 2, 3, 4, 5, 6, 7, 8], sorted(g.nodes()))
+        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6, 7, 8]
 
         g = DeBruijnGraph()
         for s, t in itertools.combinations([1, 2, 3, 4, 5, 6], 2):
@@ -127,16 +128,16 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(7, 8)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        self.assertEqual([1, 2, 3, 4, 5, 6], sorted(g.nodes()))
+        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6]
 
     def test_add_edge(self):
         g = DeBruijnGraph()
         g.add_edge(1, 2)
-        self.assertEqual(1, g.get_edge_freq(1, 2))
+        assert g.get_edge_freq(1, 2) == 1
         g.add_edge(1, 2)
-        self.assertEqual(2, g.get_edge_freq(1, 2))
+        assert g.get_edge_freq(1, 2) == 2
         g.add_edge(1, 2, 5)
-        self.assertEqual(7, g.get_edge_freq(1, 2))
+        assert g.get_edge_freq(1, 2) == 7
 
     def test_trim_noncutting_paths_by_freq_degree_stop(self):
         g = DeBruijnGraph()
@@ -150,7 +151,7 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
         for edge in g.edges():
             print(edge)
         g.trim_noncutting_paths_by_freq(3)
-        self.assertEqual(list(range(1, 9)) + path1[1:-1], g.nodes())
+        assert g.nodes() == list(range(1, 9)) + path1[1:-1]
 
         # add an equal weight path to force namesorting
         path2 = [5, 13, 14, 15, 16, 1]
@@ -158,14 +159,14 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
             g.add_edge(s, t)
 
         g.trim_noncutting_paths_by_freq(3)
-        self.assertEqual(list(range(1, 9)) + path2[1:-1], g.nodes())
+        assert g.nodes() == list(range(1, 9)) + path2[1:-1]
 
         # add back the original path with a higher (but still low) weight
         for s, t in zip(path1, path1[1:]):
             g.add_edge(s, t, freq=2)
 
         g.trim_noncutting_paths_by_freq(3)
-        self.assertEqual(list(range(1, 9)) + path1[1:-1], g.nodes())
+        assert g.nodes() == list(range(1, 9)) + path1[1:-1]
 
         # add the second path with 1 high weight edge
         path2 = [5, 13, 14, 15, 16, 1]
@@ -174,28 +175,31 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
         g.add_edge(14, 15, freq=6)
 
         g.trim_noncutting_paths_by_freq(3)
-        self.assertEqual(list(range(1, 9)) + path2[1:-1], g.nodes())
+        assert g.nodes() == list(range(1, 9)) + path2[1:-1]
+
 
+@pytest.fixture
+def assembly_sequences():
+    # load the sequences
+    with open(os.path.join(DATA_DIR, 'test_assembly_sequences.txt')) as fh:
+        seq = [i.strip() for i in fh.readlines()]
+    return seq
 
-class TestFullAssemly(unittest.TestCase):
-    def setUp(self):
-        # load the sequences
-        with open(os.path.join(DATA_DIR, 'test_assembly_sequences.txt')) as fh:
-            self.seq = [i.strip() for i in fh.readlines()]
 
-    @pytest.mark.skipif(os.environ.get('RUN_FULL', '0') != '1', reason='running short tests only')
-    def test_deterministic_assembly(self):
+class TestFullAssemly:
+    @long_running_test
+    def test_deterministic_assembly(self, assembly_sequences):
         contig_sequences = set()
         for i in range(20):
-            random.shuffle(self.seq)
+            random.shuffle(assembly_sequences)
             contigs = assemble(
-                self.seq,
+                assembly_sequences,
                 111,
                 min_edge_trim_weight=3,
                 assembly_max_paths=8,
                 assembly_min_uniq=0.1,
                 min_complexity=0.1,
             )
-            self.assertEqual(1, len(contigs))
+            assert len(contigs) == 1
             contig_sequences.add(contigs[0].seq)
-        self.assertEqual(1, len(contig_sequences))
+        assert len(contig_sequences) == 1
diff --git a/tests/unit/test_bam.py b/tests/unit/test_bam.py
index c4b0005e..bbcb0ab7 100644
--- a/tests/unit/test_bam.py
+++ b/tests/unit/test_bam.py
@@ -1,5 +1,4 @@
-import unittest
-
+import pytest
 from mavis.bam import cigar as _cigar
 from mavis.bam import read as _read
 from mavis.constants import CIGAR, ORIENT
@@ -7,7 +6,7 @@
 from .mock import Mock, MockFunction
 
 
-class TestPileUp(unittest.TestCase):
+class TestPileUp:
     def mock_read(self, positions, **kwargs):
         return Mock(get_reference_positions=MockFunction(positions), **kwargs)
 
@@ -15,7 +14,7 @@ def test_sparse_coverage(self):
         reads = [self.mock_read(range(0, 5)), self.mock_read(range(20, 25))]
         pileup = _read.pileup(reads)
         expected = [(r, 1) for r in range(1, 6)] + [(r, 1) for r in range(21, 26)]
-        self.assertEqual(expected, pileup)
+        assert pileup == expected
 
     def test_dense_coverage(self):
         reads = [
@@ -28,7 +27,7 @@ def test_dense_coverage(self):
         ]
         pileup = _read.pileup(reads)
         expected = list(zip(range(1, 9), [2, 4, 5, 6, 6, 4, 3, 2]))
-        self.assertEqual(expected, pileup)
+        assert pileup == expected
 
     def test_filter_reads(self):
         reads = [
@@ -41,14 +40,14 @@ def test_filter_reads(self):
         ]
         pileup = _read.pileup(reads, filter_func=lambda x: True if x.mapping_quality < 1 else False)
         expected = list(zip(range(2, 9), [1, 1, 2, 2, 2, 2, 2]))
-        self.assertEqual(expected, pileup)
+        assert pileup == expected
 
 
-class TestConvertEventsToSoftclipping(unittest.TestCase):
+class TestConvertEventsToSoftclipping:
     def test_left_large_deletion(self):
         read = Mock(cigar=[(CIGAR.EQ, 10), (CIGAR.D, 10), (CIGAR.EQ, 40)], query_sequence='A' * 50)
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 5, 5)
-        self.assertEqual([(CIGAR.EQ, 10), (CIGAR.S, 40)], converted.cigar)
+        assert converted.cigar == [(CIGAR.EQ, 10), (CIGAR.S, 40)]
 
     def test_left_anchor_after_event(self):
         read = Mock(
@@ -56,14 +55,12 @@ def test_left_anchor_after_event(self):
             query_sequence='A' * 50,
         )
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 5, 5)
-        self.assertEqual(
-            [(CIGAR.EQ, 4), (CIGAR.D, 10), (CIGAR.EQ, 40), (CIGAR.S, 6)], converted.cigar
-        )
+        assert converted.cigar == [(CIGAR.EQ, 4), (CIGAR.D, 10), (CIGAR.EQ, 40), (CIGAR.S, 6)]
 
     def test_left_all_mismatch_error(self):
         read = Mock(cigar=[(CIGAR.X, 10), (CIGAR.D, 10), (CIGAR.X, 40)], query_sequence='A' * 50)
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 5, 5)
-        self.assertEqual(read, converted)
+        assert converted == read
 
     def test_left_combined_small_events(self):
         read = Mock(
@@ -71,7 +68,7 @@ def test_left_combined_small_events(self):
             query_sequence='A' * 50,
         )
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 10, 10)
-        self.assertEqual([(CIGAR.EQ, 10), (CIGAR.S, 40)], converted.cigar)
+        assert converted.cigar == [(CIGAR.EQ, 10), (CIGAR.S, 40)]
 
     def test_right_large_deletion(self):
         read = Mock(
@@ -80,8 +77,8 @@ def test_right_large_deletion(self):
             reference_start=100,
         )
         converted = _read.convert_events_to_softclipping(read, ORIENT.RIGHT, 5, 5)
-        self.assertEqual([(CIGAR.S, 10), (CIGAR.EQ, 40)], converted.cigar)
-        self.assertEqual(read.reference_start + 20, converted.reference_start)
+        assert converted.cigar == [(CIGAR.S, 10), (CIGAR.EQ, 40)]
+        assert converted.reference_start == read.reference_start + 20
 
     def test_right_anchor_after_event(self):
         read = Mock(
@@ -90,10 +87,8 @@ def test_right_anchor_after_event(self):
             reference_start=100,
         )
         converted = _read.convert_events_to_softclipping(read, ORIENT.RIGHT, 5, 5)
-        self.assertEqual(
-            [(CIGAR.S, 6), (CIGAR.EQ, 40), (CIGAR.D, 10), (CIGAR.EQ, 4)], converted.cigar
-        )
-        self.assertEqual(read.reference_start + 16, converted.reference_start)
+        assert converted.cigar == [(CIGAR.S, 6), (CIGAR.EQ, 40), (CIGAR.D, 10), (CIGAR.EQ, 4)]
+        assert converted.reference_start == read.reference_start + 16
 
     def test_complex_alignment(self):
         cigar = [
@@ -112,15 +107,15 @@ def test_complex_alignment(self):
         ]
         read = Mock(cigar=cigar, query_sequence='A' * 365, reference_start=88217410)
 
-        with self.assertRaises(NotImplementedError):
+        with pytest.raises(NotImplementedError):
             _read.convert_events_to_softclipping(read, ORIENT.LEFT, 50, 50)
 
         read.cigar = [(CIGAR.EQ if x == CIGAR.M else x, y) for x, y in read.cigar]
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 50, 50)
-        self.assertEqual([(CIGAR.EQ, 137), (CIGAR.S, 365 - 137)], converted.cigar)
+        assert converted.cigar == [(CIGAR.EQ, 137), (CIGAR.S, 365 - 137)]
 
         converted = _read.convert_events_to_softclipping(read, ORIENT.RIGHT, 50, 100)
-        self.assertEqual(read.cigar, converted.cigar)
+        assert converted.cigar == read.cigar
 
     def test_multiple_events(self):
         cigar = [
@@ -139,7 +134,7 @@ def test_multiple_events(self):
         read = Mock(cigar=cigar, query_sequence=('N' * qlen), reference_start=1000)
         converted = _read.convert_events_to_softclipping(read, ORIENT.RIGHT, 50, 50)
         exp = [(CIGAR.S, 59), (CIGAR.EQ, 28), (CIGAR.D, 2), (CIGAR.EQ, 27), (CIGAR.S, 77)]
-        self.assertEqual(exp, converted.cigar)
+        assert converted.cigar == exp
 
     def test_multiple_left_with_ins(self):
         cigar = [
@@ -180,82 +175,82 @@ def test_multiple_left_with_ins(self):
         qlen = sum([v for c, v in cigar if c in _cigar.QUERY_ALIGNED_STATES])
         read = Mock(cigar=cigar, query_sequence=('N' * qlen), reference_start=1000)
         converted = _read.convert_events_to_softclipping(read, ORIENT.LEFT, 50, 50)
-        self.assertEqual(exp, converted.cigar)
+        assert converted.cigar == exp
 
 
-class TestMergeIndels(unittest.TestCase):
+class TestMergeIndels:
     def test_no_events(self):
         c = [(CIGAR.EQ, 1)]
-        self.assertEqual(c, _cigar.merge_indels(c))
+        assert _cigar.merge_indels(c) == c
 
         c = [(CIGAR.EQ, 1), (CIGAR.X, 3), (CIGAR.EQ, 10)]
-        self.assertEqual(c, _cigar.merge_indels(c))
+        assert _cigar.merge_indels(c) == c
 
     def test_del_before_ins(self):
         c = [(CIGAR.EQ, 1), (CIGAR.D, 1), (CIGAR.I, 2), (CIGAR.EQ, 2)]
         exp = [(CIGAR.EQ, 1), (CIGAR.I, 2), (CIGAR.D, 1), (CIGAR.EQ, 2)]
-        self.assertEqual(exp, _cigar.merge_indels(c))
+        assert _cigar.merge_indels(c) == exp
 
     def test_ins_before_del(self):
         exp = [(CIGAR.EQ, 1), (CIGAR.I, 2), (CIGAR.D, 1), (CIGAR.EQ, 2)]
-        self.assertEqual(exp, _cigar.merge_indels(exp))
+        assert _cigar.merge_indels(exp) == exp
 
     def test_mixed(self):
         c = [(CIGAR.EQ, 1), (CIGAR.I, 2), (CIGAR.D, 1), (CIGAR.I, 2), (CIGAR.D, 1), (CIGAR.EQ, 2)]
         exp = [(CIGAR.EQ, 1), (CIGAR.I, 4), (CIGAR.D, 2), (CIGAR.EQ, 2)]
-        self.assertEqual(exp, _cigar.merge_indels(c))
+        assert _cigar.merge_indels(c) == exp
 
 
-class TestMergeInternalEvents(unittest.TestCase):
+class TestMergeInternalEvents:
     def test_mismatch_and_deletion(self):
         c = [(CIGAR.EQ, 10), (CIGAR.X, 2), (CIGAR.EQ, 5), (CIGAR.D, 2), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 10), (CIGAR.I, 7), (CIGAR.D, 9), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 5))
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 6))
+        assert _cigar.merge_internal_events(c, 5) == c
+        assert _cigar.merge_internal_events(c, 6) == exp
 
     def test_mismatch_and_insertion(self):
         c = [(CIGAR.EQ, 10), (CIGAR.X, 2), (CIGAR.EQ, 5), (CIGAR.I, 2), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 10), (CIGAR.I, 9), (CIGAR.D, 7), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 5))
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 6))
+        assert _cigar.merge_internal_events(c, 5) == c
+        assert _cigar.merge_internal_events(c, 6) == exp
 
     def test_insertions(self):
         c = [(CIGAR.EQ, 10), (CIGAR.I, 2), (CIGAR.EQ, 5), (CIGAR.I, 2), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 10), (CIGAR.I, 9), (CIGAR.D, 5), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 5))
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 6))
+        assert _cigar.merge_internal_events(c, 5) == c
+        assert _cigar.merge_internal_events(c, 6) == exp
 
     def test_deletions(self):
         c = [(CIGAR.EQ, 10), (CIGAR.D, 2), (CIGAR.EQ, 5), (CIGAR.D, 2), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 10), (CIGAR.I, 5), (CIGAR.D, 9), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 5))
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 6))
+        assert _cigar.merge_internal_events(c, 5) == c
+        assert _cigar.merge_internal_events(c, 6) == exp
 
     def test_insertion_and_deletion(self):
         c = [(CIGAR.EQ, 10), (CIGAR.I, 2), (CIGAR.EQ, 5), (CIGAR.D, 2), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 10), (CIGAR.I, 7), (CIGAR.D, 7), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 5))
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 6))
+        assert _cigar.merge_internal_events(c, 5) == c
+        assert _cigar.merge_internal_events(c, 6) == exp
 
     def test_no_internal_events(self):
         c = [(CIGAR.EQ, 10), (CIGAR.EQ, 10)]
         exp = [(CIGAR.EQ, 20)]
 
-        self.assertEqual(exp, _cigar.merge_internal_events(c, 10))
+        assert _cigar.merge_internal_events(c, 10) == exp
 
         c = [(CIGAR.X, 10), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 10))
+        assert _cigar.merge_internal_events(c, 10) == c
 
     def test_single_internal_event(self):
         c = [(CIGAR.EQ, 10), (CIGAR.X, 5), (CIGAR.EQ, 10)]
 
-        self.assertEqual(c, _cigar.merge_internal_events(c, 10))
+        assert _cigar.merge_internal_events(c, 10) == c
 
     def test_long_suffix_and_prefix(self):
         c = [
@@ -301,55 +296,52 @@ def test_long_suffix_and_prefix(self):
         actual = _cigar.merge_internal_events(c, 20, 15)
         print(c)
         print(actual)
-        self.assertEqual(exp, actual)
+        assert actual == exp
 
     def test_mismatch_only(self):
         exp = _cigar.convert_string_to_cigar('39=1X16=1X71=22S')
-        self.assertEqual(exp, _cigar.merge_internal_events(exp, 20, 15))
+        assert _cigar.merge_internal_events(exp, 20, 15) == exp
 
 
-class TestExtendSoftclipping(unittest.TestCase):
+class TestExtendSoftclipping:
     def test_simple(self):
-        self.assertEqual(
-            ([(CIGAR.S, 10), (CIGAR.M, 10)], 0),
-            _cigar.extend_softclipping([(CIGAR.S, 10), (CIGAR.M, 10)], 1),
+        assert _cigar.extend_softclipping([(CIGAR.S, 10), (CIGAR.M, 10)], 1) == (
+            [(CIGAR.S, 10), (CIGAR.M, 10)],
+            0,
         )
 
     def test_deletions(self):
-        self.assertEqual(
-            ([(CIGAR.S, 10), (CIGAR.M, 10)], 1),
-            _cigar.extend_softclipping([(CIGAR.I, 10), (CIGAR.D, 1), (CIGAR.M, 10)], 1),
+        assert _cigar.extend_softclipping([(CIGAR.I, 10), (CIGAR.D, 1), (CIGAR.M, 10)], 1) == (
+            [(CIGAR.S, 10), (CIGAR.M, 10)],
+            1,
         )
 
     def test_mismatch(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             _cigar.extend_softclipping([(CIGAR.X, 10), (CIGAR.M, 20), (CIGAR.X, 10)], 30)
 
     def test_insert(self):
-        self.assertEqual(
-            ([(CIGAR.S, 17), (CIGAR.M, 10), (CIGAR.S, 5)], 2),
-            _cigar.extend_softclipping(
-                [(CIGAR.S, 10), (CIGAR.M, 2), (CIGAR.I, 5), (CIGAR.M, 10), (CIGAR.I, 5)], 5
-            ),
-        )
+        assert _cigar.extend_softclipping(
+            [(CIGAR.S, 10), (CIGAR.M, 2), (CIGAR.I, 5), (CIGAR.M, 10), (CIGAR.I, 5)], 5
+        ) == ([(CIGAR.S, 17), (CIGAR.M, 10), (CIGAR.S, 5)], 2)
 
     def test_hardclipping(self):
         c = [(CIGAR.H, 10), (CIGAR.EQ, 10)]
         cnew, prefix = _cigar.extend_softclipping(c, 1)
-        self.assertEqual(0, prefix)
-        self.assertEqual(c, cnew)
+        assert prefix == 0
+        assert cnew == c
 
     def test_hardclipping_right(self):
         c = [(CIGAR.EQ, 30), (CIGAR.H, 120)]
         cnew, prefix = _cigar.extend_softclipping(c, 6)
-        self.assertEqual(0, prefix)
-        self.assertEqual(c, cnew)
+        assert prefix == 0
+        assert cnew == c
 
 
-class TestSequenceComplexity(unittest.TestCase):
+class TestSequenceComplexity:
     def test_low_at(self):
         seq = 'TATATATAAATATATATTTATATATACATTATTTATATATAAATATATATTTATACATTATTTATATATAAATATATATTTATATATACATTATGTATATATAAAT'
-        self.assertEqual(0.04, round(_read.sequence_complexity(seq), 2))
+        assert round(_read.sequence_complexity(seq), 2) == 0.04
 
     def test_empty(self):
-        self.assertEqual(0, _read.sequence_complexity(''))
+        assert _read.sequence_complexity('') == 0
diff --git a/tests/unit/test_blat.py b/tests/unit/test_blat.py
index 749b0611..488f14d7 100644
--- a/tests/unit/test_blat.py
+++ b/tests/unit/test_blat.py
@@ -1,12 +1,11 @@
-import unittest
-
+import pytest
 from mavis.blat import Blat
 from mavis.constants import CIGAR, reverse_complement
 
 from .mock import Mock, MockFunction, MockLongString
 
 
-class TestConvertPslxToPysam(unittest.TestCase):
+class TestConvertPslxToPysam:
     def test_simple(self):
         row = {
             'match': 142,
@@ -50,10 +49,10 @@ def test_simple(self):
         }
         cache = Mock(reference_id=MockFunction(16))
         read = Blat.pslx_row_to_pysam(row, cache, refseq)
-        self.assertEqual(16, read.reference_id)
-        self.assertEqual('17', read.reference_name)
-        self.assertEqual(row['qseq_full'], reverse_complement(read.query_sequence))
-        self.assertEqual([(CIGAR.S, 62), (CIGAR.EQ, 142)], read.cigar)
+        assert read.reference_id == 16
+        assert read.reference_name == '17'
+        assert reverse_complement(read.query_sequence) == row['qseq_full']
+        assert read.cigar == [(CIGAR.S, 62), (CIGAR.EQ, 142)]
 
     def test_overlapping_blat_blocks_error(self):
         row = {
@@ -72,5 +71,5 @@ def test_overlapping_blat_blocks_error(self):
             ),
         }
         cache = Mock(reference_id=MockFunction(6))
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             Blat.pslx_row_to_pysam(row, cache, None)
diff --git a/tests/unit/test_breakpoint.py b/tests/unit/test_breakpoint.py
index 56bdb9da..9eeb8347 100644
--- a/tests/unit/test_breakpoint.py
+++ b/tests/unit/test_breakpoint.py
@@ -1,17 +1,16 @@
-import unittest
 from unittest.mock import Mock
 
+import pytest
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import COLUMNS, ORIENT, STRAND, SVTYPE
+from mavis.constants import ORIENT, STRAND, SVTYPE
 from mavis.error import InvalidRearrangement, NotSpecifiedError
 from mavis.interval import Interval
-from mavis.util import read_bpp_from_input_file
 
 
-class TestBreakpoint(unittest.TestCase):
+class TestBreakpoint:
     def test___eq__(self):
-        self.assertNotEqual(Breakpoint('1', 1), None)
-        self.assertEqual(Breakpoint('1', 1), Breakpoint('1', 1))
+        assert Breakpoint('1', 1) != None  # noqa: E711
+        assert Breakpoint('1', 1) == Breakpoint('1', 1)
 
     def test___hash__(self):
         b = Breakpoint('1', 1, 2)
@@ -22,44 +21,44 @@ def test___hash__(self):
         temp.add(b)
         temp.add(c)
         temp.add(d)
-        self.assertEqual(2, len(temp))
+        assert len(temp) == 2
 
         temp = dict()
         temp[b] = None
         temp[c] = None
         temp[d] = None
-        self.assertEqual(2, len(temp.keys()))
+        assert len(temp.keys()) == 2
 
     def test___len__(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Breakpoint('11', 87042760, 87041922, orient=ORIENT.LEFT, strand=STRAND.NS)
 
     def test_inherited_interval_methods(self):
         b = Breakpoint('1', 1, 10)
-        self.assertEqual(1, b[0])
-        self.assertEqual(10, b[1])
-        self.assertEqual(10, len(b))
+        assert b[0] == 1
+        assert b[1] == 10
+        assert len(b) == 10
 
     def test_breakpoint_constructor(self):
         b = Breakpoint('1', 10, 50)
-        self.assertEqual(10, b[0])
-        self.assertEqual(50, b[1])
-        self.assertTrue(Interval.overlaps((1, 10), b))
-        self.assertTrue(Interval.overlaps((50, 55), b))
-        self.assertFalse(Interval.overlaps((1, 9), b))
+        assert b[0] == 10
+        assert b[1] == 50
+        assert Interval.overlaps((1, 10), b)
+        assert Interval.overlaps((50, 55), b)
+        assert not Interval.overlaps((1, 9), b)
 
 
-class TestBreakpointPair(unittest.TestCase):
+class TestBreakpointPair:
     def test___eq__(self):
         b = BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 3), opposing_strands=True)
         c = BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 3), opposing_strands=True)
-        self.assertFalse(b is c)
-        self.assertEqual(b, c)
+        assert b is not c
+        assert c == b
         d = BreakpointPair(
             Breakpoint('1', 1), Breakpoint('1', 3), opposing_strands=True, untemplated_seq=''
         )
-        self.assertNotEqual(b, d)
-        self.assertNotEqual(b, None)
+        assert d != b
+        assert None != b  # noqa: E711
 
     def test___hash__(self):
         b = BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 3), opposing_strands=True)
@@ -67,31 +66,31 @@ def test___hash__(self):
         d = BreakpointPair(
             Breakpoint('1', 1), Breakpoint('1', 3), opposing_strands=True, untemplated_seq=''
         )
-        self.assertFalse(b is c)
+        assert b is not c
         temp = dict()
         temp[b] = None
         temp[d] = None
         temp[c] = None
-        self.assertEqual(2, len(temp.keys()))
+        assert len(temp.keys()) == 2
 
         temp = set()
         temp.add(b)
         temp.add(c)
         temp.add(d)
-        self.assertEqual(2, len(temp))
+        assert len(temp) == 2
 
     def test___init__swap_break_order(self):
         b1 = Breakpoint('1', 1)
         b2 = Breakpoint('1', 50)
         bpp = BreakpointPair(b1, b2, opposing_strands=True)
-        self.assertEqual(bpp.break1, b1)
-        self.assertEqual(bpp.break2, b2)
+        assert b1 == bpp.break1
+        assert b2 == bpp.break2
         bpp = BreakpointPair(b2, b1, opposing_strands=True)
-        self.assertEqual(bpp.break1, b1)
-        self.assertEqual(bpp.break2, b2)
+        assert b1 == bpp.break1
+        assert b2 == bpp.break2
 
     def test___init__opstrand_conflict(self):
-        with self.assertRaises(AssertionError):
+        with pytest.raises(AssertionError):
             BreakpointPair(
                 Breakpoint('1', 1, strand=STRAND.POS),
                 Breakpoint('1', 2, strand=STRAND.POS),
@@ -100,16 +99,16 @@ def test___init__opstrand_conflict(self):
 
     def test___init__opstrand_indv_not_specified(self):
         bpp = BreakpointPair(Breakpoint('test', 1), Breakpoint('test', 10), opposing_strands=True)
-        self.assertTrue(bpp.opposing_strands)
+        assert bpp.opposing_strands
         bpp = BreakpointPair(Breakpoint('test', 1), Breakpoint('test', 10), opposing_strands=False)
-        self.assertFalse(bpp.opposing_strands)
+        assert not bpp.opposing_strands
 
     def test___init__opstrand_not_specified(self):
-        with self.assertRaises(NotSpecifiedError):
+        with pytest.raises(NotSpecifiedError):
             BreakpointPair(Breakpoint('1', 1), Breakpoint('1', 2))
 
     def test___init__stranded(self):
-        with self.assertRaises(NotSpecifiedError):
+        with pytest.raises(NotSpecifiedError):
             BreakpointPair(
                 Breakpoint('1', 1), Breakpoint('1', 2), stranded=True, opposing_strands=True
             )
@@ -118,25 +117,25 @@ def test___get_item__(self):
         bp1 = Breakpoint(1, 1, 2, ORIENT.LEFT)
         bp2 = Breakpoint(2, 1, 2, ORIENT.LEFT)
         bpp = BreakpointPair(bp1, bp2, opposing_strands=True)
-        self.assertEqual(bpp[0], bp1)
-        self.assertEqual(bpp[1], bp2)
-        with self.assertRaises(IndexError):
+        assert bp1 == bpp[0]
+        assert bp2 == bpp[1]
+        with pytest.raises(IndexError):
             bpp['?']
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             bpp[2]
 
     def test_interchromosomal(self):
         bp1 = Breakpoint(1, 1, 2, ORIENT.LEFT)
         bp2 = Breakpoint(2, 1, 2, ORIENT.LEFT)
         bpp = BreakpointPair(bp1, bp2, opposing_strands=True)
-        self.assertTrue(bpp.interchromosomal)
+        assert bpp.interchromosomal
         bp1 = Breakpoint(1, 1, 2, ORIENT.LEFT)
         bp2 = Breakpoint(1, 7, 8, ORIENT.LEFT)
         bpp = BreakpointPair(bp1, bp2, opposing_strands=True)
-        self.assertFalse(bpp.interchromosomal)
+        assert not bpp.interchromosomal
 
     def test___init__invalid_intra_rprp(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
                 Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.RIGHT),
@@ -144,7 +143,7 @@ def test___init__invalid_intra_rprp(self):
             )
 
     def test___init__invalid_intra_rnrn(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
                 Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.RIGHT),
@@ -152,7 +151,7 @@ def test___init__invalid_intra_rnrn(self):
             )
 
     def test___init__invalid_intra_rpln(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
                 Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.LEFT),
@@ -160,7 +159,7 @@ def test___init__invalid_intra_rpln(self):
             )
 
     def test___init__invalid_intra_lprn(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.LEFT),
                 Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.RIGHT),
@@ -168,7 +167,7 @@ def test___init__invalid_intra_lprn(self):
             )
 
     def test___init__invalid_intra_rnlp(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
                 Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT),
@@ -176,7 +175,7 @@ def test___init__invalid_intra_rnlp(self):
             )
 
     def test___init__invalid_intra_lnrp(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.LEFT),
                 Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.RIGHT),
@@ -184,7 +183,7 @@ def test___init__invalid_intra_lnrp(self):
             )
 
     def test___init__invalid_inter_rl_opp(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, ORIENT.RIGHT),
                 Breakpoint(2, 1, 2, ORIENT.LEFT),
@@ -192,7 +191,7 @@ def test___init__invalid_inter_rl_opp(self):
             )
 
     def test___init__invalid_inter_lr_opp(self):
-        with self.assertRaises(InvalidRearrangement):
+        with pytest.raises(InvalidRearrangement):
             BreakpointPair(
                 Breakpoint(1, 1, 2, ORIENT.LEFT),
                 Breakpoint(2, 1, 2, ORIENT.RIGHT),
@@ -200,7 +199,7 @@ def test___init__invalid_inter_lr_opp(self):
             )
 
 
-class TestClassifyBreakpointPair(unittest.TestCase):
+class TestClassifyBreakpointPair:
     def test_inverted_translocation(self):
         b = BreakpointPair(
             Breakpoint(1, 1, 2, ORIENT.LEFT),
@@ -222,116 +221,116 @@ def test_inversion(self):
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.RIGHT),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.RIGHT),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.NS),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.NS),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.LEFT),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT),
         )
-        self.assertEqual({SVTYPE.INV}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.INV}
 
     def test_duplication(self):
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT),
         )
-        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.DUP}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.LEFT),
         )
-        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.DUP}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.LEFT),
         )
-        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.DUP}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.NS),
         )
-        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.DUP}
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.RIGHT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.NS),
         )
-        self.assertEqual({SVTYPE.DUP}, BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == {SVTYPE.DUP}
 
     def test_deletion_or_insertion(self):
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.RIGHT),
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.POS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NS, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.RIGHT),
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NEG, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NS, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.POS, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NEG, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         b = BreakpointPair(
             Breakpoint(1, 1, 2, strand=STRAND.NS, orient=ORIENT.LEFT),
             Breakpoint(1, 10, 11, strand=STRAND.NS, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
     def test_insertion(self):
         b = BreakpointPair(
@@ -339,7 +338,7 @@ def test_insertion(self):
             Breakpoint(1, 2, 2, strand=STRAND.NS, orient=ORIENT.RIGHT),
             opposing_strands=False,
         )
-        self.assertEqual(sorted([SVTYPE.INS]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.INS])
 
     def test_no_type(self):
         b = BreakpointPair(
@@ -348,7 +347,7 @@ def test_no_type(self):
             opposing_strands=False,
             untemplated_seq='',
         )
-        self.assertEqual(set(), BreakpointPair.classify(b))
+        assert BreakpointPair.classify(b) == set()
 
     def test_deletion(self):
         b = BreakpointPair(
@@ -357,7 +356,7 @@ def test_deletion(self):
             opposing_strands=False,
             untemplated_seq='',
         )
-        self.assertEqual(sorted([SVTYPE.DEL]), sorted(BreakpointPair.classify(b)))
+        assert sorted(BreakpointPair.classify(b)) == sorted([SVTYPE.DEL])
 
     def test_deletion_with_useq(self):
         bpp = BreakpointPair(
@@ -366,30 +365,30 @@ def test_deletion_with_useq(self):
             opposing=False,
             untemplated_seq='CCCT',
         )
-        self.assertEqual(sorted([SVTYPE.DEL, SVTYPE.INS]), sorted(BreakpointPair.classify(bpp)))
+        assert sorted(BreakpointPair.classify(bpp)) == sorted([SVTYPE.DEL, SVTYPE.INS])
 
         def distance(x, y):
             return Interval(abs(x - y))
 
         net_size = BreakpointPair.net_size(bpp, distance)
-        self.assertEqual(Interval(-71), net_size)
-        self.assertEqual(sorted([SVTYPE.DEL]), sorted(BreakpointPair.classify(bpp, distance)))
+        assert net_size == Interval(-71)
+        assert sorted(BreakpointPair.classify(bpp, distance)) == sorted([SVTYPE.DEL])
 
     def test_deletion_no_distance_error(self):
         bpp = BreakpointPair(
             Breakpoint('1', 7039, orient='L'), Breakpoint('1', 7040, orient='R'), opposing=False
         )
-        self.assertEqual(sorted([SVTYPE.INS]), sorted(BreakpointPair.classify(bpp)))
+        assert sorted(BreakpointPair.classify(bpp)) == sorted([SVTYPE.INS])
 
 
-class TestNetSize(unittest.TestCase):
+class TestNetSize:
     def test_indel(self):
         bpp = BreakpointPair(
             Breakpoint('1', 13, orient=ORIENT.RIGHT),
             Breakpoint('1', 10, orient=ORIENT.LEFT),
             untemplated_seq='TTT',
         )
-        self.assertEqual(Interval(1), bpp.net_size())
+        assert bpp.net_size() == Interval(1)
 
     def test_large_indel(self):
         bpp = BreakpointPair(
@@ -397,7 +396,7 @@ def test_large_indel(self):
             Breakpoint('1', 101, orient=ORIENT.RIGHT),
             untemplated_seq='TTT',
         )
-        self.assertEqual(Interval(-87), bpp.net_size())
+        assert bpp.net_size() == Interval(-87)
 
     def test_insertion(self):
         bpp = BreakpointPair(
@@ -405,14 +404,14 @@ def test_insertion(self):
             Breakpoint('1', 10, orient=ORIENT.LEFT),
             untemplated_seq='T',
         )
-        self.assertEqual(Interval(1), bpp.net_size())
+        assert bpp.net_size() == Interval(1)
 
         bpp = BreakpointPair(
             Breakpoint('1', 11, orient=ORIENT.RIGHT),
             Breakpoint('1', 10, orient=ORIENT.LEFT),
             untemplated_seq='TT',
         )
-        self.assertEqual(Interval(2), bpp.net_size())
+        assert bpp.net_size() == Interval(2)
 
     def test_duplication_with_insertion(self):
         bpp = BreakpointPair(
@@ -420,7 +419,7 @@ def test_duplication_with_insertion(self):
             Breakpoint('1', 15, orient=ORIENT.LEFT),
             untemplated_seq='TTT',
         )
-        self.assertEqual(Interval(9), bpp.net_size())
+        assert bpp.net_size() == Interval(9)
 
     def test_deletion(self):
         bpp = BreakpointPair(
@@ -428,7 +427,7 @@ def test_deletion(self):
             Breakpoint('1', 15, orient=ORIENT.RIGHT),
             untemplated_seq='',
         )
-        self.assertEqual(Interval(-4), bpp.net_size())
+        assert bpp.net_size() == Interval(-4)
 
     def test_inversion(self):
         bpp = BreakpointPair(
@@ -436,7 +435,7 @@ def test_inversion(self):
             Breakpoint('1', 15, orient=ORIENT.LEFT),
             untemplated_seq='',
         )
-        self.assertEqual(Interval(0), bpp.net_size())
+        assert bpp.net_size() == Interval(0)
 
     def test_inversion_insertion(self):
         bpp = BreakpointPair(
@@ -444,10 +443,10 @@ def test_inversion_insertion(self):
             Breakpoint('1', 15, orient=ORIENT.LEFT),
             untemplated_seq='TT',
         )
-        self.assertEqual(Interval(2), bpp.net_size())
+        assert bpp.net_size() == Interval(2)
 
 
-class TestUntemplatedShift(unittest.TestCase):
+class TestUntemplatedShift:
     def test_indel(self):
         ref = {
             '1': Mock(
@@ -461,4 +460,4 @@ def test_indel(self):
         )
         result = bpp.untemplated_shift(ref)
         print(result)
-        self.assertEqual((0, 1), result)
+        assert result == (0, 1)
diff --git a/tests/unit/test_call_indels.py b/tests/unit/test_call_indels.py
index f897113d..840947df 100644
--- a/tests/unit/test_call_indels.py
+++ b/tests/unit/test_call_indels.py
@@ -1,134 +1,133 @@
-import unittest
-
+import pytest
 from mavis.annotate.variant import IndelCall, call_protein_indel
 
 from .mock import Mock, MockFunction
 
 
-class TestIndelCall(unittest.TestCase):
+class TestIndelCall:
     def test_deletion(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(4, indel.nterm_aligned)
-        self.assertEqual(len(indel.ref_seq) - 8 + 1, indel.cterm_aligned)
-        self.assertEqual('ghj', indel.del_seq)
-        self.assertEqual('', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 4
+        assert indel.cterm_aligned == len(indel.ref_seq) - 8 + 1
+        assert indel.del_seq == 'ghj'
+        assert indel.ins_seq == ''
+        assert not indel.is_dup
 
     def test_insertion(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfmmmghjkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(4, indel.nterm_aligned)
-        self.assertEqual(len(indel.ref_seq) - 5 + 1, indel.cterm_aligned)
-        self.assertEqual('', indel.del_seq)
-        self.assertEqual('mmm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 4
+        assert indel.cterm_aligned == len(indel.ref_seq) - 5 + 1
+        assert indel.del_seq == ''
+        assert indel.ins_seq == 'mmm'
+        assert not indel.is_dup
 
     def test_dup(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfsdfghjkl'
         indel = IndelCall(refseq, mutseq)
         print(indel)
-        self.assertEqual(4, indel.nterm_aligned)
-        self.assertEqual(len(indel.ref_seq) - 2 + 1, indel.cterm_aligned)
-        self.assertEqual('', indel.del_seq)
-        self.assertEqual('sdf', indel.ins_seq)
-        self.assertTrue(indel.is_dup)
+        assert indel.nterm_aligned == 4
+        assert indel.cterm_aligned == len(indel.ref_seq) - 2 + 1
+        assert indel.del_seq == ''
+        assert indel.ins_seq == 'sdf'
+        assert indel.is_dup
 
     def test_delins(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfmmmkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(4, indel.nterm_aligned)
-        self.assertEqual(len(indel.ref_seq) - 8 + 1, indel.cterm_aligned)
-        self.assertEqual('ghj', indel.del_seq)
-        self.assertEqual('mmm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 4
+        assert indel.cterm_aligned == len(indel.ref_seq) - 8 + 1
+        assert indel.del_seq == 'ghj'
+        assert indel.ins_seq == 'mmm'
+        assert not indel.is_dup
 
     def test_delete_start(self):
         refseq = 'asdfghjkl'
         mutseq = 'fghjkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(0, indel.nterm_aligned)
-        self.assertEqual(6, indel.cterm_aligned)
-        self.assertEqual('asd', indel.del_seq)
-        self.assertEqual('', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 0
+        assert indel.cterm_aligned == 6
+        assert indel.del_seq == 'asd'
+        assert indel.ins_seq == ''
+        assert not indel.is_dup
 
     def test_delete_start_repetition(self):
         refseq = 'asdafghjkl'
         mutseq = 'afghjkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(0, indel.nterm_aligned)
-        self.assertEqual(7, indel.cterm_aligned)
-        self.assertEqual('asd', indel.del_seq)
-        self.assertEqual('', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 0
+        assert indel.cterm_aligned == 7
+        assert indel.del_seq == 'asd'
+        assert indel.ins_seq == ''
+        assert not indel.is_dup
 
     def test_delete_end(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfgh'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(6, indel.nterm_aligned)
-        self.assertEqual(0, indel.cterm_aligned)
-        self.assertEqual('jkl', indel.del_seq)
-        self.assertEqual('', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 6
+        assert indel.cterm_aligned == 0
+        assert indel.del_seq == 'jkl'
+        assert indel.ins_seq == ''
+        assert not indel.is_dup
 
     def test_ins_start(self):
         refseq = 'asdfghjkl'
         mutseq = 'mmasdfghjkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(0, indel.nterm_aligned)
-        self.assertEqual(9, indel.cterm_aligned)
-        self.assertEqual('', indel.del_seq)
-        self.assertEqual('mm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 0
+        assert indel.cterm_aligned == 9
+        assert indel.del_seq == ''
+        assert indel.ins_seq == 'mm'
+        assert not indel.is_dup
 
     def test_ins_end(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfghjklmmm'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(9, indel.nterm_aligned)
-        self.assertEqual(0, indel.cterm_aligned)
-        self.assertEqual('', indel.del_seq)
-        self.assertEqual('mmm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 9
+        assert indel.cterm_aligned == 0
+        assert indel.del_seq == ''
+        assert indel.ins_seq == 'mmm'
+        assert not indel.is_dup
 
     def test_delins_start(self):
         refseq = 'asdfghjkl'
         mutseq = 'mmfghjkl'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(0, indel.nterm_aligned)
-        self.assertEqual(6, indel.cterm_aligned)
-        self.assertEqual('asd', indel.del_seq)
-        self.assertEqual('mm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 0
+        assert indel.cterm_aligned == 6
+        assert indel.del_seq == 'asd'
+        assert indel.ins_seq == 'mm'
+        assert not indel.is_dup
 
     def test_delins_end(self):
         refseq = 'asdfghjkl'
         mutseq = 'asdfghjmmm'
         indel = IndelCall(refseq, mutseq)
-        self.assertEqual(7, indel.nterm_aligned)
-        self.assertEqual(0, indel.cterm_aligned)
-        self.assertEqual('kl', indel.del_seq)
-        self.assertEqual('mmm', indel.ins_seq)
-        self.assertFalse(indel.is_dup)
+        assert indel.nterm_aligned == 7
+        assert indel.cterm_aligned == 0
+        assert indel.del_seq == 'kl'
+        assert indel.ins_seq == 'mmm'
+        assert not indel.is_dup
 
 
-class TestHgvsProteinNotation(unittest.TestCase):
+class TestHgvsProteinNotation:
     def test_homopolymer(self):
         indel = IndelCall('ASDFGHJKKLQWERTYUIOP', 'ASDFGHJKKKKLQWERTYUIOP').hgvs_protein_notation()
-        self.assertEqual('p.K8_K9dupKK', indel)
+        assert indel == 'p.K8_K9dupKK'
 
     def test_dup(self):
         indel = IndelCall('ASDFGHJKL', 'ASDFSDFGHJKL').hgvs_protein_notation()
-        self.assertEqual('p.S2_F4dupSDF', indel)
+        assert indel == 'p.S2_F4dupSDF'
 
 
-class TestCallProteinIndel(unittest.TestCase):
+class TestCallProteinIndel:
     def test_large_start_deletion(self):
         ref_translation = Mock(
             get_aa_seq=MockFunction(
@@ -159,44 +158,43 @@ def test_large_start_deletion(self):
             )
         )
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual(
+        assert notation == (
             'ref:p.M1_Y227del'
             'MGLKAAQKTLFPLRSIDDVVRLFAAELGREEPDLVLLSLVLGFVEHFLAVNRVIPTNVPE'
             'LTFQPSPAPDPPGGLTYFPVADLSIIAALYARFTAQIRGAVDLSLYPREGGVSSRELVKK'
             'VSDVIWNSLSRSYFKDRAHIQSLFSFITGTKLDSSGVAFAVVGACQALGLRDVHLALSED'
-            'HAWVVFGPNGEQTAEVTWHGKGNEDRRGQTVNAGVAERSWLYLKGSY',
-            notation,
+            'HAWVVFGPNGEQTAEVTWHGKGNEDRRGQTVNAGVAERSWLYLKGSY'
         )
 
     def test_deletion_rep_at_breaks(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ABCDEFKJFEDAGFLKJ'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ABCDE' 'AGFLKJ'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.F6_D11delFKJFED', notation)
+        assert notation == 'ref:p.F6_D11delFKJFED'
 
     def test_insertion(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKIIILQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8_L9insIII', notation)
+        assert notation == 'ref:p.K8_L9insIII'
 
     def test_deletion(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8_L9delKL', notation)
+        assert notation == 'ref:p.K8_L9delKL'
 
     def test_synonymous(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual(None, notation)
+        assert notation is None
 
     def test_delins(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJIIIQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8_L9delKLinsIII', notation)
+        assert notation == 'ref:p.K8_L9delKLinsIII'
 
     def test_transcript_name(self):
         ref_translation = Mock(
@@ -206,77 +204,77 @@ def test_transcript_name(self):
         )
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJIIIQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('reft:p.K8_L9delKLinsIII', notation)
+        assert notation == 'reft:p.K8_L9delKLinsIII'
 
     def test_delete_start(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('FGHJKLQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.A1_D3delASD', notation)
+        assert notation == 'ref:p.A1_D3delASD'
 
     def test_delete_single_aa_start(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('SDFGHJKLQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.A1delA', notation)
+        assert notation == 'ref:p.A1delA'
 
     def test_delete_end(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYU'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.I17_P19delIOP', notation)
+        assert notation == 'ref:p.I17_P19delIOP'
 
     def test_delete_single_aa_end(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIO'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.P19delP', notation)
+        assert notation == 'ref:p.P19delP'
 
     def test_ins_start(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('IIASDFGHJKLQWERTYUIOP'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.A1ext-2', notation)
+        assert notation == 'ref:p.A1ext-2'
 
     def test_ins_end(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOPII'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.P19ext2', notation)
+        assert notation == 'ref:p.P19ext2'
 
     def test_no_reference_obj(self):
         ref_translation = Mock(
             get_aa_seq=MockFunction('ASDFGHJKLQWERTYUIOP'), name=None, reference_object='thing'
         )
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJIIIQWERTYUIOP'))
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             call_protein_indel(ref_translation, mut_translation)
 
     def test_fs(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKL'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJMMM'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8Mfs', notation)
+        assert notation == 'ref:p.K8Mfs'
 
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKL'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJCMMEF'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8Cfs', notation)
+        assert notation == 'ref:p.K8Cfs'
 
     def test_fs_with_stops(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLT*'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJMMMHGFTTSBF*TUHG*'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8Mfs*12', notation)
+        assert notation == 'ref:p.K8Mfs*12'
 
     def test_fs_immeadiate_stop(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDFGHJKLT*'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('ASDFGHJMMMHGFTTSBF*'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.K8Mfs*12', notation)
+        assert notation == 'ref:p.K8Mfs*12'
 
     def test_delete_start_with_rep(self):
         ref_translation = Mock(get_aa_seq=MockFunction('ASDAFGHJKL'), name='ref')
         mut_translation = Mock(get_aa_seq=MockFunction('AFGHJKL'))
         notation = call_protein_indel(ref_translation, mut_translation)
-        self.assertEqual('ref:p.A1_D3delASD', notation)
+        assert notation == 'ref:p.A1_D3delASD'
diff --git a/tests/unit/test_cluster.py b/tests/unit/test_cluster.py
index a7cb6c2d..43172ea9 100644
--- a/tests/unit/test_cluster.py
+++ b/tests/unit/test_cluster.py
@@ -1,29 +1,30 @@
 import unittest
 
+import pytest
 from mavis.cluster.cluster import merge_integer_intervals
 from mavis.interval import Interval
 
 
-class TestMergeIntegerIntervals(unittest.TestCase):
+class TestMergeIntegerIntervals:
     def test_varying_lengths(self):
         m = merge_integer_intervals((1, 2), (1, 9), (2, 10), weight_adjustment=0)
-        self.assertEqual(Interval(1, 4), m)
+        assert m == Interval(1, 4)
 
     def test_same_length(self):
         m = merge_integer_intervals((1, 1), (10, 10))
-        self.assertEqual(Interval(6), m)
+        assert m == Interval(6)
 
     def test_empty_list_error(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             merge_integer_intervals()
 
     def test_identical_even_length(self):
         m = merge_integer_intervals((1, 2), (1, 2), (1, 2))
-        self.assertEqual(Interval(1, 2), m)
+        assert m == Interval(1, 2)
 
     def test_identical_odd_length(self):
         m = merge_integer_intervals((1, 3), (1, 3), (1, 3))
-        self.assertEqual(Interval(1, 3), m)
+        assert m == Interval(1, 3)
 
 
 if __name__ == '__main__':
diff --git a/tests/unit/test_constants.py b/tests/unit/test_constants.py
index b69571db..c602c7ee 100644
--- a/tests/unit/test_constants.py
+++ b/tests/unit/test_constants.py
@@ -1,54 +1,47 @@
-import unittest
+from mavis.constants import COLUMNS, ORIENT, STRAND, reverse_complement, sort_columns, translate
 
-from mavis.constants import (
-    COLUMNS,
-    ORIENT,
-    STRAND,
-    MavisNamespace,
-    reverse_complement,
-    sort_columns,
-    translate,
-)
 
-
-class TestConstants(unittest.TestCase):
+class TestConstants:
     def test_strand_compare(self):
-        self.assertTrue(STRAND.compare(STRAND.NS, STRAND.POS))
-        self.assertTrue(STRAND.compare(STRAND.NS, STRAND.NEG))
-        self.assertTrue(STRAND.compare(STRAND.POS, STRAND.POS))
-        self.assertTrue(STRAND.compare(STRAND.NEG, STRAND.NEG))
-        self.assertFalse(STRAND.compare(STRAND.POS, STRAND.NEG))
-        self.assertFalse(STRAND.compare(STRAND.NEG, STRAND.POS))
+        assert STRAND.compare(STRAND.NS, STRAND.POS)
+        assert STRAND.compare(STRAND.NS, STRAND.NEG)
+        assert STRAND.compare(STRAND.POS, STRAND.POS)
+        assert STRAND.compare(STRAND.NEG, STRAND.NEG)
+        assert not STRAND.compare(STRAND.POS, STRAND.NEG)
+        assert not STRAND.compare(STRAND.NEG, STRAND.POS)
 
     def test_orient_compare(self):
-        self.assertTrue(ORIENT.compare(ORIENT.NS, ORIENT.RIGHT))
-        self.assertTrue(ORIENT.compare(ORIENT.NS, ORIENT.LEFT))
-        self.assertTrue(ORIENT.compare(ORIENT.RIGHT, ORIENT.RIGHT))
-        self.assertTrue(ORIENT.compare(ORIENT.LEFT, ORIENT.LEFT))
-        self.assertFalse(ORIENT.compare(ORIENT.RIGHT, ORIENT.LEFT))
-        self.assertFalse(ORIENT.compare(ORIENT.LEFT, ORIENT.RIGHT))
+        assert ORIENT.compare(ORIENT.NS, ORIENT.RIGHT)
+        assert ORIENT.compare(ORIENT.NS, ORIENT.LEFT)
+        assert ORIENT.compare(ORIENT.RIGHT, ORIENT.RIGHT)
+        assert ORIENT.compare(ORIENT.LEFT, ORIENT.LEFT)
+        assert not ORIENT.compare(ORIENT.RIGHT, ORIENT.LEFT)
+        assert not ORIENT.compare(ORIENT.LEFT, ORIENT.RIGHT)
 
     def test_reverse_complement(self):
-        self.assertEqual('ATCG', reverse_complement('CGAT'))
-        self.assertEqual('', reverse_complement(''))
+        assert reverse_complement('CGAT') == 'ATCG'
+        assert reverse_complement('') == ''
 
     def test_translate(self):
         seq = 'ATG' 'AAT' 'TCT' 'GGA' 'TGA'
         translated_seq = translate(seq, 0)
-        self.assertEqual('MNSG*', translated_seq)  # ATG AAT TCT GGA TGA
+        assert translated_seq == 'MNSG*'  # ATG AAT TCT GGA TGA
         translated_seq = translate(seq, 1)
-        self.assertEqual('*ILD', translated_seq)  # A TGA ATT CTG GAT GA
+        assert translated_seq == '*ILD'  # A TGA ATT CTG GAT GA
         translated_seq = translate(seq, 2)
-        self.assertEqual('EFWM', translated_seq)  # AT GAA TTC TGG ATG A
+        assert translated_seq == 'EFWM'  # AT GAA TTC TGG ATG A
 
     def test_sort_columns(self):
         temp = ['NEW', 'NEW2', COLUMNS.break1_seq, COLUMNS.break2_seq, COLUMNS.break1_chromosome]
-        self.assertEqual(
-            [COLUMNS.break1_chromosome, COLUMNS.break1_seq, COLUMNS.break2_seq, 'NEW', 'NEW2'],
-            sort_columns(temp),
-        )
+        assert sort_columns(temp) == [
+            COLUMNS.break1_chromosome,
+            COLUMNS.break1_seq,
+            COLUMNS.break2_seq,
+            'NEW',
+            'NEW2',
+        ]
 
     def test_column_matches_column_name(self):
-        self.assertEqual(COLUMNS.library, COLUMNS.library)
+        assert COLUMNS.library == COLUMNS.library
         s = set([COLUMNS.library, COLUMNS.library])
-        self.assertEqual(1, len(s))
+        assert len(s) == 1
diff --git a/tests/unit/test_illustrate.py b/tests/unit/test_illustrate.py
index 9968d292..db5f77f2 100644
--- a/tests/unit/test_illustrate.py
+++ b/tests/unit/test_illustrate.py
@@ -1,9 +1,8 @@
-import unittest
 from mavis.illustrate.util import generate_interval_mapping
 from mavis.interval import Interval
 
 
-class TestGenerateIntervalMapping(unittest.TestCase):
+class TestGenerateIntervalMapping:
     def test_single_bp_window(self):
         regions = [
             Interval(4222347, 4222347),
@@ -20,7 +19,7 @@ def test_single_bp_window(self):
         mapping = generate_interval_mapping(
             regions, target, ratio, min_width, buffer_, start, end, min_inter
         )
-        self.assertEqual(7, len(mapping.keys()))
+        assert len(mapping.keys()) == 7
 
     def test_no_input_intervals(self):
         target = 911.9921875
@@ -33,4 +32,4 @@ def test_no_input_intervals(self):
         mapping = generate_interval_mapping(
             [], target, ratio, min_width, buffer_, start, end, min_inter
         )
-        self.assertEqual(1, len(mapping.keys()))
+        assert len(mapping.keys()) == 1
diff --git a/tests/unit/test_interval.py b/tests/unit/test_interval.py
index ae6d0ff0..1b130201 100644
--- a/tests/unit/test_interval.py
+++ b/tests/unit/test_interval.py
@@ -1,175 +1,173 @@
-import unittest
+import pytest
 from mavis.interval import Interval, IntervalMapping
 
 
-class TestInterval(unittest.TestCase):
+class TestInterval:
     def test___init__error(self):
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Interval(4, 3)
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Interval(3, 4, 0)
 
     def test___contains__(self):
-        self.assertTrue(Interval(1, 2) in Interval(1, 7))
-        self.assertFalse(Interval(1, 7) in Interval(1, 2))
-        self.assertTrue(Interval(1.0, 2) in Interval(1.0, 7))
-        self.assertFalse(Interval(1, 7) in Interval(1, 2))
-        self.assertTrue(1 in Interval(1, 7))
-        self.assertFalse(0 in Interval(1, 7))
+        assert Interval(1, 2) in Interval(1, 7)
+        assert not Interval(1, 7) in Interval(1, 2)
+        assert Interval(1.0, 2) in Interval(1.0, 7)
+        assert not Interval(1, 7) in Interval(1, 2)
+        assert 1 in Interval(1, 7)
+        assert 0 not in Interval(1, 7)
 
     def test_eq(self):
-        self.assertEqual(Interval(1, 2), Interval(1, 2))
-        self.assertEqual(Interval(1, 2), Interval(1, 2))
+        assert Interval(1, 2) == Interval(1, 2)
+        assert Interval(1, 2) == Interval(1, 2)
 
     def test_ne(self):
-        self.assertNotEqual(Interval(1, 2), Interval(1, 3))
-        self.assertNotEqual(Interval(1, 2), Interval(1, 3))
+        assert Interval(1, 2) != Interval(1, 3)
+        assert Interval(1, 2) != Interval(1, 3)
 
     def test___get_item__(self):
         temp = Interval(1, 2, 3)
-        self.assertEqual(1, temp[0])
-        self.assertEqual(2, temp[1])
-        with self.assertRaises(IndexError):
+        assert temp[0] == 1
+        assert temp[1] == 2
+        with pytest.raises(IndexError):
             temp[3]
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             temp[-1]
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             temp['1b']
 
     def test___gt__(self):
-        self.assertTrue(Interval(10) > Interval(1))
-        self.assertFalse(Interval(1) > Interval(10))
-        self.assertTrue(Interval(10) > Interval(1))
-        self.assertFalse(Interval(1) > Interval(1.01))
+        assert Interval(10) > Interval(1)
+        assert not Interval(1) > Interval(10)
+        assert Interval(10) > Interval(1)
+        assert not Interval(1) > Interval(1.01)
 
     def test_overlaps(self):
         left = Interval(-4, 1)
         middle = Interval(0, 10)
         right = Interval(5, 12)
-        self.assertFalse(Interval.overlaps(left, right))
-        self.assertFalse(Interval.overlaps(right, left))
-        self.assertTrue(Interval.overlaps(left, middle))
-        self.assertTrue(Interval.overlaps(right, middle))
-        self.assertTrue(Interval.overlaps(middle, left))
-        self.assertTrue(Interval.overlaps(middle, right))
-        self.assertTrue(Interval.overlaps((1, 2), (2, 5)))
+        assert not Interval.overlaps(left, right)
+        assert not Interval.overlaps(right, left)
+        assert Interval.overlaps(left, middle)
+        assert Interval.overlaps(right, middle)
+        assert Interval.overlaps(middle, left)
+        assert Interval.overlaps(middle, right)
+        assert Interval.overlaps((1, 2), (2, 5))
         left = Interval(1148432, 1149343)
         right = Interval(1149493, 1150024)
-        self.assertFalse(Interval.overlaps(left, right))
+        assert not Interval.overlaps(left, right)
 
         left = Interval(-4, 0.1)
         middle = Interval(0, 10)
         right = Interval(0.11, 12)
-        self.assertFalse(Interval.overlaps(left, right))
-        self.assertFalse(Interval.overlaps(right, left))
-        self.assertTrue(Interval.overlaps(left, middle))
-        self.assertTrue(Interval.overlaps(right, middle))
-        self.assertTrue(Interval.overlaps(middle, left))
-        self.assertTrue(Interval.overlaps(middle, right))
+        assert not Interval.overlaps(left, right)
+        assert not Interval.overlaps(right, left)
+        assert Interval.overlaps(left, middle)
+        assert Interval.overlaps(right, middle)
+        assert Interval.overlaps(middle, left)
+        assert Interval.overlaps(middle, right)
 
     def test___len__(self):
-        self.assertEqual(5, len(Interval(1, 5)))
-        with self.assertRaises(TypeError):
+        assert len(Interval(1, 5)) == 5
+        with pytest.raises(TypeError):
             len(Interval(1, 5.0))
-        self.assertEqual(4.0, Interval(1, 5.0).length())
+        assert Interval(1, 5.0).length() == 4.0
 
     def test___lt__(self):
-        self.assertTrue(Interval(1) < Interval(10))
-        self.assertFalse(Interval(10) < Interval(1))
+        assert Interval(1) < Interval(10)
+        assert not Interval(10) < Interval(1)
 
     def test___and__(self):
-        self.assertEqual(None, Interval(1, 1) & Interval(2))
+        assert Interval(1, 1) & Interval(2) is None
 
     def test___sub__(self):
         # x in y
-        self.assertEqual([Interval(0, 4), Interval(7, 10)], Interval(0, 10) - Interval(5, 6))
+        assert Interval(0, 10) - Interval(5, 6) == [Interval(0, 4), Interval(7, 10)]
         # x overlaps the start of y
-        self.assertEqual([Interval(7, 10)], Interval(0, 10) - Interval(-1, 6))
+        assert Interval(0, 10) - Interval(-1, 6) == [Interval(7, 10)]
         # x overlaps the end of y
-        self.assertEqual([Interval(0, 4)], Interval(0, 10) - Interval(5, 11))
+        assert Interval(0, 10) - Interval(5, 11) == [Interval(0, 4)]
         # x overlaps all of y
-        self.assertEqual([], Interval(0, 10) - Interval(-1, 11))
+        assert Interval(0, 10) - Interval(-1, 11) == []
         # x does not overlap y
-        self.assertEqual([Interval(0, 10)], Interval(0, 10) - Interval(11, 15))
+        assert Interval(0, 10) - Interval(11, 15) == [Interval(0, 10)]
 
     def test___xor__(self):
         # x in y
-        self.assertEqual([], Interval(0, 10) ^ Interval(0, 10))
+        assert Interval(0, 10) ^ Interval(0, 10) == []
         # x overlaps the start of y
-        self.assertEqual([Interval(7, 10), Interval(-1, -1)], Interval(0, 10) ^ Interval(-1, 6))
+        assert Interval(0, 10) ^ Interval(-1, 6) == [Interval(7, 10), Interval(-1, -1)]
         # x overlaps the end of y
-        self.assertEqual([Interval(0, 4), Interval(11, 11)], Interval(0, 10) ^ Interval(5, 11))
+        assert Interval(0, 10) ^ Interval(5, 11) == [Interval(0, 4), Interval(11, 11)]
         # x overlaps all of y
-        self.assertEqual([Interval(-1, -1), Interval(11, 11)], Interval(0, 10) ^ Interval(-1, 11))
+        assert Interval(0, 10) ^ Interval(-1, 11) == [Interval(-1, -1), Interval(11, 11)]
         # x does not overlap y
-        self.assertEqual([Interval(0, 10), Interval(11, 15)], Interval(0, 10) ^ Interval(11, 15))
+        assert Interval(0, 10) ^ Interval(11, 15) == [Interval(0, 10), Interval(11, 15)]
 
     def test_center(self):
-        self.assertEqual(3, Interval(1, 5).center)
-        self.assertEqual(3.5, Interval(2, 5).center)
+        assert Interval(1, 5).center == 3
+        assert Interval(2, 5).center == 3.5
 
     def test_position_in_range(self):
         pos = (12, 12)
-        self.assertEqual((2, False), Interval.position_in_range([(1, 2), (3, 6), (7, 15)], pos))
-        self.assertEqual(
-            (3, True), Interval.position_in_range([(1, 2), (3, 6), (7, 10), (14, 16)], pos)
-        )
-        self.assertEqual((3, False), Interval.position_in_range([(1, 2), (3, 6), (7, 10)], pos))
-        self.assertEqual((0, True), Interval.position_in_range([(15, 16), (17, 19)], pos))
+        assert Interval.position_in_range([(1, 2), (3, 6), (7, 15)], pos) == (2, False)
+        assert Interval.position_in_range([(1, 2), (3, 6), (7, 10), (14, 16)], pos) == (3, True)
+        assert Interval.position_in_range([(1, 2), (3, 6), (7, 10)], pos) == (3, False)
+        assert Interval.position_in_range([(15, 16), (17, 19)], pos) == (0, True)
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Interval.position_in_range([], 1)
 
     def test_convert_pos(self):
         mapping = {(1, 10): (101, 110), (21, 30): (201, 210), (41, 50): (301, 310)}
 
-        self.assertEqual(105, Interval.convert_pos(mapping, 5))
-        self.assertEqual(101, Interval.convert_pos(mapping, 1))
-        self.assertEqual(310, Interval.convert_pos(mapping, 50))
+        assert Interval.convert_pos(mapping, 5) == 105
+        assert Interval.convert_pos(mapping, 1) == 101
+        assert Interval.convert_pos(mapping, 50) == 310
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 15)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 0)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 80)
 
     def test_convert_pos_forward_to_reverse(self):
         mapping = {(41, 50): (101, 110), (21, 30): (201, 210), (1, 10): (301, 310)}
 
-        self.assertEqual(306, Interval.convert_pos(mapping, 5))
-        self.assertEqual(110, Interval.convert_pos(mapping, 41))
-        self.assertEqual(210, Interval.convert_pos(mapping, 21))
-        self.assertEqual(310, Interval.convert_pos(mapping, 1))
-        self.assertEqual(309, Interval.convert_pos(mapping, 2))
+        assert Interval.convert_pos(mapping, 5) == 306
+        assert Interval.convert_pos(mapping, 41) == 110
+        assert Interval.convert_pos(mapping, 21) == 210
+        assert Interval.convert_pos(mapping, 1) == 310
+        assert Interval.convert_pos(mapping, 2) == 309
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 15)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 51)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 0)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             Interval.convert_pos(mapping, 31)
 
     def test_convert_pos_input_errors(self):
         # test input errors
-        with self.assertRaises(AttributeError):  # unequal length
+        with pytest.raises(AttributeError):  # unequal length
             Interval.convert_pos({(1, 10): (4, 5)}, 3)
 
-        with self.assertRaises(AttributeError):  # overlapping ranges
+        with pytest.raises(AttributeError):  # overlapping ranges
             Interval.convert_pos({(1, 10): (11, 20), (5, 14): (21, 30)}, 6)
 
-        with self.assertRaises(AttributeError):  # range not increasing or decreasing
+        with pytest.raises(AttributeError):  # range not increasing or decreasing
             mapping = {(1, 2): (1, 2), (3, 4): (4, 5), (5, 6): (3, 3)}
             Interval.convert_pos(mapping, 10)
 
-        with self.assertRaises(AttributeError):  # range not increasing or decreasing
+        with pytest.raises(AttributeError):  # range not increasing or decreasing
             mapping = {(1, 2): (4, 5), (3, 4): (1, 2), (5, 6): (3, 3)}
             Interval.convert_pos(mapping, 10)
 
@@ -179,7 +177,7 @@ def test_convert_pos_one_to_one(self):
             s = x * 10 + 1
             mapping[Interval(s, s + 9)] = Interval(s, s + 9)
         for pos in range(1, 101):
-            self.assertEqual(pos, Interval.convert_pos(mapping, pos))
+            assert Interval.convert_pos(mapping, pos) == pos
 
     def test_convert_pos_ratioed_intervals(self):
         mapping = {
@@ -189,59 +187,59 @@ def test_convert_pos_ratioed_intervals(self):
             (601.0, 900): (52, 57.0),
             (901.0, 1100): (58.0, 100),
         }
-        self.assertEqual(Interval(1), Interval.convert_ratioed_pos(mapping, 1))
-        self.assertEqual(Interval(20), Interval.convert_ratioed_pos(mapping, 100))
-        self.assertEqual(Interval(100, 100), Interval.convert_ratioed_pos(mapping, 1100))
+        assert Interval.convert_ratioed_pos(mapping, 1) == Interval(1)
+        assert Interval.convert_ratioed_pos(mapping, 100) == Interval(20)
+        assert Interval.convert_ratioed_pos(mapping, 1100) == Interval(100, 100)
 
         mapping = {(1, 100): (1, 1), (101, 500): (21, 30)}
-        self.assertEqual(Interval(1, 1), Interval.convert_ratioed_pos(mapping, 1))
-        self.assertEqual(Interval(1, 1), Interval.convert_ratioed_pos(mapping, 100))
+        assert Interval.convert_ratioed_pos(mapping, 1) == Interval(1, 1)
+        assert Interval.convert_ratioed_pos(mapping, 100) == Interval(1, 1)
 
         mapping = {(1, 100.0): (20.0, 30), (100.1, 500): (1.0, 1.0)}
-        self.assertEqual(Interval(1, 1), Interval.convert_ratioed_pos(mapping, 101))
-        self.assertEqual(Interval(1, 1), Interval.convert_ratioed_pos(mapping, 500))
-        self.assertEqual(Interval(25, 25), Interval.convert_ratioed_pos(mapping, 50))
+        assert Interval.convert_ratioed_pos(mapping, 101) == Interval(1, 1)
+        assert Interval.convert_ratioed_pos(mapping, 500) == Interval(1, 1)
+        assert Interval.convert_ratioed_pos(mapping, 50) == Interval(25, 25)
 
     def test_union(self):
         interval_list = [Interval(1, 10), Interval(5, 7), Interval(7)]
-        self.assertEqual(Interval(1, 10), Interval.union(*interval_list))
+        assert Interval.union(*interval_list) == Interval(1, 10)
         m = interval_list + [Interval(11)]
-        self.assertEqual(Interval(1, 11), Interval.union(*m))
+        assert Interval.union(*m) == Interval(1, 11)
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Interval.union()
 
     def test_intersection(self):
         interval_list = [Interval(1, 10), Interval(5, 7), Interval(7)]
-        self.assertEqual(Interval(7), Interval.intersection(*interval_list))
+        assert Interval.intersection(*interval_list) == Interval(7)
         interval_list.append(Interval(11))
-        self.assertEqual(None, Interval.intersection(*interval_list))
+        assert Interval.intersection(*interval_list) is None
 
-        with self.assertRaises(AttributeError):
+        with pytest.raises(AttributeError):
             Interval.intersection()
 
     def test_dist(self):
         x = Interval(1, 4)
         y = Interval(-1, 0)
         z = Interval(0, 3)
-        self.assertEqual(1, Interval.dist(x, y))
-        self.assertEqual(-1, Interval.dist(y, x))
-        self.assertEqual(0, Interval.dist(x, z))
-        self.assertEqual(0, Interval.dist(z, x))
-        self.assertEqual(0, Interval.dist(y, z))
-        self.assertEqual(0, Interval.dist(z, y))
-        self.assertEqual(-6, Interval.dist((1, 4), (10, 12)))
+        assert Interval.dist(x, y) == 1
+        assert Interval.dist(y, x) == -1
+        assert Interval.dist(x, z) == 0
+        assert Interval.dist(z, x) == 0
+        assert Interval.dist(y, z) == 0
+        assert Interval.dist(z, y) == 0
+        assert Interval.dist((1, 4), (10, 12)) == -6
 
     def test_min_nonoverlapping(self):
         r = Interval.min_nonoverlapping(Interval(1, 2), Interval(4, 7), Interval(8, 9))
-        self.assertEqual(3, len(r))
+        assert len(r) == 3
         r = Interval.min_nonoverlapping(Interval(1, 5), Interval(4, 7), Interval(8, 9))
-        self.assertEqual(2, len(r))
+        assert len(r) == 2
         r = Interval.min_nonoverlapping(Interval(1, 5), Interval(4, 7), Interval(7, 9))
-        self.assertEqual([Interval(1, 9)], r)
+        assert r == [Interval(1, 9)]
         r = Interval.min_nonoverlapping((1, 2), (2, 4))
-        self.assertEqual([Interval(1, 4)], r)
-        self.assertEqual([], Interval.min_nonoverlapping())
+        assert r == [Interval(1, 4)]
+        assert Interval.min_nonoverlapping() == []
 
     def test_split_overlapping_no_weight(self):
         input_intervals = [Interval(1, 10), Interval(2, 11), Interval(4, 5), Interval(4, 8)]
@@ -257,7 +255,7 @@ def test_split_overlapping_no_weight(self):
         result = Interval.split_overlap(*input_intervals)
         result = sorted(result)
         print('found', result)
-        self.assertEqual(exp, result)
+        assert result == exp
 
     def test_split_overlapping_weighted(self):
         input_intervals = [Interval(1, 10), Interval(2, 11), Interval(4, 5), Interval(4, 8)]
@@ -271,12 +269,12 @@ def test_split_overlapping_weighted(self):
             Interval(10, 11): 4,
         }
         result = Interval.split_overlap(*input_intervals, weight_mapping=weights)
-        self.assertEqual(sorted(exp), sorted(result))
+        assert sorted(result) == sorted(exp)
         for itvl in exp:
-            self.assertEqual(exp[itvl], result[itvl])
+            assert result[itvl] == exp[itvl]
 
 
-class TestIntervalMapping(unittest.TestCase):
+class TestIntervalMapping:
     def test_convert_pos_ratioed(self):
         mapping = IntervalMapping(
             {
@@ -287,37 +285,37 @@ def test_convert_pos_ratioed(self):
                 (901.0, 1100): (58.0, 100),
             }
         )
-        self.assertEqual(1, mapping.convert_pos(1))
-        self.assertEqual(1, mapping.convert_ratioed_pos(1).start)
-        self.assertAlmostEqual(1.191919191919, mapping.convert_ratioed_pos(1).end)
-        self.assertEqual(20, mapping.convert_pos(100))
-        self.assertEqual(20, mapping.convert_ratioed_pos(100).start)
-        self.assertEqual(100, mapping.convert_pos(1100))
-        self.assertEqual(100, mapping.convert_ratioed_pos(1100).start)
+        assert mapping.convert_pos(1) == 1
+        assert mapping.convert_ratioed_pos(1).start == 1
+        assert pytest.approx(mapping.convert_ratioed_pos(1).end) == 1.191919191919
+        assert mapping.convert_pos(100) == 20
+        assert mapping.convert_ratioed_pos(100).start == 20
+        assert mapping.convert_pos(1100) == 100
+        assert mapping.convert_ratioed_pos(1100).start == 100
 
         mapping = IntervalMapping({(1, 100): (1, 1.0), (101, 500): (21.0, 30)})
-        self.assertEqual(1, mapping.convert_pos(1))
-        self.assertEqual(1, mapping.convert_pos(100))
+        assert mapping.convert_pos(1) == 1
+        assert mapping.convert_pos(100) == 1
 
         mapping = IntervalMapping({(1, 100.0): (20.0, 30), (100.1, 500): (1.0, 1.0)})
-        self.assertEqual(1, mapping.convert_pos(101))
-        self.assertEqual(1, mapping.convert_pos(500))
-        self.assertEqual(25, mapping.convert_pos(50))
+        assert mapping.convert_pos(101) == 1
+        assert mapping.convert_pos(500) == 1
+        assert mapping.convert_pos(50) == 25
 
     def test_convert_pos(self):
         mapping = IntervalMapping({(1, 10): (101, 110), (21, 30): (201, 210), (41, 50): (301, 310)})
 
-        self.assertEqual(105, mapping.convert_pos(5))
-        self.assertEqual(101, mapping.convert_pos(1))
-        self.assertEqual(310, mapping.convert_pos(50))
+        assert mapping.convert_pos(5) == 105
+        assert mapping.convert_pos(1) == 101
+        assert mapping.convert_pos(50) == 310
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(15)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(0)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(80)
 
     def test_convert_pos_forward_to_reverse(self):
@@ -326,22 +324,22 @@ def test_convert_pos_forward_to_reverse(self):
             opposing=[(41, 50), (21, 30), (1, 10)],
         )
 
-        self.assertEqual(306, mapping.convert_pos(5))
-        self.assertEqual(110, mapping.convert_pos(41))
-        self.assertEqual(210, mapping.convert_pos(21))
-        self.assertEqual(310, mapping.convert_pos(1))
-        self.assertEqual(309, mapping.convert_pos(2))
+        assert mapping.convert_pos(5) == 306
+        assert mapping.convert_pos(41) == 110
+        assert mapping.convert_pos(21) == 210
+        assert mapping.convert_pos(1) == 310
+        assert mapping.convert_pos(2) == 309
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(15)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(51)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(0)
 
-        with self.assertRaises(IndexError):
+        with pytest.raises(IndexError):
             mapping.convert_pos(31)
 
     def test_convert_pos_one_to_one(self):
@@ -351,4 +349,4 @@ def test_convert_pos_one_to_one(self):
             mapping[Interval(s, s + 9)] = Interval(s, s + 9)
         mapping = IntervalMapping(mapping)
         for pos in range(1, 101):
-            self.assertEqual(pos, mapping.convert_pos(pos))
+            assert mapping.convert_pos(pos) == pos
diff --git a/tests/unit/test_summary.py b/tests/unit/test_summary.py
index f2a81ef8..3e2a9efc 100644
--- a/tests/unit/test_summary.py
+++ b/tests/unit/test_summary.py
@@ -1,146 +1,176 @@
-import unittest
-
+import pytest
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE
 from mavis.summary.summary import filter_by_annotations
 
-
-class TestFilterByAnnotations(unittest.TestCase):
-    def setUp(self):
-        self.gev1 = BreakpointPair(
-            Breakpoint('1', 1),
-            Breakpoint('1', 10),
-            opposing_strands=True,
-            **{
-                COLUMNS.event_type: SVTYPE.DEL,
-                COLUMNS.call_method: CALL_METHOD.CONTIG,
-                COLUMNS.fusion_sequence_fasta_id: None,
-                COLUMNS.protocol: PROTOCOL.GENOME,
-                COLUMNS.fusion_cdna_coding_end: None,
-                COLUMNS.fusion_cdna_coding_start: None,
-            }
-        )
-        self.gev2 = BreakpointPair(
-            Breakpoint('1', 1),
-            Breakpoint('1', 100),
-            opposing_strands=True,
-            **{
-                COLUMNS.event_type: SVTYPE.DEL,
-                COLUMNS.call_method: CALL_METHOD.CONTIG,
-                COLUMNS.fusion_sequence_fasta_id: None,
-                COLUMNS.protocol: PROTOCOL.GENOME,
-                COLUMNS.fusion_cdna_coding_start: None,
-                COLUMNS.fusion_cdna_coding_end: None,
-            }
-        )
-        self.best_transcripts = {'ABCA': True, 'ABCD': True}
-
-    def test_filter_by_annotations_two_best_transcripts(self):
-        self.gev1.data[COLUMNS.gene1] = 'ABC'
-        self.gev1.data[COLUMNS.gene2] = 'ABC'
-        self.gev1.data[COLUMNS.transcript1] = 'ABCA'
-        self.gev1.data[COLUMNS.transcript2] = 'ABCA'
-        self.gev2.data[COLUMNS.gene1] = 'ABC'
-        self.gev2.data[COLUMNS.gene2] = 'ABC'
-        self.gev2.data[COLUMNS.transcript1] = 'ABCD'
-        self.gev2.data[COLUMNS.transcript2] = 'ABCD'
-        result, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+from ..util import todo
+
+
+@pytest.fixture
+def genomic_event1():
+    return BreakpointPair(
+        Breakpoint('1', 1),
+        Breakpoint('1', 10),
+        opposing_strands=True,
+        **{
+            COLUMNS.event_type: SVTYPE.DEL,
+            COLUMNS.call_method: CALL_METHOD.CONTIG,
+            COLUMNS.fusion_sequence_fasta_id: None,
+            COLUMNS.protocol: PROTOCOL.GENOME,
+            COLUMNS.fusion_cdna_coding_end: None,
+            COLUMNS.fusion_cdna_coding_start: None,
+        }
+    )
+
+
+@pytest.fixture
+def genomic_event2():
+    return BreakpointPair(
+        Breakpoint('1', 1),
+        Breakpoint('1', 100),
+        opposing_strands=True,
+        **{
+            COLUMNS.event_type: SVTYPE.DEL,
+            COLUMNS.call_method: CALL_METHOD.CONTIG,
+            COLUMNS.fusion_sequence_fasta_id: None,
+            COLUMNS.protocol: PROTOCOL.GENOME,
+            COLUMNS.fusion_cdna_coding_start: None,
+            COLUMNS.fusion_cdna_coding_end: None,
+        }
+    )
+
+
+@pytest.fixture
+def best_transcripts():
+    return {'ABCA': True, 'ABCD': True}
+
+
+class TestFilterByAnnotations:
+    def test_filter_by_annotations_two_best_transcripts(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = 'ABC'
+        genomic_event1.data[COLUMNS.gene2] = 'ABC'
+        genomic_event1.data[COLUMNS.transcript1] = 'ABCA'
+        genomic_event1.data[COLUMNS.transcript2] = 'ABCA'
+        genomic_event2.data[COLUMNS.gene1] = 'ABC'
+        genomic_event2.data[COLUMNS.gene2] = 'ABC'
+        genomic_event2.data[COLUMNS.transcript1] = 'ABCD'
+        genomic_event2.data[COLUMNS.transcript2] = 'ABCD'
+        result, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         bpp = result[0]
         print(bpp.data)
-        self.assertEqual(self.gev1, bpp)
-        self.assertEqual('ABCA', bpp.data[COLUMNS.transcript1])
-
-    def test_filter_by_annotations_two_transcripts(self):
-        self.gev1.data[COLUMNS.gene1] = 'XYZ'
-        self.gev1.data[COLUMNS.gene2] = 'XYS'
-        self.gev1.data[COLUMNS.transcript1] = 'XYZB'
-        self.gev1.data[COLUMNS.transcript2] = 'XYSZ'
-        self.gev2.data[COLUMNS.gene1] = 'XYZ'
-        self.gev2.data[COLUMNS.gene2] = 'XYS'
-        self.gev2.data[COLUMNS.transcript1] = 'XYZA'
-        self.gev2.data[COLUMNS.transcript2] = 'XYSB'
-        bpps, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+        assert bpp == genomic_event1
+        assert bpp.data[COLUMNS.transcript1] == 'ABCA'
+
+    def test_filter_by_annotations_two_transcripts(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event1.data[COLUMNS.gene2] = 'XYS'
+        genomic_event1.data[COLUMNS.transcript1] = 'XYZB'
+        genomic_event1.data[COLUMNS.transcript2] = 'XYSZ'
+        genomic_event2.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event2.data[COLUMNS.gene2] = 'XYS'
+        genomic_event2.data[COLUMNS.transcript1] = 'XYZA'
+        genomic_event2.data[COLUMNS.transcript2] = 'XYSB'
+        bpps, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         print(bpps)
         bpp = bpps[0]
         print(bpp, bpp.data)
-        self.assertEqual(self.gev2, bpp)
-        self.assertEqual('XYZA', bpp.data[COLUMNS.transcript1])
-
-    def test_filter_by_annotations_two_fusion_cdna(self):
-        self.gev1.data[COLUMNS.gene1] = 'XYZ'
-        self.gev1.data[COLUMNS.gene2] = 'XYS'
-        self.gev1.data[COLUMNS.transcript1] = 'XYZB'
-        self.gev1.data[COLUMNS.transcript2] = 'XYSZ'
-        self.gev2.data[COLUMNS.gene1] = 'XYZ'
-        self.gev2.data[COLUMNS.gene2] = 'XYS'
-        self.gev2.data[COLUMNS.transcript1] = 'XYZB'
-        self.gev2.data[COLUMNS.transcript2] = 'XYSZ'
-        self.gev1.data[COLUMNS.fusion_cdna_coding_start] = 1
-        self.gev1.data[COLUMNS.fusion_cdna_coding_end] = 20
-        self.gev2.data[COLUMNS.fusion_cdna_coding_start] = 1
-        self.gev2.data[COLUMNS.fusion_cdna_coding_end] = 40
-        result, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+        assert bpp == genomic_event2
+        assert bpp.data[COLUMNS.transcript1] == 'XYZA'
+
+    def test_filter_by_annotations_two_fusion_cdna(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event1.data[COLUMNS.gene2] = 'XYS'
+        genomic_event1.data[COLUMNS.transcript1] = 'XYZB'
+        genomic_event1.data[COLUMNS.transcript2] = 'XYSZ'
+        genomic_event2.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event2.data[COLUMNS.gene2] = 'XYS'
+        genomic_event2.data[COLUMNS.transcript1] = 'XYZB'
+        genomic_event2.data[COLUMNS.transcript2] = 'XYSZ'
+        genomic_event1.data[COLUMNS.fusion_cdna_coding_start] = 1
+        genomic_event1.data[COLUMNS.fusion_cdna_coding_end] = 20
+        genomic_event2.data[COLUMNS.fusion_cdna_coding_start] = 1
+        genomic_event2.data[COLUMNS.fusion_cdna_coding_end] = 40
+        result, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         bpp = result[0]
-        self.assertEqual(self.gev2, bpp)
-
-    def test_filter_by_annotations_one_transcript(self):
-        self.gev1.data[COLUMNS.gene1] = None
-        self.gev1.data[COLUMNS.gene2] = 'XYS'
-        self.gev1.data[COLUMNS.transcript1] = None
-        self.gev1.data[COLUMNS.transcript2] = 'XYSZ'
-        self.gev2.data[COLUMNS.gene1] = 'XYZ'
-        self.gev2.data[COLUMNS.gene2] = 'XYS'
-        self.gev2.data[COLUMNS.transcript1] = 'XYZA'
-        self.gev2.data[COLUMNS.transcript2] = 'XYSB'
-        result, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+        assert bpp == genomic_event2
+
+    def test_filter_by_annotations_one_transcript(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = None
+        genomic_event1.data[COLUMNS.gene2] = 'XYS'
+        genomic_event1.data[COLUMNS.transcript1] = None
+        genomic_event1.data[COLUMNS.transcript2] = 'XYSZ'
+        genomic_event2.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event2.data[COLUMNS.gene2] = 'XYS'
+        genomic_event2.data[COLUMNS.transcript1] = 'XYZA'
+        genomic_event2.data[COLUMNS.transcript2] = 'XYSB'
+        result, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         bpp = result[0]
-        self.assertEqual(self.gev2, bpp)
-
-    def test_filter_by_annotations_one_best_transcripts(self):
-        self.gev1.data[COLUMNS.gene1] = 'XYZ'
-        self.gev1.data[COLUMNS.gene2] = 'ABC'
-        self.gev1.data[COLUMNS.transcript1] = 'XYZB'
-        self.gev1.data[COLUMNS.transcript2] = 'ABCA'
-        self.gev2.data[COLUMNS.gene1] = 'XYZ'
-        self.gev2.data[COLUMNS.gene2] = 'ABC'
-        self.gev2.data[COLUMNS.transcript1] = 'XYZA'
-        self.gev2.data[COLUMNS.transcript2] = 'ABCB'
-        result, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+        assert bpp == genomic_event2
+
+    def test_filter_by_annotations_one_best_transcripts(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event1.data[COLUMNS.gene2] = 'ABC'
+        genomic_event1.data[COLUMNS.transcript1] = 'XYZB'
+        genomic_event1.data[COLUMNS.transcript2] = 'ABCA'
+        genomic_event2.data[COLUMNS.gene1] = 'XYZ'
+        genomic_event2.data[COLUMNS.gene2] = 'ABC'
+        genomic_event2.data[COLUMNS.transcript1] = 'XYZA'
+        genomic_event2.data[COLUMNS.transcript2] = 'ABCB'
+        result, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         bpp = result[0]
-        self.assertEqual(self.gev1, bpp)
-        self.assertEqual('XYZB', bpp.data[COLUMNS.transcript1])
-
-    def test_filter_by_annotations_no_transcripts(self):
-        self.gev1.data[COLUMNS.gene1] = None
-        self.gev1.data[COLUMNS.gene2] = None
-        self.gev1.data[COLUMNS.transcript1] = None
-        self.gev1.data[COLUMNS.transcript2] = None
-        self.gev2.data[COLUMNS.gene1] = None
-        self.gev2.data[COLUMNS.gene2] = None
-        self.gev2.data[COLUMNS.transcript1] = None
-        self.gev2.data[COLUMNS.transcript2] = None
-        self.gev1.break1.strand = STRAND.POS
-        result, removed = filter_by_annotations([self.gev1, self.gev2], self.best_transcripts)
+        assert bpp == genomic_event1
+        assert bpp.data[COLUMNS.transcript1] == 'XYZB'
+
+    def test_filter_by_annotations_no_transcripts(
+        self, genomic_event1, genomic_event2, best_transcripts
+    ):
+        genomic_event1.data[COLUMNS.gene1] = None
+        genomic_event1.data[COLUMNS.gene2] = None
+        genomic_event1.data[COLUMNS.transcript1] = None
+        genomic_event1.data[COLUMNS.transcript2] = None
+        genomic_event2.data[COLUMNS.gene1] = None
+        genomic_event2.data[COLUMNS.gene2] = None
+        genomic_event2.data[COLUMNS.transcript1] = None
+        genomic_event2.data[COLUMNS.transcript2] = None
+        genomic_event1.break1.strand = STRAND.POS
+        result, removed = filter_by_annotations([genomic_event1, genomic_event2], best_transcripts)
         bpp = result[0]
-        self.assertEqual(None, bpp.data[COLUMNS.transcript1])
+        assert bpp.data[COLUMNS.transcript1] is None
 
+    @todo
     def test_combine_events(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_filtering_events_contigs(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_filtering_events_none(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_filtering_events_flanking(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_filtering_events_spanning(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_filtering_events_split(self):
-        raise unittest.SkipTest('TODO')
+        pass
 
+    @todo
     def test_get_pairing_state(self):
-        raise unittest.SkipTest('TODO')
+        pass
diff --git a/tests/unit/test_tool.py b/tests/unit/test_tool.py
index 72c0d2e9..96531588 100644
--- a/tests/unit/test_tool.py
+++ b/tests/unit/test_tool.py
@@ -39,14 +39,14 @@ def test_convert_insertion(self):
         assert bpp.break2.strand == STRAND.NS
         assert bpp.break2.chr == '1'
         assert bpp.event_type == SVTYPE.INS
-        assert bpp.untemplated_seq == None
+        assert bpp.untemplated_seq is None
 
         bpp_list = _convert_tool_row(
             _parse_vcf_record(row)[0], SUPPORTED_TOOL.DELLY, False, assume_no_untemplated=True
         )
         assert len(bpp_list) == 1
         bpp = bpp_list[0]
-        assert bpp.untemplated_seq == None
+        assert bpp.untemplated_seq is None
         assert bpp.untemplated_seq != ''
 
     def test_convert_convert_translocation(self):
@@ -118,8 +118,8 @@ def test_convert_standard_event(self):
         assert bpp.break2.chr == 'chr13'
         assert bpp.break1.start == 114529969
         assert bpp.break2.start == 114751269
-        assert bpp.opposing_strands == False
-        assert bpp.stranded == True
+        assert bpp.opposing_strands is False
+        assert bpp.stranded is True
 
     def test_convert_translocation(self):
         row = {
@@ -135,8 +135,8 @@ def test_convert_translocation(self):
         assert bpp.break2.chr == 'chr20'
         assert bpp.break1.start == 59445688
         assert bpp.break2.start == 49411710
-        assert bpp.opposing_strands == False
-        assert bpp.stranded == True
+        assert bpp.opposing_strands is False
+        assert bpp.stranded is True
 
     def test_malformed(self):
         row = {'FusionName': 'BCAS4--BCAS3', 'LeftBreakpoint': '', 'RightBreakpoint': None}
@@ -163,8 +163,8 @@ def test_convert_stranded_indel_insertion(self):
         assert bpp.break1.start == 10015
         assert bpp.break2.start == 10016
         assert bpp.event_type == SVTYPE.INS
-        assert bpp.opposing_strands == False
-        assert bpp.stranded == True
+        assert bpp.opposing_strands is False
+        assert bpp.stranded is True
         assert bpp.untemplated_seq == 'AAT'
 
     def test_convert_indel_deletion(self):
@@ -206,8 +206,8 @@ def test_convert_indel_unstranded_insertion(self):
         assert bpp.event_type == SVTYPE.INS
         assert bpp.break1.strand == STRAND.NS
         assert bpp.break2.strand == STRAND.NS
-        assert bpp.stranded == False
-        assert bpp.opposing_strands == False
+        assert bpp.stranded is False
+        assert bpp.opposing_strands is False
         assert bpp.untemplated_seq == 'TT'
 
     def test_convert_indel_duplication(self):
@@ -229,8 +229,8 @@ def test_convert_indel_duplication(self):
         assert bpp.event_type == SVTYPE.DUP
         assert bpp.break1.strand == STRAND.NS
         assert bpp.break2.strand == STRAND.NS
-        assert bpp.stranded == False
-        assert bpp.opposing_strands == False
+        assert bpp.stranded is False
+        assert bpp.opposing_strands is False
         assert bpp.untemplated_seq == ''
 
     def test_convert_translocation(self):
@@ -379,11 +379,11 @@ def test_convert_inverted_translocation(self):
         assert bpp.break2.chr == 'X'
         assert bpp.break1.start == 50294136
         assert bpp.break2.start == 153063989
-        assert bpp.event_type == None
-        assert bpp.opposing_strands == False
+        assert bpp.event_type is None
+        assert bpp.opposing_strands is False
         assert bpp.break1.orient == ORIENT.RIGHT
         assert bpp.break2.orient == ORIENT.LEFT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
         assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_translocation(self):
@@ -403,11 +403,11 @@ def test_convert_translocation(self):
         assert bpp.break2.chr == 'X'
         assert bpp.break1.start == 50294136
         assert bpp.break2.start == 153063989
-        assert bpp.event_type == None
-        assert bpp.opposing_strands == True
+        assert bpp.event_type is None
+        assert bpp.opposing_strands is True
         assert bpp.break1.orient == ORIENT.LEFT
         assert bpp.break2.orient == ORIENT.LEFT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
         assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_indel(self):
@@ -427,11 +427,11 @@ def test_convert_indel(self):
         assert bpp.break2.chr == '1'
         assert bpp.break1.start == 1663681
         assert bpp.break2.start == 151732089
-        assert bpp.event_type == None
-        assert bpp.opposing_strands == False
+        assert bpp.event_type is None
+        assert bpp.opposing_strands is False
         assert bpp.break1.orient == ORIENT.LEFT
         assert bpp.break2.orient == ORIENT.RIGHT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
         assert bpp.data['tracking_id'] == 'defuse-1'
 
     def test_convert_inversion(self):
@@ -451,11 +451,11 @@ def test_convert_inversion(self):
         assert bpp.break2.chr == '1'
         assert bpp.break1.start == 144898348
         assert bpp.break2.start == 235294748
-        assert bpp.event_type == None
-        assert bpp.opposing_strands == True
+        assert bpp.event_type is None
+        assert bpp.opposing_strands is True
         assert bpp.break1.orient == ORIENT.LEFT
         assert bpp.break2.orient == ORIENT.LEFT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
         assert bpp.data['tracking_id'] == 'defuse-1'
 
 
@@ -480,10 +480,10 @@ def test_convert_pos_pos(self):
         print(bpp)
         assert bpp.break1.start == int(row['end5p'])
         assert bpp.break2.start == int(row['start3p'])
-        assert bpp.opposing_strands == False
+        assert bpp.opposing_strands is False
         assert bpp.break1.orient == ORIENT.LEFT
         assert bpp.break2.orient == ORIENT.RIGHT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
 
     def test_convert_pos_neg(self):
         row = {
@@ -505,10 +505,10 @@ def test_convert_pos_neg(self):
         print(bpp)
         assert bpp.break1.start == int(row['end5p'])
         assert bpp.break2.start == int(row['end3p'])
-        assert bpp.opposing_strands == True
+        assert bpp.opposing_strands is True
         assert bpp.break1.orient == ORIENT.LEFT
         assert bpp.break2.orient == ORIENT.LEFT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
 
     def test_convert_neg_pos(self):
         row = {
@@ -530,10 +530,10 @@ def test_convert_neg_pos(self):
         print(bpp)
         assert bpp.break1.start == int(row['start5p'])
         assert bpp.break2.start == int(row['start3p'])
-        assert bpp.opposing_strands == True
+        assert bpp.opposing_strands is True
         assert bpp.break1.orient == ORIENT.RIGHT
         assert bpp.break2.orient == ORIENT.RIGHT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
 
     def test_convert_neg_neg(self):
         row = {
@@ -555,10 +555,10 @@ def test_convert_neg_neg(self):
         print(bpp)
         assert bpp.break1.start == int(row['start5p'])
         assert bpp.break2.start == int(row['end3p'])
-        assert bpp.opposing_strands == False
+        assert bpp.opposing_strands is False
         assert bpp.break1.orient == ORIENT.RIGHT
         assert bpp.break2.orient == ORIENT.LEFT
-        assert bpp.stranded == False
+        assert bpp.stranded is False
 
 
 class TestPindel:
@@ -578,8 +578,8 @@ def test_convert_deletion(self):
         assert bpp.break1.strand == STRAND.NS
         assert bpp.break2.orient == ORIENT.RIGHT
         assert bpp.break2.strand == STRAND.NS
-        assert bpp.stranded == False
-        assert bpp.opposing_strands == False
+        assert bpp.stranded is False
+        assert bpp.opposing_strands is False
 
     def test_convert_insertion(self):
         row = Mock(chrom='21', pos=9412306, info={'SVTYPE': 'INS'}, stop=9412400, id=None, alts=[])
@@ -597,8 +597,8 @@ def test_convert_insertion(self):
         assert bpp.break1.strand == STRAND.NS
         assert bpp.break2.orient == ORIENT.RIGHT
         assert bpp.break2.strand == STRAND.NS
-        assert bpp.stranded == False
-        assert bpp.opposing_strands == False
+        assert bpp.stranded is False
+        assert bpp.opposing_strands is False
 
     def test_convert_inversion(self):
         row = Mock(chrom='21', pos=9412306, info={'SVTYPE': 'INV'}, stop=9412400, id=None, alts=[])
@@ -616,8 +616,8 @@ def test_convert_inversion(self):
         assert bpp.break1.strand == STRAND.NS
         assert bpp.break2.orient == ORIENT.LEFT
         assert bpp.break2.strand == STRAND.NS
-        assert bpp.stranded == False
-        assert bpp.opposing_strands == True
+        assert bpp.stranded is False
+        assert bpp.opposing_strands is True
 
 
 class TestParseBndAlt:
@@ -726,7 +726,7 @@ def test_itx(self):
         assert bpps[0].break2.start == 10546
         assert bpps[0].break2.end == 10546
         assert bpps[0].break2.orient == ORIENT.LEFT
-        assert bpps[0].opposing_strands == False
+        assert bpps[0].opposing_strands is False
 
     def test_deletion(self):
         row = {
@@ -750,7 +750,7 @@ def test_deletion(self):
         assert bpps[0].break2.start == 870225
         assert bpps[0].break2.end == 870225
         assert bpps[0].break2.orient == ORIENT.RIGHT
-        assert bpps[0].opposing_strands == False
+        assert bpps[0].opposing_strands is False
 
     def test_inversion(self):
         row = {
@@ -774,7 +774,7 @@ def test_inversion(self):
         assert bpps[0].break2.start == 13218683
         assert bpps[0].break2.end == 13218683
         assert bpps[0].break2.orient == ORIENT.LEFT
-        assert bpps[0].opposing_strands == True
+        assert bpps[0].opposing_strands is True
 
         assert bpps[1].event_type == SVTYPE.INV
         assert bpps[1].break1.start == 13143396
@@ -783,7 +783,7 @@ def test_inversion(self):
         assert bpps[1].break2.start == 13218683
         assert bpps[1].break2.end == 13218683
         assert bpps[1].break2.orient == ORIENT.RIGHT
-        assert bpps[1].opposing_strands == True
+        assert bpps[1].opposing_strands is True
 
     def test_insertion(self):
         row = {
@@ -807,7 +807,7 @@ def test_insertion(self):
         assert bpps[0].break2.start == 20218060
         assert bpps[0].break2.end == 20218060
         assert bpps[0].break2.orient == ORIENT.RIGHT
-        assert bpps[0].opposing_strands == False
+        assert bpps[0].opposing_strands is False
 
 
 class TestStrelka:
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py
index e163349e..4d2ccffa 100644
--- a/tests/unit/test_util.py
+++ b/tests/unit/test_util.py
@@ -1,17 +1,9 @@
-import os
-
 import pytest
 from mavis.constants import COLUMNS, ORIENT, STRAND
 from mavis.error import NotSpecifiedError
-from mavis.util import (
-    ENV_VAR_PREFIX,
-    cast,
-    get_connected_components,
-    get_env_variable,
-    read_bpp_from_input_file,
-)
+from mavis.util import cast, get_connected_components, read_bpp_from_input_file
 
-from .mock import Mock
+from ..util import todo
 
 
 class TestGetConnectedComponents:
@@ -39,12 +31,12 @@ def test_multiple_components(self):
 
 class TestCast:
     def test_float(self):
-        assert type(cast('1', float)) == type(1.0)
-        assert type(cast('1', int)) != type(1.0)
+        assert type(cast('1', float)) == type(1.0)  # noqa: E721
+        assert type(cast('1', int)) != type(1.0)  # noqa: E721
 
     def test_boolean(self):
-        assert type(cast('f', bool)) == type(False)
-        assert type(cast('false', bool)) == type(False)
+        assert type(cast('f', bool)) == type(False)  # noqa: E721
+        assert type(cast('false', bool)) == type(False)  # noqa: E721
         assert not cast('f', bool)
         assert not cast('false', bool)
         assert not cast('0', bool)
@@ -267,7 +259,7 @@ def test_break1_orient_ns(self, tmp_path):
         assert len(bpps) == 1
         assert bpps[0].break1.orient == ORIENT.LEFT
 
-    @pytest.mark.skip(reason='TODO')
+    @todo
     def test_break2_orient_ns(self, tmp_path):
         input_file = tmp_path / "inputs.tsv"
         input_file.write_text(
@@ -292,10 +284,6 @@ def test_break2_orient_ns(self, tmp_path):
         assert len(bpps) == 1
         assert bpps[0].break1.orient == ORIENT.LEFT
 
-    @pytest.mark.skip(reason='TODO')
-    def test_both_break_orient_ns(self, tmp_path):
-        input_file = tmp_path / "inputs.tsv"
-
     def test_base_case(self, tmp_path):
         input_file = tmp_path / "inputs.tsv"
         input_file.write_text(
@@ -319,7 +307,7 @@ def test_base_case(self, tmp_path):
         bpps = read_bpp_from_input_file(input_file, expand_strand=False, expand_orient=False)
         assert len(bpps) == 1
         assert bpps[0].break1.orient == ORIENT.RIGHT
-        assert bpps[0].opposing_strands == True
+        assert bpps[0].opposing_strands is True
 
     def test_unstranded_with_strand_calls(self, tmp_path):
         input_file = tmp_path / "inputs.tsv"
diff --git a/tests/unit/test_validate.py b/tests/unit/test_validate.py
index dc2de156..560ca909 100644
--- a/tests/unit/test_validate.py
+++ b/tests/unit/test_validate.py
@@ -1,15 +1,12 @@
-import unittest
-
 from mavis.constants import ORIENT
-from mavis.validate.call import _call_interval_by_flanking_coverage
-from mavis.validate.evidence import GenomeEvidence
-from mavis.validate.base import Evidence
 from mavis.interval import Interval
+from mavis.validate.base import Evidence
+from mavis.validate.call import _call_interval_by_flanking_coverage
 
 from .mock import Mock
 
 
-class CallIntervalByFlankingCoverage(unittest.TestCase):
+class CallIntervalByFlankingCoverage:
     def test_invalid_input_attr(self):
         pass
 
@@ -22,8 +19,8 @@ def test_left(self):
             distance=Evidence.distance,
             traverse=Evidence.traverse,
         )
-        self.assertEqual(110, i.start)
-        self.assertEqual(180, i.end)
+        assert i.start == 110
+        assert i.end == 180
 
         i = _call_interval_by_flanking_coverage(
             Mock(start=20, end=80),
@@ -33,8 +30,8 @@ def test_left(self):
             distance=Evidence.distance,
             traverse=Evidence.traverse,
         )
-        self.assertEqual(80, i.start)
-        self.assertEqual(209, i.end)
+        assert i.start == 80
+        assert i.end == 209
 
     def test_right(self):
         i = _call_interval_by_flanking_coverage(
@@ -45,8 +42,8 @@ def test_right(self):
             distance=Evidence.distance,
             traverse=Evidence.traverse,
         )
-        self.assertEqual(101, i.end)
-        self.assertEqual(31, i.start)
+        assert i.end == 101
+        assert i.start == 31
 
         i = _call_interval_by_flanking_coverage(
             Mock(start=150, end=200),
@@ -56,16 +53,16 @@ def test_right(self):
             distance=Evidence.distance,
             traverse=Evidence.traverse,
         )
-        self.assertEqual(11, i.start)
-        self.assertEqual(150, i.end)
+        assert i.start == 11
+        assert i.end == 150
 
 
-class TestDistanceAndTraverse(unittest.TestCase):
+class TestDistanceAndTraverse:
     def test_distance(self):
-        self.assertEqual(Interval(10), Evidence.distance(1, 11))
+        assert Evidence.distance(1, 11) == Interval(10)
 
     def test_traverse_right(self):
-        self.assertEqual(Interval(11), Evidence.traverse(1, 10, ORIENT.RIGHT))
+        assert Evidence.traverse(1, 10, ORIENT.RIGHT) == Interval(11)
 
     def test_traverse_left(self):
-        self.assertEqual(Interval(10), Evidence.traverse(20, 10, ORIENT.LEFT))
+        assert Evidence.traverse(20, 10, ORIENT.LEFT) == Interval(10)
diff --git a/tests/util.py b/tests/util.py
index 55db2d11..7cc9eaf9 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -1,9 +1,22 @@
 import glob
 import os
+import shutil
+
+import pytest
 
 DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
 
 
+long_running_test = pytest.mark.skipif(
+    os.environ.get('RUN_FULL') != '1',
+    reason='Only running FAST tests subset',
+)
+
+bwa_only = pytest.mark.skipif(not shutil.which('bwa'), reason='missing the command')
+blat_only = pytest.mark.skipif(not shutil.which('blat'), reason='missing the command')
+todo = pytest.mark.skip(reason='TODO')
+
+
 def package_relative_file(*paths):
     return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', *paths))
 

From e7d9279220d64f826d837e5e7eafd82d3eb0f44d Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 26 Apr 2021 15:33:43 -0700
Subject: [PATCH 026/137] Fix linting errors

---
 src/tools/find_repeats.py          | 44 +++++++++++++++++++++++-------
 src/tools/generate_ensembl_json.py |  8 +++---
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/src/tools/find_repeats.py b/src/tools/find_repeats.py
index d840a037..e96337ce 100644
--- a/src/tools/find_repeats.py
+++ b/src/tools/find_repeats.py
@@ -15,17 +15,29 @@ def parse_arguments():
     """
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '-o', '--output',
-        help='path to the output file', required=True, metavar='FILEPATH'
+        '-o', '--output', help='path to the output file', required=True, metavar='FILEPATH'
     )
     parser.add_argument(
-        '-n', '--input', required=True, metavar='FILEPATH',
-        help='Path to the Input reference genome fasta file'
+        '-n',
+        '--input',
+        required=True,
+        metavar='FILEPATH',
+        help='Path to the Input reference genome fasta file',
     )
     parser.add_argument(
-        '--min_length', default=20, type=int, help='Minimum total length of the repeat region to find', metavar='INT')
+        '--min_length',
+        default=20,
+        type=int,
+        help='Minimum total length of the repeat region to find',
+        metavar='INT',
+    )
     parser.add_argument(
-        '--repeat_seq', default='N', type=str, help='Repeat sequence to look for. Case insensitive', nargs='+')
+        '--repeat_seq',
+        default='N',
+        type=str,
+        help='Repeat sequence to look for. Case insensitive',
+        nargs='+',
+    )
     args = parser.parse_args()
     if args.min_length < 2:
         parser.error('argument --min_length: cannot specify a shorter repeat than 2 bases')
@@ -43,7 +55,7 @@ def main():
         os.path.basename(__file__),
         'input: {}'.format(args.input),
         'min_length: {}'.format(args.min_length),
-        'repeat_seq: {}'.format(', '.join(args.repeat_seq))
+        'repeat_seq: {}'.format(', '.join(args.repeat_seq)),
     ]
     log('writing:', args.output)
     with open(args.output, 'w') as fh:
@@ -61,21 +73,33 @@ def main():
                 visited.add(seq)
             spans = []
             for repseq in repeat_sequences:
-                log('finding {}_repeat (min_length: {}), for chr{} (length: {})'.format(repseq, args.min_length, chrom, len(seq)))
+                log(
+                    'finding {}_repeat (min_length: {}), for chr{} (length: {})'.format(
+                        repseq, args.min_length, chrom, len(seq)
+                    )
+                )
                 index = 0
                 while index < len(seq):
                     next_n = seq.find(repseq, index)
                     if next_n < 0:
                         break
                     index = next_n
-                    while index + len(repseq) <= len(seq) and seq[index:index + len(repseq)] == repseq:
+                    while (
+                        index + len(repseq) <= len(seq)
+                        and seq[index : index + len(repseq)] == repseq
+                    ):
                         index += len(repseq)
                     span = BioInterval(chrom, next_n + 1, index, name='repeat_{}'.format(repseq))
                     if len(span) >= args.min_length and len(span) >= 2 * len(repseq):
                         spans.append(span)
             log('found', len(spans), 'spans', time_stamp=False)
             for span in spans:
-                fh.write('{}\t{}\t{}\t{}\n'.format(span.reference_object, span.start, span.end, span.name))
+                fh.write(
+                    '{}\t{}\t{}\t{}\n'.format(
+                        span.reference_object, span.start, span.end, span.name
+                    )
+                )
+
 
 if __name__ == '__main__':
     main()
diff --git a/src/tools/generate_ensembl_json.py b/src/tools/generate_ensembl_json.py
index 2e554f07..9c44d2f1 100755
--- a/src/tools/generate_ensembl_json.py
+++ b/src/tools/generate_ensembl_json.py
@@ -438,10 +438,10 @@ def choose_best_transcripts(self):
         """
         Select a canonical transcript for each human gene using Ensembl rules.
 
-        For human, the canonical transcript for a gene is set according to the following hierarchy: 
-        - 1. Longest CCDS translation with no stop codons. 
-        - 2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons. 
-        - 3. If no (2), choose the longest translation with no stop codons. 
+        For human, the canonical transcript for a gene is set according to the following hierarchy:
+        - 1. Longest CCDS translation with no stop codons.
+        - 2. If no (1), choose the longest Ensembl/Havana merged translation with no stop codons.
+        - 3. If no (2), choose the longest translation with no stop codons.
         - 4. If no translation, choose the longest non-protein-coding transcript.
 
         See: http://uswest.ensembl.org/Help/Glossary?id=346

From d5b4b140695d937a4f8e564e9605bbb956e60bf1 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 26 Apr 2021 15:36:16 -0700
Subject: [PATCH 027/137] use aligner decorators

---
 tests/integration/test_align.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/integration/test_align.py b/tests/integration/test_align.py
index 4effd774..0075a32d 100644
--- a/tests/integration/test_align.py
+++ b/tests/integration/test_align.py
@@ -14,7 +14,7 @@
 from mavis.schemas import DEFAULTS
 from mavis.validate.evidence import GenomeEvidence
 
-from ..util import get_data
+from ..util import blat_only, bwa_only, get_data
 from . import MockLongString, MockObject, MockRead
 
 REFERENCE_GENOME = None
@@ -52,7 +52,7 @@ def test_hardclipping(self):
 
 
 class TestAlign:
-    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
+    @blat_only
     def test_blat_contigs(self):
         ev = GenomeEvidence(
             Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
@@ -98,7 +98,7 @@ def test_blat_contigs(self):
         assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
         assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
 
-    @pytest.mark.skipif(not shutil.which('bwa'), reason='missing the command')
+    @bwa_only
     def test_bwa_contigs(self):
         ev = GenomeEvidence(
             Breakpoint('reference3', 1114, orient=ORIENT.RIGHT),
@@ -148,7 +148,7 @@ def test_bwa_contigs(self):
         assert alignment.read1.cigar == [(CIGAR.S, 125), (CIGAR.EQ, 120)]
         assert alignment.read2.cigar == [(CIGAR.S, 117), (CIGAR.EQ, 128)]
 
-    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
+    @blat_only
     def test_blat_contigs_deletion(self):
         ev = GenomeEvidence(
             Breakpoint('fake', 1714, orient=ORIENT.LEFT),
@@ -192,7 +192,7 @@ def test_blat_contigs_deletion(self):
         assert alignment.read1.reference_start == 1612
         assert alignment.read1.cigar == [(CIGAR.EQ, 102), (CIGAR.D, 1253), (CIGAR.EQ, 74)]
 
-    @pytest.mark.skipif(not shutil.which('blat'), reason='missing the blat command')
+    @blat_only
     def test_blat_contigs_deletion_revcomp(self):
         ev = GenomeEvidence(
             Breakpoint('fake', 1714, orient=ORIENT.LEFT),

From cf1bf9fe665523908610c45daffa112f0280fafa Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 26 Apr 2021 16:23:15 -0700
Subject: [PATCH 028/137] Update black

---
 setup.py                         | 2 +-
 src/mavis/annotate/genomic.py    | 6 +++---
 src/mavis/assemble.py            | 2 +-
 src/mavis/blat.py                | 2 +-
 src/mavis/illustrate/elements.py | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/setup.py b/setup.py
index 0245e812..ecbc4691 100644
--- a/setup.py
+++ b/setup.py
@@ -109,7 +109,7 @@ def check_nonpython_dependencies():
     extras_require={
         'docs': DOC_REQS,
         'test': TEST_REQS,
-        'dev': ['black', 'flake8'] + DOC_REQS + TEST_REQS + DEPLOY_REQS,
+        'dev': ['black==20.8b1', 'flake8'] + DOC_REQS + TEST_REQS + DEPLOY_REQS,
         'deploy': DEPLOY_REQS,
         'tools': ['pyensembl', 'simplejson'],
     },
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index 43246960..9891cebe 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -64,7 +64,7 @@ def to_dict(self):
 
 
 class Gene(BioInterval):
-    """"""
+    """ """
 
     def __init__(self, chr, start, end, name=None, strand=STRAND.NS, aliases=None, seq=None):
         """
@@ -161,7 +161,7 @@ def to_dict(self):
 
 
 class Exon(BioInterval):
-    """"""
+    """ """
 
     def __init__(
         self,
@@ -275,7 +275,7 @@ def __repr__(self):
 
 
 class PreTranscript(BioInterval):
-    """"""
+    """ """
 
     def __init__(
         self,
diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index 3e16e429..f7a9d301 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -12,7 +12,7 @@
 
 
 class Contig:
-    """"""
+    """ """
 
     def __init__(self, sequence, score):
         self.seq = sequence
diff --git a/src/mavis/blat.py b/src/mavis/blat.py
index c8cd325f..6efd0fb2 100644
--- a/src/mavis/blat.py
+++ b/src/mavis/blat.py
@@ -32,7 +32,7 @@
 
 
 class Blat:
-    """"""
+    """ """
 
     @staticmethod
     def millibad(row, is_protein=False, is_mrna=True):
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index aa9e90e9..cb79549b 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -93,7 +93,7 @@ def draw_exon_track(
     genomic_max=None,
     translation=None,
 ):
-    """"""
+    """ """
     colors = {} if colors is None else colors
     main_group = canvas.g(class_='exon_track')
 

From 0244f170c58e70975037a72eada38e4160b82cb6 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Mon, 26 Apr 2021 17:48:36 -0700
Subject: [PATCH 029/137] Add singularity comment to docs

---
 docs/configuration/performance.md | 5 ++---
 docs/configuration/pipeline.md    | 8 ++++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/docs/configuration/performance.md b/docs/configuration/performance.md
index e666c10a..a5cb9e7c 100644
--- a/docs/configuration/performance.md
+++ b/docs/configuration/performance.md
@@ -12,7 +12,7 @@ cpu requirements depending on what the user is trying to analyze. See
 
 ## Validation Resources
 
-![](../images/colo829_tumour_validation_resource_req.png)
+![validation resources](../images/colo829_tumour_validation_resource_req.png)
 
 Resource Requirements (MAVIS 1.8.0) for each validation job of the
 COLO829 tumour genome. The BAM file for the tumour genome is 127GB.
@@ -21,7 +21,6 @@ structural variant validations per job. The effect of number of events
 validated on both memory and time is plotted
 above.
 
-
 ## Annotation Resources
 
 Similar trends were observed for the annotation step (see below) with
@@ -29,7 +28,7 @@ regards to time elapsed. However the memory requirements remained more
 constant which is expected since, unlike validation, anntotation does
 not read more data in for more events.
 
-![](../images/colo829_tumour_annotation_resource_req.png)
+![annotation resources](../images/colo829_tumour_annotation_resource_req.png)
 
 Resource Requirements (MAVIS 1.8.0) for each annotation job of the
 COLO829 tumour genome. The events which passed validation (see above)
diff --git a/docs/configuration/pipeline.md b/docs/configuration/pipeline.md
index d8073a1c..2aab3368 100644
--- a/docs/configuration/pipeline.md
+++ b/docs/configuration/pipeline.md
@@ -23,3 +23,11 @@ snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME>
 ```
 
 This will submit a series of jobs with dependencies.
+
+To use the mavis docker container through singularity, instead of installing mavis via pip, add the
+[`--use-singularity`](https://snakemake.readthedocs.io/en/stable/snakefiles/deployment.html#running-jobs-in-containers)
+flag.
+
+```bash
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME> --use-singularity`
+```

From 7f1b806c89b0683abde0eb10918692a473004cef Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 27 Apr 2021 14:26:07 -0700
Subject: [PATCH 030/137] Use config package

---
 README.md                             |  91 +++--------------
 Snakefile                             |  18 +---
 docs/configuration/pipeline.md        |   6 +-
 docs/install.md                       | 142 ++++++++++++++++++++++++++
 docs/tutorials/full.md                |  11 +-
 docs/tutorials/mini.md                |   9 +-
 setup.py                              |   2 +-
 src/mavis/config.py                   |  92 ++---------------
 src/mavis/constants.py                |  99 +-----------------
 src/mavis/main.py                     |   7 +-
 src/tools/get_hg19_reference_files.sh |  16 +--
 src/tools/get_hg38_reference_files.sh |  21 ++++
 12 files changed, 218 insertions(+), 296 deletions(-)
 create mode 100644 docs/install.md
 create mode 100644 src/tools/get_hg38_reference_files.sh

diff --git a/README.md b/README.md
index 47956ae1..b6e6ef45 100644
--- a/README.md
+++ b/README.md
@@ -4,10 +4,8 @@
     </object><br>
 </object>
 
-
 ![PyPi](https://img.shields.io/pypi/v/mavis.svg) ![build](https://github.com/bcgsc/mavis/workflows/build/badge.svg?branch=master) [![codecov](https://codecov.io/gh/bcgsc/mavis/branch/master/graph/badge.svg)](https://codecov.io/gh/bcgsc/mavis) ![ReadTheDocs](https://readthedocs.org/projects/pip/badge/)
 
-
 ## About
 
 [MAVIS](http://mavis.bcgsc.ca) is python command-line tool for the post-processing of structural variant calls.
@@ -39,87 +37,28 @@ Common problems and questions are addressed on the [wiki](https://github.com/bcg
 If you have a question or issue that is not answered there (or already a github issue) please submit
 a github issue to our [github page](https://github.com/bcgsc/mavis/issues) or contact us by email at [mavis@bcgsc.ca](mailto:mavis@bcgsc.ca)
 
-## Install Instructions
-
-There are 3 major steps to setting up and installing [MAVIS](http://mavis.bcgsc.ca). If you are a developer contributing to mavis, please see the [instructions for developers page](https://mavis.readthedocs.io/en/latest/development) instead
-
-### 1. Install Aligner
-
-In addition to the python package dependencies, [MAVIS](http://mavis.bcgsc.ca) also requires an aligner to be installed.
-Currently the only aligners supported are [blat](https://mavis.readthedocs.io/en/latest/glossary/#blat) and [bwa mem](https://mavis.readthedocs.io/en/latest/glossary/#bwa).
-For MAVIS to run successfully the aligner must be installed and accessible on the path.
-If you have a non-standard install you may find it useful to edit the PATH environment variable. For example
-
-``` bash
-export PATH=/path/to/directory/containing/blat/binary:$PATH
-```
-
-[blat](http://mavis.bcgsc.ca/docs/latest/glossary.html#term-blat) is the default aligner. To configure MAVIS to use [bwa mem](http://mavis.bcgsc.ca/docs/latest/glossary.html#term-bwa) as a default instead, use the
-[MAVIS environment variables](https://mavis.readthedocs.io/en/latest/configuration/settings/). Make sure to specify BOTH of the variables below to change the default aligner.
-
-``` bash
-export MAVIS_ALIGNER='bwa mem'
-export MAVIS_ALIGNER_REFERENCE=/path/to/mem/fasta/ref/file
-```
-
-After this has been installed MAVIS itself can be installed through [pip](https://pypi.org/project/mavis/)
-
-### 2. Install MAVIS
-
-#### Install using pip
-
-The easiest way to install [MAVIS](http://mavis.bcgsc.ca) is through the python package manager, pip. If you do not have python3 installed it can be found [here](https://www.python.org/downloads)
-
-Ensuring you have a recent version of pip and setuptools will improve the install experience. Older versions of pip and setuptools may have issues with obtaining some of the mavis python dependencies
-
-``` bash
-pip install --upgrade pip setuptools
-```
-
-or (for Anaconda users)
-
-``` bash
-conda update pip setuptools
-```
-
-If this is not a clean/new python install it may be useful to set up mavis in a [virtual python environment](https://docs.python.org/3/tutorial/venv.html)
-
-Then install mavis itself
-
-``` bash
-pip install mavis
-```
-
-This will install mavis and its python dependencies.
+## Getting Started
 
-#### Install using Buildout
+The simplest way to use MAVIS is via Singularity. The MAVIS docker container used
+by singularity will take care of installing the aligner as well.
 
-Alternatively you can use the [bootstrap/buildout](http://www.buildout.org/en/latest/) to install mavis into bin/mavis
-
-``` bash
-git clone https://github.com/bcgsc/mavis.git
-cd mavis
-pip install zc.buildout
-python bootstrap.py
-bin/buildout
+```bash
+pip install -U setuptools pip
+pip install mavis_config  # also installs snakemake
 ```
 
-This will install mavis and its python dependencies into eggs inside the cloned mavis directory which can be used by simply running bin/mavis
+Now you will run mavis via Snakemake as follows
 
-### 3. Build or Download Reference Files
-
-After [MAVIS](http://mavis.bcgsc.ca) is installed the [reference files](https://mavis.readthedocs.io/en/latest/inputs/reference) must be generated (or downloaded) before it can be run. A simple bash script to download the hg19 reference files and generate a MAVIS environment file is provided under mavis/tools for convenience.
-
-``` bash
-cd /path/to/where/you/want/to/put/the/files
-wget https://raw.githubusercontent.com/bcgsc/mavis/master/tools/get_hg19_reference_files.sh
-bash get_hg19_reference_files.sh
-source reference_inputs/hg19_env.sh
+```bash
+snakemake \
+    -j <MAX JOBS> \
+    --configfile <YOUR CONFIG> \
+    --use-singularity \
+    -s Snakefile
 ```
 
-Once the above 3 steps are complete [MAVIS](http://mavis.bcgsc.ca) is ready to be run.
-See the MAVIS [tutorial](https://mavis.readthedocs.io/en/latest/tutorials/mini) to learn about running MAVIS.
-
+For other installation options which do not use docker/singularity see the comprehensive install
+instructions in the [user manual](https://mavis.readthedocs.io/en/latest/install)
 
 ## Citation
 
diff --git a/Snakefile b/Snakefile
index 23c0772e..5501fe9f 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,10 +1,11 @@
-from snakemake.utils import validate
 from snakemake.exceptions import WorkflowError
 import os
 from typing import List, Dict
 import re
 import json
 import pandas as pd
+from mavis_config import validate_config
+from mavis_config.constants import SUBCOMMAND
 
 CONTAINER = 'bcgsc/mavis:latest'
 
@@ -15,18 +16,7 @@ INITIALIZED_CONFIG = output_dir('config.json')
 
 
 try:
-    # TODO: replace with URL so that the user does not need a copy of the config schema
-    validate(
-        config,
-        os.path.join(os.getcwd(), 'src/mavis/schemas/config.json')
-    )
-    for key in [
-        "libraries",
-        "reference.annotations",
-        "output_dir"
-    ]:
-        if key not in config:
-            raise ValueError(f'missing required property: {key}')
+    validate_config(config, stage=SUBCOMMAND.SETUP)
 except Exception as err:
     short_msg = ' '.join(str(err).split('\n')[:2]) # these can get super long
     raise WorkflowError(short_msg)
@@ -85,7 +75,7 @@ rule copy_config:
         with open(output_dir('config.raw.json'), 'w') as fh:
             fh.write(json.dumps(config, sort_keys=True, indent='  '))
 
-
+# adds the bam stats and default settings
 rule init_config:
     input: rules.copy_config.output
     output: INITIALIZED_CONFIG
diff --git a/docs/configuration/pipeline.md b/docs/configuration/pipeline.md
index 2aab3368..a79e16f2 100644
--- a/docs/configuration/pipeline.md
+++ b/docs/configuration/pipeline.md
@@ -13,13 +13,13 @@ far-left.
 The most common use case is running the pipeline through snakemake
 
 ```bash
-snakemake -j <MAX JOBS> --configfile <YOUR CONFIG>
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> -s Snakefile
 ```
 
 If you are submitting to a cluster, use the [snakemake profiles](https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles)
 
 ```bash
-snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME>
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME> -s Snakefile
 ```
 
 This will submit a series of jobs with dependencies.
@@ -29,5 +29,5 @@ To use the mavis docker container through singularity, instead of installing mav
 flag.
 
 ```bash
-snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME> --use-singularity`
+snakemake -j <MAX JOBS> --configfile <YOUR CONFIG> --profile <YOUR PROFILE NAME> --use-singularity -s Snakefile`
 ```
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 00000000..b3468f7a
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,142 @@
+# Install Instructions
+
+Once the install steps are complete [MAVIS](http://mavis.bcgsc.ca) is ready to be run.
+See the MAVIS [tutorial](https://mavis.readthedocs.io/en/latest/tutorials/mini) to learn about running MAVIS.
+
+For either install option you will want to install the main Snakefile. It is best to use a tag to
+specify the version of interest but you can download the latest version from the master branch
+
+```bash
+wget https://raw.githubusercontent.com/bcgsc/mavis/master/Snakefile -O Snakefile
+```
+
+## Install for Docker/Singularity
+
+The simplest way to use MAVIS is via Singularity. The MAVIS docker container used
+by singularity will take care of installing the aligner as well.
+
+```bash
+pip install -U setuptools pip
+pip install mavis_config  # also installs snakemake
+```
+
+Now you will run mavis via Snakemake as follows
+
+```bash
+snakemake \
+    -j <MAX JOBS> \
+    --configfile <YOUR CONFIG> \
+    --use-singularity \
+    -s Snakefile
+```
+
+## Install (Python Only)
+
+MAVIS can also be run with just python. However you will need to install the aligner(s) required
+by MAVIS separately and ensure they are availble on the default PATH variable when MAVIS is run
+
+### 1. Install Aligner
+
+In addition to the python package dependencies, [MAVIS](http://mavis.bcgsc.ca) also requires an aligner to be installed.
+Currently the only aligners supported are [blat](https://mavis.readthedocs.io/en/latest/glossary/#blat) and [bwa mem](https://mavis.readthedocs.io/en/latest/glossary/#bwa).
+For MAVIS to run successfully the aligner must be installed and accessible on the path.
+If you have a non-standard install you may find it useful to edit the PATH environment variable. For example
+
+``` bash
+export PATH=/path/to/directory/containing/blat/binary:$PATH
+```
+
+[blat](http://mavis.bcgsc.ca/docs/latest/glossary.html#term-blat) is the default aligner. To configure MAVIS to use [bwa mem](http://mavis.bcgsc.ca/docs/latest/glossary.html#term-bwa) it must be specified
+in the [config](https://mavis.readthedocs.io/en/latest/configuration/settings/) JSON file.
+
+After this has been installed MAVIS itself can be installed through [pip](https://pypi.org/project/mavis/)
+
+### 2. Install MAVIS
+
+#### Install using pip
+
+The easiest way to install [MAVIS](http://mavis.bcgsc.ca) is through the python package manager, pip. If you do not have python3 installed it can be found [here](https://www.python.org/downloads)
+
+Ensuring you have a recent version of pip and setuptools will improve the install experience. Older versions of pip and setuptools may have issues with obtaining some of the mavis python dependencies
+
+``` bash
+pip install --upgrade pip setuptools
+```
+
+or (for Anaconda users)
+
+``` bash
+conda update pip setuptools
+```
+
+If this is not a clean/new python install it may be useful to set up mavis in a [virtual python environment](https://docs.python.org/3/tutorial/venv.html)
+
+Then install mavis itself
+
+``` bash
+pip install mavis
+```
+
+This will install mavis and its python dependencies.
+
+#### Install using Buildout
+
+Alternatively you can use the [bootstrap/buildout](http://www.buildout.org/en/latest/) to install mavis into bin/mavis
+
+``` bash
+git clone https://github.com/bcgsc/mavis.git
+cd mavis
+pip install zc.buildout
+python bootstrap.py
+bin/buildout
+```
+
+This will install mavis and its python dependencies into eggs inside the cloned mavis directory which can be used by simply running bin/mavis
+
+Finally you will need to Build/Download the necessary reference files
+
+## Build or Download Reference Files
+
+After [MAVIS](http://mavis.bcgsc.ca) is installed the [reference files](https://mavis.readthedocs.io/en/latest/inputs/reference) must be generated (or downloaded) before it can be run. A simple bash script to download the hg19 reference files is provided under mavis/tools for convenience.
+
+### Download Hg19 Files
+
+``` bash
+cd /path/to/where/you/want/to/put/the/files
+wget https://raw.githubusercontent.com/bcgsc/mavis/master/src/tools/get_hg19_reference_files.sh
+bash get_hg19_reference_files.sh
+```
+
+You should now see the reference files in the current directory
+
+```text
+.
+|-- cytoBand.txt
+|-- dgv_hg19_variants.tab
+|-- ensembl69_hg19_annotations.json
+|-- get_hg19_reference_files.sh
+|-- hg19.2bit
+|-- hg19.fa
+`-- hg19_masking.tab
+```
+
+### Download Hg38 Files
+
+``` bash
+cd /path/to/where/you/want/to/put/the/files
+wget https://raw.githubusercontent.com/bcgsc/mavis/master/src/tools/get_hg38_reference_files.sh
+bash get_hg19_reference_files.sh
+```
+
+You should now see the reference files in the current directory
+
+```text
+.
+|-- cytoBand.txt
+|-- dgv_hg38_variants.tab
+|-- ensembl79_hg38_annotations.json
+|-- get_hg38_reference_files.sh
+|-- GCA_000001405.15_GRCh38_no_alt_analysis_set.fna
+|-- GRCh38_masking.tab
+`-- hg38.2bit
+```
diff --git a/docs/tutorials/full.md b/docs/tutorials/full.md
index 44b187bc..ebcfd290 100644
--- a/docs/tutorials/full.md
+++ b/docs/tutorials/full.md
@@ -36,13 +36,11 @@ The expected contents are
 
 ## Downloading the Reference Inputs
 
-Run the following to download the hg19 reference files and set up the
-environment variables for configuring MAVIS
+Run the following to download the hg19 reference files
 
 ```bash
 wget https://raw.githubusercontent.com/bcgsc/mavis/master/tools/get_hg19_reference_files.sh
 bash get_hg19_reference_files.sh
-source reference_inputs/hg19_env.sh
 ```
 
 ## Creating the Config File
@@ -173,6 +171,13 @@ Finally you will need to set output directory and the reference files
 
 ## Running the Workflow
 
+In order to run the snakemake file you will need to have the config validation module
+`mavis_config` installed which has minimal dependencies.
+
+```bash
+pip install mavis_config
+```
+
 You are now ready to run the workflow
 
 ```bash
diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index 657dac26..27b5f51f 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -16,21 +16,22 @@ installed
 git clone https://github.com/bcgsc/mavis.git
 git checkout <VERSION_TAG>
 mv mavis/tests .
+mv mavis/Snakefile .
 rm -r mavis
 ```
 
 Now you should have a folder called `tests` in your current directory. Since this is a trivial
 example, it can easily be run locally. However in order to run the snakemake file you will need
-to have a copy of the config schema definition file which is included in MAVIS by default.
+to have the config validation module `mavis_config` installed which has minimal dependencies.
 
-```text
-mavis/schemas/config.json
+```bash
+pip install mavis_config
 ```
 
 Now you are ready to run MAVIS. This can be done in a single command using snakemake.
 
 ```bash
-snakemake -j 1 --configfile=tests/mini-tutorial.config.json
+snakemake -j 1 --configfile=tests/mini-tutorial.config.json -s Snakefile
 ```
 
 Which will run the mini tutorial version and output files into a folder called `output_dir` in the
diff --git a/setup.py b/setup.py
index ecbc4691..cf458d52 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,7 @@ def check_nonpython_dependencies():
     'pyvcf==0.6.8',
     'shortuuid>=0.5.0',
     'svgwrite',
-    'snakemake>=6.1.1, <7',
+    'mavis_config==1.0.0',
 ]
 
 DEPLOY_REQS = ['twine', 'm2r', 'wheel']
diff --git a/src/mavis/config.py b/src/mavis/config.py
index 480d3f72..38fcb500 100644
--- a/src/mavis/config.py
+++ b/src/mavis/config.py
@@ -110,91 +110,21 @@ def __call__(self, parser, namespace, values, option_string=None):
         setattr(namespace, self.dest, items)
 
 
-def validate_config(config: Dict, bam_stats: Optional[bool] = False, stage: str = '') -> None:
+def add_bamstats_to_config(config: Dict):
     """
     Check that the input JSON config conforms to the expected schema as well
     as the other relevant checks such as file exsts
     """
-    schema = 'config' if stage != SUBCOMMAND.OVERLAY else 'overlay'
-
-    try:
-        snakemake_validate(
-            config,
-            os.path.join(os.path.dirname(__file__), f'schemas/{schema}.json'),
-            set_default=True,
-        )
-    except Exception as err:
-        short_msg = '. '.join(
-            [line for line in str(err).split('\n') if line.strip()][:3]
-        )  # these can get super long
-        raise WorkflowError(short_msg)
-
-    required = []
-    if (
-        stage not in {SUBCOMMAND.CONVERT}
-        or stage == SUBCOMMAND.CLUSTER
-        and not config['cluster.uninformative_filter']
-    ):
-        required.append('reference.annotations')
-
-    if stage == SUBCOMMAND.VALIDATE:
-        required.extend(['reference.aligner_reference', 'reference.reference_genome'])
-
-    for req in required:
-        if req not in config:
-            raise WorkflowError(f'missing required property: {req}')
-
-    if schema == 'config':
-        conversion_dir = os.path.join(config['output_dir'], 'converted_outputs')
-        # check all assignments are conversions aliases or existing files
-        for libname, library in config['libraries'].items():
-            assignments = []
-            for i, assignment in enumerate(library['assign']):
-                if assignment in config.get('convert', {}):
-                    # replace the alias with the expected output path
-                    converted_output = os.path.join(conversion_dir, f'{assignment}.tab')
-                    assignments.append(converted_output)
-                elif (
-                    not os.path.exists(assignment) and os.path.dirname(assignment) != conversion_dir
-                ):
-                    raise FileNotFoundError(f'cannot find the expected input file {assignment}')
-                else:
-                    assignments.append(assignment)
-            library['assign'] = assignments
-
-            if not config['skip_stage.validate'] and stage in {
-                SUBCOMMAND.VALIDATE,
-                SUBCOMMAND.SETUP,
-            }:
-                if not library.get('bam_file', None) or not os.path.exists(library['bam_file']):
-                    raise FileNotFoundError(
-                        f'missing bam file for library ({libname}), it is a required input when the validate stage is not skipped'
-                    )
-                # calculate the bam_stats if the have not been given
-                missing_stats = any(
-                    [
-                        col not in library
-                        for col in ['median_fragment_size', 'read_length', 'stdev_fragment_size']
-                    ]
-                )
-                if missing_stats and bam_stats:
-                    library.update(calculate_bam_stats(config, libname))
-
-        # expand and check the input files exist for any conversions
-        for conversion in config.get('convert', {}).values():
-            expanded = []
-            for input_file in conversion['inputs']:
-                expanded.extend(bash_expands(input_file))
-            conversion['inputs'] = expanded
-
-    # make sure all the reference files specified exist and overload with environment variables where applicable
-    for ref_type in list(config.keys()):
-        if not ref_type.startswith('reference.'):
-            continue
-        expanded = []
-        for input_file in config[ref_type]:
-            expanded.extend(bash_expands(input_file))
-        config[ref_type] = expanded
+    # check all assignments are conversions aliases or existing files
+    for libname, library in config['libraries'].items():
+        # calculate the bam_stats if the have not been given
+        if any(
+            [
+                col not in library
+                for col in ['median_fragment_size', 'read_length', 'stdev_fragment_size']
+            ]
+        ):
+            library.update(calculate_bam_stats(config, libname))
 
 
 def get_metavar(arg_type):
diff --git a/src/mavis/constants.py b/src/mavis/constants.py
index 89e75e64..fc27c03d 100644
--- a/src/mavis/constants.py
+++ b/src/mavis/constants.py
@@ -10,6 +10,7 @@
 from Bio.Alphabet.IUPAC import ambiguous_dna
 from Bio.Data.IUPACData import ambiguous_dna_values
 from Bio.Seq import Seq
+from mavis_config.constants import SUBCOMMAND, MavisNamespace
 
 PROGNAME: str = 'mavis'
 EXIT_OK: int = 0
@@ -17,89 +18,6 @@
 EXIT_INCOMPLETE: int = 2
 
 
-class EnumType(type):
-    def __contains__(cls, member):
-        return member in cls.values()
-
-    def __getitem__(cls, item):
-        for k, v in cls.items():
-            if k == item:
-                return v
-        raise KeyError(item)
-
-    def __iter__(cls):
-        """
-        Returns members in definition order.
-        """
-        return cls.values()
-
-
-class MavisNamespace(metaclass=EnumType):
-    @classmethod
-    def items(cls):
-        return [(k, v) for k, v in cls.__dict__.items() if not k.startswith('_')]
-
-    @classmethod
-    def to_dict(cls):
-        return dict(cls.items())
-
-    @classmethod
-    def keys(cls):
-        return [k for k, v in cls.items()]
-
-    @classmethod
-    def values(cls):
-        return [v for k, v in cls.items()]
-
-    @classmethod
-    def enforce(cls, value):
-        """
-        checks that the current namespace has a given value
-
-        Returns:
-            the input value
-
-        Raises:
-            KeyError: the value did not exist
-
-        Example:
-            >>> nspace.enforce(1)
-            1
-            >>> nspace.enforce(3)
-            Traceback (most recent call last):
-            ....
-        """
-        if value not in cls.values():
-            raise KeyError('value {0} is not a valid member of '.format(repr(value)), cls.values())
-        return value
-
-    @classmethod
-    def reverse(cls, value):
-        """
-        for a given value, return the associated key
-
-        Args:
-            value: the value to get the key/attribute name for
-
-        Raises:
-            KeyError: the value is not unique
-            KeyError: the value is not assigned
-
-        Example:
-            >>> nspace.reverse(1)
-            'thing'
-        """
-        result = []
-        for key in cls.keys():
-            if cls[key] == value:
-                result.append(key)
-        if len(result) > 1:
-            raise KeyError('could not reverse, the mapping is not unique', value, result)
-        elif not result:
-            raise KeyError('input value is not assigned to a key', value)
-        return result[0]
-
-
 def float_fraction(num):
     """
     cast input to a float
@@ -147,21 +65,6 @@ class SPLICE_TYPE(MavisNamespace):
 """Filename for all complete stamp files"""
 
 
-class SUBCOMMAND(MavisNamespace):
-    """
-    holds controlled vocabulary for allowed pipeline stage values
-    """
-
-    ANNOTATE: str = 'annotate'
-    VALIDATE: str = 'validate'
-    CLUSTER: str = 'cluster'
-    PAIR: str = 'pairing'
-    SUMMARY: str = 'summary'
-    CONVERT: str = 'convert'
-    OVERLAY: str = 'overlay'
-    SETUP: str = 'setup'
-
-
 CODON_SIZE: int = 3
 """the number of bases making up a codon"""
 
diff --git a/src/mavis/main.py b/src/mavis/main.py
index 5b69fff3..abedaf82 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -8,6 +8,8 @@
 import time
 from typing import Dict
 
+from mavis_config import validate_config
+
 from . import __version__
 from . import config as _config
 from . import util as _util
@@ -194,9 +196,8 @@ def main(argv=None):
         if args.command != SUBCOMMAND.CONVERT:
             with open(args.config, 'r') as fh:
                 config = json.load(fh)
-                _config.validate_config(
+                validate_config(
                     config,
-                    args.command in {SUBCOMMAND.SETUP, SUBCOMMAND.VALIDATE},
                     args.command,
                 )
     except AttributeError as err:
@@ -263,6 +264,8 @@ def main(argv=None):
                 args.assume_no_untemplated,
             )
         elif command == SUBCOMMAND.SETUP:
+            # add bam stats to the config if missing
+            _config.add_bamstats_to_config(config)
             _util.LOG(f'writing: {args.outputfile}')
             with open(args.outputfile, 'w') as fh:
                 fh.write(json.dumps(config, sort_keys=True, indent='  '))
diff --git a/src/tools/get_hg19_reference_files.sh b/src/tools/get_hg19_reference_files.sh
index 0f4f91c4..3fb40f46 100644
--- a/src/tools/get_hg19_reference_files.sh
+++ b/src/tools/get_hg19_reference_files.sh
@@ -1,14 +1,9 @@
-mkdir reference_inputs
-cd reference_inputs
-
-ENV_FILE=hg19_env.sh
+set -euo pipefail
 
 echo "downloading the reference genome file"
 wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz
 tar -xvzf chromFa.tar.gz
 
-CWD=$( pwd )
-
 # concatenate the chromosome fa files into a single file
 for fname in chr{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,X,Y}.fa
 do
@@ -17,27 +12,20 @@ done
 
 # Clean up the non concatenated and alt chromosome files
 rm -f chr*.fa
-echo export MAVIS_REFERENCE_GENOME="${CWD}/hg19.fa" >> $ENV_FILE
+rm -f chromeFa.tar.gz
 
 echo "downloading the gene annotations file"
 wget http://www.bcgsc.ca/downloads/mavis/ensembl69_hg19_annotations.json
-echo export MAVIS_ANNOTATIONS="${CWD}/ensembl69_hg19_annotations.json" >> $ENV_FILE
 
 echo "downloading the masking file"
 wget http://www.bcgsc.ca/downloads/mavis/hg19_masking.tab
-echo export MAVIS_MASKING="${CWD}/hg19_masking.tab" >> $ENV_FILE
 
 echo "downloading the dgv annotation file"
 wget http://www.bcgsc.ca/downloads/mavis/dgv_hg19_variants.tab
-echo export MAVIS_DGV_ANNOTATION="${CWD}/dgv_hg19_variants.tab" >> $ENV_FILE
 
 echo "downloading the aligner reference file"
 wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.2bit
-echo export MAVIS_ALIGNER_REFERENCE="${CWD}/hg19.2bit" >> $ENV_FILE
 
 echo "downloading the template metadata file"
 wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz
 gunzip cytoBand.txt.gz
-echo export MAVIS_TEMPLATE_METADATA="${CWD}/cytoBand.txt" >> $ENV_FILE
-
-echo "Source $CWD/$ENV_FILE prior to running MAVIS to set MAVIS default arguments"
diff --git a/src/tools/get_hg38_reference_files.sh b/src/tools/get_hg38_reference_files.sh
new file mode 100644
index 00000000..97c1face
--- /dev/null
+++ b/src/tools/get_hg38_reference_files.sh
@@ -0,0 +1,21 @@
+set -euo pipefail
+
+echo "downloading the reference genome (no alt) file"
+wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRCh38/seqs_for_alignment_pipelines.ucsc_ids/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
+gunzip GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
+
+echo "downloading the gene annotations file"
+wget http://www.bcgsc.ca/downloads/mavis/ensembl79_hg38_annotations.json
+
+echo "downloading the masking file"
+wget http://www.bcgsc.ca/downloads/mavis/GRCh38_masking.tab
+
+echo "downloading the dgv annotation file"
+wget http://www.bcgsc.ca/downloads/mavis/dgv_hg38_variants.tab
+
+echo "downloading the aligner reference file"
+wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.2bit
+
+echo "downloading the template metadata file"
+wget http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz
+gunzip cytoBand.txt.gz

From 078846fa31502da476b65075780572bb817b7667 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 27 Apr 2021 14:43:26 -0700
Subject: [PATCH 031/137] Add docker publish job to tag workflow

---
 .github/workflows/publish.yml | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 35bf9a9d..3b19260f 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -8,10 +8,8 @@ on:
     types: [created]
 
 jobs:
-  deploy:
-
+  pypi:
     runs-on: ubuntu-latest
-
     steps:
     - uses: actions/checkout@v2
     - name: Set up Python
@@ -30,3 +28,15 @@ jobs:
         python setup.py sdist bdist_wheel install
         twine check dist/*
         twine upload dist/*
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - run: docker login -u $DOCKER_USER -p $DOCKER_PASSWORD
+        env:
+          DOCKER_USER: ${{ secrets.DOCKER_USER }}
+          DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }}
+      - run: |
+          docker build --file Dockerfile --tag bcgsc/mavis:latest --tag bcgsc/mavis:${{ github.event.release.tag_name }} .
+      - run: docker push bcgsc/mavis:latest
+      - run: docker push bcgsc/mavis:${{ github.event.release.tag_name }}

From d5d131ba1aaf7fefce648479b00e831326bb3252 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 27 Apr 2021 15:30:26 -0700
Subject: [PATCH 032/137] Add tests for running docker image with singularity

---
 .github/workflows/build.yml                 | 31 +++++++++++
 .github/workflows/quick-tests.yml           | 13 +----
 Dockerfile                                  |  4 ++
 README.md                                   |  2 +-
 Snakefile                                   | 61 ++++++++++++++++++---
 docs/install.md                             |  2 +-
 docs/tutorials/mini.md                      |  2 +-
 tests/integration/test_annotate_examples.py |  3 +
 tests/snakemake/test_mini_workflow.py       |  3 +-
 9 files changed, 97 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e8107623..a328130c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -72,3 +72,34 @@ jobs:
         name: codecov-umbrella
         fail_ci_if_error: true
       if: matrix.python-version == 3.8
+  docker:
+    runs-on: ubuntu-latest
+    name: docker build
+    steps:
+      - uses: actions/checkout@v2
+      - name: build the docker container
+        run: |
+          docker build --file Dockerfile --tag bcgsc/mavis:latest .
+      - name: test the help menu
+        run: |
+          docker run bcgsc/mavis -h
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+      - name: Install workflow dependencies
+        run: |
+          python -m pip install --upgrade pip setuptools wheel
+          pip install mavis_config pandas
+      - uses: eWaterCycle/setup-singularity@v6
+        with:
+          singularity-version: 3.6.4
+      - name: docker2singularity
+        run:
+          docker run --mount type=bind,source=/var/run/docker.sock,target=/var/run/docker.sock --mount type=bind,source="$(pwd)",target=/output --privileged -t --rm singularityware/docker2singularity bcgsc/mavis:latest
+      - name: Run analysis with snakemake & singularity
+        run: |
+          # get the SIMG filename
+          export SNAKEMAKE_CONTAINER=$(ls *mavis*.simg)
+          snakemake -j 2 --configfile tests/mini-tutorial.config.json --use-singularity
+        if: always()
diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index 689ea8fb..c74ae1be 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -20,7 +20,7 @@ jobs:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip setuptools
+        python -m pip install --upgrade pip setuptools wheel
         pip install .[test]
     - name: Lint with flake8
       run: |
@@ -51,14 +51,3 @@ jobs:
           --durations=10
       env:
         RUN_FULL: 0
-  docker:
-    runs-on: ubuntu-latest
-    name: docker build
-    steps:
-      - uses: actions/checkout@v2
-      - name: build the docker container
-        run: |
-          docker build --file Dockerfile --tag bcgsc/mavis .
-      - name: test the help menu
-        run: |
-          docker run bcgsc/mavis -h
diff --git a/Dockerfile b/Dockerfile
index 3f2ef284..8b93e0c0 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -17,6 +17,10 @@ RUN git clone https://github.com/lh3/bwa.git && \
     cd .. && \
     mv bwa/bwa /usr/local/bin
 
+
+# install blat dependencies
+RUN apt-get install -y libcurl4
+
 # install blat
 RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
     chmod a+x blat && \
diff --git a/README.md b/README.md
index b6e6ef45..c3f18975 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ by singularity will take care of installing the aligner as well.
 
 ```bash
 pip install -U setuptools pip
-pip install mavis_config  # also installs snakemake
+pip install mavis_config pandas  # also installs snakemake
 ```
 
 Now you will run mavis via Snakemake as follows
diff --git a/Snakefile b/Snakefile
index 5501fe9f..b9104212 100644
--- a/Snakefile
+++ b/Snakefile
@@ -3,16 +3,19 @@ import os
 from typing import List, Dict
 import re
 import json
-import pandas as pd
 from mavis_config import validate_config
 from mavis_config.constants import SUBCOMMAND
 
-CONTAINER = 'bcgsc/mavis:latest'
+# env variable mainly for CI/CD
+CONTAINER = os.environ.get('SNAKEMAKE_CONTAINER', 'docker://bcgsc/mavis:latest')
+MAX_TIME = 57600
+DEFAULT_MEMORY_MB = 16000
 
 def output_dir(*paths):
     return os.path.join(config['output_dir'], *paths)
 
 INITIALIZED_CONFIG = output_dir('config.json')
+LOG_DIR = output_dir('logs')
 
 
 try:
@@ -27,6 +30,8 @@ CLUSTER_OUTPUT = output_dir('{library}/cluster/batch-{job_id}.tab')
 
 # create the cluster inputs and guess the cluster sizes
 def count_total_rows(filenames):
+    import pandas as pd
+
     row_count = 0
     for filename in filenames:
         df = pd.read_csv(filename, sep='\t').drop_duplicates()
@@ -71,6 +76,11 @@ rule all:
 
 rule copy_config:
     output: output_dir('config.raw.json')
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=4000,
+        cpus=1,
+        log_dir=LOG_DIR
     run:
         with open(output_dir('config.raw.json'), 'w') as fh:
             fh.write(json.dumps(config, sort_keys=True, indent='  '))
@@ -80,19 +90,29 @@ rule init_config:
     input: rules.copy_config.output
     output: INITIALIZED_CONFIG
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=1,
+        log_dir=LOG_DIR
     shell: 'mavis setup --config {input} --outputfile {output}'
 
 
 rule convert:
     output: output_dir('converted_outputs/{alias}.tab')
     input: rules.init_config.output
-    log: output_dir('converted_outputs/snakemake.{alias}.log.txt')
+    log: os.path.join(LOG_DIR, 'convert.snakemake.{alias}.log.txt')
     params:
         file_type=lambda w: config['convert'][w.alias]['file_type'],
         strand_specific=lambda w: config['convert'][w.alias]['strand_specific'],
         assume_no_untemplated=lambda w: config['convert'][w.alias]['assume_no_untemplated'],
         input_files=lambda w: config['convert'][w.alias]['inputs']
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=1,
+        log_dir=LOG_DIR
     shell:
         'mavis convert --file_type {params.file_type}'
             + ' --strand_specific {params.strand_specific}'
@@ -118,8 +138,13 @@ rule cluster:
     input: files=get_cluster_inputs,
         config=rules.init_config.output
     output: directory(output_dir('{library}/cluster'))
-    log: output_dir('snakemake.cluster.{library}.log.txt')
+    log: os.path.join(LOG_DIR, 'snakemake.cluster.{library}.log.txt')
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=1,
+        log_dir=LOG_DIR
     shell:
         'mavis cluster --config {input.config}'
             + ' --library {wildcards.library}'
@@ -135,8 +160,13 @@ if not config['skip_stage.validate']:
             dirname=lambda w: output_dir(f'{w.library}/validate/batch-{w.job_id}'),
             inputfile=lambda w: expand(CLUSTER_OUTPUT, library=[w.library], job_id=[w.job_id])
         output: VALIDATE_OUTPUT
-        log: output_dir('{library}/validate/snakemake.batch-{job_id}.log.txt')
+        log: os.path.join(LOG_DIR, '{library}.validate.snakemake.batch-{job_id}.log.txt')
         container: CONTAINER
+        resources:
+            time_limit=MAX_TIME,
+            mem_mb=18000,
+            cpus=2,
+            log_dir=LOG_DIR
         shell:
             'mavis validate --config {rules.init_config.output}'
                 + ' --library {wildcards.library}'
@@ -149,8 +179,13 @@ rule annotate:
     input: rules.validate.output if not config['skip_stage.validate'] else rules.cluster.output
     output: stamp=output_dir('{library}/annotate/batch-{job_id}/MAVIS.COMPLETE'),
         result=output_dir('{library}/annotate/batch-{job_id}/annotations.tab')
-    log: output_dir('{library}/annotate/snakemake.batch-{job_id}.log.txt')
+    log: os.path.join(LOG_DIR, '{library}.annotate.snakemake.batch-{job_id}.log.txt')
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=2,
+        log_dir=LOG_DIR
     shell:
         'mavis annotate --config {rules.init_config.output}'
             + ' --library {wildcards.library}'
@@ -165,8 +200,13 @@ rule pairing:
         result=output_dir('pairing/mavis_paired.tab')
     params:
         dirname=output_dir('pairing')
-    log: output_dir('snakemake.pairing.log.txt')
+    log: os.path.join(LOG_DIR, output_dir('snakemake.pairing.log.txt'))
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=1,
+        log_dir=LOG_DIR
     shell:
         'mavis pairing --config {rules.init_config.output}'
             + ' --inputs {input}'
@@ -179,8 +219,13 @@ rule summary:
     output: output_dir('summary/MAVIS.COMPLETE')
     params:
         dirname=output_dir('summary')
-    log: output_dir('snakemake.summary.log.txt')
+    log: os.path.join(LOG_DIR, 'snakemake.summary.log.txt')
     container: CONTAINER
+    resources:
+        time_limit=MAX_TIME,
+        mem_mb=DEFAULT_MEMORY_MB,
+        cpus=1,
+        log_dir=LOG_DIR
     shell:
         'mavis summary --config {rules.init_config.output}'
             + ' --inputs {input}'
diff --git a/docs/install.md b/docs/install.md
index b3468f7a..badd817b 100644
--- a/docs/install.md
+++ b/docs/install.md
@@ -17,7 +17,7 @@ by singularity will take care of installing the aligner as well.
 
 ```bash
 pip install -U setuptools pip
-pip install mavis_config  # also installs snakemake
+pip install mavis_config pandas  # also installs snakemake
 ```
 
 Now you will run mavis via Snakemake as follows
diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index 27b5f51f..37a8a6ec 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -25,7 +25,7 @@ example, it can easily be run locally. However in order to run the snakemake fil
 to have the config validation module `mavis_config` installed which has minimal dependencies.
 
 ```bash
-pip install mavis_config
+pip install mavis_config pandas
 ```
 
 Now you are ready to run MAVIS. This can be done in a single command using snakemake.
diff --git a/tests/integration/test_annotate_examples.py b/tests/integration/test_annotate_examples.py
index f6ed15ee..faf0297b 100644
--- a/tests/integration/test_annotate_examples.py
+++ b/tests/integration/test_annotate_examples.py
@@ -9,6 +9,7 @@
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE
 
+from ..util import long_running_test
 from . import MockLongString, MockObject, get_example_genes
 
 
@@ -99,6 +100,7 @@ def test_small_duplication(self):
 
 
 class TestSVEP1:
+    @long_running_test
     def test_annotate_small_intronic_inversion(self):
         gene = get_example_genes()['SVEP1']
         reference_annotations = {gene.chr: [gene]}
@@ -129,6 +131,7 @@ def test_annotate_small_intronic_inversion(self):
         assert len(ann.fusion.transcripts) == 1
         assert ann.fusion.transcripts[0].get_seq() == refseq
 
+    @long_running_test
     def test_build_single_transcript_inversion(self):
         gene = get_example_genes()['SVEP1']
         reference_genome = {
diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index 1125cf3e..2751caac 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -9,7 +9,7 @@
 
 from snakemake import main as snakemake_main
 
-from ..util import glob_exists, package_relative_file
+from ..util import glob_exists, long_running_test, package_relative_file
 
 
 @pytest.fixture
@@ -27,6 +27,7 @@ def output_dir():
     shutil.rmtree(temp_output)
 
 
+@long_running_test
 def test_workflow(output_dir):
     argv = [
         'snakemake',

From 43153b5a23ccad395c287a8f8d2e103fcc2e7f9b Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 30 Apr 2021 09:38:07 -0700
Subject: [PATCH 033/137] Use config pkg helpers

---
 Snakefile         | 56 ++++++++++++++++++++++-------------------------
 src/mavis/util.py |  6 ++---
 2 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/Snakefile b/Snakefile
index b9104212..51504c5a 100644
--- a/Snakefile
+++ b/Snakefile
@@ -1,9 +1,13 @@
 from snakemake.exceptions import WorkflowError
 import os
-from typing import List, Dict
-import re
 import json
-from mavis_config import validate_config
+from mavis_config import (
+    count_total_rows,
+    get_library_inputs,
+    get_singularity_bindings,
+    guess_total_batches,
+    validate_config,
+)
 from mavis_config.constants import SUBCOMMAND
 
 # env variable mainly for CI/CD
@@ -11,12 +15,21 @@ CONTAINER = os.environ.get('SNAKEMAKE_CONTAINER', 'docker://bcgsc/mavis:latest')
 MAX_TIME = 57600
 DEFAULT_MEMORY_MB = 16000
 
+
+if 'output_dir' not in config:
+    raise WorkflowError('output_dir is a required property of the configfile')
+
+
 def output_dir(*paths):
     return os.path.join(config['output_dir'], *paths)
 
+
 INITIALIZED_CONFIG = output_dir('config.json')
 LOG_DIR = output_dir('logs')
 
+# external schedulers will not create the log dir if it does not already exist
+if not os.path.exists(LOG_DIR):
+    os.makedirs(LOG_DIR, exist_ok=True)
 
 try:
     validate_config(config, stage=SUBCOMMAND.SETUP)
@@ -24,42 +37,23 @@ except Exception as err:
     short_msg = ' '.join(str(err).split('\n')[:2]) # these can get super long
     raise WorkflowError(short_msg)
 
+# ADD bindings for singularity
+print(workflow.singularity_args)
+workflow.singularity_args = f'-B {",".join(get_singularity_bindings(config))}'
+
 libraries = sorted(list(config['libraries']))
 VALIDATE_OUTPUT = output_dir('{library}/validate/batch-{job_id}/validation-passed.tab')
 CLUSTER_OUTPUT = output_dir('{library}/cluster/batch-{job_id}.tab')
 
-# create the cluster inputs and guess the cluster sizes
-def count_total_rows(filenames):
-    import pandas as pd
-
-    row_count = 0
-    for filename in filenames:
-        df = pd.read_csv(filename, sep='\t').drop_duplicates()
-        row_count += df.shape[0]
-    return row_count
-
 
 for library in libraries:
-    lib_config = config['libraries'][library]
-    if 'total_batches' in lib_config:
+    if 'total_batches' in config['libraries'][library]:
         continue
-    inputs = []
-    for assignment in lib_config['assign']:
-        if assignment in config['convert']:
-            inputs.extend(config['convert'][assignment]['inputs'])
-        else:
-            inputs.append(assignment)
 
     # if not input by user, estimate the clusters based on the input files
-    max_files = config['cluster.max_files']
-    min_rows = config['cluster.min_clusters_per_file']
-    total_rows = count_total_rows(inputs)
-
-    if round(total_rows / max_files) >= min_rows:
-        # use max number of jobs
-        lib_config['total_batches'] = max_files
-    else:
-        lib_config['total_batches'] = total_rows // min_rows
+    config['libraries'][library]['total_batches'] = guess_total_batches(
+        config, get_library_inputs(config, library)
+    )
 
 
 libs_args = []
@@ -81,6 +75,7 @@ rule copy_config:
         mem_mb=4000,
         cpus=1,
         log_dir=LOG_DIR
+    log: os.path.join(LOG_DIR, 'copy_config.snakemake.log.txt')
     run:
         with open(output_dir('config.raw.json'), 'w') as fh:
             fh.write(json.dumps(config, sort_keys=True, indent='  '))
@@ -90,6 +85,7 @@ rule init_config:
     input: rules.copy_config.output
     output: INITIALIZED_CONFIG
     container: CONTAINER
+    log: os.path.join(LOG_DIR, 'init_config.snakemake.log.txt')
     resources:
         time_limit=MAX_TIME,
         mem_mb=DEFAULT_MEMORY_MB,
diff --git a/src/mavis/util.py b/src/mavis/util.py
index 4a22984a..8a3f2d41 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -282,7 +282,7 @@ def output_tabbed_file(bpps: List[BreakpointPair], filename: str, header=None):
             row = row.flatten()
         rows.append(row)
         if not custom_header:
-            header.update(row.keys())
+            header.update(row.keys())  # type: ignore
     header = sort_columns(header)
     LOG('writing:', filename)
     df = pd.DataFrame.from_records(rows, columns=header)
@@ -449,7 +449,7 @@ def soft_null_cast(value):
         return []
 
     for col in required_columns:
-        if col not in df:
+        if col not in df and col not in add_default:
             raise KeyError(f'missing required column: {col}')
 
     # run the custom functions
@@ -500,7 +500,7 @@ def soft_null_cast(value):
     ]:
         for col in cols:
             if col in df:
-                df[col].apply(lambda c: vocab.enforce(c))
+                df[col].apply(lambda c: vocab.enforce(c))  # type: ignore
             elif hasattr(vocab, 'NS'):
                 df[col] = vocab.NS  # type: ignore
 

From 135c977185790f2b92050e83c2269519981bd210 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 30 Apr 2021 09:38:34 -0700
Subject: [PATCH 034/137] Make dir for reference inputs

---
 docs/tutorials/full.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/tutorials/full.md b/docs/tutorials/full.md
index ebcfd290..dc1828a9 100644
--- a/docs/tutorials/full.md
+++ b/docs/tutorials/full.md
@@ -39,8 +39,11 @@ The expected contents are
 Run the following to download the hg19 reference files
 
 ```bash
-wget https://raw.githubusercontent.com/bcgsc/mavis/master/tools/get_hg19_reference_files.sh
+wget https://raw.githubusercontent.com/bcgsc/mavis/master/src/tools/get_hg19_reference_files.sh
+mkdir reference_inputs
+cd reference_inputs
 bash get_hg19_reference_files.sh
+cd ..
 ```
 
 ## Creating the Config File

From f1383ecd3612b91e5534b326eb3f2a5606d8b1b4 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 30 Apr 2021 10:53:42 -0700
Subject: [PATCH 035/137] Lint md file

---
 docs/background/theory.md     | 98 +++++++++--------------------------
 docs/inputs/standard.md       | 72 ++++++++++++-------------
 docs/outputs/illustrations.md | 12 ++---
 3 files changed, 61 insertions(+), 121 deletions(-)

diff --git a/docs/background/theory.md b/docs/background/theory.md
index ac4ab2a7..d63210a4 100644
--- a/docs/background/theory.md
+++ b/docs/background/theory.md
@@ -6,26 +6,20 @@ In MAVIS structural variants (SVs) are defined by a pair of breakpoints
 
 And a breakpoint is defined by
 
-1.  chromosome
-2.  base-pair range (start, end). This has a length of 1 for exact calls
-    and more for uncertain/non-specific calls
-3.  [orientation](../../glossary/#orientation). This is Left or Right
-    with respect to the positive/forward strand. This defines which
-    portion of the genome is 'retained'
-4.  [strand](../../glossary/#strand). (only applicable to
-    stranded transcriptome libraries)
+1. chromosome
+2. base-pair range (start, end). This has a length of 1 for exact calls and more for uncertain/non-specific calls
+3. [orientation](../../glossary/#orientation). This is Left or Right with respect to the positive/forward strand. This defines which portion of the genome is 'retained'
+4. [strand](../../glossary/#strand). (only applicable to stranded transcriptome libraries)
 
 So then a breakpoint pair is any two intervals on the reference genome
 which are adjacent in the mutant genome
 
-
-
 ## Evidence
 
 There are many ways that single reads or paired-end reads can act as
 support for an SV call.
 
-![](../images/read_evidence.svg)
+![read evidence](../images/read_evidence.svg)
 
 In the figure above the red rectangle represents a deletion structural
 variant. The arrows are types of single or paired-end reads supporting
@@ -58,7 +52,7 @@ For a deletion, we expect the flanking reads to be in the normal
 orientation but that the fragment size should be abnormal (for large
 deletions).
 
-![](../images/read_pairs_deletion.svg)
+![deletion supporting read pairs](../images/read_pairs_deletion.svg)
 
 Flanking read pair evidence for a deletion event. the read pairs will
 have a larger than expected fragment size when mapped to the reference
@@ -71,10 +65,9 @@ on the positive strand and the second read in the pair would be on the
 negative/reverse
 strand.
 
-
 #### Insertion
 
-![](../images/read_pairs_insertion.svg)
+![insertion supporting read pairs](../images/read_pairs_insertion.svg)
 
 Flanking read pair evidence for an insertion event. The read pairs
 will have a smaller than expected fragment size when mapped to the
@@ -87,10 +80,9 @@ on the positive strand and the second read in the pair would be on the
 negative/reverse
 strand.
 
-
 #### Duplication
 
-![](../images/read_pairs_duplication.svg)
+![duplication support read pairs](../images/read_pairs_duplication.svg)
 
 Flanking read pair evidence for a tandem duplication event. The read
 pairs will have an abnormal orientation but still the same strands as
@@ -99,57 +91,48 @@ strand and have a right orientation. (B2) The second breakpoint will be
 on the positive strand and have a left
 orientation.
 
-
 #### Inversion
 
-![](../images/read_pairs_inversion_LL.svg)
+![inversion supporting read pairs](../images/read_pairs_inversion_LL.svg)
 
 Flanking read pair evidence for an inversion. Both breakpoints have
 left orientation.
 
-
-![](../images/read_pairs_inversion_RR.svg)
+![inversion supporting read pairs](../images/read_pairs_inversion_RR.svg)
 
 Flanking read pair evidence for an inversion. Both breakpoints have
 right orientation.
 
-
 #### Translocation
 
-![](../images/read_pairs_translocation_LR.svg)
+![translocation supporting read pairs](../images/read_pairs_translocation_LR.svg)
 
 Flanking read pair evidence for a translocation. (B1) the first
 breakpoint with a left orientation. (B2) the second breakpoint with a
 right
 orientation.
 
-
-![](../images/read_pairs_translocation_RL.svg)
+![translocation supporting read pairs](../images/read_pairs_translocation_RL.svg)
 
 Flanking read pair evidence for a translocation. (B1) the first
 breakpoint with a right orientation. (B2) the second breakpoint with a
 left
 orientation.
 
-
 #### Inverted Translocation
 
-![](../images/read_pairs_translocated_inversion_LL.svg)
+![translocation supporting read pairs](../images/read_pairs_translocated_inversion_LL.svg)
 
 Flanking read pair evidence for an inverted translocation. Both
 breakpoints have left
 orientation.
 
-
-![](../images/read_pairs_translocated_inversion_RR.svg)
+![translocation supporting read pairs](../images/read_pairs_translocated_inversion_RR.svg)
 
 Flanking read pair evidence for an inverted translocation. Both
 breakpoints have right
 orientation.
 
-
-
-
 ### Compatible Flanking Pairs
 
 For insertion and duplication events compatible flanking pairs are
@@ -158,7 +141,7 @@ be used as compatible flanking evidence for an insertion (in the same
 region) and similarly flanking pairs which support an insertion may be
 compatible flanking evidence for a duplication
 
-![](../images/compatible_flanking_pairs.svg)
+![compatible flanking pairs](../images/compatible_flanking_pairs.svg)
 
 The event depicted above may be called as either a duplication or an
 insertion (depending on the input call). If the even were called as a
@@ -167,30 +150,24 @@ reads in blue would be given as compatible flanking support. If the
 event were called as an insertion the reverse would
 apply.
 
-
-
-
 ### Calculating the Evidence Window
 
-![](../images/read_pair_definitions.svg)
+![read pair defn terms](../images/read_pair_definitions.svg)
 
 Basic Terms used in describing read pairs are shown above: fragment
 size: the distance between the pair; read length: the length of the
 read; fragment size: the combined length of both reads and the fragment
 size
 
-
 We make some base assumptions with regards to paired-end read data:
 
 !!! note
     the distribution of fragment sizes approximately follows a normal
     distribution
 
-
 !!! note
     the most common fragment size is the unmutated 'normal' fragment
 
-
 With the above assumptions we take the median fragment size to be the
 expected normal.
 
@@ -217,7 +194,7 @@ stdev = math.sqrt(sum(X) / len(X))
 This gives us an idea of when to judge an fragment size as abnormal and
 where we expect our normal read pairs fragment sizes to fall.
 
-![](../images/fragment_sizes_histogram.svg)
+![read pair fragment size histogram](../images/fragment_sizes_histogram.svg)
 
 Distribution of fragment sizes (absolute values) of proper read pairs.
 The black curve representings the fit for a normal distribution using
@@ -227,14 +204,13 @@ thick vertical black line is the median and the thin black lines are
 standard deviations away from the
 median.
 
-
 As we can see from the diagram above, removing the outliers reproduces
 the observed distribution better than using all data points
 
 We use this in two ways
 
-1.  to find flanking evidence supporting deletions and insertions
-2.  to estimate the window size for where we will need to read from the
+1. to find flanking evidence supporting deletions and insertions
+2. to estimate the window size for where we will need to read from the
     bam when looking for evidence for a given event
 
 The
@@ -250,8 +226,6 @@ complicated and we must take into account the possible annotations when
 calculating the evidence window. see
 `mavis.validate.evidence.TranscriptomeEvidence._generate_window` for more
 
-
-
 ### Calling Breakpoints by Flanking Evidence
 
 Breakpoints are called by contig, split-read, or flanking pairs
@@ -273,8 +247,6 @@ outline, no fill) demonstrates the read length used to narrow the right
 side bound of the [estimated breakpoint
 interval.
 
-
-
 ### Determining Flanking support
 
 ![flanking support](../images/flanking_pairs_fragment_sizes_deletion.svg)
@@ -290,7 +262,6 @@ The shaded portion of the graph represents the range in fragment sizes
 we expect for flanking pairs supporting the deletion
 event.
 
-
 ## Classifying Events
 
 The following decision tree is used in classifying events based on their
@@ -317,8 +288,6 @@ reverse complement are assembled into contigs using a
 [DeBruijn graph](../../glossary/#debruijn-graph). For strand specific
 events, we then attempt to resolve the sequence strand of the contig.
 
-
-
 ## Annotating Events
 
 We make the following assumptions when determining the annotations for
@@ -328,15 +297,12 @@ each event
     If both breakpoints are in the same gene, they must also be in the same
     transcript
 
-
 !!! note
     If the breakpoint intervals overlap we do not annotate encompassed genes
 
-
 !!! note
     Encompassed and 'nearest' genes are reported without respect to strand
 
-
 There are specific questions we want annotation to answer. We collect
 gene level annotations which describes things like what gene is near the
 breakpoint (useful in the case of a potential promoter swap); what genes
@@ -357,8 +323,6 @@ computed. This is translated to a putative amino acid sequence from
 which protein metrics such as the possible ORFs and domain sequences can
 be computed.
 
-
-
 ## Predicting Splicing Patterns
 
 After the events have been called and an annotation has been attached,
@@ -392,28 +356,17 @@ is paired with the 2nd donor
 site
 
 More complex examples are drawn below. There are five classifications
-(`mavis.constants.SPLICE_TYPE`) for the
+([`mavis.constants.SPLICE_TYPE`](../../package/mavis/constants/#class-mavisconstantssplice_type)) for the
 different splicing patterns:
 
-1.  Retained intron
-    (`mavis.constants.SPLICE_TYPE.RETAIN`{.interpreted-text
-    role="class"})
-2.  Skipped exon (`mavis.constants.SPLICE_TYPE.SKIP`{.interpreted-text
-    role="attr"})
-3.  Multiple retained introns
-    (`mavis.constants.SPLICE_TYPE.MULTI_RETAIN`{.interpreted-text
-    role="attr"})
-4.  Multiple skipped exons
-    (`mavis.constants.SPLICE_TYPE.MULTI_SKIP`{.interpreted-text
-    role="attr"})
-5.  Some combination of retained introns and skipped exons
-    (`mavis.constants.SPLICE_TYPE.COMPLEX`{.interpreted-text
-    role="attr"})
+1. Retained intron
+2. Skipped exon
+3. Multiple retained introns
+4. Multiple skipped exons
+5. Some combination of retained introns and skipped exons
 
 ![Splicing scenarios](../images/splicing_model.svg)
 
-
-
 ## Pairing Similar Events
 
 After breakpoints have been called and annotated we often need to see if
@@ -430,7 +383,6 @@ rise to the following basic cases.
     breakpoint, or it is the same as the nearest retained donor/acceptor to
     the breakpoint.
 
-
 ![exonic splicing](../images/breakpoint_prediction_exonic.svg)
 
 (A-D) The breakpoint lands in an exon and the five prime portion of
diff --git a/docs/inputs/standard.md b/docs/inputs/standard.md
index cd3d7e92..373a7cb1 100644
--- a/docs/inputs/standard.md
+++ b/docs/inputs/standard.md
@@ -1,40 +1,41 @@
 # MAVIS standard input file format
 
-
 These requirements pertain to the columns of input files from the
 various tools you want to merge. The input files should be tab-delimited
 text files. Comments at the top of may be included. Comments should
-begin with two hash marks. They will be ignored when the file is read
-
+begin with hash marks. They will be ignored when the file is read
 
-    ## This is a comment
+```text
+## This is a comment
+```
 
 The header row contains the column names and is the first row following
-the comments (or the first row if no comments are included). Optionally
-the header row may (or may not) begin with a hash which will be stripped
-out on read
+the comments (or the first row if no comments are included).
 
-    ## This is a comment
-    ## this is another comment
-    # this is the header row
+```text
+## This is a comment
+## this is another comment
+# this is also a comment
+This    Is  The Header
+```
 
 A simple input file might look as follows
 
-    ## File created at: 2018-01-02
-    ## Generated by: MAVIS v1.0.0
-    #break1_chromosome  break1_position_start   break1_position_end break2_chromosome break2_position_start break2_position_end
-    X   1234    1234    X   77965   77965
+```text
+## File created at: 2018-01-02
+## Generated by: MAVIS v1.0.0
+break1_chromosome  break1_position_start   break1_position_end break2_chromosome break2_position_start break2_position_end
+X   1234    1234    X   77965   77965
+```
 
 ## Required Columns
 
--   [break1_chromosome](../../outputs/columns/#break1_chromosome)
--   [break1_position_start](../../outputs/columns/#break1_position_start)
--   [break1_position_end](../../outputs/columns/#break1_position_end) (can be the
-    same as break1\_position\_start)
--   [break2_chromosome](../../outputs/columns/#break2_chromosome)
--   [break2_position_start](../../outputs/columns/#break2_position_start)
--   [break2_position_end](../../outputs/columns/#break2_position_end) (can be the
-    same as break2\_position\_start)
+- [break1_chromosome](../../outputs/columns/#break1_chromosome)
+- [break1_position_start](../../outputs/columns/#break1_position_start)
+- [break1_position_end](../../outputs/columns/#break1_position_end) (can be the same as break1\_position\_start)
+- [break2_chromosome](../../outputs/columns/#break2_chromosome)
+- [break2_position_start](../../outputs/columns/#break2_position_start)
+- [break2_position_end](../../outputs/columns/#break2_position_end) (can be the same as break2\_position\_start)
 
 ## Optional Columns
 
@@ -42,24 +43,15 @@ Optional Columns that are not given as input will be added with default
 (or command line parameter options) during the clustering stage of MAVIS
 as some are required for subsequent pipeline steps
 
--   [break1_strand](../../outputs/columns/#break1_strand) (defaults to
-    not-specified during clustering)
--   [break1_orientation](../../outputs/columns/#break1_orientation) (expanded to all
-    possible values during clustering)
--   [break2_strand](../../outputs/columns/#break2_strand) (defaults to
-    not-specified during clustering)
--   [break2_orientation](../../outputs/columns/#break2_orientation) (expanded to all
-    possible values during clustering)
--   [opposing_strands](../../outputs/columns/#opposing_strands) (expanded to all
-    possible values during clustering)
--   [stranded](../../outputs/columns/#stranded) (defaults to False during
-    clustering)
--   [library](../../outputs/columns/#library) (defaults to command line
-    library parameter during clustering)
--   [protocol](../../outputs/columns/#protocol) (defaults to command line
-    protocol parameter during clustering)
--   [tools](../../outputs/columns/#tools) (defaults to an empty string
-    during clustering)
+- [break1_strand](../../outputs/columns/#break1_strand) (defaults to not-specified during clustering)
+- [break1_orientation](../../outputs/columns/#break1_orientation) (expanded to all possible values during clustering)
+- [break2_strand](../../outputs/columns/#break2_strand) (defaults to not-specified during clustering)
+- [break2_orientation](../../outputs/columns/#break2_orientation) (expanded to all possible values during clustering)
+- [opposing_strands](../../outputs/columns/#opposing_strands) (expanded to all possible values during clustering)
+- [stranded](../../outputs/columns/#stranded) (defaults to False during clustering)
+- [library](../../outputs/columns/#library) (defaults to command line library parameter during clustering)
+- [protocol](../../outputs/columns/#protocol) (defaults to command line protocol parameter during clustering)
+- [tools](../../outputs/columns/#tools) (defaults to an empty string during clustering)
 
 ## Summary by Pipeline Step
 
diff --git a/docs/outputs/illustrations.md b/docs/outputs/illustrations.md
index f419db26..da7fb6a3 100644
--- a/docs/outputs/illustrations.md
+++ b/docs/outputs/illustrations.md
@@ -5,33 +5,29 @@
 These are diagrams produced during the annotate step. These represent
 the putative fusion events of a single breakpoint pair.
 
-![](../images/GIMAP4_IL7_fusion.svg)
+![fusion diagram](../images/GIMAP4_IL7_fusion.svg)
 
 Fusion from transcriptome data. Intronic breakpoints here indicate
 retained intron sequence and a novel exon is
 predicted.
 
-
 If the [draw_fusions_only](../../configuration/settings/#draw_fusions_only flag is set to
 False then all events will produce a diagram, even anti-sense fusions
 
-![](../images/UBE2V2_GIMAP4_disruptive_fusion.svg)
+![disruptive fusion diagram](../images/UBE2V2_GIMAP4_disruptive_fusion.svg)
 
 Disruptive Anti-sense
 Fusion
 
-
 ## Transcript Overlays
 
 MAVIS supports generating diagrams of all transcripts for a given gene.
 These can be overlaid with markers and bam\_file pileup data. This is
 particularly useful for visualizing splice site mutations.
 
-![](../images/ENSG00000139687_RB1_overlay.png)
-
-RB1 splice site mutation results in skipping of exon
-9
+![overlay diagram](../images/ENSG00000139687_RB1_overlay.png)
 
+RB1 splice site mutation results in skipping of exon 9
 
 The above diagram was generated using the overlay command
 

From d5130336e89d7bc5572a1371d098588ac7b03018 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 30 Apr 2021 11:18:36 -0700
Subject: [PATCH 036/137] Use relative links

---
 docs/outputs/columns.md | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/docs/outputs/columns.md b/docs/outputs/columns.md
index 3dfd797f..14cdcdd2 100644
--- a/docs/outputs/columns.md
+++ b/docs/outputs/columns.md
@@ -3,7 +3,6 @@
 List of column names and their definitions. The types indicated here are
 the expected types in a row for a given column name.
 
-
 ## library
 
 Identifier for the library/source
@@ -34,7 +33,7 @@ decision from the annotation step
 
 ## event\_type
 
-**type**: [`mavis.constants.SVTYPE`](/package/mavis/constants/#class-mavisconstantssvtype)
+**type**: [`mavis.constants.SVTYPE`](../package/mavis/constants/#class-mavisconstantssvtype)
 
 The
 classification of the event
@@ -57,7 +56,7 @@ Gene for the current annotation at the first breakpoint
 
 ## gene1\_direction
 
-**type**: [`mavis.constants.PRIME`](/package/mavis/constants/#class-mavisconstantsprime)
+**type**: [`mavis.constants.PRIME`](../package/mavis/constants/#class-mavisconstantsprime)
 
 The
 direction/prime of the gene
@@ -68,7 +67,7 @@ Gene for the current annotation at the second breakpoint
 
 ## gene2\_direction
 
-**type**: [`mavis.constants.PRIME`](/package/mavis/constants/#class-mavisconstantsprime)
+**type**: [`mavis.constants.PRIME`](../package/mavis/constants/#class-mavisconstantsprime)
 
 The
 direction/prime of the gene. Has the following possible values
@@ -85,16 +84,11 @@ second breakpoint
 
 ## gene\_product\_type
 
-**type**: [`mavis.constants.GENE_PRODUCT_TYPE`](/package/mavis/constants/#class-mavisconstantsgene_product_type)
+**type**: [`mavis.constants.GENE_PRODUCT_TYPE`](../package/mavis/constants/#class-mavisconstantsgene_product_type)
 
 Describes if the putative fusion product will be
 sense or anti-sense
 
-## fusion\_cdna\_coding\_end
-
-Position wrt the 5' end of the fusion transcript where coding ends
-last base of the stop codon
-
 ## transcript1
 
 Transcript for the current annotation at the first breakpoint
@@ -105,7 +99,7 @@ Transcript for the current annotation at the second breakpoint
 
 ## fusion\_splicing\_pattern
 
-**type**: [`mavis.constants.SPLICE_TYPE`](/package/mavis/constants/#class-mavisconstantsslice_type)
+**type**: [`mavis.constants.SPLICE_TYPE`](../package/mavis/constants/#class-mavisconstantsslice_type)
 
 Type of splicing pattern used to create the fusion cDNA.
 
@@ -206,14 +200,14 @@ End integer inclusive
 
 ## break1\_orientation
 
-**type**: [`mavis.constants.ORIENT`](/package/mavis/constants/#class-mavisconstantsorient)
+**type**: [`mavis.constants.ORIENT`](../package/mavis/constants/#class-mavisconstantsorient)
 
 The side
 of the breakpoint wrt the positive/forward strand that is retained.
 
 ## break1\_strand
 
-**type**: [`mavis.constants.STRAND`](/package/mavis/constants/#class-mavisconstantsstrand)
+**type**: [`mavis.constants.STRAND`](../package/mavis/constants/#class-mavisconstantsstrand)
 
 The
 strand wrt to the reference positive/forward strand at this
@@ -247,14 +241,14 @@ End integer inclusive
 
 ## break2\_orientation
 
-**type**: [`mavis.constants.ORIENT`](/package/mavis/constants/#class-mavisconstantsorient)
+**type**: [`mavis.constants.ORIENT`](../package/mavis/constants/#class-mavisconstantsorient)
 
 The side
 of the breakpoint wrt the positive/forward strand that is retained.
 
 ## break2\_strand
 
-**type**: [`mavis.constants.STRAND`](/package/mavis/constants/#class-mavisconstantsstrand)
+**type**: [`mavis.constants.STRAND`](../package/mavis/constants/#class-mavisconstantsstrand)
 
 The
 strand wrt to the reference positive/forward strand at this
@@ -284,7 +278,7 @@ protocol was strand specific or not. Expects a boolean
 
 ## protocol
 
-**type**: [`mavis.constants.PROTOCOL`](/package/mavis/constants/#class-mavisconstantsprotocol)
+**type**: [`mavis.constants.PROTOCOL`](../package/mavis/constants/#class-mavisconstantsprotocol)
 
 Specifies the type of library
 
@@ -406,7 +400,7 @@ event
 
 ## call\_method
 
-**type**: [`mavis.constants.CALL_METHOD`](/package/mavis/constants/#class-mavisconstantscall_method)
+**type**: [`mavis.constants.CALL_METHOD`](../package/mavis/constants/#class-mavisconstantscall_method)
 
 The
 method used to call the breakpoints

From 06230daf7bf0249cf30fba8ff8dbc2f645586fba Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 30 Apr 2021 12:54:45 -0700
Subject: [PATCH 037/137] Use new version of mavis_config

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cf458d52..7b2c3a03 100644
--- a/setup.py
+++ b/setup.py
@@ -90,7 +90,7 @@ def check_nonpython_dependencies():
     'pyvcf==0.6.8',
     'shortuuid>=0.5.0',
     'svgwrite',
-    'mavis_config==1.0.0',
+    'mavis_config>=1.1.0, <2.0.0',
 ]
 
 DEPLOY_REQS = ['twine', 'm2r', 'wheel']

From b3d5256bae389d0f221c35fe980134cf357afb22 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 1 May 2021 13:46:45 -0700
Subject: [PATCH 038/137] Remove unused functions

---
 src/mavis/config.py              |  3 +-
 src/mavis/util.py                | 57 +-------------------------------
 tests/end_to_end/test_convert.py |  6 ++--
 3 files changed, 6 insertions(+), 60 deletions(-)

diff --git a/src/mavis/config.py b/src/mavis/config.py
index 38fcb500..a0b4341a 100644
--- a/src/mavis/config.py
+++ b/src/mavis/config.py
@@ -4,6 +4,7 @@
 from typing import Dict, Optional
 
 import snakemake
+from mavis_config import bash_expands
 from snakemake.exceptions import WorkflowError
 from snakemake.utils import validate as snakemake_validate
 
@@ -11,7 +12,7 @@
 from .bam import stats
 from .bam.cache import BamCache
 from .constants import INTEGER_COLUMNS, PROTOCOL, SUBCOMMAND, float_fraction
-from .util import bash_expands, cast_boolean, filepath
+from .util import cast_boolean, filepath
 
 
 def calculate_bam_stats(config: Dict, library_name: str) -> Dict:
diff --git a/src/mavis/util.py b/src/mavis/util.py
index 8a3f2d41..7f43be00 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -12,7 +12,7 @@
 from typing import Any, Callable, Dict, List, Optional, Set
 
 import pandas as pd
-from braceexpand import braceexpand
+from mavis_config import bash_expands
 from shortuuid import uuid
 
 from .breakpoint import Breakpoint, BreakpointPair
@@ -144,45 +144,6 @@ def soft_cast(value, cast_type):
     return cast_null(value)
 
 
-def get_env_variable(arg, default, cast_type=None):
-    """
-    Args:
-        arg (str): the argument/variable name
-    Returns:
-        the setting from the environment variable if given, otherwise the default value
-    """
-    if cast_type is None:
-        cast_type = type(default)
-    name = ENV_VAR_PREFIX + str(arg).upper()
-    result = os.environ.get(name, None)
-    if result is not None:
-        return cast(result, cast_type)
-    return default
-
-
-def bash_expands(*expressions):
-    """
-    expand a file glob expression, allowing bash-style brackets.
-
-    Returns:
-        list: a list of files
-
-    Example:
-        >>> bash_expands('./{test,doc}/*py')
-        [...]
-    """
-    result = []
-    for expression in expressions:
-        eresult = []
-        for name in braceexpand(expression):
-            for fname in glob(name):
-                eresult.append(fname)
-        if not eresult:
-            raise FileNotFoundError('The expression does not match any files', expression)
-        result.extend(eresult)
-    return [os.path.abspath(f) for f in result]
-
-
 def log_arguments(args):
     """
     output the arguments to the console
@@ -375,22 +336,6 @@ def filter_uninformative(annotations_by_chr, breakpoint_pairs, max_proximity=500
     return result, filtered
 
 
-def unique_exists(
-    pattern: str, allow_none: bool = False, get_newest: bool = False
-) -> Optional[str]:
-    result = bash_expands(pattern)
-    if len(result) == 1:
-        return result[0]
-    elif result:
-        if get_newest:
-            return max(result, key=lambda x: os.stat(x).st_mtime)
-        raise OSError('duplicate results:', result)
-    elif allow_none:
-        return None
-    else:
-        raise OSError('no result found', pattern)
-
-
 def read_bpp_from_input_file(
     filename: str,
     expand_orient: bool = False,
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index 514fae52..95b45309 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -8,9 +8,9 @@
 from mavis.constants import ORIENT, SUBCOMMAND, SVTYPE
 from mavis.main import main
 from mavis.tools import SUPPORTED_TOOL
-from mavis.util import read_bpp_from_input_file, unique_exists
+from mavis.util import read_bpp_from_input_file
 
-from ..util import get_data
+from ..util import get_data, glob_exists
 
 TEMP_OUTPUT = None
 
@@ -40,7 +40,7 @@ def run_main(self, inputfile, file_type, strand_specific=False):
         with patch.object(sys, 'argv', args):
             main()
             print('output', outputfile)
-            assert unique_exists(outputfile)
+            assert glob_exists(outputfile, n=1)
         result = {}
         for pair in read_bpp_from_input_file(outputfile):
             result.setdefault(pair.data['tracking_id'], []).append(pair)

From 899d8cd1469798cdbf9ab675f045a17c6ca47771 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 1 May 2021 13:49:22 -0700
Subject: [PATCH 039/137] BugFix update data column access

---
 src/mavis/summary/summary.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mavis/summary/summary.py b/src/mavis/summary/summary.py
index 5287d7d8..f30089b4 100644
--- a/src/mavis/summary/summary.py
+++ b/src/mavis/summary/summary.py
@@ -290,7 +290,7 @@ def filter_by_evidence(
                 removed.append(bpp)
                 continue
         elif bpp.column('call_method') == CALL_METHOD.SPAN:
-            if bpp.spanning_reads < filter_min_spanning_reads:
+            if bpp.column('spanning_reads') < filter_min_spanning_reads:
                 removed.append(bpp)
                 continue
         elif bpp.column('call_method') == CALL_METHOD.SPLIT:

From 21507f0708bc72ca9a5feaa9550551e0d66d34e3 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 1 May 2021 13:49:47 -0700
Subject: [PATCH 040/137] Test bwa and blat in mini workflow

---
 tests/snakemake/test_mini_workflow.py | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index 2751caac..963f339c 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -13,7 +13,7 @@
 
 
 @pytest.fixture
-def output_dir():
+def blat_output_dir():
     temp_output = tempfile.mkdtemp()
 
     os.makedirs(os.path.join(temp_output, 'mavis/schemas'))
@@ -21,13 +21,37 @@ def output_dir():
     with open(package_relative_file('tests/mini-tutorial.config.json'), 'r') as fh:
         config = json.load(fh)
     config['output_dir'] = os.path.join(temp_output, 'output_dir')
+    config['validate.aligner'] = 'blat'
     with open(os.path.join(temp_output, 'mini-tutorial.config.json'), 'w') as fh:
         fh.write(json.dumps(config))
     yield temp_output
     shutil.rmtree(temp_output)
 
 
+@pytest.fixture
+def bwa_output_dir():
+    temp_output = tempfile.mkdtemp()
+
+    os.makedirs(os.path.join(temp_output, 'mavis/schemas'))
+
+    with open(package_relative_file('tests/mini-tutorial.config.json'), 'r') as fh:
+        config = json.load(fh)
+    config['output_dir'] = os.path.join(temp_output, 'output_dir')
+    config['validate.aligner'] = 'bwa mem'
+    config['reference.aligner_reference'] = config['reference.reference_genome']
+    with open(os.path.join(temp_output, 'mini-tutorial.config.json'), 'w') as fh:
+        fh.write(json.dumps(config))
+    yield temp_output
+    shutil.rmtree(temp_output)
+
+
+@pytest.fixture
+def output_dir(request):
+    return request.getfixturevalue(request.param)
+
+
 @long_running_test
+@pytest.mark.parametrize('output_dir', ['blat_output_dir', 'bwa_output_dir'], indirect=True)
 def test_workflow(output_dir):
     argv = [
         'snakemake',

From c054eafa1b75c80f2392bca4d0c853d86dae8475 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 1 May 2021 13:59:20 -0700
Subject: [PATCH 041/137] Remove pandas

---
 README.md              | 2 +-
 docs/install.md        | 2 +-
 docs/tutorials/mini.md | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index c3f18975..b6e6ef45 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ by singularity will take care of installing the aligner as well.
 
 ```bash
 pip install -U setuptools pip
-pip install mavis_config pandas  # also installs snakemake
+pip install mavis_config  # also installs snakemake
 ```
 
 Now you will run mavis via Snakemake as follows
diff --git a/docs/install.md b/docs/install.md
index badd817b..b3468f7a 100644
--- a/docs/install.md
+++ b/docs/install.md
@@ -17,7 +17,7 @@ by singularity will take care of installing the aligner as well.
 
 ```bash
 pip install -U setuptools pip
-pip install mavis_config pandas  # also installs snakemake
+pip install mavis_config  # also installs snakemake
 ```
 
 Now you will run mavis via Snakemake as follows
diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index 37a8a6ec..27b5f51f 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -25,7 +25,7 @@ example, it can easily be run locally. However in order to run the snakemake fil
 to have the config validation module `mavis_config` installed which has minimal dependencies.
 
 ```bash
-pip install mavis_config pandas
+pip install mavis_config
 ```
 
 Now you are ready to run MAVIS. This can be done in a single command using snakemake.

From 027c4039b21bf6d2c663c0f185d1a3ed62c8212c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 1 May 2021 14:00:34 -0700
Subject: [PATCH 042/137] Remove leftover print

---
 Snakefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 51504c5a..113dc2fb 100644
--- a/Snakefile
+++ b/Snakefile
@@ -38,7 +38,6 @@ except Exception as err:
     raise WorkflowError(short_msg)
 
 # ADD bindings for singularity
-print(workflow.singularity_args)
 workflow.singularity_args = f'-B {",".join(get_singularity_bindings(config))}'
 
 libraries = sorted(list(config['libraries']))

From dd018541fab52f5c3a50d74420a52b422e6c75af Mon Sep 17 00:00:00 2001
From: Caleb Choo <cchoo@bcgsc.ca>
Date: Wed, 11 Aug 2021 11:47:18 -0700
Subject: [PATCH 043/137] #254 fix nan values confusing downstream None checks

---
 src/mavis/util.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mavis/util.py b/src/mavis/util.py
index 7f43be00..abd55c34 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -390,6 +390,7 @@ def soft_null_cast(value):
             comment='#',
             na_values=['None', 'none', 'N/A', 'n/a', 'null', 'NULL', 'Null', 'nan', '<NA>', 'NaN'],
         )
+        df = df.where(pd.notnull(df), None)
     except pd.errors.EmptyDataError:
         return []
 

From 94b1d1463867f8ad814cf09ac93b8aca4ace5589 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@users.noreply.github.com>
Date: Mon, 20 Dec 2021 13:03:16 -0800
Subject: [PATCH 044/137] Create LICENSE

---
 LICENSE | 674 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 674 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 00000000..f288702d
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,674 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<https://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<https://www.gnu.org/licenses/why-not-lgpl.html>.

From 579de590c27426867001bdc4aa272d917b1dd72c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@users.noreply.github.com>
Date: Mon, 20 Dec 2021 13:03:51 -0800
Subject: [PATCH 045/137] Remove Old License

---
 LICENSE.txt | 230 ----------------------------------------------------
 1 file changed, 230 deletions(-)
 delete mode 100644 LICENSE.txt

diff --git a/LICENSE.txt b/LICENSE.txt
deleted file mode 100644
index 20586469..00000000
--- a/LICENSE.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-MAVIS
-Copyright 2017 Canada's Michael Smith Genome Sciences Centre
-
-BC CANCER AGENCY SOFTWARE LICENSE AGREEMENT (ACADEMIC USE)
-CAREFULLY READ THE FOLLOWING TERMS AND CONDITIONS. This License
-Agreement (the "Agreement") is a legal contract between you, your
-employer, educational institution or organization (collectively, "You")
-and the British Columbia Cancer Agency ("BCCA") with respect to the
-license of the software, including all associated documentation
-(collectively, the "Product").
-
-BCCA is willing to license the Product to You only if You accept the
-terms and conditions of this Agreement. By clicking on the "I ACCEPT"
-button, or by copying, downloading, accessing or otherwise using the
-Product, You automatically agree to be bound by the terms of this
-Agreement. IF YOU DO NOT WISH TO BE BOUND BY THE TERMS OF THIS
-AGREEMENT, DO NOT COPY, DOWNLOAD, ACCESS OR OTHERWISE USE THE
-PRODUCT.
-
-1. AUTHORITY: In the event that You are an educational institution or
-organization, Your representative who is clicking the "I ACCEPT"
-button, or otherwise copying, downloading, accessing or using the
-Product hereby, in their personal capacity, represents and warrants
-that they possess the legal authority to enter into this Agreement
-on Your behalf and to bind You to the terms of this Agreement.
-
-2. LICENSE TO USE: BCCA hereby grants to You a personal, non-exclusive,
-non-transferable, limited license to use the Product solely for
-internal, non-commercial use for non-profit research or educational
-purposes only on the terms and conditions contained in this Agreement.
-The Product may be installed at a single site at Your premises only. A
-copy of the Product installed on a single common machine or cluster of
-machines may be shared for internal use by Qualified Users only. In
-order to be a "Qualified User", an individual must be a student,
-researcher, professor, instructor or staff member of a non-profit
-educational institution or organization who uses the Product solely for
-non-profit research or educational purposes.
-
-3. RESTRICTIONS: You acknowledge and agree that You shall not, and
-shall not authorize any third party to:
-(a) make copies of the Product, except as provided in Section 2 and
-except for a single backup copy, and any such copy together with the
-original must be kept in Your possession or control;
-(b) modify, adapt, decompile, disassemble, translate into another
-computer language, create derivative works of, or otherwise reverse
-engineer the Product, or disclose any trade secrets relating to the
-Product, except as permitted in Section 5;
-(c) license, sublicense, distribute, sell, lease, transfer, assign,
-trade, rent or publish the Product or any part thereof and/or copies
-thereof, to any third party;
-(d) use the Product to process any data other than Your own;
-(e) use the Product or any part thereof for any commercial or
-for-profit purpose or any other purpose other than as permitted in
-Section 2; or
-(f) use, without its express permission, the name of BCCA.
-
-4. INTELLECTUAL PROPERTY RIGHTS: Subject to Section 5 below, all
-patents, copyrights, trade secrets, service marks, trademarks and
-other proprietary rights in or related to the Product and any
-improvements, modifications and enhancements thereof are and will
-remain the exclusive property of BCCA or its licensors. You agree
-that You will not, either during or after the termination of this
-Agreement, contest or challenge the title to or the intellectual
-property rights of BCCA or its licensors in the Product or any
-portion thereof.
-
-5. OWNERSHIP OF IMPROVEMENTS: In the event that the Product, in the
-form provided to You, includes source code (the "Source Code"),
-You are entitled to make improvements, modifications and
-enhancements to the Source Code (collectively, "Improvements")
-which Improvements are to be used by You for non-profit research
-and educational purposes only and You shall be the owner of those
-Improvements that You directly make and of all intellectual
-property rights to such Improvements, subject to the foregoing
-limits on Your use and distribution of such Improvements. You
-hereby grant to BCCA a perpetual, non-exclusive, worldwide,
-fully-paid, irrevocable license to use such Improvements for any
-purposes whatsoever, and to sublicense such Improvements including
-the right for third parties to sublicense the same, in perpetuity
-to the extent such rights are not limited in duration under
-applicable law, without identifying or seeking Your
-consent. Notwithstanding the foregoing, You acknowledge that BCCA
-and its licensors will retain or own all rights in and to any
-pre-existing code or other technology, content and data that may be
-incorporated in the Improvements. For greater certainty, this
-Section applies solely to the Source Code and shall not give You
-any rights with respect to the object code or any other portion or
-format of the Product which use, for greater certainty, is limited
-as set forth in this Agreement including as set out in Section 3(b)
-above. You acknowledge and agree that you will provide copies of
-Improvements to BCCA in such format as reasonably requested by BCCA
-at any time upon the request of BCCA.
-
-6. CONFIDENTIALITY: You acknowledge that the Product is and
-incorporates confidential and proprietary information developed,
-acquired by or licensed to BCCA. You will take all reasonable
-precautions necessary to safeguard the confidentiality of the
-Product, and will not disclose any information about the Product to
-any other person without BCCA's prior written consent. You will
-not allow the removal or defacement of any confidential or
-proprietary notice placed on the Product. You acknowledge that any
-breach of this Section 6 will cause irreparable harm to BCCA and
-its licensors.
-
-7. NO WARRANTIES: THIS PRODUCT IS PROVIDED TO YOU BY BCCA IN ORDER TO
-ALLOW YOU TO OBTAIN ACCESS TO LEADING ACADEMIC RESEARCH. THE PRODUCT
-IS PROVIDED TO YOU ON AN "AS IS" BASIS WITHOUT WARRANTY OF ANY
-KIND. NO WARRANTY, REPRESENTATION OR CONDITION EITHER EXPRESS OR
-IMPLIED, INCLUDING WITHOUT LIMITATION, ANY IMPLIED WARRANTY OR
-CONDITION OF MERCHANTABILITY, NON-INFRINGEMENT, PERFORMANCE,
-DURABILITY OR FITNESS FOR A PARTICULAR PURPOSE OR USE SHALL
-APPLY. BCCA DOES NOT WARRANT THAT THE PRODUCT WILL OPERATE ON A
-CONTINUOUS OR TROUBLE FREE BASIS.
-
-8. LIMITATION OF LIABILITY: TO THE MAXIMUM EXTENT PERMITTED BY
-APPLICABLE LAW, IN NO EVENT SHALL THE AGGREGATE LIABILITY OF BCCA TO
-YOU EXCEED THE AMOUNT YOU HAVE PAID TO ACQUIRE THE PRODUCT ("MAXIMUM
-AMOUNT") AND WHERE YOU HAVE NOT PAID ANY AMOUNT FOR THE PRODUCT THEN
-THE MAXIMUM AMOUNT SHALL BE DEEMED TO BE CDN$100.00. IN NO EVENT SHALL
-BCCA BE LIABLE FOR ANY INDIRECT, INCIDENTAL, CONSEQUENTIAL, OR SPECIAL
-DAMAGES, INCLUDING WITHOUT LIMITATION ANY DAMAGES FOR LOST PROFITS OR
-SAVINGS, REGARDLESS OF WHETHER THEY HAVE BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE. EXCEPT TO THE EXTENT THAT THE LAWS OF A
-COMPETENT JURISDICTION REQUIRE LIABILITIES BEYOND AND DESPITE THESE
-LIMITATIONS, EXCLUSIONS AND DISCLAIMERS, THESE LIMITATIONS, EXCLUSIONS
-AND DISCLAIMERS SHALL APPLY WHETHER AN ACTION, CLAIM OR DEMAND ARISES
-FROM A BREACH OF WARRANTY OR CONDITION, BREACH OF CONTRACT,
-NEGLIGENCE, STRICT LIABILITY OR ANY OTHER KIND OF CIVIL OR STATUTORY
-LIABILITY CONNECTED WITH OR ARISING FROM THIS AGREEMENT. YOU AGREE
-THAT THE FOREGOING DISCLAIMER OF WARRANTIES AND LIMITATION OF
-LIABILITY ARE FAIR IN LIGHT OF THE NATURE OF THE RIGHTS GRANTED HEREIN
-AND THE AMOUNT OF FEES PAID BY YOU IN RESPECT OF THE PRODUCT.
-
-9. INDEMNITY: You will indemnify, defend and hold harmless BCCA, its
-board of directors, staff and agents from and against any and all
-liability, loss, damage, action, claim or expense (including
-attorney's fees and costs at trial and appellate levels) in
-connection with any claim, suit, action, demand or judgement
-(collectively, "Claim") arising out of, connected with, resulting
-from, or sustained as a result of Your use of the Product or the
-downloading of the Product, including without limitation, any Claim
-relating to infringement of BCCA's intellectual property rights or
-the intellectual property rights of any third party.
-
-10. SUPPORT AND MAINTENANCE: You acknowledge and agree that, unless
-and to the extent expressly agreed by BCCA in a separate written
-document, the Product is provided to You without any support or
-maintenance from BCCA and, for greater certainty, BCCA shall have
-no obligation to issue any update or upgrade to any Product.
-
-11. TERM: This Agreement is effective until terminated. You may
-terminate this Agreement at any time by ceasing use of the Product
-and destroying or deleting any copies of the Product. This
-Agreement will terminate immediately without notice from BCCA if
-You fail to comply with any provision of this Agreement. BCCA may
-terminate this Agreement at any time upon notice to you where BCCA
-determines, in its sole discretion, that any continued use of the
-Product could infringe the rights of any third parties. Upon
-termination of this Agreement, and in any event upon BCCA
-delivering You notice of termination, You shall immediately purge
-all Products from Your computer system(s), return to BCCA all
-copies of the Product that are in Your possession or control, and
-cease any further development of any Improvements. On any
-termination of this Agreement Sections 1, 4, 6, 7, 8, 9, 13 and 14
-shall survive such termination.
-
-12. GOVERNMENT END USERS: Where any of the Product is used, duplicated
-or disclosed by or to the United States government or a government
-contractor or sub contractor, it is provided with RESTRICTED
-RIGHTS as defined in Title 48 CFR 52.227-19 and is subject to the
-following: Title 48 CFR 2.101, 52.227-19, 227.7201 through
-227.7202-4, FAR 52.227-14, and FAR 52.227-19(c)(1-2) and (6/87),
-and where applicable, the customary software license, as described
-in Title 48 CFR 227-7202 with respect to commercial software and
-commercial software documentation including DFAR 252.227-7013,
-DFAR 252,227-7014, DFAR 252.227-7015 and DFAR 252.7018, all as
-applicable.
-
-13. USE OF THE DOWNLOAD SERVICE: You acknowledge and agree that you
-will be responsible for all costs, charges and taxes (where
-applicable) arising out of Your use of the Product and the
-downloading of the Product. You acknowledge that You are
-responsible for supplying any hardware or software necessary to
-use the Product pursuant to this Agreement.
-
-14. GENERAL PROVISIONS:
-(a) This Agreement will be governed by the laws of the Province of
-British Columbia, and the laws of Canada applicable therein, excluding
-any rules of private international law that lead to the application of
-the laws of any other jurisdiction. The United Nations Convention on
-Contracts for the International Sale of Goods (1980) does not apply to
-this Agreement. The courts of the Province of British Columbia shall
-have non-exclusive jurisdiction to hear any matter arising in
-connection with this Agreement.
-(b) USE OF THE PRODUCT IS PROHIBITED IN ANY JURISDICTION WHICH DOES
-NOT GIVE EFFECT TO THE TERMS OF THIS AGREEMENT.
-(c) You agree that no joint venture, partnership, employment,
-consulting or agency relationship exists between You and BCCA as a
-result of this Agreement or Your use of the Product.
-(d) You hereby consent to Your contact information and any other
-personally identifiable information that You provide to us being
-disclosed to and maintained and used by us and our business partners
-for the purposes of (i) managing and developing our respective
-businesses and operations; (ii) marketing products and services to You
-and your staff; and (iii) developing new and enhancing existing
-products. You further agree that we may provide this information to
-other persons as required to satisfy any legal requirements and to any
-person that acquires some or all of the assets of BCCA. Where any of
-the personally identifiable information that You provide to us is in
-respect of individuals other than Yourself (such as Your staff) then
-You represent and warrant to use that You have obtained all necessary
-consents and authorizations from such individuals in order to comply
-with this provision. Please see the BCCA website for further
-information regarding personally identifiable information.
-(e) This Agreement is the entire Agreement between You and BCCA
-relating to this subject matter. You will not contest the validity of
-this Agreement merely because it is in electronic form. No
-modification of this Agreement will be binding, unless in writing and
-accepted by an authorized representative of each party.
-(f) The provisions of this Agreement are severable in that if any
-provision in the Agreement is determined to be invalid or
-unenforceable under any controlling body of law, that will not affect
-the validity or enforceability of the remaining provisions of the
-Agreement.
-(g) You agree to print out or download a copy of this Agreement and
-retain it for Your records.
-(h) You consent to the use of the English language in this Agreement.
-(i) You may not assign this Agreement or any of Your rights or
-obligations hereunder without BCCA's prior written consent. BCCA, at
-its sole discretion may assign this Agreement without notice to You.

From e0e0e6e9d446483c2388f7714517eb2dffbb2183 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 12:57:08 -0800
Subject: [PATCH 046/137] Fix name of license in manifest

---
 MANIFEST.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 8b270b97..2ab6e3c1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
 recursive-include src *.py *.json
 include README.md
-include LICENSE.txt
+include LICENSE
 prune docs/build
 prune docs/source/auto

From dea842eaf404c5c249edb04a7d41f31492f61e19 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 11:17:47 -0800
Subject: [PATCH 047/137] Install missing openssl library

---
 .github/workflows/build.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index a328130c..b3cbc8bd 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,6 +19,8 @@ jobs:
     name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2
+    - name: install machine dependencies
+      run: sudo apt-get install -y libcurl4-openssl-dev
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v2
       with:

From f143720a61014fd39de81f778f7a17020d926c23 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 11:11:12 -0800
Subject: [PATCH 048/137] Remove unused dep pyvcf

---
 setup.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7b2c3a03..7dcb3ae7 100644
--- a/setup.py
+++ b/setup.py
@@ -87,7 +87,6 @@ def check_nonpython_dependencies():
     'numpy>=1.13.1',
     'pandas>=1.1, <2',
     'pysam>=0.9, <=0.15.2',
-    'pyvcf==0.6.8',
     'shortuuid>=0.5.0',
     'svgwrite',
     'mavis_config>=1.1.0, <2.0.0',

From 6af3454075be56172653f640635b38891daf1ab4 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:02:05 -0800
Subject: [PATCH 049/137] use pandas instead of pysam to read vcfs

resolves: #265
---
 .gitignore                   |   1 +
 setup.py                     |   2 +-
 src/mavis/tools/vcf.py       | 143 +++++++++++++++++++++++++++++++----
 tests/data/manta_events.vcf  |   4 +-
 tests/unit/test_tools_vcf.py |   9 +++
 5 files changed, 142 insertions(+), 17 deletions(-)
 create mode 100644 tests/unit/test_tools_vcf.py

diff --git a/.gitignore b/.gitignore
index 0745a3b2..1f4c4214 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,6 +15,7 @@ junit
 .tox
 *eggs/
 .mypy_cache
+.snakemake
 
 # aligners
 blat
diff --git a/setup.py b/setup.py
index 7dcb3ae7..4afe997a 100644
--- a/setup.py
+++ b/setup.py
@@ -86,7 +86,7 @@ def check_nonpython_dependencies():
     'networkx==1.11.0',
     'numpy>=1.13.1',
     'pandas>=1.1, <2',
-    'pysam>=0.9, <=0.15.2',
+    'pysam',
     'shortuuid>=0.5.0',
     'svgwrite',
     'mavis_config>=1.1.0, <2.0.0',
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index 4ffa1e2b..77a2e22a 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -1,12 +1,56 @@
+import logging
 import re
-from typing import Dict, List, Tuple
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Tuple
 
+import pandas as pd
 from pysam import VariantFile
+from typing_extensions import TypedDict
 
 from ..constants import COLUMNS, ORIENT, SVTYPE
 from ..util import DEVNULL
 from .constants import SUPPORTED_TOOL
 
+PANDAS_DEFAULT_NA_VALUES = [
+    '-1.#IND',
+    '1.#QNAN',
+    '1.#IND',
+    '-1.#QNAN',
+    '#N/A',
+    'N/A',
+    'NA',
+    '#NA',
+    'NULL',
+    'NaN',
+    '-NaN',
+    'nan',
+    '-nan',
+]
+
+
+class VcfInfoType(TypedDict, total=False):
+    SVTYPE: str
+    CHR2: str
+    CIPOS: Tuple[int, int]
+    CIEND: Tuple[int, int]
+    CT: str
+    END: Optional[int]
+    PRECISE: bool
+
+
+@dataclass
+class VcfRecordType:
+    id: str
+    pos: int
+    chrom: str
+    alts: List[Optional[str]]
+    info: VcfInfoType
+    ref: str
+
+    @property
+    def stop(self) -> Optional[int]:
+        return self.info.get('END', self.pos)
+
 
 def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
     """
@@ -88,6 +132,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             - duplication: 5to3
     """
     records = []
+
     for alt in record.alts if record.alts else [None]:
         info = {}
         for key in record.info.keys():
@@ -106,7 +151,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
         if record.id and record.id != 'N':  # to account for NovoBreak N in the ID field
             std_row['id'] = record.id
 
-        if info.get('SVTYPE', None) == 'BND':
+        if info.get('SVTYPE') == 'BND':
             chr2, end, orient1, orient2, ref, alt = parse_bnd_alt(alt)
             std_row[COLUMNS.break1_orientation] = orient1
             std_row[COLUMNS.break2_orientation] = orient2
@@ -172,6 +217,82 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
     return records
 
 
+def convert_pandas_rows_to_variants(df):
+    def parse_info(info_field):
+        info = {}
+        for pair in info_field.split(';'):
+            if '=' in pair:
+                key, value = pair.split('=', 1)
+                info[key] = value
+            else:
+                info[pair] = True
+
+        # convert info types
+        for key in info:
+            if key in {'CIPOS', 'CIEND'}:
+                ci_start, ci_end = info[key].split(',')
+                info[key] = (int(ci_start), int(ci_end))
+            elif key == 'END':
+                info[key] = int(info[key])
+
+        return info
+
+    df['info'] = df['INFO'].apply(parse_info)
+    df['alts'] = df['ALT'].apply(lambda a: a.split(','))
+
+    rows = []
+    for _, row in df.iterrows():
+
+        rows.append(
+            VcfRecordType(
+                id=row['ID'],
+                pos=row['POS'],
+                info=VcfInfoType(row['info']),
+                chrom=row['CHROM'],
+                ref=row['REF'],
+                alts=row['alts'],
+            )
+        )
+    return rows
+
+
+def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
+    """
+    Read a standard vcf file into a pandas dataframe
+    """
+    # read the comment/header information
+    header_lines = []
+    with open(input_file, 'r') as fh:
+        line = '##'
+        while line.startswith('##'):
+            header_lines.append(line)
+            line = fh.readline().strip()
+        header_lines = header_lines[1:]
+    # read the data
+    df = pd.read_csv(
+        input_file,
+        sep='\t',
+        skiprows=len(header_lines),
+        dtype={
+            'CHROM': str,
+            'POS': int,
+            'ID': str,
+            'INFO': str,
+            'FORMAT': str,
+            'REF': str,
+            'ALT': str,
+        },
+        na_values=PANDAS_DEFAULT_NA_VALUES + ['.'],
+    )
+    df = df.rename(columns={df.columns[0]: df.columns[0].replace('#', '')})
+    required_columns = ['CHROM', 'INFO', 'POS', 'REF', 'ALT', 'ID']
+    for col in required_columns:
+        if col not in df.columns:
+            raise KeyError(f'Missing required column: {col}')
+    # convert the format fields using the header
+    return header_lines, df
+
+
 def convert_file(input_file: str, file_type: str, log):
     """process a VCF file
 
@@ -183,18 +304,12 @@ def convert_file(input_file: str, file_type: str, log):
         err: [description]
     """
     rows = []
-    vfile = VariantFile(input_file)
-    try:
-        vfile.header.info.add('END', number=1, type='Integer', description='End of the interval')
-    except ValueError:
-        pass
 
-    for vcf_record in vfile.fetch():
+    _, data = pandas_vcf(input_file)
+
+    for variant_record in convert_pandas_rows_to_variants(data):
         try:
-            rows.extend(convert_record(vcf_record, log=log))
-        except Exception as err:
-            if file_type != SUPPORTED_TOOL.STRELKA:
-                raise err
-            else:
-                log('Ignoring', vcf_record)
+            rows.extend(convert_record(variant_record, log=log))
+        except NotImplementedError as err:
+            logging.warning(str(err))
     return rows
diff --git a/tests/data/manta_events.vcf b/tests/data/manta_events.vcf
index b1f5200f..ac384904 100644
--- a/tests/data/manta_events.vcf
+++ b/tests/data/manta_events.vcf
@@ -133,5 +133,5 @@
 7	126098487	MantaINV:4:28281:28286:0:0:0	T	<INV>	.	PASS	END=126167443;SVTYPE=INV;SVLEN=68956;CIPOS=0,3;CIEND=-3,0;HOMLEN=3;HOMSEQ=ATG;INV5;SOMATIC;SOMATICSCORE=120	PR:SR	42,0:48,0	95,45:104,36
 9	28031861	MantaINV:162252:2:3:0:0:0	A	<INV>	.	PASS	END=28034467;SVTYPE=INV;SVLEN=2606;CIPOS=0,1;CIEND=-1,0;HOMLEN=1;HOMSEQ=C;SVINSLEN=11;SVINSSEQ=TTTTCGGAATT;INV5;SOMATIC;SOMATICSCORE=104	PR:SR	45,0:42,0	41,19:26,19
 X	31196943	MantaDEL:290420:0:1:0:0:0	A	<DEL>	.	PASS	END=31216210;SVTYPE=DEL;SVLEN=-19267;SVINSLEN=8;SVINSSEQ=ATGTAGTG;SOMATIC;SOMATICSCORE=124	PR:SR	35,0:25,0	43,32:32,31
-1    17051724   MantaBND:207:0:1:0:0:0:0    C   [1:234912188[GCCCCATC   36  PASS    SVTYPE=BND;MATEID=MantaBND:207:0:1:0:0:0:1;SVINSLEN=7;SVINSSEQ=GCCCCAT;BND_DEPTH=5;MATE_BND_DEPTH=4 GT:FT:GQ:PL:PR:SR 0/1:PASS:30:86,0,28:1,2:3,1   .   .   .
-1    234912188  MantaBND:207:0:1:0:0:0:1    A   [1:17051724[ATGGGGCA    36  PASS    SVTYPE=BND;MATEID=MantaBND:207:0:1:0:0:0:0;SVINSLEN=7;SVINSSEQ=ATGGGGC;BND_DEPTH=4;MATE_BND_DEPTH=5 GT:FT:GQ:PL:PR:SR 0/1:PASS:30:86,0,28:1,2:3,1   .   .   .
+1	17051724	MantaBND:207:0:1:0:0:0:0	C	[1:234912188[GCCCCATC	36	PASS	SVTYPE=BND;MATEID=MantaBND:207:0:1:0:0:0:1;SVINSLEN=7;SVINSSEQ=GCCCCAT;BND_DEPTH=5;MATE_BND_DEPTH=4 GT:FT:GQ:PL:PR:SR 0/1:PASS:30:86,0,28:1,2:3,1	.	.	.
+1	234912188	MantaBND:207:0:1:0:0:0:1	A	[1:17051724[ATGGGGCA	36	PASS	SVTYPE=BND;MATEID=MantaBND:207:0:1:0:0:0:0;SVINSLEN=7;SVINSSEQ=ATGGGGC;BND_DEPTH=4;MATE_BND_DEPTH=5 GT:FT:GQ:PL:PR:SR 0/1:PASS:30:86,0,28:1,2:3,1	.	.	.
diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
new file mode 100644
index 00000000..cffe9ade
--- /dev/null
+++ b/tests/unit/test_tools_vcf.py
@@ -0,0 +1,9 @@
+from mavis.tools.vcf import pandas_vcf
+
+from ..util import get_data
+
+
+def test_read_vcf():
+    header, df = pandas_vcf(get_data('delly_events.vcf'))
+    assert len(header) == 63
+    assert df.shape[0] == 31

From 034363ce84cfaabd9f8be46993f6b66e9586bac7 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:16:06 -0800
Subject: [PATCH 050/137] Drop support for 3.6, add 3.9 and 3.10

---
 .github/workflows/build.yml | 4 ++--
 setup.py                    | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b3cbc8bd..23aa2a5a 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8, 3.9, 3.10]
     name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2
@@ -92,7 +92,7 @@ jobs:
       - name: Install workflow dependencies
         run: |
           python -m pip install --upgrade pip setuptools wheel
-          pip install mavis_config pandas
+          pip install mavis_config pandas snakemake
       - uses: eWaterCycle/setup-singularity@v6
         with:
           singularity-version: 3.6.4
diff --git a/setup.py b/setup.py
index 4afe997a..015bb674 100644
--- a/setup.py
+++ b/setup.py
@@ -114,7 +114,7 @@ def check_nonpython_dependencies():
     },
     tests_require=TEST_REQS,
     setup_requires=['pip>=9.0.0', 'setuptools>=36.0.0'],
-    python_requires='>=3.6',
+    python_requires='>=3.7',
     author='Caralyn Reisle',
     author_email='creisle@bcgsc.ca',
     test_suite='tests',

From 53535cfe46b7ac9683dd5954fb7e4bc78d7b01e3 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:21:36 -0800
Subject: [PATCH 051/137] Fix pointer to old license file

---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index 8b93e0c0..d3fbd1f4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -29,7 +29,7 @@ RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
 COPY setup.py setup.py
 COPY setup.cfg setup.cfg
 COPY src src
-COPY LICENSE.txt LICENSE.txt
+COPY LICENSE LICENSE
 COPY README.md README.md
 
 # install python package

From ecefdd850877a6349f609eef0c1d8226b4746a65 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:22:21 -0800
Subject: [PATCH 052/137] Quote python versions

---
 .github/workflows/build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 23aa2a5a..b5809403 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.7, 3.8, 3.9, 3.10]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2

From ed02f49831b35a20b3ccfc8bd6689e7d2fd7a82c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:23:51 -0800
Subject: [PATCH 053/137] Also update quick tests

---
 .github/workflows/quick-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index c74ae1be..a1d333b4 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     name: python-${{ matrix.python-version }} quick
     steps:
     - uses: actions/checkout@v2

From 9b3c72c889594cd0cf3b6062b528792badaa7494 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 16:41:39 -0800
Subject: [PATCH 054/137] Only support 3.7/3.8 for now

---
 .github/workflows/build.yml       | 2 +-
 .github/workflows/quick-tests.yml | 2 +-
 setup.cfg                         | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b5809403..17b952f1 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.7", "3.8"]
     name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2
diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index a1d333b4..178eab7b 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10"]
+        python-version: ["3.7", "3.8"]
     name: python-${{ matrix.python-version }} quick
     steps:
     - uses: actions/checkout@v2
diff --git a/setup.cfg b/setup.cfg
index 1df04ed1..afe52b99 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,7 @@ process-timeout=600
 
 [metadata]
 description-file = README.md
-license_file = LICENSE.txt
+license_file = LICENSE
 
 [bdist_wheel]
 universal = 1

From 16c3f3bff759c2a8cf0a2ba1573cfa4fe2d62ec0 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 21:49:05 -0800
Subject: [PATCH 055/137] Upgrade networkx to v2

---
 setup.py                    |  2 +-
 src/mavis/assemble.py       | 33 +++++++++++++++++++++------------
 tests/unit/test_assemble.py | 19 +++++++++++--------
 3 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/setup.py b/setup.py
index 015bb674..fdeeaf77 100644
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@ def check_nonpython_dependencies():
     'biopython>=1.70, <1.78',
     'braceexpand==0.1.2',
     'colour',
-    'networkx==1.11.0',
+    'networkx>=2.5,<3',
     'numpy>=1.13.1',
     'pandas>=1.1, <2',
     'pysam',
diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index f7a9d301..0370ea01 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -66,6 +66,15 @@ class DeBruijnGraph(nx.DiGraph):
     enforces edge weights
     """
 
+    def get_out_edges(self, *args, **kwargs):
+        return list(self.out_edges(*args, **kwargs))
+
+    def get_in_edges(self, *args, **kwargs):
+        return list(self.in_edges(*args, **kwargs))
+
+    def get_nodes(self, *args, **kwargs):
+        return list(self.nodes(*args, **kwargs))
+
     def get_edge_freq(self, n1, n2):
         """
         returns the freq from the data attribute for a specified edge
@@ -85,7 +94,7 @@ def add_edge(self, n1, n2, freq=1):
         nx.DiGraph.add_edge(self, n1, n2, freq=freq)
 
     def all_edges(self, *nodes, data=False):
-        return self.in_edges(*nodes, data=data) + self.out_edges(*nodes, data=data)
+        return self.get_in_edges(*nodes, data=data) + self.get_out_edges(*nodes, data=data)
 
     def trim_tails_by_freq(self, min_weight):
         """
@@ -95,7 +104,7 @@ def trim_tails_by_freq(self, min_weight):
             min_weight (int): the minimum weight for an edge to be retained
         """
         ends = sorted(
-            [n for n in self.nodes() if self.out_degree(n) == 0 or self.in_degree(n) == 0]
+            [n for n in self.get_nodes() if self.out_degree(n) == 0 or self.in_degree(n) == 0]
         )
         visited = set()
 
@@ -126,16 +135,16 @@ def trim_forks_by_freq(self, min_weight):
         for all nodes in the graph, if the node has an out-degree > 1 and one of the outgoing
         edges has freq < min_weight. then that outgoing edge is deleted
         """
-        nodes = [n for n in self.nodes() if self.degree(n) > 2]
+        nodes = [n for n in self.get_nodes() if self.degree(n) > 2]
         for node in sorted(nodes):
             if self.out_degree(node) > 1:
-                outgoing_edges = self.out_edges(node, data=True)
+                outgoing_edges = self.get_out_edges(node, data=True)
                 best = max([e[2]['freq'] for e in outgoing_edges])
                 for src, tgt, data in outgoing_edges:
                     if data['freq'] < min_weight and data['freq'] != best:
                         self.remove_edge(src, tgt)
             if self.in_degree(node) > 1:
-                ingoing_edges = self.in_edges(node, data=True)
+                ingoing_edges = self.get_in_edges(node, data=True)
                 best = max([e[2]['freq'] for e in ingoing_edges])
                 for src, tgt, data in ingoing_edges:
                     if data['freq'] < min_weight and data['freq'] != best:
@@ -157,7 +166,7 @@ def trim_noncutting_paths_by_freq(self, min_weight):
             else:
                 path = []
                 while self.in_degree(src) == 1 and self.out_degree(src) == 1:
-                    s, t, data = self.in_edges(src, data=True)[0]
+                    s, t, data = self.get_in_edges(src, data=True)[0]
                     if data['freq'] >= min_weight or s in path:
                         break
                     path.insert(0, src)
@@ -165,7 +174,7 @@ def trim_noncutting_paths_by_freq(self, min_weight):
                 path.insert(0, src)
 
                 while self.in_degree(tgt) == 1 and self.out_degree(tgt) == 1:
-                    s, t, data = self.out_edges(tgt, data=True)[0]
+                    s, t, data = self.get_out_edges(tgt, data=True)[0]
                     if data['freq'] >= min_weight or t in path:
                         break
                     path.append(tgt)
@@ -193,7 +202,7 @@ def get_sinks(self, subgraph=None):
         """
         nodeset = set()
         if subgraph is None:
-            subgraph = self.nodes()
+            subgraph = self.get_nodes()
         for node in subgraph:
             if self.out_degree(node) == 0:
                 nodeset.add(node)
@@ -205,7 +214,7 @@ def get_sources(self, subgraph=None):
         """
         nodeset = set()
         if subgraph is None:
-            subgraph = self.nodes()
+            subgraph = self.get_nodes()
         for node in subgraph:
             if self.in_degree(node) == 0:
                 nodeset.add(node)
@@ -227,7 +236,7 @@ def digraph_connected_components(graph, subgraph=None):
         List[List]: returns a list of compnents which are lists of node names
     """
     if subgraph is None:
-        subgraph = set(graph.nodes())
+        subgraph = set(graph.get_nodes())
     g = nx.Graph()
     for src, tgt in graph.edges():
         if src in subgraph and tgt in subgraph:
@@ -387,7 +396,7 @@ def assemble(
         for kmer in kmers_list:
             assembly.add_edge(kmer[:-1], kmer[1:])
     # use the ab min edge weight to remove all low weight edges first
-    nodes = list(assembly.nodes())
+    nodes = assembly.get_nodes()
     for n in nodes:
         if assembly.in_degree(n) == 0 and assembly.out_degree(n) == 0:
             assembly.remove_node(n)
@@ -396,7 +405,7 @@ def assemble(
         subgraph = assembly.subgraph(component)
         if not nx.is_directed_acyclic_graph(subgraph):
             log('dropping cyclic component', time_stamp=False)
-            for node in subgraph.nodes():
+            for node in subgraph.get_nodes():
                 assembly.remove_node(node)
     # initial data cleaning
     assembly.trim_forks_by_freq(min_edge_trim_weight)
diff --git a/tests/unit/test_assemble.py b/tests/unit/test_assemble.py
index fbd5d0cb..73b3c6bf 100644
--- a/tests/unit/test_assemble.py
+++ b/tests/unit/test_assemble.py
@@ -105,7 +105,7 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(8, 7)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6]
+        assert sorted(g.get_nodes()) == [1, 2, 3, 4, 5, 6]
 
         g = DeBruijnGraph()
         for s, t in itertools.combinations([1, 2, 3, 4, 5, 6], 2):
@@ -117,7 +117,7 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(8, 7)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6, 7, 8]
+        assert sorted(g.get_nodes()) == [1, 2, 3, 4, 5, 6, 7, 8]
 
         g = DeBruijnGraph()
         for s, t in itertools.combinations([1, 2, 3, 4, 5, 6], 2):
@@ -128,7 +128,7 @@ def test_trim_tails_by_freq_forks(self):
         g.add_edge(7, 8)
         g.add_edge(9, 8)
         g.trim_tails_by_freq(2)
-        assert sorted(g.nodes()) == [1, 2, 3, 4, 5, 6]
+        assert sorted(g.get_nodes()) == [1, 2, 3, 4, 5, 6]
 
     def test_add_edge(self):
         g = DeBruijnGraph()
@@ -151,22 +151,25 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
         for edge in g.edges():
             print(edge)
         g.trim_noncutting_paths_by_freq(3)
-        assert g.nodes() == list(range(1, 9)) + path1[1:-1]
+        print('g.nodes', g.nodes)
+        assert g.get_nodes() == list(range(1, 9)) + path1[1:-1]
+        print('g.nodes', g.nodes)
 
         # add an equal weight path to force namesorting
         path2 = [5, 13, 14, 15, 16, 1]
         for s, t in zip(path2, path2[1:]):
             g.add_edge(s, t)
-
+        print('g.nodes', g.nodes)
         g.trim_noncutting_paths_by_freq(3)
-        assert g.nodes() == list(range(1, 9)) + path2[1:-1]
+        print('g.nodes', g.nodes)
+        assert g.get_nodes() == list(range(1, 9)) + path2[1:-1]
 
         # add back the original path with a higher (but still low) weight
         for s, t in zip(path1, path1[1:]):
             g.add_edge(s, t, freq=2)
 
         g.trim_noncutting_paths_by_freq(3)
-        assert g.nodes() == list(range(1, 9)) + path1[1:-1]
+        assert g.get_nodes() == list(range(1, 9)) + path1[1:-1]
 
         # add the second path with 1 high weight edge
         path2 = [5, 13, 14, 15, 16, 1]
@@ -175,7 +178,7 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
         g.add_edge(14, 15, freq=6)
 
         g.trim_noncutting_paths_by_freq(3)
-        assert g.nodes() == list(range(1, 9)) + path2[1:-1]
+        assert g.get_nodes() == list(range(1, 9)) + path2[1:-1]
 
 
 @pytest.fixture

From af634de4aafb36e2edeb83855da0ccb91fb8ec81 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 21:50:52 -0800
Subject: [PATCH 056/137] Add python 3.9/3.10 to workflows

---
 .github/workflows/build.yml       | 2 +-
 .github/workflows/quick-tests.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 17b952f1..b5809403 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8"]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     name: python-${{ matrix.python-version }}
     steps:
     - uses: actions/checkout@v2
diff --git a/.github/workflows/quick-tests.yml b/.github/workflows/quick-tests.yml
index 178eab7b..a1d333b4 100644
--- a/.github/workflows/quick-tests.yml
+++ b/.github/workflows/quick-tests.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7", "3.8"]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
     name: python-${{ matrix.python-version }} quick
     steps:
     - uses: actions/checkout@v2

From 21366907cb1706941f77f70a599afde2a3c1f628 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 21:51:08 -0800
Subject: [PATCH 057/137] do not include docs/tests in dist

---
 MANIFEST.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 2ab6e3c1..c1af92d1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,5 @@
 recursive-include src *.py *.json
 include README.md
 include LICENSE
-prune docs/build
-prune docs/source/auto
+prune docs
+prune tests

From a9415e28d99fa95b70afbb4a4011123db091849b Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 21:52:23 -0800
Subject: [PATCH 058/137] Remove m2r dependency

---
 setup.py | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/setup.py b/setup.py
index fdeeaf77..6cb7600d 100644
--- a/setup.py
+++ b/setup.py
@@ -1,37 +1,14 @@
 import os
 import re
+from pathlib import Path
 
 from setuptools import find_packages, setup
 
+this_directory = Path(__file__).parent
+long_description = (this_directory / "README.md").read_text()
 VERSION = '2.2.8'
 
 
-def parse_md_readme():
-    """
-    pypi won't render markdown. After conversion to rst it will still not render unless raw directives are removed
-    """
-    try:
-        from m2r import parse_from_file
-
-        rst_lines = parse_from_file('README.md').split('\n')
-        long_description = [
-            '.. image:: http://mavis.bcgsc.ca/docs/latest/_static/acronym.svg\n\n|\n'
-        ]  # backup since pip can't handle raw directives
-        i = 0
-        while i < len(rst_lines):
-            if re.match(r'^..\s+raw::.*', rst_lines[i]):
-                i += 1
-                while re.match(r'^(\s\s+|\t|$).*', rst_lines[i]):
-                    i += 1
-            else:
-                long_description.append(re.sub('>`_ ', '>`__ ', rst_lines[i]))  # anonymous links
-                i += 1
-        long_description = '\n'.join(long_description)
-    except (ImportError, OSError):
-        long_description = ''
-    return long_description
-
-
 def check_nonpython_dependencies():
     """
     check that the non-python dependencies have been installed.
@@ -92,7 +69,7 @@ def check_nonpython_dependencies():
     'mavis_config>=1.1.0, <2.0.0',
 ]
 
-DEPLOY_REQS = ['twine', 'm2r', 'wheel']
+DEPLOY_REQS = ['twine', 'wheel']
 
 
 setup(
@@ -103,7 +80,8 @@ def check_nonpython_dependencies():
     package_dir={'': 'src'},
     packages=find_packages(where='src'),
     description='A Structural Variant Post-Processing Package',
-    long_description=parse_md_readme(),
+    long_description=long_description,
+    long_description_content_type='text/markdown',
     install_requires=INSTALL_REQS,
     extras_require={
         'docs': DOC_REQS,

From dd6f0e1b59c6944311e7f6f7be0557c95a9a6d19 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 21:58:07 -0800
Subject: [PATCH 059/137] Fix mapping import for 3.10

---
 src/mavis/schemas/__init__.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mavis/schemas/__init__.py b/src/mavis/schemas/__init__.py
index a0568bac..f41bda5c 100644
--- a/src/mavis/schemas/__init__.py
+++ b/src/mavis/schemas/__init__.py
@@ -1,10 +1,14 @@
-import collections
+try:
+    from collections import Mapping
+except ImportError:
+    from collections.abc import Mapping
+
 import os
 
 from snakemake.utils import validate as snakemake_validate
 
 
-class ImmutableDict(collections.Mapping):
+class ImmutableDict(Mapping):
     def __init__(self, data):
         self._data = data
 

From b53ff6476a833104df104b8f950bf81b80957041 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Tue, 28 Dec 2021 14:52:44 -0800
Subject: [PATCH 060/137] added edited toml files

---
 pyproject.toml |  1 +
 setup.cfg      | 95 +++++++++++++++++++++++++++++++++++++++++++-------
 setup.py       | 83 ++-----------------------------------------
 3 files changed, 85 insertions(+), 94 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..9e5a9848
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1 @@
+build-backend = "setuptools.build_meta"
diff --git a/setup.cfg b/setup.cfg
index afe52b99..2459069c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,23 +1,92 @@
-[nosetests]
-with-coverage=1
-cover-package=mavis,tab
-cover-html=1
-cover-html-dir=coverage
-cover-inclusive=1
-cover-erase=1
-processes=2
-process-timeout=600
-
 [metadata]
-description-file = README.md
+name = mavis
+version = 2.2.10
+url = https://github.com/bcgsc/mavis.git
+download_url = https://github.com/bcgsc/mavis/archive/v2.2.10.tar.gz
+description = A Structural Variant Post-Processing Package
+author_email = creisle@bcgsc.ca
+author = Caralyn Reisle
+maintainer_email = creisle@bcgsc.ca
+maintainer = Caralyn Reisle
+long_description = file: README.md, LICENSE
+long_description_content_type = text/markdown
 license_file = LICENSE
+project_urls = mavis = http://mavis.bcgsc.ca
 
 [bdist_wheel]
 universal = 1
 
 [pycodestyle]
-ignore = E501,W503,E203
+ignore = E501
+    W503
+    E203
 statistics = True
 
 [flake8]
-ignore = E501,W503,E203
+ignore = E501
+    W503
+    E203
+
+[options]
+packages = find:
+python_requires = >=3.2
+dependency_links = []
+include_package_data = True
+install_requires =
+    Distance>=0.1.3
+    Shapely>=1.6.4.post1
+    biopython>=1.70, <1.78
+    braceexpand==0.1.2
+    colour
+    networkx>=2.5,<3
+    numpy>=1.13.1
+    pysam
+    shortuuid>=0.5.0
+    svgwrite
+setup_requires =
+    pip>=9.0.0
+    setuptools>=36.0.0
+
+[options.packages.find]
+exclude =
+    tests
+
+[options.extras_require]
+doc =
+    mkdocs==1.1.2
+    markdown-refdocs
+    mkdocs-material==5.4.0
+    markdown-include
+    mkdocs-simple-hooks==0.1.2
+test =
+    timeout-decorator>=0.3.3
+    coverage>=4.2
+    pycodestyle>=2.3.1
+    pytest
+    pytest-cov
+dev =
+    black
+    flake8
+    twine
+    wheel
+    timeout-decorator>=0.3.3
+    coverage>=4.2
+    pycodestyle>=2.3.1
+    pytest
+    pytest-cov
+    mkdocs==1.1.2
+    markdown-refdocs
+    mkdocs-material==5.4.0
+    markdown-include
+    mkdocs-simple-hooks==0.1.2
+deploy =
+    twine
+    wheel
+tools =
+    pyensembl
+    simplejson
+
+[options.entry_points]
+console_scripts =
+    mavis = mavis.main:main
+    calculate_ref_alt_counts = tools.calculate_ref_alt_counts:main
diff --git a/setup.py b/setup.py
index 6cb7600d..853de05b 100644
--- a/setup.py
+++ b/setup.py
@@ -1,12 +1,7 @@
 import os
 import re
-from pathlib import Path
 
-from setuptools import find_packages, setup
-
-this_directory = Path(__file__).parent
-long_description = (this_directory / "README.md").read_text()
-VERSION = '2.2.8'
+from setuptools import setup
 
 
 def check_nonpython_dependencies():
@@ -31,79 +26,5 @@ def check_nonpython_dependencies():
         print('Found: aligner at', pth)
 
 
-# HSTLIB is a dependency for pysam.
-# The cram file libraries fail for some OS versions and mavis does not use cram files so we disable these options
-os.environ['HTSLIB_CONFIGURE_OPTIONS'] = '--disable-lzma --disable-bz2 --disable-libcurl'
-
-
-TEST_REQS = [
-    'timeout-decorator>=0.3.3',
-    'coverage>=4.2',
-    'pycodestyle>=2.3.1',
-    'pytest',
-    'pytest-cov',
-]
-
-
-DOC_REQS = [
-    'mkdocs==1.1.2',
-    'markdown_refdocs',
-    'mkdocs-material==5.4.0',
-    'markdown-include',
-    'mkdocs-simple-hooks==0.1.2',
-]
-
-
-INSTALL_REQS = [
-    'Distance>=0.1.3',
-    'Shapely>=1.6.4.post1',
-    'biopython>=1.70, <1.78',
-    'braceexpand==0.1.2',
-    'colour',
-    'networkx>=2.5,<3',
-    'numpy>=1.13.1',
-    'pandas>=1.1, <2',
-    'pysam',
-    'shortuuid>=0.5.0',
-    'svgwrite',
-    'mavis_config>=1.1.0, <2.0.0',
-]
-
-DEPLOY_REQS = ['twine', 'wheel']
-
-
-setup(
-    name='mavis',
-    version='{}'.format(VERSION),
-    url='https://github.com/bcgsc/mavis.git',
-    download_url='https://github.com/bcgsc/mavis/archive/v{}.tar.gz'.format(VERSION),
-    package_dir={'': 'src'},
-    packages=find_packages(where='src'),
-    description='A Structural Variant Post-Processing Package',
-    long_description=long_description,
-    long_description_content_type='text/markdown',
-    install_requires=INSTALL_REQS,
-    extras_require={
-        'docs': DOC_REQS,
-        'test': TEST_REQS,
-        'dev': ['black==20.8b1', 'flake8'] + DOC_REQS + TEST_REQS + DEPLOY_REQS,
-        'deploy': DEPLOY_REQS,
-        'tools': ['pyensembl', 'simplejson'],
-    },
-    tests_require=TEST_REQS,
-    setup_requires=['pip>=9.0.0', 'setuptools>=36.0.0'],
-    python_requires='>=3.7',
-    author='Caralyn Reisle',
-    author_email='creisle@bcgsc.ca',
-    test_suite='tests',
-    entry_points={
-        'console_scripts': [
-            'mavis = mavis.main:main',
-            'calculate_ref_alt_counts = tools.calculate_ref_alt_counts:main',
-        ]
-    },
-    include_package_data=True,
-    data_files=[('mavis', ['src/mavis/schemas/config.json', 'src/mavis/schemas/overlay.json'])],
-    project_urls={'mavis': 'http://mavis.bcgsc.ca'},
-)
+setup()
 check_nonpython_dependencies()

From a646440e548775d86efa6756def4c1448a4a5d14 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 22:33:57 -0800
Subject: [PATCH 061/137] Swap maintainer to mavis email alias

---
 setup.cfg | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 2459069c..c8f751e8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,9 +6,9 @@ download_url = https://github.com/bcgsc/mavis/archive/v2.2.10.tar.gz
 description = A Structural Variant Post-Processing Package
 author_email = creisle@bcgsc.ca
 author = Caralyn Reisle
-maintainer_email = creisle@bcgsc.ca
-maintainer = Caralyn Reisle
-long_description = file: README.md, LICENSE
+maintainer_email = mavis@bcgsc.ca
+maintainer = mavis
+long_description = file: README.md
 long_description_content_type = text/markdown
 license_file = LICENSE
 project_urls = mavis = http://mavis.bcgsc.ca

From 0970918a4770c9765ae11cb4f58425bc3290a62f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 22:42:41 -0800
Subject: [PATCH 062/137] Specify src dir for install

---
 setup.cfg | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index c8f751e8..8b1a07da 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -29,7 +29,9 @@ ignore = E501
 
 [options]
 packages = find:
-python_requires = >=3.2
+package_dir =
+     = src
+python_requires = >=3.7
 dependency_links = []
 include_package_data = True
 install_requires =
@@ -48,8 +50,8 @@ setup_requires =
     setuptools>=36.0.0
 
 [options.packages.find]
-exclude =
-    tests
+exclude = tests
+where = src
 
 [options.extras_require]
 doc =

From 2cf3065ab69b99004cc3590f084c3223706e952f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 28 Dec 2021 22:47:03 -0800
Subject: [PATCH 063/137] Add v3 dependencies

---
 setup.cfg | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 8b1a07da..b5cfe3a3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,14 +35,16 @@ python_requires = >=3.7
 dependency_links = []
 include_package_data = True
 install_requires =
-    Distance>=0.1.3
-    Shapely>=1.6.4.post1
     biopython>=1.70, <1.78
     braceexpand==0.1.2
     colour
+    Distance>=0.1.3
+    mavis_config>=1.1.0, <2.0.0
     networkx>=2.5,<3
     numpy>=1.13.1
+    pandas>=1.1, <2
     pysam
+    Shapely>=1.6.4.post1
     shortuuid>=0.5.0
     svgwrite
 setup_requires =

From f16b17492371557bedd725d93e5ad4e139cac0b7 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 4 Jan 2022 11:34:57 -0800
Subject: [PATCH 064/137] Add typing extensions as dependency

---
 setup.cfg              | 1 +
 src/mavis/tools/vcf.py | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index b5cfe3a3..a34fc7ef 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -47,6 +47,7 @@ install_requires =
     Shapely>=1.6.4.post1
     shortuuid>=0.5.0
     svgwrite
+    typing_extensions>=4
 setup_requires =
     pip>=9.0.0
     setuptools>=36.0.0
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index 77a2e22a..f87aa764 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -5,7 +5,12 @@
 
 import pandas as pd
 from pysam import VariantFile
-from typing_extensions import TypedDict
+
+try:
+    # TypedDict added to typing package directly in later versions
+    from typing import TypedDict
+except ImportError:
+    from typing_extensions import TypedDict
 
 from ..constants import COLUMNS, ORIENT, SVTYPE
 from ..util import DEVNULL

From 967bdb481b49001ba62a1d7473e45b1debd8dd91 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 4 Jan 2022 11:50:34 -0800
Subject: [PATCH 065/137] Try being more explicit about json in manifest

---
 MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index c1af92d1..691ef59e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include src *.py *.json
+include src/mavis/schemas/*.json
 include README.md
 include LICENSE
 prune docs

From c3363cc55072035bf3754279f4571e136647b3a0 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 4 Jan 2022 12:16:14 -0800
Subject: [PATCH 066/137] Copy manifest and pyproject.toml to docker container

---
 Dockerfile  | 2 ++
 MANIFEST.in | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index d3fbd1f4..b62ea761 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -28,6 +28,8 @@ RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
 
 COPY setup.py setup.py
 COPY setup.cfg setup.cfg
+COPY MANIFEST.in MANIFEST.in
+COPY pyproject.toml pyproject.toml
 COPY src src
 COPY LICENSE LICENSE
 COPY README.md README.md
diff --git a/MANIFEST.in b/MANIFEST.in
index 691ef59e..c1af92d1 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,4 @@
 recursive-include src *.py *.json
-include src/mavis/schemas/*.json
 include README.md
 include LICENSE
 prune docs

From 86b046fe9f81e4a5c2ed5e2621093e0f36276ed4 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 4 Jan 2022 12:42:14 -0800
Subject: [PATCH 067/137] Create editable copy of subgraph

In networkx v2 subgraphs are frozen and must be copied to be edited
---
 src/mavis/assemble.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index 0370ea01..0683dbad 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -418,7 +418,7 @@ def assemble(
         # pull the path scores
         path_scores.update(
             pull_contigs_from_component(
-                assembly.subgraph(component),
+                assembly.subgraph(component).copy(),
                 component,
                 min_edge_trim_weight=min_edge_trim_weight,
                 assembly_max_paths=assembly_max_paths,

From ca06372fe0d88b257981632b34fb52af4c54420d Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 4 Jan 2022 13:04:10 -0800
Subject: [PATCH 068/137] Increase assembly timeout to account for new graph
 copy requirement

---
 tests/integration/test_assemble.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index 8cae4394..6930b685 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -351,7 +351,7 @@ def test_multiple_events(self):
         assert assemblies[0].seq == expected
         assert len(assemblies) == 1
 
-    @timeout_decorator.timeout(300)
+    @timeout_decorator.timeout(600)
     @long_running_test
     def test_large_assembly(self, large_assembly_seq):
         # simply testing that this will complete before the timeout

From 94b7630c2d7629dab482fc1495342fbafec6672b Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Tue, 4 Jan 2022 23:41:47 -0800
Subject: [PATCH 069/137] cleaned up vcf.py and added unit tests for sniffles
 and cuteSV

---
 src/mavis/tools/vcf.py           |   1 -
 tests/data/cuteSV.vcf            | 326 ++++++++++++++++++++++++++++
 tests/data/sniffles.vcf          | 355 +++++++++++++++++++++++++++++++
 tests/end_to_end/test_convert.py |  14 ++
 4 files changed, 695 insertions(+), 1 deletion(-)
 create mode 100644 tests/data/cuteSV.vcf
 create mode 100644 tests/data/sniffles.vcf

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index f87aa764..eea0fadf 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -4,7 +4,6 @@
 from typing import Dict, List, Optional, Tuple
 
 import pandas as pd
-from pysam import VariantFile
 
 try:
     # TypedDict added to typing package directly in later versions
diff --git a/tests/data/cuteSV.vcf b/tests/data/cuteSV.vcf
new file mode 100644
index 00000000..d95fb3f4
--- /dev/null
+++ b/tests/data/cuteSV.vcf
@@ -0,0 +1,326 @@
+##fileformat=VCFv4.2
+##FILTER=<ID=PASS,Description="All filters passed">
+##source=cuteSV-1.0.11
+##fileDate=2021-06-18 18:52:00 5-PDT
+##contig=<ID=chr1,length=248956422>
+##contig=<ID=chr2,length=242193529>
+##contig=<ID=chr3,length=198295559>
+##contig=<ID=chr4,length=190214555>
+##contig=<ID=chr5,length=181538259>
+##contig=<ID=chr6,length=170805979>
+##contig=<ID=chr7,length=159345973>
+##contig=<ID=chr8,length=145138636>
+##contig=<ID=chr9,length=138394717>
+##contig=<ID=chr10,length=133797422>
+##contig=<ID=chr11,length=135086622>
+##contig=<ID=chr12,length=133275309>
+##contig=<ID=chr13,length=114364328>
+##contig=<ID=chr14,length=107043718>
+##contig=<ID=chr15,length=101991189>
+##contig=<ID=chr16,length=90338345>
+##contig=<ID=chr17,length=83257441>
+##contig=<ID=chr18,length=80373285>
+##contig=<ID=chr19,length=58617616>
+##contig=<ID=chr20,length=64444167>
+##contig=<ID=chr21,length=46709983>
+##contig=<ID=chr22,length=50818468>
+##contig=<ID=chrX,length=156040895>
+##contig=<ID=chrY,length=57227415>
+##contig=<ID=chrM,length=16569>
+##contig=<ID=chr1_KI270706v1_random,length=175055>
+##contig=<ID=chr1_KI270707v1_random,length=32032>
+##contig=<ID=chr1_KI270708v1_random,length=127682>
+##contig=<ID=chr1_KI270709v1_random,length=66860>
+##contig=<ID=chr1_KI270710v1_random,length=40176>
+##contig=<ID=chr1_KI270711v1_random,length=42210>
+##contig=<ID=chr1_KI270712v1_random,length=176043>
+##contig=<ID=chr1_KI270713v1_random,length=40745>
+##contig=<ID=chr1_KI270714v1_random,length=41717>
+##contig=<ID=chr2_KI270715v1_random,length=161471>
+##contig=<ID=chr2_KI270716v1_random,length=153799>
+##contig=<ID=chr3_GL000221v1_random,length=155397>
+##contig=<ID=chr4_GL000008v2_random,length=209709>
+##contig=<ID=chr5_GL000208v1_random,length=92689>
+##contig=<ID=chr9_KI270717v1_random,length=40062>
+##contig=<ID=chr9_KI270718v1_random,length=38054>
+##contig=<ID=chr9_KI270719v1_random,length=176845>
+##contig=<ID=chr9_KI270720v1_random,length=39050>
+##contig=<ID=chr11_KI270721v1_random,length=100316>
+##contig=<ID=chr14_GL000009v2_random,length=201709>
+##contig=<ID=chr14_GL000225v1_random,length=211173>
+##contig=<ID=chr14_KI270722v1_random,length=194050>
+##contig=<ID=chr14_GL000194v1_random,length=191469>
+##contig=<ID=chr14_KI270723v1_random,length=38115>
+##contig=<ID=chr14_KI270724v1_random,length=39555>
+##contig=<ID=chr14_KI270725v1_random,length=172810>
+##contig=<ID=chr14_KI270726v1_random,length=43739>
+##contig=<ID=chr15_KI270727v1_random,length=448248>
+##contig=<ID=chr16_KI270728v1_random,length=1872759>
+##contig=<ID=chr17_GL000205v2_random,length=185591>
+##contig=<ID=chr17_KI270729v1_random,length=280839>
+##contig=<ID=chr17_KI270730v1_random,length=112551>
+##contig=<ID=chr22_KI270731v1_random,length=150754>
+##contig=<ID=chr22_KI270732v1_random,length=41543>
+##contig=<ID=chr22_KI270733v1_random,length=179772>
+##contig=<ID=chr22_KI270734v1_random,length=165050>
+##contig=<ID=chr22_KI270735v1_random,length=42811>
+##contig=<ID=chr22_KI270736v1_random,length=181920>
+##contig=<ID=chr22_KI270737v1_random,length=103838>
+##contig=<ID=chr22_KI270738v1_random,length=99375>
+##contig=<ID=chr22_KI270739v1_random,length=73985>
+##contig=<ID=chrY_KI270740v1_random,length=37240>
+##contig=<ID=chrUn_KI270302v1,length=2274>
+##contig=<ID=chrUn_KI270304v1,length=2165>
+##contig=<ID=chrUn_KI270303v1,length=1942>
+##contig=<ID=chrUn_KI270305v1,length=1472>
+##contig=<ID=chrUn_KI270322v1,length=21476>
+##contig=<ID=chrUn_KI270320v1,length=4416>
+##contig=<ID=chrUn_KI270310v1,length=1201>
+##contig=<ID=chrUn_KI270316v1,length=1444>
+##contig=<ID=chrUn_KI270315v1,length=2276>
+##contig=<ID=chrUn_KI270312v1,length=998>
+##contig=<ID=chrUn_KI270311v1,length=12399>
+##contig=<ID=chrUn_KI270317v1,length=37690>
+##contig=<ID=chrUn_KI270412v1,length=1179>
+##contig=<ID=chrUn_KI270411v1,length=2646>
+##contig=<ID=chrUn_KI270414v1,length=2489>
+##contig=<ID=chrUn_KI270419v1,length=1029>
+##contig=<ID=chrUn_KI270418v1,length=2145>
+##contig=<ID=chrUn_KI270420v1,length=2321>
+##contig=<ID=chrUn_KI270424v1,length=2140>
+##contig=<ID=chrUn_KI270417v1,length=2043>
+##contig=<ID=chrUn_KI270422v1,length=1445>
+##contig=<ID=chrUn_KI270423v1,length=981>
+##contig=<ID=chrUn_KI270425v1,length=1884>
+##contig=<ID=chrUn_KI270429v1,length=1361>
+##contig=<ID=chrUn_KI270442v1,length=392061>
+##contig=<ID=chrUn_KI270466v1,length=1233>
+##contig=<ID=chrUn_KI270465v1,length=1774>
+##contig=<ID=chrUn_KI270467v1,length=3920>
+##contig=<ID=chrUn_KI270435v1,length=92983>
+##contig=<ID=chrUn_KI270438v1,length=112505>
+##contig=<ID=chrUn_KI270468v1,length=4055>
+##contig=<ID=chrUn_KI270510v1,length=2415>
+##contig=<ID=chrUn_KI270509v1,length=2318>
+##contig=<ID=chrUn_KI270518v1,length=2186>
+##contig=<ID=chrUn_KI270508v1,length=1951>
+##contig=<ID=chrUn_KI270516v1,length=1300>
+##contig=<ID=chrUn_KI270512v1,length=22689>
+##contig=<ID=chrUn_KI270519v1,length=138126>
+##contig=<ID=chrUn_KI270522v1,length=5674>
+##contig=<ID=chrUn_KI270511v1,length=8127>
+##contig=<ID=chrUn_KI270515v1,length=6361>
+##contig=<ID=chrUn_KI270507v1,length=5353>
+##contig=<ID=chrUn_KI270517v1,length=3253>
+##contig=<ID=chrUn_KI270529v1,length=1899>
+##contig=<ID=chrUn_KI270528v1,length=2983>
+##contig=<ID=chrUn_KI270530v1,length=2168>
+##contig=<ID=chrUn_KI270539v1,length=993>
+##contig=<ID=chrUn_KI270538v1,length=91309>
+##contig=<ID=chrUn_KI270544v1,length=1202>
+##contig=<ID=chrUn_KI270548v1,length=1599>
+##contig=<ID=chrUn_KI270583v1,length=1400>
+##contig=<ID=chrUn_KI270587v1,length=2969>
+##contig=<ID=chrUn_KI270580v1,length=1553>
+##contig=<ID=chrUn_KI270581v1,length=7046>
+##contig=<ID=chrUn_KI270579v1,length=31033>
+##contig=<ID=chrUn_KI270589v1,length=44474>
+##contig=<ID=chrUn_KI270590v1,length=4685>
+##contig=<ID=chrUn_KI270584v1,length=4513>
+##contig=<ID=chrUn_KI270582v1,length=6504>
+##contig=<ID=chrUn_KI270588v1,length=6158>
+##contig=<ID=chrUn_KI270593v1,length=3041>
+##contig=<ID=chrUn_KI270591v1,length=5796>
+##contig=<ID=chrUn_KI270330v1,length=1652>
+##contig=<ID=chrUn_KI270329v1,length=1040>
+##contig=<ID=chrUn_KI270334v1,length=1368>
+##contig=<ID=chrUn_KI270333v1,length=2699>
+##contig=<ID=chrUn_KI270335v1,length=1048>
+##contig=<ID=chrUn_KI270338v1,length=1428>
+##contig=<ID=chrUn_KI270340v1,length=1428>
+##contig=<ID=chrUn_KI270336v1,length=1026>
+##contig=<ID=chrUn_KI270337v1,length=1121>
+##contig=<ID=chrUn_KI270363v1,length=1803>
+##contig=<ID=chrUn_KI270364v1,length=2855>
+##contig=<ID=chrUn_KI270362v1,length=3530>
+##contig=<ID=chrUn_KI270366v1,length=8320>
+##contig=<ID=chrUn_KI270378v1,length=1048>
+##contig=<ID=chrUn_KI270379v1,length=1045>
+##contig=<ID=chrUn_KI270389v1,length=1298>
+##contig=<ID=chrUn_KI270390v1,length=2387>
+##contig=<ID=chrUn_KI270387v1,length=1537>
+##contig=<ID=chrUn_KI270395v1,length=1143>
+##contig=<ID=chrUn_KI270396v1,length=1880>
+##contig=<ID=chrUn_KI270388v1,length=1216>
+##contig=<ID=chrUn_KI270394v1,length=970>
+##contig=<ID=chrUn_KI270386v1,length=1788>
+##contig=<ID=chrUn_KI270391v1,length=1484>
+##contig=<ID=chrUn_KI270383v1,length=1750>
+##contig=<ID=chrUn_KI270393v1,length=1308>
+##contig=<ID=chrUn_KI270384v1,length=1658>
+##contig=<ID=chrUn_KI270392v1,length=971>
+##contig=<ID=chrUn_KI270381v1,length=1930>
+##contig=<ID=chrUn_KI270385v1,length=990>
+##contig=<ID=chrUn_KI270382v1,length=4215>
+##contig=<ID=chrUn_KI270376v1,length=1136>
+##contig=<ID=chrUn_KI270374v1,length=2656>
+##contig=<ID=chrUn_KI270372v1,length=1650>
+##contig=<ID=chrUn_KI270373v1,length=1451>
+##contig=<ID=chrUn_KI270375v1,length=2378>
+##contig=<ID=chrUn_KI270371v1,length=2805>
+##contig=<ID=chrUn_KI270448v1,length=7992>
+##contig=<ID=chrUn_KI270521v1,length=7642>
+##contig=<ID=chrUn_GL000195v1,length=182896>
+##contig=<ID=chrUn_GL000219v1,length=179198>
+##contig=<ID=chrUn_GL000220v1,length=161802>
+##contig=<ID=chrUn_GL000224v1,length=179693>
+##contig=<ID=chrUn_KI270741v1,length=157432>
+##contig=<ID=chrUn_GL000226v1,length=15008>
+##contig=<ID=chrUn_GL000213v1,length=164239>
+##contig=<ID=chrUn_KI270743v1,length=210658>
+##contig=<ID=chrUn_KI270744v1,length=168472>
+##contig=<ID=chrUn_KI270745v1,length=41891>
+##contig=<ID=chrUn_KI270746v1,length=66486>
+##contig=<ID=chrUn_KI270747v1,length=198735>
+##contig=<ID=chrUn_KI270748v1,length=93321>
+##contig=<ID=chrUn_KI270749v1,length=158759>
+##contig=<ID=chrUn_KI270750v1,length=148850>
+##contig=<ID=chrUn_KI270751v1,length=150742>
+##contig=<ID=chrUn_KI270752v1,length=27745>
+##contig=<ID=chrUn_KI270753v1,length=62944>
+##contig=<ID=chrUn_KI270754v1,length=40191>
+##contig=<ID=chrUn_KI270755v1,length=36723>
+##contig=<ID=chrUn_KI270756v1,length=79590>
+##contig=<ID=chrUn_KI270757v1,length=71251>
+##contig=<ID=chrUn_GL000214v1,length=137718>
+##contig=<ID=chrUn_KI270742v1,length=186739>
+##contig=<ID=chrUn_GL000216v2,length=176608>
+##contig=<ID=chrUn_GL000218v1,length=161147>
+##contig=<ID=chrEBV,length=171823>
+##contig=<ID=NC_001416.1,length=48502>
+##ALT=<ID=INS,Description="Insertion of novel sequence relative to the reference">
+##ALT=<ID=DEL,Description="Deletion relative to the reference">
+##ALT=<ID=DUP,Description="Region of elevated copy number relative to the reference">
+##ALT=<ID=INV,Description="Inversion of reference sequence">
+##ALT=<ID=BND,Description="Breakend of translocation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variant">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variant">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
+##INFO=<ID=CILEN,Number=2,Type=Integer,Description="Confidence interval around inserted/deleted material between breakends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="Number of read support this record">
+##INFO=<ID=STRAND,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=RNAMES,Number=.,Type=String,Description="Supporting read names of SVs (comma separated)">
+##FILTER=<ID=q5,Description="Quality below 5">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# High-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# High-quality variant reads">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="# Phred-scaled genotype likelihoods rounded to the closest integer">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="# Genotype quality">
+##CommandLine="cuteSV --max_cluster_bias_INS 100 --diff_ratio_merging_INS 0.3 --max_cluster_bias_DEL 100 --diff_ratio_merging_DEL 0.3 -t 72 -s 5 --max_split_parts 30 --report_readid --genotype /projects/jfan_prj/jfan_prj/Nanopore_Testing/2021_nanopore_sv_testing/scratch/depth_testing/POG/COLO829/minimap2_bam/F24721_merged_sorted.bam /projects/jfan_prj/jfan_prj/Nanopore_Testing/2021_nanopore_sv_testing/scratch/depth_testing/POG/COLO829/hg38_no_alt_phage_lambda.fa F24721_merged_sorted.bam_5_read.vcf F24721_merged_sorted.bam"
+##bcftools_viewVersion=1.11+htslib-1.11
+##bcftools_viewCommand=view --regions chr1 F24721_merged_sorted.bam_5_read.vcf.gz; Date=Tue Jan  4 22:11:46 2022
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	NULL
+chr1	10011	cuteSV.BND.0	N	[chr17:41490824[N	0.1	q5	IMPRECISE;SVTYPE=BND;RE=6;RNAMES=ee2ad47c-5065-4825-9697-1aab02c409eb,debd3e06-6b28-4e12-b5a5-2064270d20e7,533ec7a2-4008-4148-87c7-5c5e9a6a7b05,0195fcbc-037a-4026-917c-768b3effe0b1,0163af83-26fa-455b-a6ee-b1070203087a,d8d2eb1f-41b7-4d71-89bd-a78ed9de2a66	GT:DR:DV:PL:GQ	0/0:24:6:0,19,172:19
+chr1	10027	cuteSV.BND.1	N	[chr3:198172735[N	0	q5	IMPRECISE;SVTYPE=BND;RE=7;RNAMES=4968b938-ac4d-4e30-90a5-5838c58fad34,1553213b-5917-497f-be48-78dee4b1c26b,d60daf48-6cf9-488d-b9b0-3a4a18cc0340,568571dd-4342-4a25-bdc9-552213bf3765,77368f76-6a14-4bf7-a9fe-f0dede9c19ff,86dfbc36-4f4d-47e0-b9b8-6b38bc96dc72,1f0a2c70-15ac-4553-ae9e-bb7277d2f987	GT:DR:DV:PL:GQ	0/0:28:7:0,23,200:22
+chr1	10468	cuteSV.BND.2	N	N]chr17:41490879]	0.1	q5	IMPRECISE;SVTYPE=BND;RE=6;RNAMES=620955cc-9f3e-4849-b2a6-9f20c2550a73,d84b9ae7-9ed2-4e60-8d4f-1e31d4a2e362,88393d96-032b-4781-95e8-900c7c8e2d3c,efb016bc-90bb-49bc-82a1-28bd881f602f,15a3c0f0-28aa-4638-aa6e-bec6b81e1188,12b2ee2b-f19d-4bd5-9b63-fe8c4310b3ee	GT:DR:DV:PL:GQ	0/0:24:6:0,19,172:19
+chr1	10469	cuteSV.BND.3	N	N]chrX:156030800]	0.1	q5	IMPRECISE;SVTYPE=BND;RE=6;RNAMES=1cde1b1c-7d45-48da-a8e0-78edcae385d1,1553213b-5917-497f-be48-78dee4b1c26b,78301a41-17b9-4c18-92e0-1a2b3dbf2e38,f9aa8b4b-db03-4e80-aef8-fead40f28940,99568c1f-8dec-4d4b-a9e0-404c677aeef5,c2820162-0910-4105-90eb-4abb80bf1b5a	GT:DR:DV:PL:GQ	0/0:24:6:0,19,172:19
+chr1	35143	cuteSV.BND.4	N	N[chr20:60000[	0.2	q5	IMPRECISE;SVTYPE=BND;RE=6;RNAMES=3723fb60-862e-4a3e-804b-744b3f741267,4f2f6804-bf08-4527-8a19-18dba1b89cf1,b2b644f7-d646-4f3b-b262-bbe8880eb088,74860d07-8b65-4643-8881-d64b4ef8b31b,5f6c3321-121c-471b-be42-36559a25227f,fb776225-90b5-4063-9528-0a8623d3297e	GT:DR:DV:PL:GQ	0/0:22:6:0,14,153:14
+chr1	136878	cuteSV.INS.5	G	GAGTGTGGAGGGCCGGTGTGAGTAAGGCTCACGCTGACCTCTGTCCGCGAGGGCCGGTGCAGACAAGGGGCCCGGCTGACCTCTCTCAGCTGGGAGGGCCAGCAGAAAGCAAGGCTCACACTGACCTCTCTCAGCATGGAGAGGGCCTGGTGTGAGA	190.8	PASS	PRECISE;SVTYPE=INS;SVLEN=156;END=136878;CIPOS=-72,72;CILEN=-33,33;RE=20;RNAMES=25796517-ff72-4482-b6c0-2aa7de0174ce,c58bf71a-cc0c-4268-8d89-a2ca5fdb4d9c,f2228c6c-d17a-4262-b2c0-4971299a4e24,0985e9ba-ea9b-40ab-b4da-d1ed9544fb8e,a3395cec-d8bf-486b-9599-7547f716cd59,f8b67d43-8045-461f-ada1-969d92564c23,4b73faff-93bd-4af7-adcd-d96abc1a3b28,3642879a-10de-45c3-9d93-02a5063d3a8c,99251e1c-7d12-4db5-841f-1bf324dfcc15,9ee38079-241a-4368-805f-ba5dc55ceb68,91a0c677-b499-48db-a6f7-081686ec0420,b22476b1-d308-4bb6-8d73-49fafbcf0ee2,37f73b02-d397-489b-ab42-f27037974ec4,8890ae37-c248-4c21-9763-5d6dcf8f7774,97ee061a-3254-45ac-b6e5-3e786ec8450f,81ab4c6a-f5fd-4793-bb3c-fffff54b38ac,580f9c90-35f2-4c5a-b2e8-6e829ff63b20,82344b3a-4a3e-4eaf-852f-23d5706d97b2,9c8a04c0-2b21-4943-a19d-e806a598087a,0ae4e7a8-5a98-4cf8-8220-a546796f6814	GT:DR:DV:PL:GQ	1/1:0:20:191,51,0:51
+chr1	180090	cuteSV.BND.5	N	[chr3:198172703[N	0	q5	IMPRECISE;SVTYPE=BND;RE=5;RNAMES=9d16f233-21de-4067-aff5-4002f8b66bfe,3e609e79-bf43-495f-abc5-589e153a386c,30da9670-d745-42cd-8a23-4ab34b593e92,5231d16f-9701-43a6-89da-319ee2ddf6e0,0053b59d-06cc-4ee6-ba75-80d244838eef	GT:DR:DV:PL:GQ	0/0:25:5:0,29,191:28
+chr1	180785	cuteSV.INS.6	A	ACCACCCTCCCTTTCCACTGCCCTAACCCACCTCACCCTCTGCCCTCACCCTCACCTCACCC	49.2	PASS	PRECISE;SVTYPE=INS;SVLEN=61;END=180785;CIPOS=-71,71;CILEN=-17,17;RE=7;RNAMES=e3eb0785-89bc-47fe-b010-5507ded6f073,445e45f8-b550-4995-83c5-4101693ca335,97f41f36-3bef-40ad-b76d-86ebb2d4501d,c1433f2c-0190-405a-b4f0-de89b0361a12,80ad0bf2-d144-4b4e-9e93-6dd5e31da974,9d16f233-21de-4067-aff5-4002f8b66bfe,0053b59d-06cc-4ee6-ba75-80d244838eef	GT:DR:DV:PL:GQ	1/1:2:7:49,5,1:5
+chr1	181215	cuteSV.DEL.2	AGGCGCAGAGAGGCGCGCCTCGCCGGCGCAGGCGCAGAGAGGCGCGCCGGGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGACACATGCTAGCGCGTCCAGGGGAGGAGGCGTGGCACAGGCGCAGAGACACATGCTAGCGCGCCCAGGGGAGGAGGCGTGGCGCAGGCGCAGAGAGGCGCGCCGTGCTGCCGC	A	0	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-238;END=181453;CIPOS=-4,4;CILEN=-1,1;RE=11;RNAMES=c1433f2c-0190-405a-b4f0-de89b0361a12,dc30defc-03f2-4e71-80f0-4cb7ce0c9ded,607fca6a-e4b6-4c1b-b14a-eaa480324b1a,803b38ee-2f2f-472c-b1b3-d1ddd8348fe6,bd08e14e-77b6-4bf5-9f69-8293094292b0,0cc7342e-adaa-4409-a1ec-1a1995a4284a,2be11552-cfea-4e6e-ad37-6b2d629ae1de,f64b79a5-cd0f-42cb-8d25-2c3425a0f91d,6f23d6fc-a187-466e-8bcb-ce6a6bf5eea1,592065f2-2bf2-4b38-b6d3-5cea446e07c0,f6549ae3-8ffe-4eda-be96-82ff76067a4c;STRAND=+-	GT:DR:DV:PL:GQ	0/0:41:11:0,28,286:27
+chr1	181229	cuteSV.INS.7	G	GCGCAGGGCGCGCGCGCGCGCAGCGCAGAGAGCGCGCGGCGCAGCACCGGCGCAGCGCCGCGCAGCGCAGCGCGCAGAGGGCGCGCCCCCGCGCGCGCA	161.2	PASS	PRECISE;SVTYPE=INS;SVLEN=98;END=181229;CIPOS=-18,18;CILEN=-12,12;RE=30;RNAMES=1cee1d17-283c-4da6-8235-f37bf60b4c07,3e609e79-bf43-495f-abc5-589e153a386c,445e45f8-b550-4995-83c5-4101693ca335,9d16f233-21de-4067-aff5-4002f8b66bfe,cb388ae1-da56-4217-92f9-ff5c1d0f38f3,598e10ed-9e0f-4b91-9321-cf49d264894a,f2447712-6638-4688-8634-6456e3381ab1,b8c1d59c-7bf5-4771-a26d-18ff414db526,153bb516-869b-4774-922a-ac7a3bfb3819,8635611c-116a-4462-a83d-2ed4adf3d48b,6977b0a3-c47e-4c52-becf-86060ca26a56,71bacfe7-4813-4186-90ed-c5c3c11ceac9,2400200b-b619-4e31-a31f-d5ca55fe43b0,a3eefdd5-15ef-496c-80d1-1c648cb3a460,333b6845-d254-4729-85c6-deff57311b89,5231d16f-9701-43a6-89da-319ee2ddf6e0,50ccacdc-8edc-4e6d-b0aa-f5d572119e74,64ccea5c-2f30-4678-bc7d-d4c38e8b78c7,30da9670-d745-42cd-8a23-4ab34b593e92,34ee1f1f-8477-4f30-b5b6-b8ba0d69fd0e,7bfe3eda-c76e-4d8a-8087-decae5495eb5,01e20e44-6a4f-46cd-9e47-21e4672bd6a1,ccee1775-5c00-4eac-981c-a36ea051336e,b899737e-2777-4b81-a079-f006d634cf81,eea77227-1562-4bb0-9ee6-b1c56c82d8cb,055a3afd-77bc-4577-a665-378d54328dee,4e248870-b369-48c0-9e31-1da436de798d,ce435c54-3e21-464c-9522-4a19bf9ed9ed,97f41f36-3bef-40ad-b76d-86ebb2d4501d,e3eb0785-89bc-47fe-b010-5507ded6f073	GT:DR:DV:PL:GQ	0/1:19:30:161,0,56:56
+chr1	257667	cuteSV.BND.6	N	[chr5:181462058[N	0	q5	IMPRECISE;SVTYPE=BND;RE=5;RNAMES=09cdbd93-aa21-4979-8110-75c36774feb8,5204edda-4746-4ea2-8928-dbcf3c36a0ea,15190a47-b61a-4cbe-9183-342e97bed9f7,4ce7a619-865d-4cf2-9b05-6b8b27acdf9e,9992b757-2b9c-4e98-a0ec-14e3605ab7a8	GT:DR:DV:PL:GQ	0/0:25:5:0,29,191:28
+chr1	350807	cuteSV.INS.8	A	AACTCACTGAAGGTGGAGGGAAAATGGTGTTGACCTAAG	32	PASS	PRECISE;SVTYPE=INS;SVLEN=38;END=350807;CIPOS=0,0;CILEN=-1,1;RE=6;RNAMES=018ed570-e6ef-4162-8426-bccc94c4e150,b2f4c3c1-f080-4a1f-a536-3c22212f9dcd,da720310-699e-4889-ab3b-a3a01122d079,31af45fc-5610-4010-8c97-337ba5b2f823,a25c7fbb-74e3-4c57-985c-159e0e233a27,522ac18e-d321-4dde-b480-3704593b22d2	GT:DR:DV:PL:GQ	0/1:4:6:32,0,13:12
+chr1	368928	cuteSV.DEL.3	CAGCTCACGGTGTGGAAACTGCGACACTCACGTGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACG	C	5.6	PASS	PRECISE;SVTYPE=DEL;SVLEN=-315;END=369243;CIPOS=-55,55;CILEN=-41,41;RE=9;RNAMES=f770c333-5b0a-4b62-af96-3f7ab5daa662,d826c547-1c7c-4d2f-a379-6b6ee1c5425a,31af45fc-5610-4010-8c97-337ba5b2f823,b2f4c3c1-f080-4a1f-a536-3c22212f9dcd,bcad7e7f-53cc-4b01-a0ff-82552cf16d8f,522ac18e-d321-4dde-b480-3704593b22d2,11b4adf7-f440-4ffb-848d-3bb844f9c5dc,a25c7fbb-74e3-4c57-985c-159e0e233a27,da720310-699e-4889-ab3b-a3a01122d079;STRAND=+-	GT:DR:DV:PL:GQ	0/1:23:9:6,1,139:5
+chr1	372668	cuteSV.DEL.4	TAACCGACCACCTTAGGGTCCATTCTGATCTGTATATATGTATAATATATATTATATATGGACCTCAGGGTCCATTCTGATCTGCATATATGTATAATATATATTATATATGGTCCTCAGGGTCCATTCTGATCTGTATATATGTATCATGTAAACATGAGTTCCTGCTGGCATATCTGTCT	T	7.6	PASS	PRECISE;SVTYPE=DEL;SVLEN=-181;END=372849;CIPOS=-41,41;CILEN=-2,2;RE=9;RNAMES=018ed570-e6ef-4162-8426-bccc94c4e150,da720310-699e-4889-ab3b-a3a01122d079,d826c547-1c7c-4d2f-a379-6b6ee1c5425a,31af45fc-5610-4010-8c97-337ba5b2f823,a25c7fbb-74e3-4c57-985c-159e0e233a27,6433ec8d-025c-403f-8d9f-6ed36291d563,522ac18e-d321-4dde-b480-3704593b22d2,f770c333-5b0a-4b62-af96-3f7ab5daa662,b2f4c3c1-f080-4a1f-a536-3c22212f9dcd;STRAND=+-	GT:DR:DV:PL:GQ	0/1:22:9:8,1,132:7
+chr1	374047	cuteSV.INS.9	C	CCCCCCTCTCCTTTCTCCTCTCCATCCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTCGCCCCCTCTCCTTTCTCCCTCTCTATCCCCCTCTCCTTTCTCCCTCTCTCCCCCTCTCCTTTCTCCTCTCCATCCCCTCTCCATCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTAGCCCCTCTCCTTTCTCTCTCCTCCCCCTCTC	24	PASS	PRECISE;SVTYPE=INS;SVLEN=208;END=374047;CIPOS=-33,33;CILEN=-12,12;RE=10;RNAMES=2025839f-8c5c-44ad-939d-e173d15a60ba,1ca42b59-8622-43bf-ab89-8dcdba70a3ef,da720310-699e-4889-ab3b-a3a01122d079,018ed570-e6ef-4162-8426-bccc94c4e150,f770c333-5b0a-4b62-af96-3f7ab5daa662,31af45fc-5610-4010-8c97-337ba5b2f823,d826c547-1c7c-4d2f-a379-6b6ee1c5425a,522ac18e-d321-4dde-b480-3704593b22d2,b2f4c3c1-f080-4a1f-a536-3c22212f9dcd,a25c7fbb-74e3-4c57-985c-159e0e233a27	GT:DR:DV:PL:GQ	0/1:18:10:24,0,100:23
+chr1	598067	cuteSV.INS.10	A	ACAGCAGCTCATGGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGGAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGGAAACTGCGACACTCACGTCGTGGTGCCGTCTCAGCAGCTCACGGTGTGGAAACTGCGAAGACCTCAGCGGGTGCCGTCTCCGCCCCCCAGCAGCTCACGGTGTGGAAACTGCGACACTCACGCGGGCAGGTGCCGTTCCCTCAGCAGCTCACGGAAACTGCGACACCACGCGGGTGCCGGCCTCAGCAGCTCACGGTGGAA	2.6	q5	IMPRECISE;SVTYPE=INS;SVLEN=306;END=598067;CIPOS=-187,187;CILEN=-44,44;RE=5;RNAMES=5f2941d5-b81b-43e0-8fbf-40f994c61e82,e5eafbed-6121-4116-87c6-5fcea74eca7c,4d65b4bf-fd57-442c-b4a2-60854df8bb12,6e49ca59-0cc7-4d01-ba69-dd67194f9cb0,0c83858c-eba8-4369-aab3-62ee10f058a2	GT:DR:DV:PL:GQ	0/0:14:5:3,3,89:3
+chr1	606602	cuteSV.DEL.5	GTCAGAGCTGTCCTGGGTCAGAGCTGCCCATGG	G	28.4	PASS	PRECISE;SVTYPE=DEL;SVLEN=-32;END=606634;CIPOS=-2,2;CILEN=0,0;RE=11;RNAMES=e3961b9e-e239-4d5e-853d-074ce238ba5a,9caef6b7-8947-4df9-8c86-78bdf7b4c050,1a51cfde-0cea-4573-a6ec-ddc8c363913f,3e4d6ee4-a0ec-4624-a4f9-1bc096415618,60af2d8e-6533-4761-a57f-9f5bab88f59b,d7b0fc8f-8fd3-4e73-97fb-de5d4ddd60c6,db625de5-0805-43b2-b461-e64674fec855,dba99022-afad-445d-a6f2-8cba1ad2d85a,dd79d67c-2b2b-468c-a9f4-ffc9e3713644,e6343866-bdcb-4e35-9928-d6a90089192b,83f5c1b9-f820-43f8-809d-453c6d2028ee;STRAND=+-	GT:DR:DV:PL:GQ	0/1:19:11:28,0,105:28
+chr1	609585	cuteSV.DEL.6	CTGTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCACACGCGGGTTCT	C	23.3	PASS	PRECISE;SVTYPE=DEL;SVLEN=-63;END=609648;CIPOS=-19,19;CILEN=-1,1;RE=11;RNAMES=874f43d4-264a-45ef-8668-1550210369a2,dba99022-afad-445d-a6f2-8cba1ad2d85a,9caef6b7-8947-4df9-8c86-78bdf7b4c050,0b573944-2eb4-467a-b944-dddce54e418b,ddd71e0f-7905-4685-b7c0-05d7f216243f,e6343866-bdcb-4e35-9928-d6a90089192b,1a51cfde-0cea-4573-a6ec-ddc8c363913f,e3961b9e-e239-4d5e-853d-074ce238ba5a,60af2d8e-6533-4761-a57f-9f5bab88f59b,dc1e925e-5165-455b-a29f-5dda1560ad67,dd79d67c-2b2b-468c-a9f4-ffc9e3713644;STRAND=+-	GT:DR:DV:PL:GQ	0/1:21:11:23,0,119:23
+chr1	609935	cuteSV.DEL.7	CGGTGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGC	C	1.1	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-64;END=609999;CIPOS=-60,60;CILEN=-1,1;RE=8;RNAMES=0b573944-2eb4-467a-b944-dddce54e418b,db625de5-0805-43b2-b461-e64674fec855,83f5c1b9-f820-43f8-809d-453c6d2028ee,9caef6b7-8947-4df9-8c86-78bdf7b4c050,ddd71e0f-7905-4685-b7c0-05d7f216243f,dd79d67c-2b2b-468c-a9f4-ffc9e3713644,1a51cfde-0cea-4573-a6ec-ddc8c363913f,d7b0fc8f-8fd3-4e73-97fb-de5d4ddd60c6;STRAND=+-	GT:DR:DV:PL:GQ	0/0:24:8:1,6,154:6
+chr1	610369	cuteSV.DEL.8	AGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGTGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCC	A	0.1	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-130;END=610499;CIPOS=-30,30;CILEN=-3,3;RE=5;RNAMES=83f5c1b9-f820-43f8-809d-453c6d2028ee,874f43d4-264a-45ef-8668-1550210369a2,60af2d8e-6533-4761-a57f-9f5bab88f59b,e3961b9e-e239-4d5e-853d-074ce238ba5a,dba99022-afad-445d-a6f2-8cba1ad2d85a;STRAND=+-	GT:DR:DV:PL:GQ	0/0:20:5:0,16,143:16
+chr1	610490	cuteSV.DEL.9	GGAGATGCCCAGGCCAGGCGGCCGGCGCACGCGGGTTCTCTGTGGCCAGCAGGCGGTGCTGCAGGAG	G	0.8	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-66;END=610556;CIPOS=-57,57;CILEN=-2,2;RE=7;RNAMES=0b573944-2eb4-467a-b944-dddce54e418b,9caef6b7-8947-4df9-8c86-78bdf7b4c050,3e4d6ee4-a0ec-4624-a4f9-1bc096415618,1a51cfde-0cea-4573-a6ec-ddc8c363913f,58c69596-3118-4b49-91eb-0b5ad9cc07b0,ddd71e0f-7905-4685-b7c0-05d7f216243f,dd79d67c-2b2b-468c-a9f4-ffc9e3713644;STRAND=+-	GT:DR:DV:PL:GQ	0/0:22:7:1,8,144:7
+chr1	610569	cuteSV.INS.11	G	GTTGGCCTGTGAGGTTCTCTGTGGCCAGCAGGCGGCGCTGCAGGGAGGGTACCCAGGCCTGGCGGC	2.9	q5	IMPRECISE;SVTYPE=INS;SVLEN=65;END=610569;CIPOS=-70,70;CILEN=-3,3;RE=8;RNAMES=569f47cd-bf9a-4cda-bd5b-4b855b1a191f,a1b31d74-bce7-4754-84d7-d3e0c9ba4cc1,a90309df-44df-4a58-bd90-a6b6a40d319e,18f659db-5ad3-4c14-99b4-8bc6eb6371ff,bcd37f7e-7f75-4374-8969-c5c5b6847ef1,037d7275-443d-4952-a66f-08e5b32a531e,3d21c734-2938-4a1e-a06c-95f413f7729e,b534f964-ed10-4a02-98c2-5562bf4f6853	GT:DR:DV:PL:GQ	0/0:22:8:3,3,136:3
+chr1	611307	cuteSV.DEL.10	GTGGGTGTGACAGGGTGTGTTCTGTGTGAGAACATGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGATGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTTGGTGTGAGTTCATGGGTGTGACGGGGTGTGCTGTGTGAGAACGTGTGTGTAGTGTTCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTC	G	1.8	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-725;END=612032;CIPOS=-1,1;CILEN=-1,1;RE=8;RNAMES=dba99022-afad-445d-a6f2-8cba1ad2d85a,db625de5-0805-43b2-b461-e64674fec855,ddd71e0f-7905-4685-b7c0-05d7f216243f,3e4d6ee4-a0ec-4624-a4f9-1bc096415618,874f43d4-264a-45ef-8668-1550210369a2,dc1e925e-5165-455b-a29f-5dda1560ad67,1a51cfde-0cea-4573-a6ec-ddc8c363913f,d7b0fc8f-8fd3-4e73-97fb-de5d4ddd60c6;STRAND=+-	GT:DR:DV:PL:GQ	0/0:23:8:2,5,145:4
+chr1	626790	cuteSV.INS.12	T	TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG	0.1	q5	IMPRECISE;SVTYPE=INS;SVLEN=33;END=626790;CIPOS=-13,13;CILEN=-1,1;RE=5;RNAMES=03da607d-ec0b-4c16-af37-c7bffd0fde4f,701d440d-d52b-477c-a132-204fc867a873,26f3608e-9c4c-4bf9-bebb-bab47529ad41,5bc4ad69-db16-48c0-9c91-9d61c80e47ea,7fb7bf07-820d-4347-97b9-46e8d81505a0	GT:DR:DV:PL:GQ	0/0:21:5:0,19,153:18
+chr1	744867	cuteSV.INS.13	G	GATATATATATATATATATATATATATATATATATA	0.9	q5	IMPRECISE;SVTYPE=INS;SVLEN=35;END=744867;CIPOS=0,0;CILEN=-3,3;RE=6;RNAMES=428523af-d83b-4029-9ba6-02b2e053dad8,69c14cf9-ae64-4646-9a67-871e6ae789c8,9e8e62e7-3bef-4120-b1ed-db57e7644483,c9073453-c215-4531-a6f5-bd03a20d3303,9d7cd991-e98d-4774-af05-9da0fc07f75d,ce2a8020-a552-477f-9df5-b7d97f0a1078	GT:DR:DV:PL:GQ	0/0:19:6:1,7,125:7
+chr1	814585	cuteSV.INS.14	A	AAAAAGATGTGAGAACCTATTTTCAGATGTATTTCCCTTCTAAATATCTAACACAACACACAGAAGAGAAAGTCAAGTCAATTTTACATATAAGTT	132	PASS	PRECISE;SVTYPE=INS;SVLEN=95;END=814585;CIPOS=-12,12;CILEN=-2,2;RE=24;RNAMES=f8d73f18-e9e3-4117-84b3-25f6cbcccc07,71e7628e-be56-4af8-8cde-d459504c90b8,20f3552c-6685-4f0d-a9f3-b2bced8bdfbf,dea6d8a4-d4a4-46ff-bc8a-29f398b22dd1,748b49cc-0b65-4897-becf-ebe5de749e4c,68668099-f1bd-40fe-ba1b-375c60d26882,27e89a23-a2bd-4eb7-8f63-8be09957d3c5,fc6dd300-5ee5-4cd2-93fa-837c6a4be50a,7a47a58a-763b-4bdb-b38e-9a5b15ca84d9,0f47bd21-3cf1-45e1-a4dd-b3568472f922,35cc5f0c-2f47-44bd-bbc2-748fffaa3548,5a1ac598-afc9-4de2-b58e-6c7766d58dd3,1fc46ec8-a4bc-4c5a-a723-1dc6666ff1e2,3234576d-19b1-47a8-8f6f-33c9ec8a3154,c9aba122-a878-4883-8df2-ba537038e718,249f7119-afa9-400d-802f-b3dedec8dbb7,3112a6c0-ca14-42f1-b167-31cd65936400,c6859633-f656-4ffb-a43c-310e3568d902,2f5c7650-f34e-4dd6-914e-82f56b9da016,7661c62d-827c-49a7-8929-e47f1a2f72bd,d4ccaa89-de65-476c-9724-ff94f9eba74f,6d04f2bb-637c-4d61-b504-201d582a7d9b,22ed4c8d-bcf3-44b1-a3ac-6e1670a1ac02,1ddd235d-02fa-4705-80f9-544ffbf76a46	GT:DR:DV:PL:GQ	0/1:14:24:132,0,37:36
+chr1	820896	cuteSV.INS.15	T	TCTACACTGCCTGGGCAGTAGTTCACGCAATCTCCCCTACCTGCCTCCTCCTTGAACCAGCCCTATCTATACTACTTGCCTGTCCAGCAGATCCACTCTATCTACACGACCTGCCTGTCCAACAGATCCACCCTGTCTACACTACCTGCTTGTCCAGCAGGTCCACCCTGTCTATACTACCTGCCTGGCCAGTAGATCCACACTATCTACACTGCCTGCCACAGCCAAGATCCACCCTGTCTAC	69	PASS	PRECISE;SVTYPE=INS;SVLEN=243;END=820896;CIPOS=-10,10;CILEN=-2,2;RE=19;RNAMES=7d52fe89-e304-42a2-b483-33b0ff06ccbc,0a0295eb-c248-4bed-8d45-06bfa3a76406,5af3910e-b9f4-451a-8710-917ca5fa90b8,cc4589e4-36c0-4e71-8151-b61449bba14a,2bc25ca5-c598-4437-b822-36903e39cd43,c0757315-cc54-4e20-abfc-0292cd3e588b,60f7349e-a821-4ce7-8442-5771681f63b5,428aa746-97d1-4764-95e3-6dd4e6867aee,ed57ee60-11d6-47ac-a344-c795f4f4c269,8783cda6-9447-4cf3-8da0-23207f536755,1a85a944-e29e-4900-94ac-0db738749b1f,236d703b-56fc-4715-9fd3-7d0bad6fb350,bb9b2ee4-a208-479f-87a1-82390847b380,80431cc1-baa5-4287-9918-5269169e50e9,6ba9a389-ec6c-4aba-9462-72e2aaa04ecb,e46d6475-fcbb-4b6e-b8c4-faf7026366dd,13748a33-386b-4d95-af9c-8e12fcd16ec2,e741df71-7c61-4b64-b810-fed29fcf9029,11e05e53-15f3-4fee-a1f5-0169b9a4e390	GT:DR:DV:PL:GQ	0/1:25:19:69,0,126:68
+chr1	820911	cuteSV.DEL.11	CCCTGTCTACACTACCTGCTTGTCCAGCAGGTCCAC	C	70.9	PASS	PRECISE;SVTYPE=DEL;SVLEN=-35;END=820946;CIPOS=-3,3;CILEN=-1,1;RE=20;RNAMES=dea6d8a4-d4a4-46ff-bc8a-29f398b22dd1,c6859633-f656-4ffb-a43c-310e3568d902,1ddd235d-02fa-4705-80f9-544ffbf76a46,a0d6875e-4ed6-4fd8-8546-73e206e1b988,d4ccaa89-de65-476c-9724-ff94f9eba74f,748b49cc-0b65-4897-becf-ebe5de749e4c,3234576d-19b1-47a8-8f6f-33c9ec8a3154,0f47bd21-3cf1-45e1-a4dd-b3568472f922,249f7119-afa9-400d-802f-b3dedec8dbb7,35cc5f0c-2f47-44bd-bbc2-748fffaa3548,6baa71b3-614c-48cc-80e1-5d35913a2176,7661c62d-827c-49a7-8929-e47f1a2f72bd,9b38ae98-7dcb-45de-94d1-552edb57b5e1,68668099-f1bd-40fe-ba1b-375c60d26882,755c1eb0-0cc3-40a7-9333-9d5a36d75142,5a1ac598-afc9-4de2-b58e-6c7766d58dd3,cf4de677-11bd-42f4-b872-918466e13387,57ca2209-38dd-410f-b078-90fefd7ff16c,d00795b8-c323-4b84-ad84-2633e778c395,9b0c7ca8-7b5f-4cbe-8acc-210aee11213f;STRAND=+-	GT:DR:DV:PL:GQ	0/1:27:20:71,0,138:70
+chr1	822420	cuteSV.DEL.12	TACTACCTCCCTGGCCAGCAGATCCACCCTGTCTA	T	68.3	PASS	PRECISE;SVTYPE=DEL;SVLEN=-34;END=822454;CIPOS=-8,8;CILEN=0,0;RE=20;RNAMES=3191037b-4588-4ca9-b10d-30f72b22dae0,0a0295eb-c248-4bed-8d45-06bfa3a76406,11e05e53-15f3-4fee-a1f5-0169b9a4e390,cc4589e4-36c0-4e71-8151-b61449bba14a,d3859d16-3b84-42c1-b62d-880e59fe5668,0568638e-ff8a-4c0c-b359-651d5856a156,60f7349e-a821-4ce7-8442-5771681f63b5,428aa746-97d1-4764-95e3-6dd4e6867aee,acd0961e-11d1-44e6-9521-38bf4df5a748,ed57ee60-11d6-47ac-a344-c795f4f4c269,13748a33-386b-4d95-af9c-8e12fcd16ec2,1a85a944-e29e-4900-94ac-0db738749b1f,236d703b-56fc-4715-9fd3-7d0bad6fb350,2bc25ca5-c598-4437-b822-36903e39cd43,6ba9a389-ec6c-4aba-9462-72e2aaa04ecb,80431cc1-baa5-4287-9918-5269169e50e9,8783cda6-9447-4cf3-8da0-23207f536755,e741df71-7c61-4b64-b810-fed29fcf9029,069cec55-5212-49f9-a7bf-4f1538651d94,c0757315-cc54-4e20-abfc-0292cd3e588b;STRAND=+-	GT:DR:DV:PL:GQ	0/1:28:20:68,0,145:68
+chr1	839484	cuteSV.DEL.13	ACACCTGGACAAACACACCTGGACACACACACCTAGACAC	A	64.5	PASS	PRECISE;SVTYPE=DEL;SVLEN=-39;END=839523;CIPOS=-9,9;CILEN=-1,1;RE=18;RNAMES=236d703b-56fc-4715-9fd3-7d0bad6fb350,e9c8a5e0-ae6a-43fc-aa8d-443d0299178a,d3859d16-3b84-42c1-b62d-880e59fe5668,75f1ae85-0230-43d5-ab85-1b3c037a9f9e,13748a33-386b-4d95-af9c-8e12fcd16ec2,5c3ba9cd-3b23-4144-a829-69e31a658eea,d731a5a5-f6ba-484e-a949-a306b48b24a0,4fec7be0-fba7-4412-8a60-2a107ec4be60,b4514bb9-2852-4a77-a4c7-4c3ba323d1de,ce40ecf4-62b7-4557-b021-c1c3930f1bec,319e2bdd-20b7-4b43-a442-8b53e9cf9f45,e88eb53c-dac6-4220-adfc-2ddf08b01e06,f660919f-7fd0-4f1b-85e4-8a84fb7ada46,251ef908-eb88-4b1a-a3b4-7ced2e9c18d4,cff27005-11f2-4e9b-a8d4-fc4b8ab6a17c,c4e74269-fc7e-45a6-bcca-9ed3f426b382,428aa746-97d1-4764-95e3-6dd4e6867aee,b0a25212-295d-4995-9664-74983d034114;STRAND=+-	GT:DR:DV:PL:GQ	0/1:24:18:65,0,122:64
+chr1	853529	cuteSV.DEL.14	ATGACCGCCGTGTGGTAAACTGATGAACCCCGACCCTGATGAACGTGAG	A	58.9	PASS	PRECISE;SVTYPE=DEL;SVLEN=-48;END=853577;CIPOS=-19,19;CILEN=-1,1;RE=15;RNAMES=b0a25212-295d-4995-9664-74983d034114,c4e74269-fc7e-45a6-bcca-9ed3f426b382,c78f3f96-0eca-4165-8039-a726409e315f,b1b40cdb-900d-4845-9898-6170d90cd3b2,4a58eaf6-68f1-4fee-a0c4-c7c6566fa5cc,eb6cac52-99f7-4d03-8f98-daccd8fca025,605f1e39-e4c5-4793-8bfa-a49d3a06c231,e741df71-7c61-4b64-b810-fed29fcf9029,72c69633-5575-4780-837f-327eb7e539e3,536a63a2-dfd4-433e-9d9e-5d9b7c94d3f4,251ef908-eb88-4b1a-a3b4-7ced2e9c18d4,f475a1f3-22bf-4b8f-b041-b81e6efb0211,83af1174-2b47-4abd-9718-293c2bbeb232,57e8980a-683c-41fa-b8be-6bf98f105d33,9db3952b-26fb-4232-9af8-58a98f45f237;STRAND=+-	GT:DR:DV:PL:GQ	0/1:18:15:59,0,88:58
+chr1	860179	cuteSV.DEL.15	CTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT	C	0.4	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-30;END=860209;CIPOS=0,0;CILEN=0,0;RE=7;RNAMES=22085938-8eeb-4b08-8c96-7afe9d261e81,7d61b37a-ec51-4e39-ab16-9262a0dc0754,9c386053-d761-4960-89e6-cd465103bc5d,bb30818e-d2cd-4f4f-a1af-e47bab969faa,8df62003-1bf3-49fa-9b11-c29769761226,cea5332f-05e1-4540-9026-c7895ec31961,f709d9a2-d943-493c-90e3-86b947f99927;STRAND=+-	GT:DR:DV:PL:GQ	0/0:23:7:0,10,153:10
+chr1	866742	cuteSV.INS.16	G	GCCTGAGCCCTCACGTGGTCCTCCCCTGTGACTCCTGAGTCTCACATGGTCCTCCCCTGCACTCACATCCCTGACATCCTTCCCGTGCCTCACGTGGTCCTCCCCGTGAATTCCACA	85	PASS	PRECISE;SVTYPE=INS;SVLEN=116;END=866742;CIPOS=-51,51;CILEN=-11,11;RE=18;RNAMES=9b371dff-6873-48fe-b8da-a7064d536115,f475a1f3-22bf-4b8f-b041-b81e6efb0211,a991d6ca-a60b-4607-8762-3ab46be04d0b,8df62003-1bf3-49fa-9b11-c29769761226,28b79b1a-aeec-4ef1-8702-0a96a47277ad,ef6676f0-38c6-4daf-b0ec-af5991af3cde,8ca5b19a-40d7-4c67-a443-2a4c4ad1fd2c,dfdd1f76-41a0-4067-b78a-2ad10449afb2,22d84ee0-219b-458b-8911-886548b6efc7,e741df71-7c61-4b64-b810-fed29fcf9029,83d113a6-1c88-4ed4-9bcd-c0d7486282ee,eb6cac52-99f7-4d03-8f98-daccd8fca025,94a20cbf-3758-4b42-8c6a-da7d7500fd11,4bbda435-5485-4814-be91-34dc058cdc5a,33e89011-37df-42be-a64a-bf3a398c2c39,b8484e0f-d788-42c1-a228-10b46af1b795,b1b40cdb-900d-4845-9898-6170d90cd3b2,530b293c-b9e8-492f-a8af-c8b3529bf79d	GT:DR:DV:PL:GQ	0/1:16:18:85,0,66:65
+chr1	872746	cuteSV.INS.17	T	TTCTCAGGTTTGACTCTGACAATTCCCTAACAGGGAAGCTGCTGTCCTATAACTCTGGGGGAGGGGTTTCATTTGCTCCCTGGCAGGTTGGCTTCAGTCTCAGGTTTGACTCCTACCTGACTTAATTCCTAACAGAGGCTGCTGTCCTGTGACTCTCTGGAGAAGGGGGTTTCATTTTACTCCACCTGCAGTAGGGTCTGTTAGCCCAGGGGGAGGGGGTTTCATTTTGTACCTGCAGCCAGGGTTAGCCCATCTCAGGTTGGCCTCCTGTTAGTAATTCCTAACAGGGAGGGGAAGCTGCTGTCCTGTGACTCTGGGAGAGGCTTCACTGACTCCTGTAACTCTGGGAGGGGTTCATTTTCTCCACCCCTCCTGGCCGAGGTTAGCCCATCTCAGGTTTGACTCCTGACTTAATTCCT	62.8	PASS	PRECISE;SVTYPE=INS;SVLEN=418;END=872746;CIPOS=-94,94;CILEN=-4,4;RE=13;RNAMES=dc2fa176-2e22-4efc-9310-d5e581286c22,b531fd8f-94a3-401d-b47a-cc8d15a8d5d7,fa9da229-e6a6-4179-b397-2d141edd48aa,3b3ec0bb-28b0-47d0-bfb6-9aaf46c33103,99b88d32-1ef1-40c4-b00a-5705363b8477,1ab37623-f1cc-461e-a660-23d807f676b7,22085938-8eeb-4b08-8c96-7afe9d261e81,f9ed5ade-463c-4908-b7de-c9e6d9435c36,d402008b-79c3-4fae-b528-5b6355c2a300,ab958f04-03ad-4eeb-b923-3bd9126b4605,bf7f96a4-1992-465e-afa5-a0be4c87825a,ccecf5ea-f571-442c-95c3-84df5139fa08,7d61b37a-ec51-4e39-ab16-9262a0dc0754	GT:DR:DV:PL:GQ	0/1:11:13:63,0,44:43
+chr1	875831	cuteSV.DUP.1	C	<DUP>	27.1	PASS	PRECISE;SVTYPE=DUP;SVLEN=509;END=876340;RE=13;STRAND=-+;RNAMES=bf7f96a4-1992-465e-afa5-a0be4c87825a,e9278a48-f13d-452e-b116-b334f56aea9f,2e7f44ed-26c9-4d08-8b97-14cc1bce418d,8a996251-bd12-464d-8702-52627014ab5c,54c716dc-2974-49bb-9da1-7517688122b1,157884a7-c3c2-4e4a-949a-53de16111ee0,0f3c8ebf-04b2-45c9-943e-6aa19617fd25,594f75ad-7c93-4a51-a735-43f49eeaa22f,fa9da229-e6a6-4179-b397-2d141edd48aa,f804818a-f647-4493-942e-29808c2368a9,30a10600-8e17-45f1-ba0a-45befaf2a980,8d5882ef-92c9-4618-835c-e2746bf2976d,833d5e0e-a83a-4d01-95b5-1e1327f581e0	GT:DR:DV:PL:GQ	0/1:25:13:27,0,142:27
+chr1	875973	cuteSV.INS.18	C	CAGTGTCCTGTCGTCGAAGGGGACAATCCTTAATTCAGTCCTCAGTTTGGACTCTACCTCCCGATTGGAGCGACGTCCACCTCGTTTACTTTGGGGGGGGAGGGGTCTCAATGTCCTGTCGAAGGACAATCTTAATCTCAGTCCTCAGTTTGGACTCTACCCGATTGTCTGGAGTGACGTCCATCTTGTTTACTTTGGGGAGGGGGCCCTCCCAGTGTCCTGGAACCGAAGGGGACAATCCTTAATTCAGTCCTCAGTTTGGACTCTACCCGATGAATGAGCGACGTCCACCTCGTTTACTTTGGAGGGGGTCTCAATGTCCTGTCGTCGAAGGGGACAATCCTTAATTCAGTCCTCAGTTTGGACTCTACCCGATTGTCTGGAGTGACGTCCACCTCGTTTACTTTGGGGGAAGAGGTCTCAGTGGTCCTGTCGTCGAAGGGGACAATCCTTAATTCAGTCCTCAGTTTGGACTCTGTCCGATTGGAGTGACCCACCTCGTTTACTTTGGGGGAGGGGTCTCAATATCCTGTCGGTGTCGAAGGGGACAATCCTTAATTCAGTCCTCAGTTTGACTCTGCCTCGATTGTCTGGAGTGACGTCCACCTTGGTAACTTTGGGGGAGGGGTCTCAATGTCCTGTGTAAAATCCAGAATCTGAAGTTATAAATTTTATAAACAGAGACCTTAAACTTTTATAGAAGACATGGAACAAATGACCTAATTTCCGTGTTAGTGAATAAGTAGACTAATCATTAGTTTCAAATTTTAAATAAATTATGAACTACAATGAAACAAAATATATTTACACATAAAGATGCTATCAAAATCACAGTAGATTATAATAAAAGTTACAACATTAAAAATAAATATACTTAGAAACACAATATGTAGAATTACAGTGACCATAATTTTGATATACACGTTTATCGAACTTTATCGTGGATTGTGTAGTACCTAAGATACATTTATAAGTGGTGAGATAAAGTAGTGTAAAATTGGTTAAGTATGGTAAGAAGTAAACGATTGTAAAATAAACAGGAGTAAGTTTAAACACAACCACAGGACAGATGTATAATTATTTTCTCTCCTCCATCCGGGTCTGAAGTAGAGCTAATTTATGATTCTGTAAAAAGATCAAGATAAATATGTGTTGTAATGTAAACAGAGTGTGATATTTATACTCATAAGTCAATTCATGTCGTTTTTTTTTTTTTTTTTTTTTTTTAGAATTGGAAGGATGTTGTCTACAACCCTATAAGTGTCTAACAGGTGTTGAAGAAAGAGGGTTGTAATAAAAATAAGAATGTAGAGTTGACGGTACTTTTGGATCTTCGTACTTTATGATGTTTAACTTTGTTTAAGGGACGATATGTCTTTAACATTTCATTTGGTTCTTCTTTCTCTTATATACGTATGAGTACGTTGTTCTTTATTTTTGTCTAATCTGATTACTTTTGAGAAGTAGTCTTCATTCAGGACCGGTCCGAGGATTTCGAAGTGATCACTTAAGATAATTTGTGAATTACTTATTGGTTAGGAAGTGTTTGTGAACGTTTCGTCTCTTCGTCCTTTTGGAGTTAAGTAAAACTCCGGTCATAATGGTCTATTGTTTAGTCTGTTTTCGTAGTGTTCTTTTTGATTTACACGTCTGATAATAGGAGTACTTACTTCTGTCTTTAGAGGTTTTTGAGATCATTTGACCTTCTTCGTTGTGTATTTTTACAATCTATTCGGACCGTACCACCACGTGTGGACATCACGGTCGATGAATCCTCCGACTCCACCGTCCTAGTGAACTCGGGTCCTCAAACTCCAATATCACTCGACACTGACACGGTGAGGTGGGGTTGGACCCACTGTCTCACTTCTGGAGTAGAGATTATTTTTAATTTCTTTTAATCTTTTTTCAATCTGTTATATTGGTTCACCTTAATAGGATGGTTATGTTTAGATTTTTAGTCACATTATGTGGTATAATTATCTTATTTCCTTTCTGGTACTACTAAAGTTACCTGCAGACTGTTACAGTTACTGTGAGTAAGGACCATTTAGAAGGTCTTTAGATCGTTATCTTTATTGAAGGACTTGGAGGATTTCCTGTAGGT	95.5	PASS	PRECISE;SVTYPE=INS;SVLEN=2106;END=875973;CIPOS=-48,48;CILEN=-105,105;RE=11;RNAMES=b531fd8f-94a3-401d-b47a-cc8d15a8d5d7,4bbda435-5485-4814-be91-34dc058cdc5a,9148c8b3-594a-4abe-afe8-9c5f32d76173,4ba6382a-2fce-4dae-80ab-4c1dedd1cec7,3b3ec0bb-28b0-47d0-bfb6-9aaf46c33103,d9720cb5-2fa8-4d15-9221-76bebb8ee477,1ab37623-f1cc-461e-a660-23d807f676b7,f2c202a6-bd7d-4934-9879-7d393feb9dff,8b08274d-9781-4d6f-99a3-717a424483e9,21e9cbd1-d15b-4c76-a89d-aa015299da43,b1b40cdb-900d-4845-9898-6170d90cd3b2	GT:DR:DV:PL:GQ	1/1:1:11:95,21,0:21
+chr1	875973	cuteSV.DUP.2	C	<DUP>	0.1	q5	IMPRECISE;SVTYPE=DUP;SVLEN=455;END=876428;RE=7;STRAND=-+;RNAMES=e9278a48-f13d-452e-b116-b334f56aea9f,57180a95-ccfb-41fc-a26e-95f054ea5da9,8a996251-bd12-464d-8702-52627014ab5c,0f3c8ebf-04b2-45c9-943e-6aa19617fd25,7d61b37a-ec51-4e39-ab16-9262a0dc0754,8b08274d-9781-4d6f-99a3-717a424483e9,e1aef287-8fbf-4811-af38-59948ea55545	GT:DR:DV:PL:GQ	0/0:25:7:0,15,172:15
+chr1	876165	cuteSV.INS.19	C	CCCCCACACCTCCCTGCTCCCCCCACACTCCCTCATACTCCCCATACCACCCCAACCTCCCACACTCACCCACTCCCCATACTCCCCAACCTCCCCCATACTCCCCACATTCCCCCATACTCCCCTCATACTCCCCCAAACTCCCCCATACTCCTC	102	PASS	PRECISE;SVTYPE=INS;SVLEN=155;END=876165;CIPOS=-80,80;CILEN=-75,75;RE=17;RNAMES=22085938-8eeb-4b08-8c96-7afe9d261e81,81e82995-0ffd-447b-af0c-f31a8a115fb1,83d9c279-cb7c-4b06-9b0c-59442e2354da,54c716dc-2974-49bb-9da1-7517688122b1,fa9da229-e6a6-4179-b397-2d141edd48aa,c833ef7f-3c94-419c-96cc-bf4e38a240c2,bf7f96a4-1992-465e-afa5-a0be4c87825a,b0446234-8b99-44e0-b576-1a72d3ef0700,2417740b-2584-4ca1-8f2d-7f6c68abd8f9,99a38a81-7109-4bb4-ad5d-a20a4eee12ef,e1aef287-8fbf-4811-af38-59948ea55545,dfdd1f76-41a0-4067-b78a-2ad10449afb2,b8484e0f-d788-42c1-a228-10b46af1b795,8d5882ef-92c9-4618-835c-e2746bf2976d,0f3c8ebf-04b2-45c9-943e-6aa19617fd25,30a10600-8e17-45f1-ba0a-45befaf2a980,594f75ad-7c93-4a51-a735-43f49eeaa22f	GT:DR:DV:PL:GQ	0/1:7:17:102,1,7:6
+chr1	882645	cuteSV.DEL.16	TAATATATTAGCTATTCTAGACTTTATGCATTTATGTAAAGTTTTCTTTGTTGCACTTTAAGTTCTGTGATACATGGGCAGAGCATG	T	256.4	PASS	PRECISE;SVTYPE=DEL;SVLEN=-86;END=882731;CIPOS=0,0;CILEN=0,0;RE=48;RNAMES=97e774b1-1177-4bdf-9c45-4dbd0f3d8a73,73728239-8ff1-4cd4-ae56-e31e059a758b,056a9e24-0a77-44d1-ab83-a80a75d9d788,6a9a2e7c-07fb-4cf7-8efc-22bea120a93b,046a1725-d02e-438e-9da5-d41fb1ce84b2,0e3cf0c5-c78d-45c7-a6bc-c4233623f360,ae6d687f-f2ff-483b-be58-dd45b9ba58bf,05c54a42-e1a3-4e16-93b9-9ca92d0319dd,08fa5454-6e34-4304-9787-9ed403a4ef53,0c5c0fcf-8c53-415b-9bc9-2ed8a9e4299d,1412da0d-d8b3-4d26-82bd-25b190e8a6a4,16169411-77a4-427a-8aef-7b60074206bd,21421e6f-ff09-4fa3-bc1e-1b17dea7e828,3bd7dc1d-4849-4e40-b64d-3842fc25cb9b,3d6d04de-603a-40d7-8b40-50a320ddd891,3fb08fdb-7f14-4f34-8aff-de9e75d4dd0e,42e33868-1a79-47a6-868e-7d5d389c5d56,46dfd63c-e140-494a-841b-5bd7f64189db,4eeb823d-93f5-4cb9-b4dd-56fbd2852eab,505d5fa2-a923-4c2b-988e-bd1a3019246e,56c7af10-8d0c-4d10-934e-ce832e20bec5,5d0b99df-5c30-4ea4-8385-0bec80e55059,5fbb17d4-0ab0-465e-b3e1-7dc60e96e11f,6169a3b9-e904-46e3-b8bf-b8c315d5c5e5,716637f2-babf-4370-9472-f8f6fa9d744e,7186749e-c3f5-4c2a-b234-3a6837d50fd6,768ccb5b-6be9-41ea-bc3f-279b2e7a8fb0,7d284a1c-633b-40cb-91e5-162f95fa3d66,80b82853-6e69-45ee-9ca4-e5ad9e8f03a1,90498d40-6caf-43e8-a74d-f1736a1de6c5,9f6cdfba-a21d-4f16-a5e4-14986b2af751,a2397e41-8500-4e12-9aba-12b8497ca4f1,a5e72e50-4e49-4227-b7ff-ee22a24a02f9,a78069fd-c1ba-4e9b-84ac-f8589d7589cc,b3cb0ab9-8a9d-48bf-9136-9eea67132892,b55c1a9a-8db3-465f-9be8-8469cbc48312,bad14ecc-2338-4c54-afcf-9cd59bd90c2a,d6291e72-fb98-406f-80f2-f01885a638c0,db1f015c-ba4f-4d13-a0da-f774b99ce01a,dde71730-62fd-47aa-b397-6f37ace09318,e1de4680-a48f-4701-b35a-9e90e6fd3da1,e2af033c-70d2-4d88-8b6c-e67e2eadb7b5,e530ef5b-f4f8-4800-aae0-eb330134cdaf,ee96a347-de27-4e4f-8152-7cb3d0422d8f,f5434dca-3eba-40df-99f4-1aa2af2c2e9d,ae1e190c-1af4-4119-80d6-6195876f9e64,c085c408-f90b-403c-889c-556a8cf52725,93c9ba0c-2139-4366-8af9-816d2b650eb3;STRAND=+-	GT:DR:DV:PL:GQ	0/1:31:48:256,0,94:94
+chr1	883241	cuteSV.BND.7	N	N]chr20:29351526]	0.2	q5	IMPRECISE;SVTYPE=BND;RE=19;RNAMES=7d284a1c-633b-40cb-91e5-162f95fa3d66,56c7af10-8d0c-4d10-934e-ce832e20bec5,ee96a347-de27-4e4f-8152-7cb3d0422d8f,505d5fa2-a923-4c2b-988e-bd1a3019246e,73728239-8ff1-4cd4-ae56-e31e059a758b,3fb08fdb-7f14-4f34-8aff-de9e75d4dd0e,a2397e41-8500-4e12-9aba-12b8497ca4f1,46dfd63c-e140-494a-841b-5bd7f64189db,046a1725-d02e-438e-9da5-d41fb1ce84b2,768ccb5b-6be9-41ea-bc3f-279b2e7a8fb0,b3cb0ab9-8a9d-48bf-9136-9eea67132892,5fbb17d4-0ab0-465e-b3e1-7dc60e96e11f,3d6d04de-603a-40d7-8b40-50a320ddd891,4eeb823d-93f5-4cb9-b4dd-56fbd2852eab,1412da0d-d8b3-4d26-82bd-25b190e8a6a4,6169a3b9-e904-46e3-b8bf-b8c315d5c5e5,056a9e24-0a77-44d1-ab83-a80a75d9d788,9f6cdfba-a21d-4f16-a5e4-14986b2af751,db1f015c-ba4f-4d13-a0da-f774b99ce01a	GT:DR:DV:PL:GQ	0/0:57:19:0,13,363:12
+chr1	883245	cuteSV.BND.8	N	N]chr20:29789175]	0	q5	IMPRECISE;SVTYPE=BND;RE=14;RNAMES=7186749e-c3f5-4c2a-b234-3a6837d50fd6,e2af033c-70d2-4d88-8b6c-e67e2eadb7b5,16169411-77a4-427a-8aef-7b60074206bd,0c5c0fcf-8c53-415b-9bc9-2ed8a9e4299d,e530ef5b-f4f8-4800-aae0-eb330134cdaf,c085c408-f90b-403c-889c-556a8cf52725,3bd7dc1d-4849-4e40-b64d-3842fc25cb9b,97e774b1-1177-4bdf-9c45-4dbd0f3d8a73,80b82853-6e69-45ee-9ca4-e5ad9e8f03a1,42e33868-1a79-47a6-868e-7d5d389c5d56,a5e72e50-4e49-4227-b7ff-ee22a24a02f9,21421e6f-ff09-4fa3-bc1e-1b17dea7e828,085aed94-1490-4a98-8912-0e3be7f68801,bad14ecc-2338-4c54-afcf-9cd59bd90c2a	GT:DR:DV:PL:GQ	0/0:56:14:0,45,401:45
+chr1	886237	cuteSV.INS.20	G	GGCCCTTTGGCAGAGCACAGGTGCTGTGCTG	67.2	PASS	PRECISE;SVTYPE=INS;SVLEN=30;END=886237;CIPOS=-6,6;CILEN=0,0;RE=14;RNAMES=1b5284d7-0579-4bb8-aa3e-7bc85c40e956,2952f193-7c51-43af-bbc5-c080d80edcd7,833d5e0e-a83a-4d01-95b5-1e1327f581e0,ba1cb437-af9c-47a5-bb4f-7a83b124974c,e25b8c47-f70a-4568-b64b-53d6c821faee,9148c8b3-594a-4abe-afe8-9c5f32d76173,4ba6382a-2fce-4dae-80ab-4c1dedd1cec7,6ebac132-23ad-4bfc-82b9-71ab65b52b87,f8441ce1-4097-40ea-8559-2b09b0b98b05,157884a7-c3c2-4e4a-949a-53de16111ee0,d2c1f991-7bd6-47d9-a6ac-6379b6adcfc4,a095f8e9-9b24-4020-a392-c40aba2f48b5,8b08274d-9781-4d6f-99a3-717a424483e9,1f790562-7d96-4180-b322-9cb561596e5c	GT:DR:DV:PL:GQ	0/1:12:14:67,0,48:48
+chr1	893790	cuteSV.DEL.17	AAAAAAAAAAAAAATATATATATATATATATATATA	A	229	PASS	PRECISE;SVTYPE=DEL;SVLEN=-35;END=893825;CIPOS=0,0;CILEN=0,0;RE=25;RNAMES=f8441ce1-4097-40ea-8559-2b09b0b98b05,0bad51cf-85e9-451f-b2b3-1d92289ca318,989f153a-61c4-4600-9c1a-91a4804fe4c1,21e1dfd6-61cf-4be2-9067-82184c38457f,b0168428-ba4f-4c4c-ac1b-7988f4060d1f,c74e5c73-20e4-4e8c-8030-36adb8223044,157884a7-c3c2-4e4a-949a-53de16111ee0,1f790562-7d96-4180-b322-9cb561596e5c,25e64d5c-2313-4e64-8051-3bd8e6003005,1c0aa9ad-f2ba-43e2-86dd-40af939404f4,061f0900-f931-48aa-8864-d586f6d790ee,833d5e0e-a83a-4d01-95b5-1e1327f581e0,b531fd8f-94a3-401d-b47a-cc8d15a8d5d7,3d7a1e2c-1452-47e1-963c-a3cfc6d63b46,46183fcd-acab-4c99-8b0a-fe46ba072d9d,5ad8c07f-30be-4a25-b4bd-17c37679d9e7,6ebac132-23ad-4bfc-82b9-71ab65b52b87,c753e6fa-20f8-4c7e-aa67-9efcb87a27b8,d960c198-fbca-4240-aaec-0524e03219cc,2e5a5c36-81a3-4ef7-a4f6-c7ce89c15fcb,5c1c717f-82b0-498a-a57e-c3a7b5fc52f3,668be2a3-b472-4cf2-ab60-b6e01a18a8bb,a095f8e9-9b24-4020-a392-c40aba2f48b5,a7a20a3f-47f4-4d97-9f5f-68670b283b42,ade5f657-3781-475d-a952-a3a26cef57b7;STRAND=+-	GT:DR:DV:PL:GQ	1/1:1:25:229,57,0:56
+chr1	907847	cuteSV.INS.21	C	CCTGCCCGGTCCTTCTGACCAGCCGAGAGAGTA	45.6	PASS	PRECISE;SVTYPE=INS;SVLEN=32;END=907847;CIPOS=-7,7;CILEN=0,0;RE=12;RNAMES=6442676e-05bd-4d7e-9322-8a10a68af75b,989f153a-61c4-4600-9c1a-91a4804fe4c1,d9abe60f-2349-4c46-8fa8-4ed1d0681b41,cf34ac10-b4ba-4f7e-8eba-7dd1ebad8f3b,197ed372-bfef-4fe2-b0e0-df8b05eba7f3,3a6e4814-546e-4834-a6d9-14ababfe6a69,7c2d5b60-57ab-49b7-9d19-375dbf0cde78,bc69941c-5feb-4e57-aae0-0ee530b36dd9,013e7d51-f095-4b11-8a4e-f077dcc74e71,5ebeaf63-db07-4100-8081-1a1f63e74c8c,729b3db5-7276-4df7-ae39-9f9439c321ad,f86dbc79-ada1-4764-8777-6f2820cffbeb	GT:DR:DV:PL:GQ	0/1:15:12:46,0,74:45
+chr1	909163	cuteSV.DEL.18	GGCGCACCTTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCATCTTTGCTGGTATATGCGGTGG	G	324.4	PASS	PRECISE;SVTYPE=DEL;SVLEN=-292;END=909455;CIPOS=-53,53;CILEN=-26,26;RE=35;RNAMES=de9147d3-cb61-4585-a00d-6bcc0e53e7a2,e19d0ab7-8c9a-4cdf-9fa5-e3b64259bd21,1c0aa9ad-f2ba-43e2-86dd-40af939404f4,48ee0195-0418-4d3e-983e-94295d21d48a,e89b5093-4ee5-4f94-ab68-72b0fb142efa,f5da1070-e436-4e16-9076-0c28a37a8434,32160072-5834-4d3a-a92a-b787ff89d148,6442676e-05bd-4d7e-9322-8a10a68af75b,d34147d4-42e8-4c08-969c-c4a22a1c4b9e,c0470469-f331-492e-a916-713baf1a9e52,8794daf4-3a1e-43ca-80d2-223fe23c7bff,87cfd6de-af07-43af-afc0-3b7956141015,fb3c12ce-a395-4c12-8aad-b83a4db3068c,013e7d51-f095-4b11-8a4e-f077dcc74e71,157884a7-c3c2-4e4a-949a-53de16111ee0,a7a20a3f-47f4-4d97-9f5f-68670b283b42,be47fddc-a6e6-4e11-abdc-ec58059ac9aa,455e63af-8997-471c-a9f4-7538acdc6d3c,ef017d95-eb39-451e-a7a0-433e7c631511,331538b4-6108-49ad-ab42-3a04840bd1d5,8cb61c3c-7e5e-4310-b3ba-d6790a174aa0,45e5bce7-5948-4f24-9979-f624acf818b3,b531fd8f-94a3-401d-b47a-cc8d15a8d5d7,d9abe60f-2349-4c46-8fa8-4ed1d0681b41,3ff85788-7299-4769-b0e2-660d65429540,6097bf31-8314-4ec8-92d1-de170682d868,7c2d5b60-57ab-49b7-9d19-375dbf0cde78,197ed372-bfef-4fe2-b0e0-df8b05eba7f3,cedaf996-5a21-4bdd-bc96-6e12a05959d0,cf34ac10-b4ba-4f7e-8eba-7dd1ebad8f3b,bc69941c-5feb-4e57-aae0-0ee530b36dd9,5ebeaf63-db07-4100-8081-1a1f63e74c8c,dfddfd57-ec74-49a0-8c87-056496f06344,729b3db5-7276-4df7-ae39-9f9439c321ad,f86dbc79-ada1-4764-8777-6f2820cffbeb;STRAND=+-	GT:DR:DV:PL:GQ	1/1:1:35:324,82,0:82
+chr1	934050	cuteSV.DEL.19	GCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGG	G	162.3	PASS	PRECISE;SVTYPE=DEL;SVLEN=-829;END=934879;CIPOS=-14,14;CILEN=-6,6;RE=22;RNAMES=5e011927-ab9d-474f-91fc-f7ae39f94bc4,cb6fea29-8a74-4f19-97b0-047cd9ff1988,058a2a35-66c8-4e50-a04e-50abc2fee9e0,5439c77a-2a93-4cd8-9b3e-8d0055b43ebe,bdcdd759-18fb-49b0-b78d-c60f959d1e10,c2a57bcb-3194-4d93-8cf9-12c18544cc73,d84b2f57-3f94-4570-a9b9-d441844951d0,039e6e21-87a5-4c62-a83c-a7a6bc777e0e,5092d6a4-17e7-4b3d-ac3e-c22350294e33,6563cf52-9022-416c-b76a-ad4d604ad796,ef738199-2538-4ca3-bd44-4738dae9b64a,25d56d8f-95a3-467c-8971-704b6bd8b014,2ba057e6-2c4d-4d23-b853-edd58ffbbafd,8e0002eb-fff3-45d2-9a67-ae5bfeb455db,8909429c-8eae-4329-99f6-8ae048f53f8e,90759a7f-47ad-4027-a56d-575eb480bb51,f5da1070-e436-4e16-9076-0c28a37a8434,4225f521-f571-483b-8c4e-d899a90db58d,b1f681f8-03d5-4d20-8872-bbb605e83812,8ec4e4c1-9f5a-4bb8-89ab-920aa61ec69d,ed2eb115-3859-4e15-967e-5628f3e55e88,fbd996c8-c74d-43eb-bff2-121a8752eede;STRAND=+-	GT:DR:DV:PL:GQ	1/1:5:22:162,21,0:21
+chr1	936287	cuteSV.DEL.20	TAGGGCTCCTGGACGGAGGGGGTCCCCGGTCCCGCCTCC	T	257.6	PASS	PRECISE;SVTYPE=DEL;SVLEN=-38;END=936325;CIPOS=-8,8;CILEN=0,0;RE=28;RNAMES=cb6fea29-8a74-4f19-97b0-047cd9ff1988,a2c53f94-331e-4946-9670-fe7adad6b2b2,ef738199-2538-4ca3-bd44-4738dae9b64a,d84b2f57-3f94-4570-a9b9-d441844951d0,7fdc4490-b48a-4769-94bf-c0c67f477a2f,7ef9e428-6312-4642-a023-f3363c296d5a,8ec4e4c1-9f5a-4bb8-89ab-920aa61ec69d,5e011927-ab9d-474f-91fc-f7ae39f94bc4,b1f681f8-03d5-4d20-8872-bbb605e83812,8e0002eb-fff3-45d2-9a67-ae5bfeb455db,b2d0e51e-2f9d-40ef-910b-15553399360a,25d56d8f-95a3-467c-8971-704b6bd8b014,2ba057e6-2c4d-4d23-b853-edd58ffbbafd,8909429c-8eae-4329-99f6-8ae048f53f8e,f5da1070-e436-4e16-9076-0c28a37a8434,039e6e21-87a5-4c62-a83c-a7a6bc777e0e,2474461a-5501-41d4-8c2f-14c8ea360ca1,6563cf52-9022-416c-b76a-ad4d604ad796,790a1392-a4ed-431a-b852-6b2da93ed0c2,f7500359-a393-40b1-8b08-76a85d7de289,5092d6a4-17e7-4b3d-ac3e-c22350294e33,058a2a35-66c8-4e50-a04e-50abc2fee9e0,90759a7f-47ad-4027-a56d-575eb480bb51,bdcdd759-18fb-49b0-b78d-c60f959d1e10,fbd996c8-c74d-43eb-bff2-121a8752eede,4225f521-f571-483b-8c4e-d899a90db58d,ed2eb115-3859-4e15-967e-5628f3e55e88,5439c77a-2a93-4cd8-9b3e-8d0055b43ebe;STRAND=+-	GT:DR:DV:PL:GQ	1/1:1:28:258,64,0:64
+chr1	948693	cuteSV.INS.22	T	TACCCTGGTCCCCCTGGTCCCTTTGGCCCTGCCTGGCTGG	257.6	PASS	PRECISE;SVTYPE=INS;SVLEN=39;END=948693;CIPOS=-9,9;CILEN=-1,1;RE=28;RNAMES=da3d6919-68dc-40dc-9594-f00ad1e2ae52,c1613a03-ea98-4685-b3b9-c020a4c15210,eb762c90-9fcc-4d4c-9e98-debcc6e8261a,8f9eff79-5fa6-41a4-9961-d085eab8d911,6eb90826-9a88-48dd-966a-dd531f44fec5,cb6fea29-8a74-4f19-97b0-047cd9ff1988,4fd75c31-c8cb-4398-b105-5960c3eed53f,1638b6ea-9a32-4f75-bff1-31814803dfc1,2b2cea73-821f-4c3f-a7b0-0ea1e71c4031,35f7d250-ae9a-42da-984a-dacd59aefd87,2474461a-5501-41d4-8c2f-14c8ea360ca1,8a089335-5e21-404e-ac6e-8416b50619df,a58862dd-d98a-43f2-8f92-4a2cc3e44909,59158445-57ca-4f69-838e-b4e69bff3a5c,b39d85b2-e86f-4d3d-b4b9-dc64a04be2ca,23c8c9a1-9837-4401-9b74-d8d0e4d7a170,c4dabb82-0380-460c-9b0a-f94d85750d44,5092d6a4-17e7-4b3d-ac3e-c22350294e33,5623a23e-35c9-4fac-a114-e443f2c8ea83,5cbfd059-3131-4f6e-92ab-5a1c2438b814,c994f939-43ef-4f34-be99-93e3eed4d729,6cb90369-a4f1-4d89-a067-bbb639c431bf,f5da1070-e436-4e16-9076-0c28a37a8434,6ea88abc-babb-4aa0-8e58-1decb03e297f,e75216cb-9060-4279-94f4-739954cd39a6,bdcdd759-18fb-49b0-b78d-c60f959d1e10,c8e8bba4-4b8d-428a-a4b1-4e3c33b4d7ee,d28ba1cb-a79c-4766-995b-25cc0c43384b	GT:DR:DV:PL:GQ	1/1:1:28:258,64,0:64
+chr1	964651	cuteSV.DEL.21	TGCTGCCGGGAGGGGGGCGCGGGTCCGCAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCAGTGGGGAT	T	229	PASS	PRECISE;SVTYPE=DEL;SVLEN=-74;END=964725;CIPOS=-13,13;CILEN=-1,1;RE=24;RNAMES=17e1c98b-f7f1-4b30-a986-81cfbd8f4050,6518475e-ac67-4cf0-a321-471e3a59a87c,ef7fe0c2-9437-4f96-8ea1-88c04ab8147b,68892a2e-c39b-49e2-a491-00cd8f97811c,1d6b0d43-1deb-4376-8159-b695b0485276,c65f4965-ccfe-4d55-be58-b7c37f5cd837,c4d8bf72-de2a-4830-94c0-be8914a95fcf,acaeaf54-d697-48db-9593-cc9cbe17c947,e5c003c4-8c18-4466-b90f-064be3b489a7,65b0344c-fe44-4b34-b994-74450789fb6b,71dc3e8c-1f43-4431-8ee4-ce08314cbd2a,76751550-cc73-4a3c-9ef1-b2065c3b0e30,e4f38bc4-58ac-44db-aee0-dfd64ec30bda,8b821158-c7ae-41f5-a960-eac7fa52b8ae,e6cb51d8-b370-46fa-be4f-6e33f868005b,20afc371-dd60-46ae-b59a-eeb04cff123c,dc13e96d-675b-4a42-b876-179e591ec574,4bb3065b-6927-4086-aae6-68b2e3fba092,1ba4584e-575b-4136-a254-85e5e1fd19d5,49a01f1e-6091-470d-a45b-24c271e07986,c8e6c9f0-34cf-45a9-917a-bb30503fe891,ab9c0652-0fba-4be1-9b36-43ed10bb3d50,703ccb79-8208-4993-9803-7ab17d11da85,c72430c7-3181-4aa3-9f51-4cd251652aac;STRAND=+-	GT:DR:DV:PL:GQ	1/1:0:24:229,61,0:61
+chr1	976689	cuteSV.DUP.3	C	<DUP>	0.1	q5	IMPRECISE;SVTYPE=DUP;SVLEN=83;END=976772;RE=6;STRAND=-+;RNAMES=44bbad3a-ec7c-4ad4-9202-70f500f1369b,b641bc48-fbae-40e1-97a6-a8e99aee46ad,c399f294-a2ce-4e1b-8f51-9868513601dc,3dbe75af-7974-428e-8e64-4d2a1e18fa0e,b85c5def-3018-4c59-930b-e3ec0752d39b,f1a145aa-6901-4e7a-a6d7-a3e5a9938f03	GT:DR:DV:PL:GQ	0/0:23:6:0,17,162:16
+chr1	977237	cuteSV.INS.23	A	AACAACCCCAGGAACCACCTACCTCCCCGCAACCCCGGGAACCGCCCTCCCCTCCCCCGCAACCCGGAAGGGACACCTCCCACTCCCCCCCCACCCAGCCCAGGAACCTCCCCTCCCCCCATAACCCAGGGCATCACCTCCCTCCACCCCGCGCCCGGGGAACCGCCCCTCCCCTCCCCCGACCAACCCCCGGGAACCTGCTCCCACTCCCCGCAACCAGGGGGCACCTGCACTCCCCCATGGCAGGGCCGCCTCCACTCCCCCACCCAGCAGGAACCGCCCTCCACTCCCCGCAACCCCAGGAACCCTCCTCCCTC	257.6	PASS	PRECISE;SVTYPE=INS;SVLEN=316;END=977237;CIPOS=-137,137;CILEN=-53,53;RE=28;RNAMES=edc9239e-9688-445a-88e5-423ca07dbe77,5f598d57-f68e-4ecd-b9cc-f9195a998554,d89d2427-3a22-4aab-b99f-2ab3097cfb84,e525aa7b-51be-45f5-9303-b4f8d5c37446,3dbe75af-7974-428e-8e64-4d2a1e18fa0e,44bbad3a-ec7c-4ad4-9202-70f500f1369b,9996ec4f-5be1-4652-bbba-41e823c07ca9,e8f80089-d8a9-4b7e-8c2d-b077fe2c5d61,7f13876d-4fcb-4799-a07d-f152551dfe16,703ccb79-8208-4993-9803-7ab17d11da85,b641bc48-fbae-40e1-97a6-a8e99aee46ad,2b0f7c30-ac53-43d3-bc0e-72dc495e3140,c399f294-a2ce-4e1b-8f51-9868513601dc,e16da602-240a-4f23-a489-553fa60982a3,71dc3e8c-1f43-4431-8ee4-ce08314cbd2a,a28b6e79-70d8-42c6-a115-862aaa959b74,755fcc77-3e17-40a7-be45-2d233eb9dcef,f1a145aa-6901-4e7a-a6d7-a3e5a9938f03,b5ee6a22-b578-4a47-86d9-2a806e7b82fe,22871708-eb90-45cb-8849-95dd335f6da7,3f6fd9c9-8f6f-4598-9e66-d1afbdaa4917,09eeaf48-ecac-4d6b-80df-c4bcbcf72936,6d6f1e38-c400-4faa-b2f2-534d498df5a0,5de6d963-11d3-42b2-835b-88723c55b1b3,b824fb18-679e-4312-ba10-b2a7b665bbca,f0b732cc-4a83-4934-9437-7b7874d37988,2d4ca48a-0ac2-4283-95c3-d49ad086e788,24605ded-9f4a-411a-85ae-4e352d32991a	GT:DR:DV:PL:GQ	1/1:1:28:258,64,0:64
+chr1	988837	cuteSV.INS.24	C	CGTTTTTGGAGTTCTGGGTTGATTGTTTCTGGAGTTCAGGGTTGGC	248.1	PASS	PRECISE;SVTYPE=INS;SVLEN=45;END=988837;CIPOS=-7,7;CILEN=-1,1;RE=27;RNAMES=33c8c639-8316-4e93-8a3e-0f4b0c1e4dbb,2d4ca48a-0ac2-4283-95c3-d49ad086e788,741fcfa9-cbc3-4032-9299-2d29d7173087,bbffc87d-e0bc-4623-9165-7976befc5ed8,c09ec4b6-5a8d-4292-bf93-9655137ffb41,d522b452-a778-4b87-bc69-f9c7e28a67f5,fb659d07-1b25-4168-96b8-1eb2519fe0e5,58ee7776-a955-4752-b7da-83e96a10779d,a59d11ba-9ecb-41fc-a6ff-c70e764aacce,71dc3e8c-1f43-4431-8ee4-ce08314cbd2a,0526a88a-c43d-4306-87d1-f8044a4d7250,12920a0b-9f0c-4dff-8033-4bc19fed9b8a,7f13876d-4fcb-4799-a07d-f152551dfe16,205538dd-b6d5-443d-a8f4-d78dfa42fce1,e75e13bb-1f83-4cfe-8799-440edbb3df4e,551ed2f0-44cf-491d-b155-9445d2d9996c,22bfcad0-231a-4d9f-9122-46ef80736dce,9996ec4f-5be1-4652-bbba-41e823c07ca9,5ab823ac-f22f-4ec1-8265-30b737d5b4e7,5688faaa-ee3e-4c00-ac67-60e68aaa1731,57466553-b5b7-4875-b069-b34ed1b5f236,e79c9c43-465b-43fa-ace2-2aac17618d6e,b2ea2333-8b7b-4892-9ede-39b8c68bbb73,81baf47c-0987-4293-94a6-9030b31203d7,bcab629a-fdad-4e0e-93a2-ccffabcff548,01412e3f-65aa-457c-b3b7-c873f69e1c18,b641bc48-fbae-40e1-97a6-a8e99aee46ad	GT:DR:DV:PL:GQ	1/1:1:27:248,62,0:61
+chr1	996558	cuteSV.INS.25	A	AGGGAGGGCAGAAAGGACCCCCCGCTGGAGGGGGCACCTCACATCTGGGGCCACAGGATGCAGGGTGGGGGAGGATGCCAGAAAGGACCCCCTGCTGGAGGGGGACCCCCCCGCTGGGAGGGGGCACCCCACATCTGGGGGCCATATGCAGGG	353.1	PASS	PRECISE;SVTYPE=INS;SVLEN=152;END=996558;CIPOS=-56,56;CILEN=-8,8;RE=37;RNAMES=db3a6405-9d82-4cbd-83e6-c0a4ce8ee914,33c8c639-8316-4e93-8a3e-0f4b0c1e4dbb,2d4ca48a-0ac2-4283-95c3-d49ad086e788,8d1b989b-10de-4784-95ef-e3943ebf6309,9e8c7cfd-c151-4a9c-b408-139491f081bb,63b9d932-5152-44b0-96ad-ecbb02ab915e,0526a88a-c43d-4306-87d1-f8044a4d7250,a43d0a23-98d1-4fed-8466-70240231420d,bb4a3545-df5e-4f76-a5cc-d0f7d3c3a921,603147e9-a00a-4f82-8e5d-8b203c6dcac7,d30ce71e-09f5-4f48-adfe-909e7ae658f4,a738069e-5b3a-4636-bed4-0e547fe1a8c4,634bae15-9dca-4fc1-9efb-fd7d940f3a37,01412e3f-65aa-457c-b3b7-c873f69e1c18,815ed88e-2e7c-479d-a3a3-2d7694f08791,57466553-b5b7-4875-b069-b34ed1b5f236,fd39c92e-d02b-4fdd-9f70-4491eee84dcd,586d0898-7c61-4e45-899b-46e169314896,205538dd-b6d5-443d-a8f4-d78dfa42fce1,c09ec4b6-5a8d-4292-bf93-9655137ffb41,fd5ba769-5143-40f6-be53-029ea699fd42,9996ec4f-5be1-4652-bbba-41e823c07ca9,36848b46-b6ad-4a7d-91df-8ff23667ff2b,551ed2f0-44cf-491d-b155-9445d2d9996c,f76e33a2-c83f-420f-9f54-cb2f270559d4,e26c3e7f-c16a-4730-987d-937c2b58025f,7fb8eda5-c691-42ac-951e-ca7e11588193,a59d11ba-9ecb-41fc-a6ff-c70e764aacce,f030743e-41da-4f0d-9e2e-753f8d1c41af,16b51bbe-d01c-40a9-a113-e457f9042688,10b28999-0e46-490c-901a-563958d0ea48,4d2326aa-2e82-4b75-993a-b6ce6dc5cb79,bbffc87d-e0bc-4623-9165-7976befc5ed8,7f13876d-4fcb-4799-a07d-f152551dfe16,5688faaa-ee3e-4c00-ac67-60e68aaa1731,3d1f73fa-a716-4e04-91b0-913c39df6718,e40ecb0a-6e7a-4f69-9220-6edb49e65f06	GT:DR:DV:PL:GQ	1/1:0:37:353,94,0:94
+chr1	998766	cuteSV.INS.26	G	GGGGAGGGCTGAGCGGAGGGGAGGGCGCGAGCTGGA	71.7	PASS	PRECISE;SVTYPE=INS;SVLEN=35;END=998766;CIPOS=-7,7;CILEN=-1,1;RE=15;RNAMES=205538dd-b6d5-443d-a8f4-d78dfa42fce1,10b28999-0e46-490c-901a-563958d0ea48,03736baf-21ff-4edb-8c3a-564a3da1dcbb,57466553-b5b7-4875-b069-b34ed1b5f236,a738069e-5b3a-4636-bed4-0e547fe1a8c4,603147e9-a00a-4f82-8e5d-8b203c6dcac7,5688faaa-ee3e-4c00-ac67-60e68aaa1731,0526a88a-c43d-4306-87d1-f8044a4d7250,9e8c7cfd-c151-4a9c-b408-139491f081bb,db31bc21-d577-4b10-86d1-af75ceebb355,328b5933-11f6-4319-98a8-ace09cd88f21,634bae15-9dca-4fc1-9efb-fd7d940f3a37,8d1b989b-10de-4784-95ef-e3943ebf6309,33c8c639-8316-4e93-8a3e-0f4b0c1e4dbb,16b51bbe-d01c-40a9-a113-e457f9042688	GT:DR:DV:PL:GQ	0/1:13:15:72,0,53:52
+chr1	1028941	cuteSV.INS.27	A	AGCAGTGCGCAGGCCAGGGCGCCCACACCCACGCCACTCCGGGAAGAACCAGGCCCCAGCCCCTCGTGGGCCAGGGGCGCCACAGCCACGCCACCCTTTCGAAAGACCGGGCCCCAGCC	1.5	q5	IMPRECISE;SVTYPE=INS;SVLEN=118;END=1028941;CIPOS=-49,49;CILEN=-7,7;RE=6;RNAMES=ecb9a83c-48c1-426e-8ae0-3744d8c9c7c8,8b003e80-f945-4cbc-92d9-e6067d1f4457,99289c42-3c75-4773-8fa8-6dd7d3ea7446,83a65fbd-8ad8-4492-a486-0664dd4bfc6d,6d40d9c0-b42f-4c94-914c-a20ed46d3bda,ef99ad9e-a3e7-4140-b150-d7a743cefb11	GT:DR:DV:PL:GQ	0/0:18:6:1,5,116:5
+chr1	1029413	cuteSV.INS.28	G	GTATGCAGGCGGAGGTGGGAGGGGACATCTGAG	21.4	PASS	PRECISE;SVTYPE=INS;SVLEN=32;END=1029413;CIPOS=-14,14;CILEN=-1,1;RE=10;RNAMES=25fbbfe6-3bf9-406f-8204-afe00f41e043,765cb5f9-b894-4fe1-bac5-f0b1b9e80d57,44b6e03c-aea8-40a0-b92f-4a941f052200,94fc27c8-7cf0-40c8-9d15-0e18d842c864,95b7b8bc-573e-4eb1-83cd-ea4f10073d20,531f08c5-12d5-4330-beea-e5701486f243,f516c7b2-bfb5-43a4-841e-b29972690940,99289c42-3c75-4773-8fa8-6dd7d3ea7446,a5432370-c974-46f6-ab19-70e62d322628,8623c422-77b8-49d3-8172-a0e3653d29dc	GT:DR:DV:PL:GQ	0/1:19:10:21,0,107:21
+chr1	1030853	cuteSV.DEL.22	TGTGTGTGTGCAGTGCATGGTGCTGTGTGAGATCAGCAT	T	219.5	PASS	PRECISE;SVTYPE=DEL;SVLEN=-38;END=1030891;CIPOS=-18,18;CILEN=-1,1;RE=28;RNAMES=765cb5f9-b894-4fe1-bac5-f0b1b9e80d57,b6d1f0c3-2990-4ad7-986b-39de4ad3688c,f398b356-cdb7-4c64-8adc-ddd6dae15588,04c8133e-b70e-4755-9b44-7f0ade6a391c,2c763c62-5ac4-4d4d-83e9-79b89a0bfd29,531f08c5-12d5-4330-beea-e5701486f243,6d40d9c0-b42f-4c94-914c-a20ed46d3bda,f516c7b2-bfb5-43a4-841e-b29972690940,37066cab-3888-4261-9767-b874f4df3ade,8e343519-ee17-4d21-8f5a-c9a5b2a7a76c,b494b509-dc88-474c-80bf-12fa0e142343,ecb9a83c-48c1-426e-8ae0-3744d8c9c7c8,0b63842d-c3eb-4ed0-94ba-89576acad33f,25fbbfe6-3bf9-406f-8204-afe00f41e043,56f95b02-edd0-47cb-bab2-fdd45cba961e,5d8225c7-5225-4493-9ef6-d19161771345,5ffce705-8fca-4e75-b044-93d45f1ee9d9,3ee13d15-bd0b-4146-9605-38e698a91c49,97c4f3a9-d9ac-4ecb-9d09-24c865004f62,99289c42-3c75-4773-8fa8-6dd7d3ea7446,83a65fbd-8ad8-4492-a486-0664dd4bfc6d,ebf6d94a-7c83-4085-8bba-a3741da16f8e,d26a8d3b-e125-4b20-88b7-f9eceab15ddc,44b6e03c-aea8-40a0-b92f-4a941f052200,8b003e80-f945-4cbc-92d9-e6067d1f4457,4d408313-3977-4014-8e6c-8db62eda4673,a5432370-c974-46f6-ab19-70e62d322628,a6f31b98-32bb-410f-a717-120e935e8002;STRAND=+-	GT:DR:DV:PL:GQ	1/1:5:28:219,37,0:36
+chr1	1041775	cuteSV.DEL.23	CTCCGGCCAGTGCCAGGGTCGAGGTGGGCGGCTCCCCCGGGGGAGGG	C	181.3	PASS	PRECISE;SVTYPE=DEL;SVLEN=-46;END=1041821;CIPOS=-14,14;CILEN=-1,1;RE=22;RNAMES=a64a03dd-185a-4818-8bee-7fcdb0319756,4b17d8e6-9217-4e2a-9faa-9e3f74f0c362,99289c42-3c75-4773-8fa8-6dd7d3ea7446,ba90f173-0dc5-4466-9200-a2a2297397ab,cf6dbca0-6268-43ee-9fcf-e9629260af48,65d2b66b-d923-40be-9fec-4fbbeea91c78,6f2a05fa-6b69-4e4a-ae27-3eabdf8970d9,51c54121-152d-4fd6-a888-847105530576,2c763c62-5ac4-4d4d-83e9-79b89a0bfd29,d26a8d3b-e125-4b20-88b7-f9eceab15ddc,3cb621ef-1675-4fed-96a7-65a09493297f,59c98262-5546-4735-aa25-67dc3c50d9f8,77bb4e80-06a6-4d19-b8b9-a286c2a7f2f6,b3cfd68b-0a60-4513-b081-af5f432a6fb0,8354b887-a636-4b9f-b963-24715918b6a1,6d40d9c0-b42f-4c94-914c-a20ed46d3bda,d5d77fef-ed0e-4f4b-a783-f38dce9f86ac,54547fe9-b935-4472-a5cf-83945666a995,e65fd9dc-95c3-45a8-9730-1cfb8ba8b300,d5ec49d3-bd52-427e-b561-07a09e66f612,5ffce705-8fca-4e75-b044-93d45f1ee9d9,e03bcc49-ddec-4177-8eea-8f4ed9cbed20;STRAND=+-	GT:DR:DV:PL:GQ	1/1:3:22:181,35,0:35
+chr1	1068795	cuteSV.INS.29	G	GTCCACAGCAGGGCAGGCCCGCCAGTGCCGCATGATGCCCCAGGTGGCGGGCAGGGCCTGGGCTGTGGGTGCAG	324.4	PASS	PRECISE;SVTYPE=INS;SVLEN=73;END=1068795;CIPOS=-12,12;CILEN=-1,1;RE=34;RNAMES=edb11067-c404-4807-ab1c-cbc750ccebef,6ebded49-5c28-4243-b8eb-26e45f3ee5ff,c634c64c-2fd8-4a0c-a8db-ef436dfc17bf,616f68f1-5dfc-4f82-ad0b-2ee28ffc17a1,54547fe9-b935-4472-a5cf-83945666a995,32d2f0f7-c1cf-466d-8726-0e6caa00c35a,6255065a-9f26-4a69-97e5-8ebce1d1f382,87850298-2c1d-4a7c-9d6b-f0e096cfa397,bef325ad-0c6e-49ee-8cd3-191c11197e67,a64c2fc6-2da8-423e-9201-1533f81e5eda,a8ff5a7e-a988-4af5-9557-5e7807c00289,3050e181-e4df-4883-8daf-cc2453d15d9c,61c46a1a-9e73-4299-92ac-6d94dd6b23a0,a89a1947-ec8b-46df-8319-aa19f25e73b1,1a14603e-0571-4ce0-8f26-89b0c7074a73,ac1de16b-6802-4fe2-9672-3e548e18b9c2,bdcaebd1-c85b-41ff-b9f8-eff7af676070,7929258b-7dc7-4c71-9e22-7a9439666a19,6c973e59-e33a-4619-b7d2-6334d50c629a,82438b9f-6bb7-466e-9d6d-2c1d669b24ac,6278fc73-6dfe-45a7-a469-22aebe240a26,c5cbc961-a06a-4355-a28b-80dad6325418,0aef4258-425f-4230-a17d-f9a12bc13358,550ccb26-086e-4b4b-b92b-fff688fc873d,4ee3b15d-ca09-4598-89da-1b09e9d4d7fd,d0b569a4-5f75-4966-b266-eeb4008f5ed9,037ee0d1-b4ed-4e93-b41a-2e680fda7009,8716c5bf-5ea1-46d9-b434-901e8c1a50e9,e92f2f51-6f24-49ef-a43b-14f9e5abf9f2,bf8f78d4-dab1-4044-a6b2-f1af5d391de1,c3114829-11c5-49da-ab1a-cf227e963e2e,b9012afd-987c-4ad4-b1a5-aa7c3fe6bc4a,d227156d-8748-4569-b546-1f67b04ffcd4,99289c42-3c75-4773-8fa8-6dd7d3ea7446	GT:DR:DV:PL:GQ	1/1:0:34:324,87,0:86
+chr1	1076281	cuteSV.DEL.24	TGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGG	T	152.7	PASS	PRECISE;SVTYPE=DEL;SVLEN=-393;END=1076674;CIPOS=-34,34;CILEN=-61,61;RE=18;RNAMES=4999d51d-7728-4398-a04f-90d5d1f14259,6300ef61-230c-4fd9-a137-56e522b02d83,bf8f78d4-dab1-4044-a6b2-f1af5d391de1,a89a1947-ec8b-46df-8319-aa19f25e73b1,54547fe9-b935-4472-a5cf-83945666a995,e463ee38-1282-45ce-8dfe-721cd844f72c,6c9cc18f-75a9-4fa7-b7c9-f8c6500e9fab,c3114829-11c5-49da-ab1a-cf227e963e2e,aaf82e32-f7f4-4122-9309-08e94f165e1b,268013e1-c660-424b-ba46-ecda97faef65,ac1de16b-6802-4fe2-9672-3e548e18b9c2,87850298-2c1d-4a7c-9d6b-f0e096cfa397,cd7bbea6-6627-4ec4-8449-58831ae76422,17bdd25a-2e86-4cc9-bd83-360694e61966,bef325ad-0c6e-49ee-8cd3-191c11197e67,c5cbc961-a06a-4355-a28b-80dad6325418,6c973e59-e33a-4619-b7d2-6334d50c629a,037ee0d1-b4ed-4e93-b41a-2e680fda7009;STRAND=+-	GT:DR:DV:PL:GQ	1/1:2:18:153,32,0:31
+chr1	1078872	cuteSV.DEL.25	AGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCGGGGAGGCTGAGGCTATGGGGACT	A	3.6	q5	PRECISE;SVTYPE=DEL;SVLEN=-73;END=1078945;CIPOS=-48,48;CILEN=-16,16;RE=6;RNAMES=bf8f78d4-dab1-4044-a6b2-f1af5d391de1,f3451d58-eea4-4590-bdf2-31223f246593,87850298-2c1d-4a7c-9d6b-f0e096cfa397,6c9cc18f-75a9-4fa7-b7c9-f8c6500e9fab,5220f2c0-179e-4605-9f57-cf23c1449820,8cf3c101-6a61-4820-b2ee-91166a6dfa97;STRAND=+-	GT:DR:DV:PL:GQ	0/1:16:6:4,2,99:3
+chr1	1080864	cuteSV.INS.30	C	CCCCCACTCCCGGTCCCTGTCTCCTTCCCTCCGCCCCCACCTCGGTCCCTGCTTTCCTCCGCCCCCACCTCGGTCCCCTTGTCTCCTTCCCTCCGCCCCACTCGGTCCCTGTCTCCTTCCCTCTCCGCCCCCACCTCGGTCCCTGTCTCCTTCCTCCGCCCCCACCTCATCCTGTCTCCTTCCTCTCCGCCCCCACCTCGGTCCTTTTCGTCTCTCCTTCCCTCCGCTCCCCACCTCGGTCCCTGTCTCCTTCCTCCGCCCACCTCGGTCCCTGTCTCCTTCCTCCGCCCCCACCTCGGTCCCCTGTCTCTTCCT	219.5	PASS	PRECISE;SVTYPE=INS;SVLEN=314;END=1080864;CIPOS=-53,53;CILEN=-45,45;RE=23;RNAMES=af0b84e0-4359-450b-9d92-62eadaa7d2b8,7134ddb6-573e-4a8f-b7d5-8790178f9fa1,f3451d58-eea4-4590-bdf2-31223f246593,6afc9f30-36b8-4d0b-8ed7-92f955a1db08,6ebded49-5c28-4243-b8eb-26e45f3ee5ff,c31fb5b7-a8ef-4c62-b90e-450f936bae4a,e463ee38-1282-45ce-8dfe-721cd844f72c,6c9cc18f-75a9-4fa7-b7c9-f8c6500e9fab,5220f2c0-179e-4605-9f57-cf23c1449820,aaf82e32-f7f4-4122-9309-08e94f165e1b,a8ff5a7e-a988-4af5-9557-5e7807c00289,8012b87e-4103-49cb-979e-9e0fbcba6769,09e46a61-482f-4f8c-bd98-f51d32cb239d,cd6bc3de-11f8-4b66-b39c-86719785ed87,c3114829-11c5-49da-ab1a-cf227e963e2e,6cd7ddfb-f119-404a-ab96-926853e2c497,17bdd25a-2e86-4cc9-bd83-360694e61966,5c7b6a09-da87-4a62-a166-1af09aa3f5a4,de5e48ae-831f-4790-9744-345b96efddf4,6f3d4f11-2168-4edf-8922-eac14761fb72,8cf3c101-6a61-4820-b2ee-91166a6dfa97,e819ddd5-c668-4730-bfb4-4f082d30cab7,cd7bbea6-6627-4ec4-8449-58831ae76422	GT:DR:DV:PL:GQ	1/1:0:23:219,59,0:58
+chr1	1139113	cuteSV.DEL.26	GAAGGTGGGGGTGTCAACGTCGAACCGGGGGACCTGGGTCCTGGGGAGTTTCCTGGGGTCAGAAGGTAGGGGTGTCAATGTCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGT	G	111	PASS	PRECISE;SVTYPE=DEL;SVLEN=-119;END=1139232;CIPOS=-12,12;CILEN=-1,1;RE=21;RNAMES=a4e2c6c8-24f5-415e-9d2d-6267b6650bb1,5a46ef88-dc8e-441d-b4f8-9dccbb2834c4,2f5dceab-4822-4348-955f-096a60b31b8e,ba3206c6-3e43-48e2-972c-8491a3188ea1,c4179c44-4791-4cb3-b520-e1fe81ab3748,2b838716-d558-4c11-bd39-1f8859acf4e9,454b9f78-d1c3-424e-bc78-923da02f01ae,5c697790-beff-40e7-9f60-2ea6819adc81,21b1ff19-a677-4984-ad6e-a9a1ebf7c1ed,57114dc7-8b5b-4199-9c74-878b96c79fa6,dbf0367c-5f33-47c0-9508-6d142504fd68,84e227f7-009c-4870-994c-82bf93a66718,c5642ff8-7392-45b5-98fd-0c64e782221d,80d5e617-b844-443e-b5bd-f221374e76cf,b4951356-ca81-42c9-9826-ac0e0f14cdc8,b57825a6-97f9-4fc5-bfab-a0961dffa9ad,ee0d9263-d299-4379-bf92-ccb062c13ae9,b6d1b7f1-19b8-4776-b6df-6fb949e0e0db,70c17be1-8aa7-4a06-bea1-b58fa5f6c4c5,c81c027c-07df-4de8-93ca-b163ddb87fed,cacdc6be-7a4f-49c8-ad8b-c8bbe2c80b38;STRAND=+-	GT:DR:DV:PL:GQ	0/1:14:21:111,0,44:44
+chr1	1140208	cuteSV.DEL.27	TCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGGAGCTTCCTGGGG	T	74.8	PASS	PRECISE;SVTYPE=DEL;SVLEN=-60;END=1140268;CIPOS=-12,12;CILEN=-1,1;RE=18;RNAMES=ba3206c6-3e43-48e2-972c-8491a3188ea1,21b1ff19-a677-4984-ad6e-a9a1ebf7c1ed,c81c027c-07df-4de8-93ca-b163ddb87fed,454b9f78-d1c3-424e-bc78-923da02f01ae,5c697790-beff-40e7-9f60-2ea6819adc81,a4e2c6c8-24f5-415e-9d2d-6267b6650bb1,57114dc7-8b5b-4199-9c74-878b96c79fa6,84e227f7-009c-4870-994c-82bf93a66718,b6d1b7f1-19b8-4776-b6df-6fb949e0e0db,dbf0367c-5f33-47c0-9508-6d142504fd68,80d5e617-b844-443e-b5bd-f221374e76cf,c4179c44-4791-4cb3-b520-e1fe81ab3748,cacdc6be-7a4f-49c8-ad8b-c8bbe2c80b38,ee0d9263-d299-4379-bf92-ccb062c13ae9,5a46ef88-dc8e-441d-b4f8-9dccbb2834c4,70c17be1-8aa7-4a06-bea1-b58fa5f6c4c5,2b838716-d558-4c11-bd39-1f8859acf4e9,c5642ff8-7392-45b5-98fd-0c64e782221d;STRAND=+-	GT:DR:DV:PL:GQ	0/1:20:18:75,0,94:74
+chr1	1140221	cuteSV.INS.31	G	GGTCAGCATCGAACCGGGGACCTGGGTCCTGGGGAGCTTCCCTGGGGTCAGAAGGTGGGGGTGTCAACATCGAACCGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGG	0.4	q5	IMPRECISE;SVTYPE=INS;SVLEN=118;END=1140221;CIPOS=-90,90;CILEN=-46,46;RE=7;RNAMES=552a6e53-6d8f-4b59-9a27-92f32bc5d55a,63dff088-63aa-495d-9a83-7ead3bac9ad6,a96b4308-b4a8-4862-ab51-4d246ffa4856,5d272fd2-da4d-451d-8ca4-885acaad84e7,13395d40-9538-461e-ac5c-ea462c9e55fe,26953a70-aec9-4b3e-8c56-f0de5a9c3101,aa7d5ab5-720b-4edd-8c13-1f643b0e0c87	GT:DR:DV:PL:GQ	0/0:23:7:0,10,153:10
+chr1	1140336	cuteSV.INS.32	G	GGGGGACCCGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACATCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGTGTCAGCATTCGAACCGGGGACCTGGGTCCTGGGGAGCCTGGGGTCAGAAGCCGTAGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCATGGGAGCTTCCTGGGGTCAGAAGTGGGGTGTCAACGTCGAACCGGGGGCCTGGGTCCTGGGGCCTTCCTGGGGTCAGAAGGTAGGGGTGCCGAATCCGGGGACCTGGGTCCTGGGAGCTCTGGGGTCAGAAGGTGGGGTGTCAACGTCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGGGTCAACGTCGAACCGGGGGACCTGGGTCCTGGGGAGCTCCTGGGTTCAGAAGGTGGGGGTGTCAGCACGAACCGGGGGACCTGGGTCCTGGGAGCTTCCCTGGGGTCAGAAGGTGGGGGTGTCGTTATCGAACCGGGGGACCTGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACATCGAACCG	0.5	q5	IMPRECISE;SVTYPE=INS;SVLEN=585;END=1140336;CIPOS=-110,110;CILEN=-40,40;RE=6;RNAMES=68113a8c-4d8d-4600-ac9f-1da02a5a8fe9,7594e3bc-88ee-4af4-816b-a1f7cf6c6d8a,3a3f9b2d-0ad2-4c86-bb32-06b8dadb0fa3,d23f3ab5-8a68-4d47-80bf-2ab949052219,3e352bf2-2d6d-4536-b218-cab154bdfb7b,a83cb84f-f8de-497f-bc4b-243637a2f5e5	GT:DR:DV:PL:GQ	0/0:20:6:1,10,134:9
+chr1	1141403	cuteSV.INS.33	C	CCCCATCCCCGCCCCGTCCACAACCCCATCCTTACCTCTATCCCCACCCACATCCTTA	108.5	PASS	PRECISE;SVTYPE=INS;SVLEN=57;END=1141403;CIPOS=-13,13;CILEN=-2,2;RE=21;RNAMES=70c17be1-8aa7-4a06-bea1-b58fa5f6c4c5,21b1ff19-a677-4984-ad6e-a9a1ebf7c1ed,57114dc7-8b5b-4199-9c74-878b96c79fa6,c81c027c-07df-4de8-93ca-b163ddb87fed,5c697790-beff-40e7-9f60-2ea6819adc81,ee0d9263-d299-4379-bf92-ccb062c13ae9,454b9f78-d1c3-424e-bc78-923da02f01ae,2b838716-d558-4c11-bd39-1f8859acf4e9,ba3206c6-3e43-48e2-972c-8491a3188ea1,c5642ff8-7392-45b5-98fd-0c64e782221d,b731b06b-1457-48a3-91fe-a228dcc63878,5a46ef88-dc8e-441d-b4f8-9dccbb2834c4,c4179c44-4791-4cb3-b520-e1fe81ab3748,b4951356-ca81-42c9-9826-ac0e0f14cdc8,a4e2c6c8-24f5-415e-9d2d-6267b6650bb1,84e227f7-009c-4870-994c-82bf93a66718,17ced63c-a1cd-4f0a-8bec-4aa7d1c613cb,80d5e617-b844-443e-b5bd-f221374e76cf,b6d1b7f1-19b8-4776-b6df-6fb949e0e0db,cacdc6be-7a4f-49c8-ad8b-c8bbe2c80b38,0ed75539-2679-496f-aeed-96a6daada7ed	GT:DR:DV:PL:GQ	0/1:15:21:108,0,51:51
+chr1	1168040	cuteSV.DEL.28	GGGTGAGGGCGGAGGGCCGAGCGGGGCCAGCAGA	G	56.3	PASS	PRECISE;SVTYPE=DEL;SVLEN=-33;END=1168073;CIPOS=-12,12;CILEN=0,0;RE=15;RNAMES=0ceaad89-27a9-4a33-a481-fe5621a4bfad,3c221b00-3ecd-4eea-86ec-7a5e0d35424a,5c697790-beff-40e7-9f60-2ea6819adc81,3f0bcb38-bcc3-4ae4-a09b-3b9ef5ae387d,7e11412a-2876-4661-a708-479a9e787bc6,53f4dbfa-7c0b-4162-8d31-76f1e8bf91cc,f874e077-0684-412f-a39c-cc807bdbd0aa,88d73175-6061-400b-b9d4-94b05d743d8d,b30d1d9d-050a-4860-8d21-fbb3c551a66f,489c9583-a48b-417e-bbc8-63a31dfa62c5,34f64651-ce28-4440-8ac3-fa57ddd39181,360ce83e-dcd5-42da-9e0f-12d9a805fef8,d3da996c-3053-4a28-abc1-00ecfe8c81ee,6f4e3272-6e0f-4161-a5d1-91687eaaf225,7d7d2399-1fa6-4b55-9224-91734b7627e4;STRAND=+-	GT:DR:DV:PL:GQ	0/1:19:15:56,0,95:56
+chr1	1202080	cuteSV.INS.34	C	CCCAGTACCCCAGCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCAGCGACCCAGTA	76.1	PASS	PRECISE;SVTYPE=INS;SVLEN=59;END=1202080;CIPOS=-87,87;CILEN=-9,9;RE=16;RNAMES=ced88eab-a646-4014-8aeb-c06ca21b5705,5add0203-d6e5-47a6-b90e-54d02d78d164,a4f3c7c6-0a66-4857-9665-11ae06c59a93,59590150-a13f-454c-8f37-279758d415fa,e4c629fb-92bb-41f6-a405-35c447ac2edb,1f107017-da31-4485-9ad0-f3131da1a2de,3acb4a98-1d7c-4ba0-a464-d7c716eff5aa,50f71109-b11a-4485-8ee5-0baff6cbb7cb,1371d4e0-de73-428d-a0b1-980f0172eed1,b0062959-3629-493f-926a-3c85b33f8ca7,408cfac5-0094-4446-a5d6-3dcb62642b1c,36bb353d-1fdc-40e9-bfcc-a7910a981285,d0649435-0e93-4a6f-a275-a83678f6b1c7,9a3e6750-b61c-4e3b-862d-d81b66d85836,d792ab01-3364-4f58-afdf-1f38aaac168b,5e1e811e-35f6-4628-b23a-480dbb46fdfc	GT:DR:DV:PL:GQ	0/1:14:16:76,0,57:56
+chr1	1202304	cuteSV.INS.35	C	CGAGCTCCGGTACCCAGCCCCGAGCTTCCAGTGCTCCAGCTCCCGAGCCCAGTACCCAGTCTCCAGCCCAGTACCCAGCCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCATCCGAGCCCTGTACCCAGCCCCGAGTGCAGTACCCAGCCT	8.1	PASS	PRECISE;SVTYPE=INS;SVLEN=154;END=1202304;CIPOS=-115,115;CILEN=-12,12;RE=8;RNAMES=ccf6e32b-ba2c-4dec-a585-8b1d12802c3e,e44d31f6-e6b9-4427-87b5-73fe396c6bf8,46e7c4a0-de0a-4bb5-a9c7-3205b7ea5b62,a952409f-0adf-4d89-9164-1410df306be7,8c4bb810-3b30-46a1-a980-29dd77349014,72815391-ed34-4f91-83da-5c39287f281b,bd0adee6-8e6e-4457-babe-4698cd7b3958,8afecac0-f47b-4de8-97c7-d4460518f153	GT:DR:DV:PL:GQ	0/1:19:8:8,1,113:8
+chr1	1202446	cuteSV.INS.36	C	CTGAGCCCAGTACCCAGCCCCCAGCCCAGTACACTCCAGCCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCAGCCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCAGCCCCGAGCCCAGTACCCAGCCCCGAGCCCAGTACCCAGCCCCCGAGCCCAGTACCCAGTCTCCAGCCCAGTACTCCAGCTCCCGAGCCCAGTACCCAGCCTCCAGCCCAGTACCCATCCCCCGAGCCCAGTACCCAGCCCCGAGCGCAGTACCCAGC	0.1	q5	IMPRECISE;SVTYPE=INS;SVLEN=271;END=1202446;CIPOS=-111,111;CILEN=-18,18;RE=5;RNAMES=b3beae97-47d9-48d6-a6b3-977c0a15b299,9c07ac5e-c8b6-46cc-9b83-81cd59589aca,2a300411-fa05-4afd-831a-3549a73f954f,ffd5cb31-03a8-46e9-bde4-29e85a43b70f,064ee272-8481-41a8-8b39-13d1ff904462	GT:DR:DV:PL:GQ	0/0:20:5:0,16,143:16
+chr1	1212611	cuteSV.INS.37	C	CCCTCTGCCCCCCTCAAGCCCCTCCCAGCCTAGC	15.2	PASS	PRECISE;SVTYPE=INS;SVLEN=33;END=1212611;CIPOS=-6,6;CILEN=-1,1;RE=8;RNAMES=a61f1fbe-b0a4-456d-8358-7d54b86c228d,1ef7159e-e106-46c9-81a1-f37295f1b6ee,cc29de15-7bd7-46f5-8fd5-69288395f465,064ee272-8481-41a8-8b39-13d1ff904462,8c4bb810-3b30-46a1-a980-29dd77349014,4b846ddd-587b-41f6-b62c-8c9450e1d5ca,7600aefb-e592-4790-a089-3937ec206e38,0db3db16-3b86-4131-8c6d-3fdb246e89ab	GT:DR:DV:PL:GQ	0/1:16:8:15,0,92:15
+chr1	1226332	cuteSV.DEL.29	TCAACCCTGTACGGTCAGGAGGAAACATGGCACCTCCCCTCTGGGGGCTCTTTCCAGAAACCC	T	51.2	PASS	PRECISE;SVTYPE=DEL;SVLEN=-62;END=1226394;CIPOS=-4,4;CILEN=0,0;RE=15;RNAMES=00730119-6bf3-4259-b731-b4788ef5b314,96717086-fe48-49ab-a513-1910742d076a,d0e8fc4c-db47-4700-b095-c457edcbfdb7,a61f1fbe-b0a4-456d-8358-7d54b86c228d,26456d45-de06-4be1-9295-a4838e2521a6,4005744b-cbe1-4af1-b72b-4c92e4c46f69,9b47cf78-0455-4c56-a614-c2a96cabc3f4,cc29de15-7bd7-46f5-8fd5-69288395f465,9f25ff8c-0f0c-4102-9755-3eaa49ad6cc8,cfe81581-5685-41b4-b2e3-6cc759dde6b6,54056604-4d7e-472b-a0d1-e0b629bfb695,e4f8a90d-15f2-4e84-a954-296c3499000c,37fc26b7-bc9c-44c4-b02f-2bba900b5be9,dceef018-aba6-4d50-b9a3-784e280b362e,b2704107-e0ea-41b2-9395-da38d79b046c;STRAND=+-	GT:DR:DV:PL:GQ	0/1:21:15:51,0,108:51
+chr1	1227298	cuteSV.DEL.30	GAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGG	G	58.2	PASS	PRECISE;SVTYPE=DEL;SVLEN=-173;END=1227471;CIPOS=-4,4;CILEN=-1,1;RE=16;RNAMES=d0e8fc4c-db47-4700-b095-c457edcbfdb7,00730119-6bf3-4259-b731-b4788ef5b314,96717086-fe48-49ab-a513-1910742d076a,9f25ff8c-0f0c-4102-9755-3eaa49ad6cc8,54056604-4d7e-472b-a0d1-e0b629bfb695,dceef018-aba6-4d50-b9a3-784e280b362e,cfe81581-5685-41b4-b2e3-6cc759dde6b6,26456d45-de06-4be1-9295-a4838e2521a6,39428341-3093-4633-b634-f5ce3af9d111,4005744b-cbe1-4af1-b72b-4c92e4c46f69,b2704107-e0ea-41b2-9395-da38d79b046c,cc29de15-7bd7-46f5-8fd5-69288395f465,384fd9e4-08ed-465a-8ab2-9bb997cc6cf2,e4f8a90d-15f2-4e84-a954-296c3499000c,a61f1fbe-b0a4-456d-8358-7d54b86c228d,9b47cf78-0455-4c56-a614-c2a96cabc3f4;STRAND=+-	GT:DR:DV:PL:GQ	0/1:21:16:58,0,106:58
+chr1	1240674	cuteSV.INS.38	C	CCCCATTCCACCCCGACAGGTCCCTGCCCCAGCCCCGCCGCCC	362.6	PASS	PRECISE;SVTYPE=INS;SVLEN=42;END=1240674;CIPOS=-6,6;CILEN=-1,1;RE=39;RNAMES=e27f81b3-f37c-46a9-927d-1704051865f4,e95f67b5-2b6c-4d2b-8fdf-bb3bcf80380f,21a10068-b408-49c2-8591-bc56c13f8bc8,d8f83f59-c3ca-442b-8234-c87f5fd5b9d8,e8912883-f9ee-481e-9033-7ac822537778,115402c3-2363-4260-8185-088cb2efe813,d0e8fc4c-db47-4700-b095-c457edcbfdb7,6c6f5904-5902-49e4-aecc-9f7107aaaf32,00c871e6-9240-44ea-afd7-76260fb1ff8f,3ab10396-251a-4ab9-83e1-5fed03fd0fb4,48364c74-7e0e-449b-92e1-89c06dba0835,24fe21a5-7ddc-40db-8d2b-3536b5a4ea72,47d0af8c-17a1-436f-8839-ade7ab6de78a,cc60c1e0-54ff-4699-a10a-f7783be37055,9b47cf78-0455-4c56-a614-c2a96cabc3f4,3eba1c5e-e793-46d3-aacb-82377666e35e,08fd6d74-57bf-4e04-bd81-f2de084960a4,f09a7634-58be-4251-98d0-2d4f88afd783,421d61bb-197c-46a0-abc1-0ffc0e08958b,7f548ef8-bf8f-4a61-b9ce-43038c6ed4f1,00730119-6bf3-4259-b731-b4788ef5b314,cc29de15-7bd7-46f5-8fd5-69288395f465,95c3ccf4-0adf-43e3-9054-45c562be67f2,1fcf688b-b67c-4193-93d9-73465de7ffc7,5f57573e-7b14-4b1a-899c-87843755ef38,0de9cf66-122b-45e4-81a2-0e5f79607368,72780d85-8ccf-4898-b2b2-11cf986d0491,0ddf623b-87fe-4f8e-a9d6-bdb595c3afc4,8e84b06f-693a-4208-add6-ca0bebaf3b40,a32985bb-648d-4ee1-9572-7d4e2160a1f1,d22b9311-0646-440d-b6e4-59b9d2385746,8f02f628-554d-47ef-8855-2d0e61e87721,6aa87ec7-edab-4aed-8c84-d1ce656ee421,440b5c72-9c1a-42d2-93b7-ed47bf9c4ceb,e9628e82-8506-4ac7-b680-5640081d3a40,ba940b8e-7984-468d-9a9c-d9543dfd4501,21b2165e-219c-44c9-b583-eaaa6f9cfa4d,1cac9230-18ec-4c42-8b75-5b8edc0a46e0,f80b54d1-313c-4b3c-821e-2486f5ac1987	GT:DR:DV:PL:GQ	1/1:1:39:363,93,0:92
+chr1	1245147	cuteSV.INS.39	T	TCCACCTTCATCCCATTCTTCCCCCACTATCTCCCTCCTCCCCCACCTCTGCCCTCCTCCCTTCCCCCTCCCTCTGCTCCTCTCCTTCCCCCTTCTCCCCCGACCCTTCCCCACTCATCTCCCTCCTCCCACCTCCTCTCCTCCCTCCCTCTCCCCCACTCCTCCCCCTCCTCCCCCACTCTCCCCCACTGCTCTCCCTCTCCCCCACTCCCCTCCCCCACTCCTCTCCCCCTCCTTCTCCACTCTCCTCCCCCTCCCACCCCTCCCCCACTCCCCAACCCTCCTCCCCCTCTTCCCTCCTCCCCACTCCTCATCTCCCTCCTCCCCACTCCTCCCTCTCCCTCTTCCCCTGCCTCACTCCTCCCCCACCTTCCCCTCTTGCTCCTCTCTTCCCCACTCCCTCCCCCACCCTCTCCCTCCTCCCCGCTCCCCTCTCC	219.5	PASS	PRECISE;SVTYPE=INS;SVLEN=436;END=1245147;CIPOS=-12,12;CILEN=-8,8;RE=24;RNAMES=8f02f628-554d-47ef-8855-2d0e61e87721,a7eccd01-b383-4757-9497-19a633fc484a,fba85188-6836-422d-b3dd-be62d3ee3746,21b2165e-219c-44c9-b583-eaaa6f9cfa4d,d22b9311-0646-440d-b6e4-59b9d2385746,701b1d5e-6f8b-4582-83b3-bebdcd378d01,7f548ef8-bf8f-4a61-b9ce-43038c6ed4f1,1fcf688b-b67c-4193-93d9-73465de7ffc7,e9628e82-8506-4ac7-b680-5640081d3a40,6aa87ec7-edab-4aed-8c84-d1ce656ee421,cc29de15-7bd7-46f5-8fd5-69288395f465,00730119-6bf3-4259-b731-b4788ef5b314,3ab10396-251a-4ab9-83e1-5fed03fd0fb4,e7e1f47d-0cd9-4d0b-9259-5cb8d10e0754,aecdc501-f8cf-4c74-be34-7fbd6c71d3a8,5f57573e-7b14-4b1a-899c-87843755ef38,440b5c72-9c1a-42d2-93b7-ed47bf9c4ceb,d171ef72-b23e-432d-a798-df3db714a490,9b47cf78-0455-4c56-a614-c2a96cabc3f4,8e84b06f-693a-4208-add6-ca0bebaf3b40,3feb7f84-5f44-4b84-9010-37250a11d37e,115402c3-2363-4260-8185-088cb2efe813,6c6f5904-5902-49e4-aecc-9f7107aaaf32,95c3ccf4-0adf-43e3-9054-45c562be67f2	GT:DR:DV:PL:GQ	1/1:1:24:219,54,0:54
+chr1	1248059	cuteSV.DEL.31	CTGGATCTCCAACTCTGACCTACAGGCAGGAAAGTGGGCAGCCCTGGGAGGCTGGACTGAGGGAGGCTGGACTTCCCACTCAGGCCTACACGCAGGAAAATGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCACCCTACAGGCCAGGACACGGGCAGCCCTGGGAGGCTAGACCGAGGGAGGCTGGGCCTCCCATCTACCCTACAGGCCGGGACACAGGCAGCCCTGGGAGGCTGTACCGAGGGAGGC	C	3.2	q5	PRECISE;SVTYPE=DEL;SVLEN=-263;END=1248322;CIPOS=-2,2;CILEN=-1,1;RE=7;RNAMES=cc29de15-7bd7-46f5-8fd5-69288395f465,a7eccd01-b383-4757-9497-19a633fc484a,bf5b0c80-4f39-4320-b98a-1e7ceb6af60c,701b1d5e-6f8b-4582-83b3-bebdcd378d01,95c3ccf4-0adf-43e3-9054-45c562be67f2,e9628e82-8506-4ac7-b680-5640081d3a40,9b47cf78-0455-4c56-a614-c2a96cabc3f4;STRAND=+-	GT:DR:DV:PL:GQ	0/1:19:7:3,3,118:3
+chr1	1248987	cuteSV.DEL.32	CCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGAGCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCT	C	0.1	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-326;END=1249313;CIPOS=-54,54;CILEN=-1,1;RE=5;RNAMES=cc29de15-7bd7-46f5-8fd5-69288395f465,701b1d5e-6f8b-4582-83b3-bebdcd378d01,9b47cf78-0455-4c56-a614-c2a96cabc3f4,e9628e82-8506-4ac7-b680-5640081d3a40,a7eccd01-b383-4757-9497-19a633fc484a;STRAND=+-	GT:DR:DV:PL:GQ	0/0:20:5:0,16,143:16
+chr1	1249297	cuteSV.INS.40	C	CCACAGGCCTCCCACACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCCACAGGCCGGGACACGGGGGCAGCCCTGGGAAGTTCCGAGGGAGGTCTGGGCCTCCCACTCCGCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGGACCGTAGACTCCACT	109.7	PASS	PRECISE;SVTYPE=INS;SVLEN=209;END=1249297;CIPOS=-67,67;CILEN=-28,28;RE=18;RNAMES=ba65d04d-c548-41af-a1ae-2f92555cdec9,fba85188-6836-422d-b3dd-be62d3ee3746,21b2165e-219c-44c9-b583-eaaa6f9cfa4d,93793f8f-88f0-4d1c-bbc7-f4ff305b38db,9889e0f9-b111-466b-b4a7-cc49d18e799d,1fcf688b-b67c-4193-93d9-73465de7ffc7,202ac4cc-be0f-46d4-b6a1-6b44d0f40ca9,50a99aef-78bd-4c3d-a301-ce79ad502d13,e75b832f-5543-4dfb-93fa-4c7f935c6920,36a68d4a-51c0-45f9-bbb7-21e8d6595a74,5fcf5f40-970f-456e-a6c8-6d2e644cc05d,abc80d9a-83a8-491f-bda0-ef94c8b186fc,e7e1f47d-0cd9-4d0b-9259-5cb8d10e0754,8f02f628-554d-47ef-8855-2d0e61e87721,7f548ef8-bf8f-4a61-b9ce-43038c6ed4f1,645bdabf-aba6-4f5c-9a5a-aac5300ec7f2,8108df60-0733-4ded-ab77-efdf8bbf4d25,d171ef72-b23e-432d-a798-df3db714a490	GT:DR:DV:PL:GQ	0/1:7:18:110,2,5:4
+chr1	1284150	cuteSV.INS.41	G	GTGGGGTGAGGGGTGGTTGGGGCTGGGAGCGAGGGTGGGGCTAAGGCTGGGTGAGGGGGTGGGGTGGGGGTTGGGTGAGGGGGTGGGGTGGGGGGTTGGAGTGAGGGGGTGTTGGTGAGGGGTTGGGGTTGGGTGAGGGGGTGGGGCTAGCAGGGAGGGGGTGGGTGGGGGTTGAGTGAGGGGGTGGGGGTTGGTGAGGGTGGGGTTGGGTGAGGGGGTGGGTGGTGGCTGAGTGAGGGTGGGGGGAAGAAGAGGGGTGGGGGTTGGGTGAGGGGGTGGGGGTGGGTTGTGGGGTGGGAGGGGGGGTGGGGGCTGGGTGAGGGGTGGGCTCGGGGGGGTTGAGTGAGGGTGAGGGGTGGGGTGGGGGTTGAGGAGGGGTTGGGGTGTTGG	334	PASS	PRECISE;SVTYPE=INS;SVLEN=389;END=1284150;CIPOS=-11,11;CILEN=-42,42;RE=35;RNAMES=b0abcfbb-61fd-4728-81a1-43fa323e396b,f28c127d-baf6-42e2-8aca-5e4956bf6f3b,d9c4eb81-e133-4197-8e8e-e206e57bd810,fdf0a32a-0337-491d-a75e-34098f30517a,d1cbe0e6-eca8-4c73-a51f-46ab546d1e8e,f173546d-38f4-44f6-97b6-91d0e45a2d14,95404675-b417-4a52-a407-b2ce2852ad4b,33f38ec0-cd95-4f95-832e-80af657c2ecf,b9c5bedc-f363-4539-bc47-7b0f412a4bbd,e694bcb5-46e8-43d0-b0d6-3fd9647ffe7e,45b3e9c7-9541-4afe-b291-ae5bc5e5bfb1,3133cf21-e3fe-42a8-a1a9-abfe57860c7e,34b9f7e2-6ab9-4c30-8f04-825c28b41bc3,57b4f996-d753-4cff-a038-96e61ad4c4a1,da1bdeb0-11ca-41f7-9977-64a5f9b6f43d,fe9586e4-a814-4585-b5e9-0a70f38dd05e,48f59671-4881-42ed-a9a3-aed4c41a8a27,bd8c4b8e-a603-47e0-ae1a-bd0c4f7c768c,1e8fccd3-10da-4fbb-abd3-696fc95a7449,4e830313-41e9-4df0-aa80-fd044e1941f6,cba077c7-f898-4622-9da4-5b16316d0245,0338319c-9648-432b-a5e8-d100191dbd20,7453e3f8-6ef3-4524-bec9-f9eac86f3421,06524921-db14-4b52-9dce-ed08e1523255,1660464c-9193-48b8-9078-8c477520a960,174f1aff-b0b3-407d-939e-eba9860b4ba1,45316b56-a0a4-4f50-bcbf-99c85507665f,91688ed7-ae4f-48d4-9550-45a4903f9d70,b8227d89-a8f7-4fb5-a3e2-42174694c6b6,ba8acd78-c80f-49ff-93fc-1a99ffa7e077,c1019b1c-b966-4bb8-b129-a172a18b289f,87a3d636-a864-4fe2-b799-df74b0cbef67,ddc30ebd-ceb6-4337-94f9-058444486c91,723260c2-5676-4623-acd8-87e2d5ff2445,b437894e-54e9-47cd-ab32-9813a0cc594f	GT:DR:DV:PL:GQ	1/1:0:35:334,89,0:89
+chr1	1289207	cuteSV.INS.42	C	CTGTTCGTCCCCAGAGTCTCGGTTCCGTCCTGGTGTCTCTGTTCATTCCCCGTGTTCTGTCTGTCCCGTGTTCTGTTCGTCTCCAGAAACCCTGTTCATTCCCCGTGGTTCTCTGTCTGGCCCCGTGTCTCTGTTCGTCTCGTTGTCCCTGCTCCGCCCCGTGTCTCTGTTCCCAGCCCCCGTGGTGCCCTTGTCTCTGTTCGGGTCCTGTCCCTGTTCGTCTCCTCTCGAAGTCTCTGCTCCGCGTCCTGTGTCTCTGCCTGTCCCTAGGCTCCTGTTCGTCCCGTGTCTCTGCCCCTGCTGTCTCGTGGTTCTTGCCCCGTCCTTGTTCTGTCTACCCCTCTTGTCTGTTCCTGGCCCTGTCTCCTGTTCAGTCTTGGCAGGTCCCTGTTCGGTCTCTGTGTTCCTGTTCGTCTATCCCCTG	229	PASS	PRECISE;SVTYPE=INS;SVLEN=423;END=1289207;CIPOS=-234,234;CILEN=-85,85;RE=26;RNAMES=3e1eb897-72e7-4ae0-a0d2-2627466a830c,2aa3c5b0-51b8-4dde-b323-6756439be524,5330950b-0332-411a-9811-65f6cd72fbd7,57d01459-e734-4df0-b3f6-a19c8816ad89,1660464c-9193-48b8-9078-8c477520a960,fb75b63c-b8bc-4e80-9b67-bc3421d23077,fdf0a32a-0337-491d-a75e-34098f30517a,4e830313-41e9-4df0-aa80-fd044e1941f6,7daa0ceb-1da3-4a2f-9d3e-292ad11d4e0d,91688ed7-ae4f-48d4-9550-45a4903f9d70,320fb35f-589e-4586-a6f4-658a187b6466,1e8fccd3-10da-4fbb-abd3-696fc95a7449,89301ea0-b509-45e9-a23d-9d482685b2d9,7453e3f8-6ef3-4524-bec9-f9eac86f3421,cba077c7-f898-4622-9da4-5b16316d0245,b8227d89-a8f7-4fb5-a3e2-42174694c6b6,45242ace-d289-4d59-b664-05f2cc0fe66e,48f59671-4881-42ed-a9a3-aed4c41a8a27,4240aded-2e15-41cf-8e86-5b4eb4c25db8,e34f1c89-c242-4a46-b19b-50deaba9e44f,7559d883-c5e9-4b07-86c4-daa5b48a64f0,ba8acd78-c80f-49ff-93fc-1a99ffa7e077,b9c5bedc-f363-4539-bc47-7b0f412a4bbd,ddc30ebd-ceb6-4337-94f9-058444486c91,b437894e-54e9-47cd-ab32-9813a0cc594f,da1bdeb0-11ca-41f7-9977-64a5f9b6f43d	GT:DR:DV:PL:GQ	1/1:2:26:229,52,0:52
+chr1	1324165	cuteSV.INS.43	G	GGCTGAGGGGCTGGGGGGCTGGGAGGCTGAGAGGCTGGGGAGCTGGGAGCTGGGGGGCTGG	324.4	PASS	PRECISE;SVTYPE=INS;SVLEN=60;END=1324165;CIPOS=-10,10;CILEN=-3,3;RE=35;RNAMES=b607649d-c7ea-4932-9d33-12dcae50810e,e0e831cb-c095-4d3a-9a06-b2dd44d616ea,f5cceb4b-93c9-4b51-8096-7e0cd64ae466,b2acb702-b045-418b-82e9-84fb23740ee1,5add4e04-e468-4944-9803-ca71ad5e067c,e5961070-c2ef-42e2-bba4-5a4b29a04fad,b9ba05fc-bdd4-4b2f-92fb-90aefa2c680c,d5be04cd-8350-47d4-9024-0bbf5ae19de9,69ab5885-79b5-4bfc-9a75-be19a85673d9,8769fc7d-1fd4-4f70-8eb0-8d9a33b81acb,516db773-824b-4b59-8e23-aa5f399b028e,33722165-439f-455f-b763-83b7ef59275b,92f48072-887e-4dcf-9305-f8cea44945b2,419aed38-9d49-4493-859c-0983e2f06856,f6392148-86ea-4ac4-aa20-64bc81271553,40f48fb3-c138-4c40-935c-59936303dbab,c4febaca-032a-45e4-9d7b-ddffa3049f24,7dec19f4-8014-4062-8905-a0391b78a028,9cc9d9bd-3d4e-4c32-a10d-96dc5fb4374a,abdbcf55-98e2-4ad0-a689-b98c0af04706,9af4109b-0bbb-49ea-af89-7b40b62ee955,e8736cdd-fdb3-49c7-ad15-153cf640722a,09708d95-dd04-43d5-b47a-5e6601726993,7b5578be-5d71-42df-b818-22f669246aaa,d48b9361-395a-4463-819b-deb1427cd07d,57d01459-e734-4df0-b3f6-a19c8816ad89,ed17aad3-7a33-4220-b67b-4bf5a6e44ddb,08e6fb86-3192-40c2-b7fa-d2ccd498e453,091c6d44-03dc-4651-8218-818c1d23a93d,85cef6cd-d396-416f-9e45-d02e88061185,aa90c795-27e9-4b82-bb90-5963d30a64fa,1fda58af-a114-4202-8c53-90149c2d2ed3,d1068b23-9f55-40f9-8b26-0027b0d88fe3,dbc135c4-8e41-41ea-9db0-a1299ef02dbe,218d7b4b-3b50-409f-8fe2-a82e03ddc201	GT:DR:DV:PL:GQ	1/1:1:35:324,82,0:82
+chr1	1339914	cuteSV.INS.44	A	AGTCCCCAGCAGCCCCACAGACCCACCGCAGCCGCATG	219.5	PASS	PRECISE;SVTYPE=INS;SVLEN=37;END=1339914;CIPOS=-10,10;CILEN=-1,1;RE=24;RNAMES=e0e831cb-c095-4d3a-9a06-b2dd44d616ea,15dc9de7-45aa-4fe9-8890-d2cca94af899,d61fe4e6-eeee-4a3c-a2a6-8318ada41fd1,091c6d44-03dc-4651-8218-818c1d23a93d,eb6dedc7-def4-412d-8942-3e67e3a089af,0601fb29-eaa7-4c91-b112-db4c34a23f91,b7aea0b0-9f3f-4bda-ac4f-45ddfa7e3eff,3ba7d7a0-df2a-4871-b569-45db3fd2521b,b2acb702-b045-418b-82e9-84fb23740ee1,08fbcfb4-62e2-40d6-b323-b8fea89ab44a,376261d2-c01e-43b3-9924-bbbe58c5aa63,b9ba05fc-bdd4-4b2f-92fb-90aefa2c680c,e4ba9d23-c3be-4d82-88fc-8f59596bf8e7,fb9c1378-c2b8-4d1e-be76-8ff0321d4554,bc09aba3-dd95-4a94-99c9-5ad7c6912ba9,9a1b7bc6-5e6e-4f8e-8083-2604ef3b1d1f,7d1bde11-580f-44a6-9ce0-279b9d606688,db20f2bc-8216-4d64-a548-cb476cdbaf82,3422bb03-1d59-4f56-bcf8-52524a1a3f4b,a7b6b556-1844-4003-ac40-86a454b26de7,ac227aea-fadc-407b-aa52-ff70fbf3c32e,237b09c3-d628-4f57-98ce-8bd2378ba2ad,d38f14a1-a826-46af-ba7e-d9954408e6f3,681524fc-d8d4-4fae-b24b-fa515d8ce749	GT:DR:DV:PL:GQ	1/1:1:24:219,54,0:54
+chr1	1350113	cuteSV.DEL.33	GGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGA	G	248.1	PASS	PRECISE;SVTYPE=DEL;SVLEN=-1076;END=1351189;CIPOS=-29,29;CILEN=-1,1;RE=28;RNAMES=c759666a-78d1-4e4e-85e9-49f14c92cb02,b2c9e453-3cae-45b5-823f-a06accb28bda,d600c2be-262b-467c-b9fd-72d4d65e3e7a,bf77a3ce-4a60-47a5-9bb5-2c13067c86e0,befb0e46-69bf-44a0-9a06-1316fa491269,237b09c3-d628-4f57-98ce-8bd2378ba2ad,0950a236-9718-441c-9710-ab2aba6ee96f,e4ba9d23-c3be-4d82-88fc-8f59596bf8e7,ba555f0d-01d6-4b77-bb53-fb5602de2be3,e7a09d9c-3f36-45eb-b50b-f42d8b8a35e9,be8eadd1-4343-42de-9b4e-d998c4b14ca1,b3af5871-92fa-4fd6-849c-bf7a8ac51a0e,e28879d9-907b-47f0-82db-ebb2f3f31103,bc443e1f-204d-461b-8e69-e0b21369bd10,fdad49e6-a6ad-4be1-9f95-1a3dc66aad68,bcd87627-7572-45e7-bf8f-9261520b21b6,d9caad9d-b11e-4c3b-8c1b-44bf7b369001,fa1decc9-64be-4c96-9390-bb11efe8f144,84021b5e-e408-44fd-83ab-eed7da232e97,fc27f63f-4a8f-4cab-89d7-f5156f1944b7,55da8ccf-279d-4963-b3e3-8bbe90b36620,8a106af9-82b5-46d8-b907-35be6dedfcda,2bdca843-6153-45b0-8766-782ee3635507,ed58bbcf-2808-4104-b38f-0f6a6f8cb40c,b7aea0b0-9f3f-4bda-ac4f-45ddfa7e3eff,93745b8a-6e5d-4f25-ae47-f8c35baaafd4,9a1b7bc6-5e6e-4f8e-8083-2604ef3b1d1f,d61fe4e6-eeee-4a3c-a2a6-8318ada41fd1;STRAND=+-	GT:DR:DV:PL:GQ	1/1:2:28:248,57,0:57
+chr1	1366923	cuteSV.DEL.34	GAGTTGGTGTGAATTGAATTGTGTGAATGAGTGGATTGGTGAGTGAATTGGTGAGTTGAATTGGTGTGTGTAGTGGATGAGTGTGGATGAATGTGAATTGGCGAGTATGGATGTGTGAATTGGTGAGTGTGAATGTG	G	16.4	PASS	PRECISE;SVTYPE=DEL;SVLEN=-136;END=1367059;CIPOS=-8,8;CILEN=-16,16;RE=10;RNAMES=aa88905e-5130-4723-9afd-4f5c3d70dd16,53ccdbe6-ac6d-4e16-a1e8-b277be973a9e,67772cfc-e5e8-4d09-9bfa-0819a87da557,cb74559a-face-4bc5-a20f-85224a2b7cff,79c6d5aa-e931-424a-b086-4a5f560efe83,39131799-9d54-4644-8077-ec715475d10c,4e414224-9269-42df-b33d-2feba304ad4e,4cbf911c-b176-4222-9802-7ba2334f570a,9d38c592-3987-4463-9fc5-b77d2f97c536,18988950-fe75-4b3c-a50d-8fff5a9e362e;STRAND=+-	GT:DR:DV:PL:GQ	0/1:21:10:16,0,121:16
+chr1	1382232	cuteSV.DUP.4	G	<DUP>	0.1	q5	IMPRECISE;SVTYPE=DUP;SVLEN=671;END=1382903;RE=6;STRAND=-+;RNAMES=9d38c592-3987-4463-9fc5-b77d2f97c536,4f00962c-12f2-4ff5-b18b-d08a4118c893,9d57d1ac-8ef1-42cb-9f8a-1952ebd70438,f0a7a3af-0ea1-47a8-a52d-90b077dd5311,a5a3a1b3-0284-49ed-a518-dc0ab1b32e10,0697a1f6-c749-4ec4-a0bb-1320fe1bc916	GT:DR:DV:PL:GQ	0/0:23:6:0,17,162:16
+chr1	1382606	cuteSV.INS.45	G	GTAACAACTAGAGGCTCACCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCCAGTAACAATCTAGAGGTCACCACCCTTCCCAACAATCCAGTAACAACAATCCAGAGGCCACCACCCCTTCCCAACAATCCAGCTTCCCAACAACTAGTAACAATCCAGAGGTCACCACCCCTTCCCAACAATACAGTAACAATCCAGAGGCCACCACCCCTTCCCCAACAATCCACTAACAATCCAGAGGTCACCACCTCTTCCTAACAATACAGTAACAATCCAGAGGCCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCCGCAACAATCCAGAGGTCACCACCCCCTTCCCCCAACAATACAGTAACAATCCAGAGGCACCACCCCCTTCCCAACAATCCACTACCAATCCAGAGGTCACCACCCTTCCCAACAATACAGTAACAATCCAGAGGCCACCTCACCCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCCTTTCCAACAACTTCACTACCAATCCAGAGGTCACCACCCTTTCCCAACAATCCACTAACAATCCAGAGGTCACCACCCCTTCCCCAACAATCCAGTAACAATCCAGAGGTACCACCCCTTCCCAATAATCCAGTAACAATCCAGAGGTCACCACCTTCCAACAATCCACTAACAATCCAGAGGTCACTCACCCCCTTCCCCAACAATCCACTAACAATCCAGGAGTCGCCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGTACCACCCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCAATTCCTTCCCAACAATCCAGTAACAATCCAGAGGTTACCACCCTTCCCAACAATCCACATCAATCCAGAGGCCACCACCCTTCCCAACAATCCGGCAAGGACCCAGAGGCCACCACCCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCCAAAATCCAGTAACATCCAGAGGTCACCACCCCTTCCCCAACAATCCAGTAACAATCCAGAGGCCACCACCCCTTCCCCAACAATCCAGTAACAATCCAGAGGTACCACCCCTTCCCAACAATCCAGTAACAATCGAGGCCACCACCCCTTCCTTAACAATCCAGTAACAATCCAGAGGACACCACCCTTCCCAACAATCCACTAGCAATCCAGAGGCCACCACCCTTCCCAACAATCTGGCAACGACCCAGAGGCCACCACCCCTTCCCAACAAATCCAGTAACAATCCAGAGGTCACCACCCCCTTTCCCAACAATCCAGTAACAATCCAGAGGTCA	20.9	PASS	PRECISE;SVTYPE=INS;SVLEN=1395;END=1382606;CIPOS=-41,41;CILEN=-5,5;RE=7;RNAMES=77b12407-8c9b-4d88-a023-aac52eb7d8a0,f0eab809-506a-46ee-b4e8-962ea3bda1ff,d936d231-dd4d-4254-ac7a-c92933cd3268,3a92fd10-7806-4547-8570-259f291e70c4,33c9bbab-aba2-437b-90a1-6f991ec4c38c,67559dcc-d09b-4387-a2ec-c9075b98d81d,df6fb7c2-1567-443f-af3b-29e57c98eb0d	GT:DR:DV:PL:GQ	0/1:11:7:21,0,59:20
+chr1	1382641	cuteSV.INS.46	C	CAGGAGTCACCACCTTCCCAACAATCCAAGTAACAATCCAGAGGTTACCACCTTCCCAACAATCCACTAACAGTCCAGAGCCACCACCCCTTCCCAACAATCTGACCAAGGACAGAGGCCACCACCCCTTTAACAATCCAGTAACAATCCCCAACAATCCAGTAACAATCCAGGGTACCACCCTTCCCAACAATCCAGTAACAATCAGGAGCCACCACCCCTTCCCAACAATCAGTAACAGTCAGAGGACACCACCCTTCCCAACAATCCACACTAGCAATCCAGAGGCCACCACCCTTCCCCAACAATCTGGCAACGACCCAGAGCCTACCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACACCCCTTCCCAACAATCCAGTAACAATCAAGGGGATCCCACCCCTTCCCAACAATCTGGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCAAAGTA	134.9	PASS	PRECISE;SVTYPE=INS;SVLEN=466;END=1382641;CIPOS=-39,39;CILEN=-132,132;RE=21;RNAMES=f0a7a3af-0ea1-47a8-a52d-90b077dd5311,468ee76e-3ed4-4648-bfe1-d98510c064cb,fafc5c55-1207-4ec9-ad82-c384b1ca16fa,0697a1f6-c749-4ec4-a0bb-1320fe1bc916,a9944842-b9cf-42b4-83d7-345c82404ac7,a5a3a1b3-0284-49ed-a518-dc0ab1b32e10,2f0183eb-547a-46f9-a17a-2d1cdd9e5757,9d57d1ac-8ef1-42cb-9f8a-1952ebd70438,9d38c592-3987-4463-9fc5-b77d2f97c536,922f26c5-3206-4434-b317-1afbdb2a1a7f,4f00962c-12f2-4ff5-b18b-d08a4118c893,358e9370-bc32-41ba-8770-34330ce6995e,271f004d-0105-4403-98f2-d1e3bcbfb1ed,3bce85ca-852f-4a5d-aafd-33a558214eb1,a6b702f4-af87-46b0-8258-dd0080dba5de,9fd33fe0-7f99-4554-b196-b8ec37fa5ebd,a36c1728-9d07-43a0-8e83-8e5bb1e27014,06ae3af0-e88c-4c02-92cc-d45624993d5f,36491f59-cbff-4115-b4f6-653d792fcc66,f123340d-ccd5-4643-9cda-aeea14c2f41e,c104db4f-c8b0-44f8-9748-972c777861bb	GT:DR:DV:PL:GQ	1/1:7:21:135,6,1:5
+chr1	1427436	cuteSV.INS.47	C	CTCCTCCATCATCCGCCCGCTCCCCTCTCACCTCCCCTTCCCCTCCATCCCACCCTGCCCAGCCCCCTCCCCTCCATCACCTGCCCTGCCCCTCCCTCCATCCATCCCGCCCCGCTCCCCTCTCCACCCCTCCCCCTTCCCCTGCATCACACCCTGCCCAGCCCCCACCCCTCCATCACCCTGCCCTGCCCCCCTCCCCTCATCACCCTGCCCTGGCCCCCTCCCCTCCATCACCCTGCCCAGCCCCCCTCCCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCCAGCCCCTCCCTCCATCACCCTGCCCTGCCCCTTCACCTCCATCACCTGCCCTGCCCCTTCCCCTCCATCACCCTGCCCTGCCCCTCCCTCCATCACCCTGCCCTGCTCCCCTCTTCTCCCCCTTCCCTCCATCATCCCGCCAGCTCCCCTTTCCCACCCCTCCCCTCCCCTCCATCACCCTGCCCAGCCCCCTCCTCATCACCCTGCCCTGCCCCCCCTCCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCCATCCTTCCCGCTCCCTCTCCCACCCCTCCCCTTCCCCTCCATCACCCTGCCCTGCCCCCACCCCTCCATCACCCTACCCCTGCCCCCACCCCATCACCCTGCCCTGCCCCCTTCCCTCCATCATCCCGCCCGCTCCTCTCCCACCTCCACTTCCCTGCATCACCCTGCCCACACTGCCCCTTCCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCCTGCCCCCTCCCTCCTCACCCTGCCCTGCCCCCACCCCTCCATCATCCCGCCCGCTCCTACTCACCTCCCCTTCCCCTGCATCACCCTGCCCTGCCCCTTCCCTCCATCCCCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCTGCCTCCCTCCATCACCCTGCCCTGCCCCCCCCTCCATTGTCCCGCCCGCTCCCACTCTCCACCCCTCCCCTTCCCCTGCATCACCCTGCCCTGCCCCTTCCCCTCCATCACCTGCCCGACCCCCTCCCCCTCCATCACCCTGCCCT	28.4	PASS	PRECISE;SVTYPE=INS;SVLEN=1071;END=1427436;CIPOS=-23,23;CILEN=-16,16;RE=11;RNAMES=9029b9c6-09de-484d-83ff-7a20e6b313c8,2934cba4-0b4b-4aee-b14b-0d0e3546b541,54e98c4b-5a5f-4113-9304-7a5c0aea7c63,d2031098-3567-4d27-be20-c8e182f7c4b4,68845987-0249-4222-b2eb-04cb83059a27,733b07d2-662e-4b95-b959-2b5d0a115483,12932aa3-c4fb-4966-905c-488f025cb743,fde476d8-2aac-42ed-9c46-752e5d14207f,3c70ee06-5212-40a3-8f97-e23cf4bf063d,74e3c21f-dad1-4e75-9848-a6a85d837b12,f99f1f38-6f26-485c-8aad-b3bad4a2596f	GT:DR:DV:PL:GQ	0/1:19:11:28,0,105:28
+chr1	1427458	cuteSV.INS.48	T	TCCCTTCCCTCCATCACCCTGCCCAGCCCCCTCCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCATCCCGCCCGCTCCCTCTCCACCCCTCCTTCCCCTGCATCACCCTGCCCAGCCCCCACCCCTCCATCTTCCCTGCCCTGCCCCTCCCCTCCATCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCCAGCCCCTCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCCTGCCCCCACCCCATCACCCTGCCCTGCCCCCTTCCCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCCTTCCCCTGCATCACCCTGCCCTGCCCCTTCCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCCTCCATCACCCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCTGCCCCCACCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCTTCCCTGCATCACCCTGCCTGCCCCTTCCCCTCCATCACCTGCCCTGCCCCCTCCCCTCCATCACCCTGCCCTGCCCCTCCCCTCCATCACCCTGCCCTGCCCCCACCCCTCCATCATCCCGCCGCTCCCCTCTCCACCCCTCCCTTCCCCTGCATCACCCTGCCCT	111.7	PASS	PRECISE;SVTYPE=INS;SVLEN=637;END=1427458;CIPOS=-24,24;CILEN=-32,32;RE=20;RNAMES=f5204bf0-46df-4baf-964b-8c839f046200,2329bd50-c50b-40fb-8da8-8a3b33bd241e,a569e6c2-b324-4efd-9a08-13076cbbcbd5,c226539f-904d-474c-a18e-49b18f7cb378,ff84c73b-00ed-4477-949c-4902317bb4f0,102cfb6a-f6b2-4542-b618-563ef2e3ed6d,05911855-6bbc-4bb3-979c-479c8273eeaf,ed59a62c-2964-4d7a-ad2f-4e7b142d8463,b472de9d-b5fe-4b3d-8923-13fe8969ad76,d8e1b071-4b75-4442-9426-407a431e0b92,68a7b331-b649-402a-ac0e-b65b85655d04,77268fa6-8721-44f0-b8e3-ecf9fb06b924,623f61f6-f014-46d0-b4de-08151f38ff0d,640d7d27-bb3a-494b-838d-44ee7c85f12c,69c90976-a291-4c14-b803-a85f27f6fb6a,cf41e38b-2b70-40b9-9323-03a1f73be7c6,cedb80be-37e8-47fe-ad60-645973a12955,ea545945-4684-404b-aee5-f96da073730d,59ec017f-83ff-4284-a55f-d4fdec1caedd,31ccfe43-1c0d-4756-a1bb-acc9e62b69f6	GT:DR:DV:PL:GQ	0/1:11:20:112,0,26:25
+chr1	1428035	cuteSV.INS.49	G	GGAGAGGGGAGGAGGGAAGAGGGAGGGGAGGGGGTAGGGAGGGGAGAGAGGGGAGGGAGGGGGAGAGAGGGAGGGAGGGGAGAGAGGTGGGAGGAGGAGAGAGGGGGAGTGGGAGGAGGAAGAGGGAGGGGGGAGGGAAGAGAGGGGACGGGAGGGGAAGAGAGGGAGGGGAGGGGAGGGGGGAAGAGAGGGCAGGGGAGGGTGGGAGAGGAGGGCATGGAGGGGTGGGAGAGAGGGGAGGGAGGGAGAGGAGAGGGGAGGGGGAGAGAGGGGAGGGAGGGGGAGAGAGGGAGGGAGGGAGGGGAAGAGGGGGAGGGGAGGGGAAGAGGAGGAGGGTGGAGAGAGGCAGGAGAGAGGGCAGTGGGGAGGG	324.4	PASS	PRECISE;SVTYPE=INS;SVLEN=369;END=1428035;CIPOS=-17,17;CILEN=-22,22;RE=34;RNAMES=733b07d2-662e-4b95-b959-2b5d0a115483,f5204bf0-46df-4baf-964b-8c839f046200,d8e1b071-4b75-4442-9426-407a431e0b92,54e98c4b-5a5f-4113-9304-7a5c0aea7c63,2329bd50-c50b-40fb-8da8-8a3b33bd241e,68a7b331-b649-402a-ac0e-b65b85655d04,12932aa3-c4fb-4966-905c-488f025cb743,9029b9c6-09de-484d-83ff-7a20e6b313c8,d2031098-3567-4d27-be20-c8e182f7c4b4,f99f1f38-6f26-485c-8aad-b3bad4a2596f,c226539f-904d-474c-a18e-49b18f7cb378,623f61f6-f014-46d0-b4de-08151f38ff0d,a569e6c2-b324-4efd-9a08-13076cbbcbd5,05911855-6bbc-4bb3-979c-479c8273eeaf,77268fa6-8721-44f0-b8e3-ecf9fb06b924,69c90976-a291-4c14-b803-a85f27f6fb6a,59716984-39a5-40aa-af8c-7859e47e8dd6,cf41e38b-2b70-40b9-9323-03a1f73be7c6,2934cba4-0b4b-4aee-b14b-0d0e3546b541,fde476d8-2aac-42ed-9c46-752e5d14207f,b472de9d-b5fe-4b3d-8923-13fe8969ad76,3a22e315-09d8-4146-8de1-6a077f83e794,68845987-0249-4222-b2eb-04cb83059a27,74e3c21f-dad1-4e75-9848-a6a85d837b12,3c70ee06-5212-40a3-8f97-e23cf4bf063d,ff84c73b-00ed-4477-949c-4902317bb4f0,ea545945-4684-404b-aee5-f96da073730d,cedb80be-37e8-47fe-ad60-645973a12955,ed59a62c-2964-4d7a-ad2f-4e7b142d8463,59ec017f-83ff-4284-a55f-d4fdec1caedd,640d7d27-bb3a-494b-838d-44ee7c85f12c,91dab2f4-f212-4619-be08-03d8bc49e2d9,31ccfe43-1c0d-4756-a1bb-acc9e62b69f6,8e2d8882-2af9-4c06-b864-5cb41183c2e5	GT:DR:DV:PL:GQ	1/1:0:34:324,87,0:86
+chr1	1442880	cuteSV.INS.50	G	GTTTCAGCAGATTTTGGGCCAAACCAAGTCAAT	190.9	PASS	PRECISE;SVTYPE=INS;SVLEN=32;END=1442880;CIPOS=-11,11;CILEN=0,0;RE=27;RNAMES=9131a259-bac1-4478-91c8-74315ef48c01,a1ecbbe0-44ce-4b79-b89d-d41f41d6e980,a79e0f63-7054-4a36-8a7f-910b6bd38fc6,cedb80be-37e8-47fe-ad60-645973a12955,a569e6c2-b324-4efd-9a08-13076cbbcbd5,0451a570-a9c6-46b0-b66c-f07a921085d2,b472de9d-b5fe-4b3d-8923-13fe8969ad76,8037c6cd-9a78-4bb3-bc0f-876835c66397,9e1af1c2-ae7e-4af6-80c8-f04db6b8da09,56cf6573-38f4-40a4-975a-9eafb54e4772,3644ce7c-04b1-4adc-8da0-9bcfc52fea97,81c77fd1-7f7a-45b5-bbe9-d4ca5259b06b,12932aa3-c4fb-4966-905c-488f025cb743,640d7d27-bb3a-494b-838d-44ee7c85f12c,ea545945-4684-404b-aee5-f96da073730d,5c443079-8bd5-490c-999e-a89b4a7c2e4a,4d0d4c92-bd33-4cbf-b887-24500de6aa7c,6df546c9-ca86-42a4-b5d4-76f5ecb1a301,59ec017f-83ff-4284-a55f-d4fdec1caedd,88227abe-84b7-4e0c-9df5-b1f9c19a7db7,98c04ccb-fed1-4d09-b2ab-2237950eade2,07f7818c-3008-485b-a72a-bf23f8a9bd85,b75c54f4-1363-419e-aaec-ccd650302bab,299d3562-7f7c-453f-89f6-92cc818e2539,76fd3da7-0cfb-4d60-972d-4902f7c7579b,3114c99a-222d-4864-a130-dad4fa8ee72a,59716984-39a5-40aa-af8c-7859e47e8dd6	GT:DR:DV:PL:GQ	1/1:7:27:191,20,0:20
+chr1	1443673	cuteSV.DEL.35	CCTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT	C	132	PASS	PRECISE;SVTYPE=DEL;SVLEN=-33;END=1443706;CIPOS=0,0;CILEN=-1,1;RE=24;RNAMES=59716984-39a5-40aa-af8c-7859e47e8dd6,76fd3da7-0cfb-4d60-972d-4902f7c7579b,74e3c21f-dad1-4e75-9848-a6a85d837b12,a79e0f63-7054-4a36-8a7f-910b6bd38fc6,1f5d20d9-283f-4121-b344-08e677666733,c8f9bea9-6094-4db4-a62e-87eb8ca3b85a,ea545945-4684-404b-aee5-f96da073730d,81c77fd1-7f7a-45b5-bbe9-d4ca5259b06b,5adb0f11-712e-43de-8296-47af85507bfc,12932aa3-c4fb-4966-905c-488f025cb743,4d0d4c92-bd33-4cbf-b887-24500de6aa7c,59ec017f-83ff-4284-a55f-d4fdec1caedd,5a0fc793-fc3b-4878-bb46-e770c6bf95ae,b472de9d-b5fe-4b3d-8923-13fe8969ad76,a188eb2e-3338-4a2a-a859-c22662ded131,a569e6c2-b324-4efd-9a08-13076cbbcbd5,07f7818c-3008-485b-a72a-bf23f8a9bd85,3644ce7c-04b1-4adc-8da0-9bcfc52fea97,a0d28591-5dfb-4680-9140-21843e82d50b,cedb80be-37e8-47fe-ad60-645973a12955,299d3562-7f7c-453f-89f6-92cc818e2539,9e1af1c2-ae7e-4af6-80c8-f04db6b8da09,c3895732-a3e2-4839-a5d3-409942045275,b75c54f4-1363-419e-aaec-ccd650302bab;STRAND=+-	GT:DR:DV:PL:GQ	0/1:14:24:132,0,37:36
+chr1	1469102	cuteSV.DEL.36	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	A	0.7	q5	IMPRECISE;SVTYPE=DEL;SVLEN=-34;END=1469136;CIPOS=-6,6;CILEN=-1,1;RE=8;RNAMES=92dc4377-d417-453f-9af9-12426f178cf8,574e03e0-4c27-464c-95e2-7e359ad18011,a9c7f5b9-d9b4-416c-8249-e95da7419fcc,7c25be75-6172-4f7f-9c0b-b80389b1f8d9,857e3690-3e8e-4943-bbb1-28cd73e53bf8,d9b425de-6457-4cd7-9a17-8a9690806f29,05606178-17ba-4773-8989-dc90172265ae,bbd9dd0a-252d-4d40-b27a-5bd9eec4f1b3;STRAND=+-	GT:DR:DV:PL:GQ	0/0:25:8:1,9,163:8
+chr1	1477901	cuteSV.INS.51	C	CGCCACCACGCCTGGCTAATGTTGTATTTGTAGAGACGGGGTTTCTCCATGTTGGTCGGGCTGGTCTCGAACTCCCGACCTCAGGTGATCCACCGCCTGTCTCTCAAATCAGTGCTGGGATTACAGGCATGT	305.4	PASS	PRECISE;SVTYPE=INS;SVLEN=131;END=1477901;CIPOS=-15,15;CILEN=-1,1;RE=32;RNAMES=92dc4377-d417-453f-9af9-12426f178cf8,6d230858-7403-4a6e-8fe6-561a53d8bb08,9304e5e6-4053-44dc-bd61-14dbbbacfe80,55db679c-a93c-4d4b-b081-6be6ddf8d32d,0c4d0a70-ff5e-4891-99b9-05ed59a23eca,4d0846d2-c77a-4f04-ab9d-68eb548cc199,7724e954-c223-4614-8ee1-452e727afe21,7c25be75-6172-4f7f-9c0b-b80389b1f8d9,b73d7779-9769-42f0-b383-896a2cf597d8,b996975f-7ea0-452e-9a01-15d57bacd531,6e95792e-cae3-4246-9227-5e4c3c8c457e,93c9a19c-23df-4132-8455-678153d0eecd,6a20ecdf-0172-4385-bf2c-7fc05a077871,21e92a97-e96a-45b9-ab20-149fd0adae33,f2d3f807-f651-402a-a686-ad37157d5956,6bf99bdb-e2f4-4d9e-a74d-277aa8418d5f,a370a155-fb2e-479c-a49b-0efa0e7a96b7,c50b11cd-e5f8-4448-bd61-e85b50c30308,18de7f98-d2e4-4e3c-b9a5-4100435aed4a,65b0e4a5-f9ec-4ae5-bc34-c79385df8859,e06bbc72-2c56-480f-b3ba-289a320fa5ac,a9c7f5b9-d9b4-416c-8249-e95da7419fcc,576d01a5-9b52-4982-9e22-4953786abad1,55845941-6f72-4a68-a4e5-b51f39132aed,aa6d1629-603c-4ff4-8b60-6e5e3310bbb1,a37daeca-ebbe-49df-b89d-f662704b024c,d638acf6-2965-4be2-818a-f4b169c90606,80fc4697-1048-4150-a38a-6032b39aca24,96e41d89-8690-4e83-888a-3d096b4cf675,d9b1ec8b-d331-4b36-8367-c14aee545173,301af76c-5c8c-4f8f-a7de-60de6dce6843,37af4c00-18d0-4eee-911f-aa036824902a	GT:DR:DV:PL:GQ	1/1:0:32:305,82,0:81
+chr1	1497112	cuteSV.INS.52	C	CGCCTGGGCGCAGCGGTCCCATGGGAGAGCAGACCCTCG	78.6	PASS	PRECISE;SVTYPE=INS;SVLEN=38;END=1497112;CIPOS=-8,8;CILEN=-1,1;RE=16;RNAMES=c920d579-a175-4d4e-98b8-5483456734fc,0d2b73fa-6365-4899-8a27-0b4f8e133d21,a370a155-fb2e-479c-a49b-0efa0e7a96b7,757d110c-2087-4f31-99f9-984675f8afc2,f2da6824-3384-45bb-a431-9637afa6833a,de71b88d-4247-40b5-8f20-18ad4ba91948,727733a3-f662-4da2-b1b6-6bb591a498e1,e002b396-4646-469c-b2f5-31418b4a55c5,7994bd08-7dbf-4d35-9313-ad00aede507d,6cdcaa04-0dad-4d56-a1c1-16fca9d411fb,5c5434d0-7e94-4961-86bf-5ed0be05a901,b20f81d6-0e81-482a-b814-9f2895aad339,96e41d89-8690-4e83-888a-3d096b4cf675,729d0ffb-0e5a-444a-9e5c-25b39d7500b1,ae45098f-0cbd-4ff4-8622-d1ad73658aa0,df37dd1c-d52d-40b9-b817-b885543054d4	GT:DR:DV:PL:GQ	0/1:13:16:79,0,50:50
+chr1	1540143	cuteSV.DEL.37	CGGGGAGGGGAGCGCAGGCCGGGGAGGGGAGCGCAGGCC	C	171.8	PASS	PRECISE;SVTYPE=DEL;SVLEN=-38;END=1540181;CIPOS=-6,6;CILEN=0,0;RE=24;RNAMES=d211c770-b648-412e-826b-1c8f6b8c7d33,5fe8d78f-4365-4398-a6fd-c2f2375b038c,1fcf68f1-2ebd-432b-aad6-0baaa1e1fc25,2d213b39-1d81-4c16-b70a-600d7f0e1329,3e6d8ba0-6225-42ff-8c4f-b71ca8b33fcf,bf39c1d7-160d-462f-95d2-c8eb809c430b,c940e79b-9607-442d-aaa3-efdaa61625b8,056a7033-c4e8-4b0a-a9e1-63acfa17c278,ee817bac-930e-4d3c-a14b-b1b2646e6bf5,edc22f9f-3ca3-42a8-9fb8-607cac4d842b,222aed17-ae32-4e79-8f2e-6a047c6f0175,a38b178e-45de-49a5-894a-bcd312ffd3e0,e06d8d91-cb08-43d9-8502-0beb8815aff6,e55fb060-f89b-40c2-9ad3-237a85b372b1,30c45a20-8403-4f8e-9b9c-71840c473bd9,6e9c3ff9-386f-44dc-b9d3-5df3baf75fc7,69af0fc2-5e0e-4ebf-b788-3843b85e1017,c2b3bda1-e3f7-40ff-b1e9-22da462fff0c,3a7272a0-2ebd-48ec-81c7-449f116f9638,e7d62aaf-f433-4d15-a8b5-f99f4aa67a17,133c6253-34ea-4e09-af98-2727ae8c2a7b,73b74963-da1a-4249-aeba-9c309d098918,98d935e6-1078-4aa3-bdb0-b2c6b9bda83d,5e8f62d8-cace-44cf-a70e-de5ddbdddcfc;STRAND=+-	GT:DR:DV:PL:GQ	1/1:6:24:172,19,0:19
+chr1	1554172	cuteSV.DEL.38	CTAAGGGGTCCCCACGAAGCTGAGCACGAGGCGGATCCGGACCA	C	57	PASS	PRECISE;SVTYPE=DEL;SVLEN=-43;END=1554215;CIPOS=-12,12;CILEN=-1,1;RE=14;RNAMES=222aed17-ae32-4e79-8f2e-6a047c6f0175,6e9c3ff9-386f-44dc-b9d3-5df3baf75fc7,c2b3bda1-e3f7-40ff-b1e9-22da462fff0c,69af0fc2-5e0e-4ebf-b788-3843b85e1017,41e3acae-1239-4690-aaa5-104c76c52dbb,df9ed2d4-72ac-4afe-bcba-c3391664b31a,c5abd0d0-0f77-4306-b8e2-693cb97de6ca,81f0b908-59ab-41a6-a075-a4bd92c97b16,c755a07f-1e64-43bb-9430-dba434f89b26,d211c770-b648-412e-826b-1c8f6b8c7d33,8b4ebcb8-8c9e-4280-9157-ffb0e9018584,f7ebc27f-ae67-453c-b618-2cfa8fa6f3f6,133c6253-34ea-4e09-af98-2727ae8c2a7b,f493efa3-011f-47ea-a3ae-a13a3ec516e9;STRAND=+-	GT:DR:DV:PL:GQ	0/1:16:14:57,0,76:56
+chr1	1594855	cuteSV.INS.53	A	AAGCAGGGTGGGGAGAGACAGACACAGAGAGAGAGCAGAACGGGAAGGAAGAGACAGAGAGAGGCAGACAGAGAGAGAGAGAGAGACAGACAGACAGACACAGAAGAGCAGAACAGGGAGAGACAGAGAGAGTGAGACAGAACCCGGAGACAGAGGAGGCAGACACACAGAGAGAGAGAGAGAGAGAGACAGACAGACACAGAGGCAGAACAGGGAGAGACAGAGAGACAGAGAGAGAGAGAGTGAGACAGAGACAGGGAATTGAGAGGCAGACAGAGAGAGACAGACAGACAGACAGACACAGAGAGAACCAGGGCAAGGGACAGACAGAGAGAGAGAGACAGACAGAGAGCAGAACAGGGAGAGACAAAGAGACAGAGAGAGAAGACACAGAGAGAGAGAGACAGAGAGAGGCAGACAGAGACAAGAGAGACAGACAGACACAGAGCAGAACAGGGAAGACAGAGAGAGAGAGAGACAGAGAGAGGCAAGACAGAGAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAAACAGAGACAGAGAGCGAGAGAGACAGAGACAGAGAGAGAGGCAGACAGAGACAGAGAGAGAGAGAGACAGACAGACACAGAGAGAGCAGAGCAGGGAGGAGAGACAGAGAGAGAGAAAGAGACAAAGACAGAGACAGAGACAGAGAGGCAGAGACAGAGACACAGAGAGAGCAGAACAGGGAGAGACAGCAGACAGAGAGAGAGAGAGAGACAGAGAGAGGCAGACAGAGACAGAGAGAGAGACAGACAGACACAGAGAGAGCAGAACAGGGAGAGACAGAGACAGAGAGAAACAGAGACAGAGACAGAGACAGAGAGGCAGACAGAGAGAGAGACAGACACAGAGAGAGCAGAACAGGGAGAGACAGAGAGACAGAGAAGGTAGAGACAGAGACAGAGACAGAGGCAGACAGAGAGAGACAGACAGACAGACAGACAGACACAGAAGAGAGCAGAACAGGGACAGACAGAGAGAGAGAGAGAGAGACAGAGACAGAGACAGAAGGCAGAGAGAGACACAGAGAGAGCAGAACAGGGAGAGACAGAGAGACAGAGAGAGAGAGAGACAGAGAGGAGCCAGACAGAGACAGAGACAGACAGACAGACACAGAGAGAGCAGAACGGGGAGAGACAGAGAGAGAGAGAGAGAGAGAGACAGAGAGAGAGAGCAGAACAGGGAGAAACAGAGAGACAGAGAGCGAG	1	q5	IMPRECISE;SVTYPE=INS;SVLEN=1239;END=1594855;CIPOS=-13,13;CILEN=-45,45;RE=5;RNAMES=592727b7-8b4e-41a3-835b-3e0ff258c59c,bb65e009-c11a-467f-8d23-d620c0605d7b,2ef6b649-4918-488d-9a44-500e03fbebaf,57e2bd9a-3426-42c1-ad31-09f88e493158,64d8dedb-1adb-469e-9cee-aba7b1e24411	GT:DR:DV:PL:GQ	0/0:16:5:1,7,106:6
+chr1	1594953	cuteSV.DUP.5	A	<DUP>	0.4	q5	IMPRECISE;SVTYPE=DUP;SVLEN=45;END=1594998;RE=8;STRAND=-+;RNAMES=feb42a4c-bff2-47e6-bc2b-32908590ae4a,e0aa3efa-33ee-4d1e-ab4c-e75b4cc6c632,8c7063be-6b73-4d7b-8f08-3e861f2ca50d,46de2f85-a73a-4539-8cc8-08ca1c95653b,3b0211e3-7775-44ad-b4a3-e42a1b9745c8,bba10337-5aad-46a2-883d-8f081186d727,0124b41f-8dd8-426e-94c4-efca467e11eb,68023d5d-2b80-4da6-8dea-6c6e3fb0f7e5	GT:DR:DV:PL:GQ	0/0:26:8:0,11,172:10
diff --git a/tests/data/sniffles.vcf b/tests/data/sniffles.vcf
new file mode 100644
index 00000000..700df87a
--- /dev/null
+++ b/tests/data/sniffles.vcf
@@ -0,0 +1,355 @@
+##fileformat=VCFv4.1
+##FILTER=<ID=PASS,Description="All filters passed">
+##source=Sniffles
+##fileDate=20210607:16:05 PMef_minus
+##contig=<ID=chr1,length=248956422>
+##contig=<ID=chr2,length=242193529>
+##contig=<ID=chr3,length=198295559>
+##contig=<ID=chr4,length=190214555>
+##contig=<ID=chr5,length=181538259>
+##contig=<ID=chr6,length=170805979>
+##contig=<ID=chr7,length=159345973>
+##contig=<ID=chr8,length=145138636>
+##contig=<ID=chr9,length=138394717>
+##contig=<ID=chr10,length=133797422>
+##contig=<ID=chr11,length=135086622>
+##contig=<ID=chr12,length=133275309>
+##contig=<ID=chr13,length=114364328>
+##contig=<ID=chr14,length=107043718>
+##contig=<ID=chr15,length=101991189>
+##contig=<ID=chr16,length=90338345>
+##contig=<ID=chr17,length=83257441>
+##contig=<ID=chr18,length=80373285>
+##contig=<ID=chr19,length=58617616>
+##contig=<ID=chr20,length=64444167>
+##contig=<ID=chr21,length=46709983>
+##contig=<ID=chr22,length=50818468>
+##contig=<ID=chrX,length=156040895>
+##contig=<ID=chrY,length=57227415>
+##contig=<ID=chrM,length=16569>
+##contig=<ID=chr1_KI270706v1_random,length=175055>
+##contig=<ID=chr1_KI270707v1_random,length=32032>
+##contig=<ID=chr1_KI270708v1_random,length=127682>
+##contig=<ID=chr1_KI270709v1_random,length=66860>
+##contig=<ID=chr1_KI270710v1_random,length=40176>
+##contig=<ID=chr1_KI270711v1_random,length=42210>
+##contig=<ID=chr1_KI270712v1_random,length=176043>
+##contig=<ID=chr1_KI270713v1_random,length=40745>
+##contig=<ID=chr1_KI270714v1_random,length=41717>
+##contig=<ID=chr2_KI270715v1_random,length=161471>
+##contig=<ID=chr2_KI270716v1_random,length=153799>
+##contig=<ID=chr3_GL000221v1_random,length=155397>
+##contig=<ID=chr4_GL000008v2_random,length=209709>
+##contig=<ID=chr5_GL000208v1_random,length=92689>
+##contig=<ID=chr9_KI270717v1_random,length=40062>
+##contig=<ID=chr9_KI270718v1_random,length=38054>
+##contig=<ID=chr9_KI270719v1_random,length=176845>
+##contig=<ID=chr9_KI270720v1_random,length=39050>
+##contig=<ID=chr11_KI270721v1_random,length=100316>
+##contig=<ID=chr14_GL000009v2_random,length=201709>
+##contig=<ID=chr14_GL000225v1_random,length=211173>
+##contig=<ID=chr14_KI270722v1_random,length=194050>
+##contig=<ID=chr14_GL000194v1_random,length=191469>
+##contig=<ID=chr14_KI270723v1_random,length=38115>
+##contig=<ID=chr14_KI270724v1_random,length=39555>
+##contig=<ID=chr14_KI270725v1_random,length=172810>
+##contig=<ID=chr14_KI270726v1_random,length=43739>
+##contig=<ID=chr15_KI270727v1_random,length=448248>
+##contig=<ID=chr16_KI270728v1_random,length=1872759>
+##contig=<ID=chr17_GL000205v2_random,length=185591>
+##contig=<ID=chr17_KI270729v1_random,length=280839>
+##contig=<ID=chr17_KI270730v1_random,length=112551>
+##contig=<ID=chr22_KI270731v1_random,length=150754>
+##contig=<ID=chr22_KI270732v1_random,length=41543>
+##contig=<ID=chr22_KI270733v1_random,length=179772>
+##contig=<ID=chr22_KI270734v1_random,length=165050>
+##contig=<ID=chr22_KI270735v1_random,length=42811>
+##contig=<ID=chr22_KI270736v1_random,length=181920>
+##contig=<ID=chr22_KI270737v1_random,length=103838>
+##contig=<ID=chr22_KI270738v1_random,length=99375>
+##contig=<ID=chr22_KI270739v1_random,length=73985>
+##contig=<ID=chrY_KI270740v1_random,length=37240>
+##contig=<ID=chrUn_KI270302v1,length=2274>
+##contig=<ID=chrUn_KI270304v1,length=2165>
+##contig=<ID=chrUn_KI270303v1,length=1942>
+##contig=<ID=chrUn_KI270305v1,length=1472>
+##contig=<ID=chrUn_KI270322v1,length=21476>
+##contig=<ID=chrUn_KI270320v1,length=4416>
+##contig=<ID=chrUn_KI270310v1,length=1201>
+##contig=<ID=chrUn_KI270316v1,length=1444>
+##contig=<ID=chrUn_KI270315v1,length=2276>
+##contig=<ID=chrUn_KI270312v1,length=998>
+##contig=<ID=chrUn_KI270311v1,length=12399>
+##contig=<ID=chrUn_KI270317v1,length=37690>
+##contig=<ID=chrUn_KI270412v1,length=1179>
+##contig=<ID=chrUn_KI270411v1,length=2646>
+##contig=<ID=chrUn_KI270414v1,length=2489>
+##contig=<ID=chrUn_KI270419v1,length=1029>
+##contig=<ID=chrUn_KI270418v1,length=2145>
+##contig=<ID=chrUn_KI270420v1,length=2321>
+##contig=<ID=chrUn_KI270424v1,length=2140>
+##contig=<ID=chrUn_KI270417v1,length=2043>
+##contig=<ID=chrUn_KI270422v1,length=1445>
+##contig=<ID=chrUn_KI270423v1,length=981>
+##contig=<ID=chrUn_KI270425v1,length=1884>
+##contig=<ID=chrUn_KI270429v1,length=1361>
+##contig=<ID=chrUn_KI270442v1,length=392061>
+##contig=<ID=chrUn_KI270466v1,length=1233>
+##contig=<ID=chrUn_KI270465v1,length=1774>
+##contig=<ID=chrUn_KI270467v1,length=3920>
+##contig=<ID=chrUn_KI270435v1,length=92983>
+##contig=<ID=chrUn_KI270438v1,length=112505>
+##contig=<ID=chrUn_KI270468v1,length=4055>
+##contig=<ID=chrUn_KI270510v1,length=2415>
+##contig=<ID=chrUn_KI270509v1,length=2318>
+##contig=<ID=chrUn_KI270518v1,length=2186>
+##contig=<ID=chrUn_KI270508v1,length=1951>
+##contig=<ID=chrUn_KI270516v1,length=1300>
+##contig=<ID=chrUn_KI270512v1,length=22689>
+##contig=<ID=chrUn_KI270519v1,length=138126>
+##contig=<ID=chrUn_KI270522v1,length=5674>
+##contig=<ID=chrUn_KI270511v1,length=8127>
+##contig=<ID=chrUn_KI270515v1,length=6361>
+##contig=<ID=chrUn_KI270507v1,length=5353>
+##contig=<ID=chrUn_KI270517v1,length=3253>
+##contig=<ID=chrUn_KI270529v1,length=1899>
+##contig=<ID=chrUn_KI270528v1,length=2983>
+##contig=<ID=chrUn_KI270530v1,length=2168>
+##contig=<ID=chrUn_KI270539v1,length=993>
+##contig=<ID=chrUn_KI270538v1,length=91309>
+##contig=<ID=chrUn_KI270544v1,length=1202>
+##contig=<ID=chrUn_KI270548v1,length=1599>
+##contig=<ID=chrUn_KI270583v1,length=1400>
+##contig=<ID=chrUn_KI270587v1,length=2969>
+##contig=<ID=chrUn_KI270580v1,length=1553>
+##contig=<ID=chrUn_KI270581v1,length=7046>
+##contig=<ID=chrUn_KI270579v1,length=31033>
+##contig=<ID=chrUn_KI270589v1,length=44474>
+##contig=<ID=chrUn_KI270590v1,length=4685>
+##contig=<ID=chrUn_KI270584v1,length=4513>
+##contig=<ID=chrUn_KI270582v1,length=6504>
+##contig=<ID=chrUn_KI270588v1,length=6158>
+##contig=<ID=chrUn_KI270593v1,length=3041>
+##contig=<ID=chrUn_KI270591v1,length=5796>
+##contig=<ID=chrUn_KI270330v1,length=1652>
+##contig=<ID=chrUn_KI270329v1,length=1040>
+##contig=<ID=chrUn_KI270334v1,length=1368>
+##contig=<ID=chrUn_KI270333v1,length=2699>
+##contig=<ID=chrUn_KI270335v1,length=1048>
+##contig=<ID=chrUn_KI270338v1,length=1428>
+##contig=<ID=chrUn_KI270340v1,length=1428>
+##contig=<ID=chrUn_KI270336v1,length=1026>
+##contig=<ID=chrUn_KI270337v1,length=1121>
+##contig=<ID=chrUn_KI270363v1,length=1803>
+##contig=<ID=chrUn_KI270364v1,length=2855>
+##contig=<ID=chrUn_KI270362v1,length=3530>
+##contig=<ID=chrUn_KI270366v1,length=8320>
+##contig=<ID=chrUn_KI270378v1,length=1048>
+##contig=<ID=chrUn_KI270379v1,length=1045>
+##contig=<ID=chrUn_KI270389v1,length=1298>
+##contig=<ID=chrUn_KI270390v1,length=2387>
+##contig=<ID=chrUn_KI270387v1,length=1537>
+##contig=<ID=chrUn_KI270395v1,length=1143>
+##contig=<ID=chrUn_KI270396v1,length=1880>
+##contig=<ID=chrUn_KI270388v1,length=1216>
+##contig=<ID=chrUn_KI270394v1,length=970>
+##contig=<ID=chrUn_KI270386v1,length=1788>
+##contig=<ID=chrUn_KI270391v1,length=1484>
+##contig=<ID=chrUn_KI270383v1,length=1750>
+##contig=<ID=chrUn_KI270393v1,length=1308>
+##contig=<ID=chrUn_KI270384v1,length=1658>
+##contig=<ID=chrUn_KI270392v1,length=971>
+##contig=<ID=chrUn_KI270381v1,length=1930>
+##contig=<ID=chrUn_KI270385v1,length=990>
+##contig=<ID=chrUn_KI270382v1,length=4215>
+##contig=<ID=chrUn_KI270376v1,length=1136>
+##contig=<ID=chrUn_KI270374v1,length=2656>
+##contig=<ID=chrUn_KI270372v1,length=1650>
+##contig=<ID=chrUn_KI270373v1,length=1451>
+##contig=<ID=chrUn_KI270375v1,length=2378>
+##contig=<ID=chrUn_KI270371v1,length=2805>
+##contig=<ID=chrUn_KI270448v1,length=7992>
+##contig=<ID=chrUn_KI270521v1,length=7642>
+##contig=<ID=chrUn_GL000195v1,length=182896>
+##contig=<ID=chrUn_GL000219v1,length=179198>
+##contig=<ID=chrUn_GL000220v1,length=161802>
+##contig=<ID=chrUn_GL000224v1,length=179693>
+##contig=<ID=chrUn_KI270741v1,length=157432>
+##contig=<ID=chrUn_GL000226v1,length=15008>
+##contig=<ID=chrUn_GL000213v1,length=164239>
+##contig=<ID=chrUn_KI270743v1,length=210658>
+##contig=<ID=chrUn_KI270744v1,length=168472>
+##contig=<ID=chrUn_KI270745v1,length=41891>
+##contig=<ID=chrUn_KI270746v1,length=66486>
+##contig=<ID=chrUn_KI270747v1,length=198735>
+##contig=<ID=chrUn_KI270748v1,length=93321>
+##contig=<ID=chrUn_KI270749v1,length=158759>
+##contig=<ID=chrUn_KI270750v1,length=148850>
+##contig=<ID=chrUn_KI270751v1,length=150742>
+##contig=<ID=chrUn_KI270752v1,length=27745>
+##contig=<ID=chrUn_KI270753v1,length=62944>
+##contig=<ID=chrUn_KI270754v1,length=40191>
+##contig=<ID=chrUn_KI270755v1,length=36723>
+##contig=<ID=chrUn_KI270756v1,length=79590>
+##contig=<ID=chrUn_KI270757v1,length=71251>
+##contig=<ID=chrUn_GL000214v1,length=137718>
+##contig=<ID=chrUn_KI270742v1,length=186739>
+##contig=<ID=chrUn_GL000216v2,length=176608>
+##contig=<ID=chrUn_GL000218v1,length=161147>
+##contig=<ID=chrEBV,length=171823>
+##contig=<ID=NC_001416.1,length=48502>
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INV,Description="Inversion">
+##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
+##ALT=<ID=TRA,Description="Translocation">
+##ALT=<ID=INS,Description="Insertion">
+##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
+##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
+##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
+##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
+##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -.">
+##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref.">
+##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias.">
+##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
+##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
+##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
+##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)">
+##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
+##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
+##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
+##contig=<ID=NC_001416.1STRANDBIAS>
+##bcftools_viewVersion=1.11+htslib-1.11
+##bcftools_viewCommand=view --regions chr1 F24721_merged_sorted.bam_5_read_sorted.vcf.gz; Date=Tue Jan  4 22:45:21 2022
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/projects/jfan_prj/jfan_prj/Nanopore_Testing/2021_nanopore_sv_testing/scratch/depth_testing/POG/COLO829/minimap2_bam/F24721_merged_sorted.bam
+chr1	10006	35777	N	]chr3:198172735]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198172735;STD_quant_start=32.4628;STD_quant_stop=44.8237;Kurtosis_quant_start=2.29519;Kurtosis_quant_stop=-0.995353;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,6,6,0;RE=6;REF_strand=72,102;Strandbias_pval=0.0824618;AF=0.0344828	GT:DR:DV	0/0:168:6
+chr1	10030	36832	N	]chr17:41490827]N	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr17;END=41490827;STD_quant_start=48.6107;STD_quant_stop=4.67516;Kurtosis_quant_start=0.545103;Kurtosis_quant_stop=1.53121;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,7,7,0;RE=7;REF_strand=24,9;Strandbias_pval=0.000613617;AF=0.212121	GT:DR:DV	0/0:26:7
+chr1	10312	35780	N	]chrX:449436]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=449436;STD_quant_start=117.156;STD_quant_stop=68.302;Kurtosis_quant_start=-1.29786;Kurtosis_quant_stop=-0.029231;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=0,6,0,6;RE=6;REF_strand=45,128;Strandbias_pval=0.33926;AF=0.0346821	GT:DR:DV	0/0:167:6
+chr1	10466	35781	N	N[chrX:156030800[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=156030800;STD_quant_start=81.1924;STD_quant_stop=134.17;Kurtosis_quant_start=1.34083;Kurtosis_quant_stop=1.99911;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=5,0,0,5;RE=5;REF_strand=7,8;Strandbias_pval=0.0546956;AF=0.333333	GT:DR:DV	0/1:10:5
+chr1	10467	35779	N	N[chr3:10002[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=10002;STD_quant_start=106.244;STD_quant_stop=161.729;Kurtosis_quant_start=0.552508;Kurtosis_quant_stop=2.99076;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=+-;STRANDS2=6,0,6,0;RE=6;REF_strand=4,5;Strandbias_pval=0.043956;AF=0.666667	GT:DR:DV	0/1:3:6
+chr1	10467	35782	N	N[chr3:198174376[	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198174376;STD_quant_start=17.5865;STD_quant_stop=297.518;Kurtosis_quant_start=0.324147;Kurtosis_quant_stop=0.886959;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=7,0,0,7;RE=7;REF_strand=57,112;Strandbias_pval=0.000675389;AF=0.0414201	GT:DR:DV	0/0:162:7
+chr1	10468	35778	N	N[chr17:41490879[	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr17;END=41490879;STD_quant_start=41.208;STD_quant_stop=1.92354;Kurtosis_quant_start=3.02235;Kurtosis_quant_stop=0.961601;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=11,0,0,11;RE=11;REF_strand=4,6;Strandbias_pval=0.00386997;AF=1	GT:DR:DV	1/1:0:11
+chr1	35143	35783	N	N[chr20:60001[	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=60001;STD_quant_start=0;STD_quant_stop=0.632456;Kurtosis_quant_start=nan;Kurtosis_quant_stop=-0.5;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=0,3;Strandbias_pval=0.196429;AF=1	GT:DR:DV	1/1:0:5
+chr1	136637	0	N	GTGTCGGCTGACCCTCTGTCCGCGTGGAGGCCGGTGGGGTGTGGAGGC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=136638;STD_quant_start=20.4524;STD_quant_stop=21.8541;Kurtosis_quant_start=-1.28738;Kurtosis_quant_stop=-1.27278;SVTYPE=INS;SUPTYPE=AL;SVLEN=46;STRANDS=+-;STRANDS2=10,8,10,8;RE=18;REF_strand=20,20;Strandbias_pval=0.780391;AF=0.45	GT:DR:DV	0/1:22:18
+chr1	136956	1	N	TGACCTCTCTCAGTGTGGGAGGGGGCCGGTGTGAGGCAAGGGGCTCACGCGCGGCCTCTGTCCGCGTGGGAGGGGCCGGTGTGAGACAAGGGGCTCAGGCTGACCTCTCAGCGTGGGAGGGGCCGGTGTGAGGCAAAGGGCTCGGGCTGACCTCTCTCAGCGTGGGAGGGCCAGTGTGAGGCAGGGCTCACATGACCTCTCAGCATGGGAGGGGCCGGTGTGAGACAAGGGCTCGGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=136989;STD_quant_start=36.1248;STD_quant_stop=37.4166;Kurtosis_quant_start=0.21249;Kurtosis_quant_stop=1.96205;SVTYPE=INS;SUPTYPE=AL;SVLEN=195;STRANDS=+-;STRANDS2=6,8,6,8;RE=14;REF_strand=18,20;Strandbias_pval=1;AF=0.368421	GT:DR:DV	0/1:24:14
+chr1	180694	35784	N	]chrX:449444]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=449444;STD_quant_start=296.777;STD_quant_stop=10.3923;Kurtosis_quant_start=0.018679;Kurtosis_quant_stop=0;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=0,6,0,6;RE=6;REF_strand=26,30;Strandbias_pval=0.0354297;AF=0.107143	GT:DR:DV	0/0:50:6
+chr1	181262	2	N	CCGGCAGGCGCAGAGAGGCGCGGGCCGGGGTCGGGCGCAGGCGCAGAGAGCGCGGCCGGCGCAGAGGCGCAGAGAGGGCGCAGCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=181262;STD_quant_start=28.5441;STD_quant_stop=24.1125;Kurtosis_quant_start=2.54835;Kurtosis_quant_stop=1.686;SVTYPE=INS;SUPTYPE=AL;SVLEN=93;STRANDS=+-;STRANDS2=19,15,19,15;RE=34;REF_strand=56,46;Strandbias_pval=1;AF=0.333333	GT:DR:DV	0/1:68:34
+chr1	257667	35785	N	]chr5:181462060]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr5;END=181462060;STD_quant_start=0.894427;STD_quant_stop=0.447214;Kurtosis_quant_start=2;Kurtosis_quant_stop=2;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,5,5,0;RE=5;REF_strand=19,18;Strandbias_pval=0.0532252;AF=0.135135	GT:DR:DV	0/0:32:5
+chr1	350806	3	N	ACTCACTGAAGGTGGAGGGAAAATGGTGACCTAAGTC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=350807;STD_quant_start=1.22475;STD_quant_stop=2.82843;Kurtosis_quant_start=3;Kurtosis_quant_stop=-0.65625;SVTYPE=INS;SUPTYPE=AL;SVLEN=37;STRANDS=+-;STRANDS2=4,2,4,2;RE=6;REF_strand=10,4;Strandbias_pval=1;AF=0.428571	GT:DR:DV	0/1:8:6
+chr1	368936	4	CCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACGTGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=369307;STD_quant_start=139.183;STD_quant_stop=119.892;Kurtosis_quant_start=-0.086052;Kurtosis_quant_stop=-0.157727;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-371;STRANDS=+-;STRANDS2=7,5,7,5;RE=12;REF_strand=16,20;Strandbias_pval=0.511217;AF=0.333333	GT:DR:DV	0/1:24:12
+chr1	372679	5	CTTAGGGTCCATTCTGATCTGTATATATGTATAATATATATTATATATGGACCTCAGGGTCCATTCTGATCTGCATATATGTATAATATATATTATATATGGTCCTCAGGGTCCATTCTGATCTGTATATATGTATCATGTAAACATGAGTTCCTGCTGGCATATCTGTCTATAACCGACCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=372858;STD_quant_start=74.4439;STD_quant_stop=102.876;Kurtosis_quant_start=-0.974097;Kurtosis_quant_stop=1.36116;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-179;STRANDS=+-;STRANDS2=6,4,6,4;RE=10;REF_strand=14,12;Strandbias_pval=1;AF=0.384615	GT:DR:DV	0/1:16:10
+chr1	374100	6	N	CCCCCTCTCCTTTCTCCTCTCCATCCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTCGCCCCCTCTCCTTTCTCCCTCTCTATCCCCCTCTCCTTTCTCCCTCTCTCCCCCTCTCCTTTCTCCTCTCCATCCCCTCTCCATCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTAGCCCCTCTCCTTTCTCTCTCCTCCCCCTCTCCTTTCTCCCTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=374100;STD_quant_start=57.1456;STD_quant_stop=79.9085;Kurtosis_quant_start=0.684282;Kurtosis_quant_stop=-0.413029;SVTYPE=INS;SUPTYPE=AL;SVLEN=227;STRANDS=+-;STRANDS2=4,4,4,4;RE=8;REF_strand=14,10;Strandbias_pval=0.703493;AF=0.333333	GT:DR:DV	0/1:16:8
+chr1	606600	7	GGTCAGAGCTGTCCTGGGTCAGAGCTGCCCAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=606632;STD_quant_start=2.98329;STD_quant_stop=2.54951;Kurtosis_quant_start=2.61341;Kurtosis_quant_stop=3.87685;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=7,4,7,4;RE=11;REF_strand=28,28;Strandbias_pval=0.516721;AF=0.196429	GT:DR:DV	0/0:45:11
+chr1	609583	8	GTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCACACGCGGGTTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=609647;STD_quant_start=21.6956;STD_quant_stop=18.4174;Kurtosis_quant_start=-0.340189;Kurtosis_quant_stop=0.435423;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-64;STRANDS=+-;STRANDS2=9,4,9,4;RE=13;REF_strand=30,34;Strandbias_pval=0.223523;AF=0.203125	GT:DR:DV	0/0:51:13
+chr1	611309	9	TGTGGGTGTGACAGGGTGTGTTCTGTGTGAGAACATGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGATGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTTGGTGTGAGTTCATGGGTGTGACGGGGTGTGCTGTGTGAGAACGTGTGTGTAGTGTTCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=612033;STD_quant_start=78.5303;STD_quant_stop=59.8415;Kurtosis_quant_start=-0.363;Kurtosis_quant_stop=0.0992;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-724;STRANDS=+-;STRANDS2=7,6,7,6;RE=13;REF_strand=29,33;Strandbias_pval=0.763359;AF=0.209677	GT:DR:DV	0/0:49:13
+chr1	744867	10	N	TATATATATATATATATATATATATATATATA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=744867;STD_quant_start=1.34164;STD_quant_stop=4.07431;Kurtosis_quant_start=2;Kurtosis_quant_stop=-0.953985;SVTYPE=INS;SUPTYPE=AL;SVLEN=35;STRANDS=+-;STRANDS2=4,1,4,1;RE=5;REF_strand=24,16;Strandbias_pval=0.635332;AF=0.125	GT:DR:DV	0/0:35:5
+chr1	814584	11	N	AAAAAAAGATGTGAAACCTATTTTCAGAATTAACATTTCCTTCCTAAATATCTAACACAACACTGAAGGAGAAAGTCCAGTCAATTTTATGTAGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=814585;STD_quant_start=17.8792;STD_quant_stop=15.8719;Kurtosis_quant_start=-1.85801;Kurtosis_quant_stop=-1.90018;SVTYPE=INS;SUPTYPE=AL;SVLEN=96;STRANDS=+-;STRANDS2=13,11,13,11;RE=24;REF_strand=40,44;Strandbias_pval=0.64659;AF=0.285714	GT:DR:DV	0/0:60:24
+chr1	820880	12	N	TCTACACTACCTGCCTGGCCAGCAGATCCACCCTGTCTACACTACCTGCCTGGGCAGTAGTTCCACGCAATCTCCCTTACCTGCCTCTCCAGCAGACCCGCCCTATCTATACTACTTGCCTGTCCAGCAGATCCACTTCCCATTCACACGACCTGCCTGTCCAGCAGATCCACCCTGTCTACACTACCTTCCTGCTTGTCCAGCAGGTCCACCCTGTCTATACTACCTGCCTGGCCAGTAGATCCACACACTA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=820881;STD_quant_start=6.70075;STD_quant_stop=12.2963;Kurtosis_quant_start=5.29053;Kurtosis_quant_stop=5.17296;SVTYPE=INS;SUPTYPE=AL;SVLEN=245;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,40;Strandbias_pval=1;AF=0.212766	GT:DR:DV	0/0:74:20
+chr1	820906	13	TCCACCCTGTCTACACTACCTGCTTGTCCAGCAGG	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=820941;STD_quant_start=2.56905;STD_quant_stop=2.70185;Kurtosis_quant_start=-1.38237;Kurtosis_quant_stop=-1.08812;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,40;Strandbias_pval=1;AF=0.212766	GT:DR:DV	0/0:74:20
+chr1	822428	14	CCTGGCCAGCAGATCCACCCTGTCTATACTACCTG	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=822463;STD_quant_start=2.98329;STD_quant_stop=2.91548;Kurtosis_quant_start=-1.2983;Kurtosis_quant_stop=-1.31531;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,42;Strandbias_pval=1;AF=0.208333	GT:DR:DV	0/0:76:20
+chr1	839479	15	ACACACACCTGGACAAACACACCTGGACACACACACCTAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=839519;STD_quant_start=11.7558;STD_quant_stop=10.8904;Kurtosis_quant_start=-1.43253;Kurtosis_quant_stop=-1.20587;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-40;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=42,42;Strandbias_pval=1;AF=0.178571	GT:DR:DV	0/0:69:15
+chr1	853534	16	GCCGTGTGGTAAACTGATGAACCCCGACCCTGATGAACGTGAGATG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=853581;STD_quant_start=21.0143;STD_quant_stop=21.4033;Kurtosis_quant_start=-1.50523;Kurtosis_quant_stop=-1.48919;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-47;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=30,36;Strandbias_pval=1;AF=0.227273	GT:DR:DV	0/0:51:15
+chr1	866801	17	N	CGCTCCTGGCCGTCTCCGAGCCCTCCACATGTCTCCTGCCTCATCCCTGACGTCCTCCCAGGCCCTCGTGGTCACTCCCCCTGCACTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=866839;STD_quant_start=45.3707;STD_quant_stop=37.0162;Kurtosis_quant_start=-2.21189;Kurtosis_quant_stop=-1.85872;SVTYPE=INS;SUPTYPE=AL;SVLEN=49;STRANDS=+-;STRANDS2=6,12,6,12;RE=18;REF_strand=32,44;Strandbias_pval=0.598063;AF=0.236842	GT:DR:DV	0/0:58:18
+chr1	872837	18	N	GGGGAGGTTTCATTTGCTCCACCTGCAGCGAGTAAGTAGCCCATCTCAGGTTTGACTCCTGACTTAATTCCTAACAGGGGAAGCCAAGGTCCTGTGACCCTCCCGGGGGAGGGGTTTCATTTGTTCTACCTGCAGTGAGGTCTGTTAGCCCATCTCAGGTTTGACTCCTGACTCTAATTCTAACAGGAAGCTGTCCTGTAACTCTGGGGAGGGGGGGGTTTCATTTGCTCCACCTGCAGCGAGGTTAGCCCTCCATCTCAGGTTTGACTCCTGACTTAATTCCTAACAGGGGAAGCTGCTGTCCTGTGACTCTGGGAGAAGGGGTTTCATTTGCTCCACCTGCAGTGAGGTCTGCTAGCCCATCTCAGGTTTGACTCTGACTTAATTCCTAAACAGGGGAAGCTGCTGTCCTGTAACTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=872837;STD_quant_start=87.716;STD_quant_stop=108.899;Kurtosis_quant_start=-1.21956;Kurtosis_quant_stop=-1.1229;SVTYPE=INS;SUPTYPE=AL;SVLEN=416;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=28,30;Strandbias_pval=1;AF=0.241379	GT:DR:DV	0/0:44:14
+chr1	876112	19	N	CCCCATACTCCTCCCCCATACTCCCCCATACCCCCCCACACTCCCCCCATACTCCTCCCCCATACTCCCCCATACTCCCCCACACTCCCCCATACTCCTCCCCCATACTCCCCTATACTCCCCACACTCCCCCCAAACTCCCCCCATACTCCTCCCCCATACTCCCCATACTCCCCCACACTCCCCCACACTCCCCCATACTCCCCCACACTGTTCCCCCCATACCTCCCCCATACTCCCCCACACTCCCCCACACTCCCCCACGCTCCTCCCCCACACCCTCCCACACTCCCCCACACTCCCCTACTGCCTTCCCCCACACTCCCCCACACTCCTCCCCATACTCCCCCACACTCCCTCATACTCCCCATACTACCCCAACCTCCCCCATACTCCCCCATACTCCCCACACACTCCCCCCACACTCCCCCCAAACTCCCCCATACTCCTCCCCCAGTACTCCCCCATACTCCCACACTCCCACACTCCCCCACACTCCCCCCATACTCCCCCACACTCCCCCACACTCACTCCACACTCCCCATACTCCCCAAATCTCCCCCATACTCCCACATTCCCCCACACTCCCCACACTCCCCCATACTCCCCCACACTCCCCACACTCACCCACACCCCCCCATACTCCCCAACCTCCCCCATACTCCCCACATTCCCCCATACTCCCCCATACTCCTCCCCCATACTCCCCCCATACTCCCCCACACTCCCCACACTCCCCCATACTCCCCCACACTCCCCATACTCCCCCTGCATCCTCCCCATACTCCCCACATTCCCCCATACTCCCCATACTCCCCACACTCCCCCACACTCCCCCATACTCCCCCTCACACTCCCCCCATACTCCCCAACCTCCCAAACTCCCCCACATTCCCCCATACTCCCCATACTCCCCCAAACTCCCCATACTCCTCCCCTCAATACTCCCCATACTCCCCCATACTGCCCAACCTCCCCATACCCCCCACACTCCCCCCATACTCCCCCCACACCCCCCCCATACTCCCCCACACTCCCCTGCAACTCCCCTTATACTCCTCCCCCATACTCCCCATACTCCCCCCACACTCCCCAAACTCCCCATACTCCTCCCCATACTCCCCATACTCCCCCACACTCCCCCATACTCCTCCCCCATACTCCCCATACCCCCACACTCCCCCATACTCTCCCCATACTCCCCATACTCCCCACACTCCCCCAAACTCCCCCATACTCCTCCCCCATACTCCCCATACTCCCCCACACTCCCCCACACTCCCCCATACTCCCCACACTCCCCCATACTCCCCCAACCTCCCCATACTCCCCCACATTCCCCTATTACTCCCCATACTCCCCAAACTCCCCACATTCCCCCATACTCCCCCATACTCCCCAAACTCCCCCATACTCCTCCCCCACACTCCCCATACTCCCCCATACTCGCCCAACCTCCCCATACTCCCCCACTCCCCCATACTCCCCCACAGTCCCCCACACTCCCCCACACACTCCCCAACCTCCCCCATACTCCCCATACTCGCCCACACTCGCCCACACCCCCCCATACTCCCCACACTCCCCCACACTCCCCCACACCCCCCATACTCCCCCATACTCCCCATACTCCCCCACACCCCCACACT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=876112;STD_quant_start=160.964;STD_quant_stop=281.694;Kurtosis_quant_start=-1.48637;Kurtosis_quant_stop=-0.804806;SVTYPE=INS;SUPTYPE=AL,SR;SVLEN=1649;STRANDS=+-;STRANDS2=7,6,7,6;RE=13;REF_strand=48,38;Strandbias_pval=1;AF=0.151163	GT:DR:DV	0/0:73:13
+chr1	876433	35786	N	N[chr4:189980733[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr4;END=189980733;STD_quant_start=373.501;STD_quant_stop=193.312;Kurtosis_quant_start=-0.601023;Kurtosis_quant_stop=-0.696578;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=4,3,3,4;RE=7;REF_strand=15,15;Strandbias_pval=1;AF=0.233333	GT:DR:DV	0/0:23:7
+chr1	878423	35787	N	]chr3:198124405]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198124405;STD_quant_start=0.632456;STD_quant_stop=0.316228;Kurtosis_quant_start=4.00716;Kurtosis_quant_stop=2.14525;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=12,5,12,5;RE=17;REF_strand=34,32;Strandbias_pval=0.182341;AF=0.257576	GT:DR:DV	0/0:49:17
+chr1	878423	36833	N	]chr3:198124405]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198124405;STD_quant_start=2.72029;STD_quant_stop=0.632456;Kurtosis_quant_start=1.9394;Kurtosis_quant_stop=6.69527;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=3,8,3,8;RE=11;REF_strand=34,32;Strandbias_pval=0.19555;AF=0.166667	GT:DR:DV	0/0:55:11
+chr1	882645	20	ATATATTAGCTATTCTAGACTTTATGCATTTATGTAAAGTTTTCTTTGTTGCACTTTAAGTTCTGTGATACATGGGCAGAGCATGC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=882732;STD_quant_start=2.70801;STD_quant_stop=2.51661;Kurtosis_quant_start=0.409091;Kurtosis_quant_stop=3.73961;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-87;STRANDS=+-;STRANDS2=8,1,8,1;RE=9;REF_strand=77,90;Strandbias_pval=0.0153702;AF=0.0538922	GT:DR:DV	0/0:158:9
+chr1	883246	35788	N	N[chr20:29351529[	.	STRANDBIAS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=29351529;STD_quant_start=6.0208;STD_quant_stop=8.59506;Kurtosis_quant_start=4.94502;Kurtosis_quant_stop=2.53006;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=8,0,0,8;RE=8;REF_strand=31,35;Strandbias_pval=0.00564375;AF=0.121212	GT:DR:DV	0/0:58:8
+chr1	883246	35789	N	N[chr20:29789177[	.	STRANDBIAS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=29789177;STD_quant_start=1.73205;STD_quant_stop=2.05481;Kurtosis_quant_start=4.74074;Kurtosis_quant_stop=5.09003;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=9,0,0,9;RE=9;REF_strand=30,34;Strandbias_pval=0.00272312;AF=0.140625	GT:DR:DV	0/0:55:9
+chr1	886250	21	N	TGTGCTGGCCCTTTGGCAGAGCAGGTGTGGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=886250;STD_quant_start=14.922;STD_quant_stop=15.3351;Kurtosis_quant_start=-0.359429;Kurtosis_quant_stop=-0.424765;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=4,2,4,2;RE=6;REF_strand=20,38;Strandbias_pval=0.186216;AF=0.103448	GT:DR:DV	0/0:52:6
+chr1	893790	22	AAAAAAAAAAAAATATATATATATATATATATATAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=893826;STD_quant_start=0.738549;STD_quant_stop=0;Kurtosis_quant_start=-1.74362;Kurtosis_quant_stop=-1.42857;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;STRANDS2=13,10,13,10;RE=23;REF_strand=28,24;Strandbias_pval=1;AF=0.442308	GT:DR:DV	0/1:29:23
+chr1	907836	23	N	CTGCCCGGTCCTTCTGACCAGCCGAGAGAGTA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=907836;STD_quant_start=11.7346;STD_quant_stop=12.1491;Kurtosis_quant_start=-0.460251;Kurtosis_quant_stop=-0.470373;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=6,6,6,6;RE=12;REF_strand=34,32;Strandbias_pval=1;AF=0.181818	GT:DR:DV	0/0:54:12
+chr1	909140	24	TTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCATCTTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=909494;STD_quant_start=64.8764;STD_quant_stop=64.2294;Kurtosis_quant_start=-2.03504;Kurtosis_quant_stop=-1.70278;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-354;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;REF_strand=39,33;Strandbias_pval=0.680851;AF=0.472222	GT:DR:DV	0/1:38:34
+chr1	934067	25	GGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=934880;STD_quant_start=19.0606;STD_quant_stop=20.0499;Kurtosis_quant_start=1.46688;Kurtosis_quant_stop=-1.88984;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-813;STRANDS=+-;STRANDS2=12,14,12,14;RE=26;REF_strand=24,33;Strandbias_pval=0.812937;AF=0.45614	GT:DR:DV	0/1:31:26
+chr1	936289	26	AGGGCTCCTGGACGGAGGGGGTCCCCGGTCCCGCCTCCTA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=936328;STD_quant_start=5.46316;STD_quant_stop=5.2915;Kurtosis_quant_start=0.217921;Kurtosis_quant_stop=0.801437;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-39;STRANDS=+-;STRANDS2=11,15,11,15;RE=26;REF_strand=24,34;Strandbias_pval=1;AF=0.448276	GT:DR:DV	0/1:32:26
+chr1	948662	27	N	CCTGGCTGTCCTTGGTCCCCTGGTCCCTTGGCCCTGCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=948696;STD_quant_start=12.3786;STD_quant_stop=16.4784;Kurtosis_quant_start=-2.07742;Kurtosis_quant_stop=-2.33448;SVTYPE=INS;SUPTYPE=AL;SVLEN=37;STRANDS=+-;STRANDS2=8,19,8,19;RE=27;REF_strand=18,40;Strandbias_pval=1;AF=0.465517	GT:DR:DV	0/1:31:27
+chr1	964642	28	CAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=964717;STD_quant_start=14.3717;STD_quant_stop=16.1442;Kurtosis_quant_start=-1.6698;Kurtosis_quant_stop=-1.78719;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-75;STRANDS=+-;STRANDS2=12,10,12,10;RE=22;REF_strand=26,22;Strandbias_pval=1;AF=0.458333	GT:DR:DV	0/1:26:22
+chr1	976811	29	N	CAACCCCGGGAACCGCCTCCCACTCCCCCCACCAACCCCCGGGAACCGCCTCCCACTTCTCCCGCAACCCCGGGAACTGCCTCCCACTCCCTTCTGCAACCCCCGGGAACCGCTCCCACTCCCCGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=976916;STD_quant_start=53.1169;STD_quant_stop=72.0569;Kurtosis_quant_start=-0.975517;Kurtosis_quant_stop=-0.734689;SVTYPE=INS;SUPTYPE=AL,SR;SVLEN=87;STRANDS=+-;STRANDS2=9,10,9,10;RE=15;REF_strand=36,36;Strandbias_pval=1;AF=0.208333	GT:DR:DV	0/0:57:15
+chr1	977334	30	N	CGCTCCCCACTCCCCCGCAACTTCGGGAACCGCCTCCCCACTCCCCCACCAACCCCTGAACCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977334;STD_quant_start=59.633;STD_quant_stop=54.4259;Kurtosis_quant_start=-1.27956;Kurtosis_quant_stop=-1.57241;SVTYPE=INS;SUPTYPE=AL;SVLEN=131;STRANDS=+-;STRANDS2=11,13,11,13;RE=22;REF_strand=38,36;Strandbias_pval=0.814668;AF=0.297297	GT:DR:DV	0/0:52:22
+chr1	977541	31	N	CCCCGGAACCGCTCCCACCGCGCGCAACCCCTGAACCGCCTCCCACTCCCCACCAACCCTGGAACCGCCTCCACTCCCCTCTTACCGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977584;STD_quant_start=48.8615;STD_quant_stop=40.2961;Kurtosis_quant_start=-0.497493;Kurtosis_quant_stop=-0.185665;SVTYPE=INS;SUPTYPE=AL;SVLEN=67;STRANDS=+-;STRANDS2=3,6,3,6;RE=9;REF_strand=36,38;Strandbias_pval=0.490453;AF=0.121622	GT:DR:DV	0/0:65:9
+chr1	977848	32	N	ACCAACCGGGGAGCCGCCTCCCCTCCCCCCACCCGCCCCGAGCCGCCTGCCCCCGCCACCAACCCCGGGAACCACCTCCCACTCCCCGCCCAACCCCGGGAACCGCCCCTCCCCTCCCCACG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977882;STD_quant_start=61.4687;STD_quant_stop=66.9395;Kurtosis_quant_start=-1.68422;Kurtosis_quant_stop=-1.71073;SVTYPE=INS;SUPTYPE=AL;SVLEN=86;STRANDS=+-;STRANDS2=9,10,9,10;RE=19;REF_strand=34,38;Strandbias_pval=1;AF=0.263889	GT:DR:DV	0/0:53:19
+chr1	988831	33	N	AGTTCTGGAGTTGATTGTTTCTCAGAGGTTCAGGGTTGAGTGTTC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=988831;STD_quant_start=6.13314;STD_quant_stop=6.34277;Kurtosis_quant_start=-1.27849;Kurtosis_quant_stop=-1.31557;SVTYPE=INS;SUPTYPE=AL;SVLEN=46;STRANDS=+-;STRANDS2=11,16,11,16;RE=27;REF_strand=22,36;Strandbias_pval=0.815445;AF=0.465517	GT:DR:DV	0/1:31:27
+chr1	996353	34	N	GCACCTACATCTGGGGCCACAGGATGCAGGGTGGGGAGGGCAAGGCCTCTGCGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=996353;STD_quant_start=25.8341;STD_quant_stop=26.3869;Kurtosis_quant_start=-1.7722;Kurtosis_quant_stop=-1.00787;SVTYPE=INS;SUPTYPE=AL;SVLEN=64;STRANDS=+-;STRANDS2=12,18,12,18;RE=30;REF_strand=30,52;Strandbias_pval=0.826462;AF=0.365854	GT:DR:DV	0/1:52:30
+chr1	998765	35	N	GGGGAGGGCGCTGAGCCGAGGGGGAGGGCTGAGCGGGAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=998770;STD_quant_start=11.4935;STD_quant_stop=9.94485;Kurtosis_quant_start=-1.80857;Kurtosis_quant_stop=-1.45908;SVTYPE=INS;SUPTYPE=AL;SVLEN=34;STRANDS=+-;STRANDS2=4,10,4,10;RE=14;REF_strand=26,46;Strandbias_pval=0.762111;AF=0.194444	GT:DR:DV	0/0:58:14
+chr1	1030890	36	TGTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1030928;STD_quant_start=15.6993;STD_quant_stop=15.5285;Kurtosis_quant_start=0.12083;Kurtosis_quant_stop=-0.088691;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-38;STRANDS=+-;STRANDS2=18,13,18,13;RE=31;REF_strand=39,26;Strandbias_pval=1;AF=0.476923	GT:DR:DV	0/1:34:31
+chr1	1041778	37	GGCCAGTGCCAGGGTCGAGGTGGGCGGCTCCCCCGGGGGAGGGCTG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1041824;STD_quant_start=15.7567;STD_quant_stop=16.687;Kurtosis_quant_start=-0.850302;Kurtosis_quant_stop=-1.83988;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-46;STRANDS=+-;STRANDS2=11,12,11,12;RE=23;REF_strand=23,25;Strandbias_pval=1;AF=0.479167	GT:DR:DV	0/1:25:23
+chr1	1068748	38	N	AAGGCCACGCGGGCTGTGCAGATGCAGGTGCGGCGGGGCGGGCCACGCGGGCTGTGAAGGTGCAGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1068809;STD_quant_start=28.0891;STD_quant_stop=12.2066;Kurtosis_quant_start=-2.18788;Kurtosis_quant_stop=-1.80804;SVTYPE=INS;SUPTYPE=AL;SVLEN=75;STRANDS=+-;STRANDS2=19,14,19,14;RE=33;REF_strand=42,28;Strandbias_pval=0.832838;AF=0.471429	GT:DR:DV	0/1:37:33
+chr1	1076283	39	GCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1076735;STD_quant_start=48.1819;STD_quant_stop=70.5606;Kurtosis_quant_start=0.175533;Kurtosis_quant_stop=-1.43403;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-452;STRANDS=+-;STRANDS2=7,11,7,11;RE=18;REF_strand=18,23;Strandbias_pval=0.780972;AF=0.439024	GT:DR:DV	0/1:23:18
+chr1	1076341	40	GCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGCGGGGAGGCTGAGTCTACGGGGACTCCGTGAGGGGTGGCTGAGTCTATGGGGACTCCGTGCGGGGAGGCTGAGTCTATGGGGACTCCGTGCGGGGTGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCTGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGTCTATGGGGACTCCGTGCCGAGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTGCGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1077781;STD_quant_start=341.572;STD_quant_stop=478.388;Kurtosis_quant_start=-1.1971;Kurtosis_quant_stop=-0.402974;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-1440;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=19,23;Strandbias_pval=0.653637;AF=0.119048	GT:DR:DV	0/0:37:5
+chr1	1080919	41	N	CTGTCCTTCTCACTTCCTGCCTCGGTCTCTGTCTCCTTCCCTCCGCCCTACCTCGGTCCTATCATCCTTCCTCGCCTACCTCAGGTCCCTGTCTCCTTCCCTCCATACACACTCGGTCCCTGTCTCTCTTCCCTCCGCCTGGTCCCTGTCTCCTTCCCTCCTTCCCCCCACCTCCGGTCCTGTCTCCTTCCCTCCCTTCCGCCTCAGTCTGTCTCACTTCCCTCCGCCCACCTCAGTCCCCTGTCTCCTTCCCTCCCACACTTCCTCTGGTCCTGTCTCCTTCCCTCAAGCCCCCTCAGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1080919;STD_quant_start=32.45;STD_quant_stop=47.856;Kurtosis_quant_start=-1.30067;Kurtosis_quant_stop=-1.55852;SVTYPE=INS;SUPTYPE=AL;SVLEN=289;STRANDS=+-;STRANDS2=12,9,12,9;RE=21;REF_strand=26,20;Strandbias_pval=1;AF=0.456522	GT:DR:DV	0/1:25:21
+chr1	1139106	42	GGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGACCTGGGTCCTGGGGAGTTTCCTGGGGTCAGAAGGTAGGGGTGTCAATGTCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1139227;STD_quant_start=15.678;STD_quant_stop=18.4038;Kurtosis_quant_start=-2.1908;Kurtosis_quant_stop=-2.23094;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-121;STRANDS=+-;STRANDS2=15,6,15,6;RE=21;REF_strand=41,31;Strandbias_pval=0.312886;AF=0.291667	GT:DR:DV	0/0:51:21
+chr1	1140200	43	AGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGGAGCTTCCTGGGGTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1140259;STD_quant_start=17.0822;STD_quant_stop=14.6151;Kurtosis_quant_start=0.610656;Kurtosis_quant_stop=0.583042;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-59;STRANDS=+-;STRANDS2=14,6,14,6;RE=20;REF_strand=44,36;Strandbias_pval=0.312112;AF=0.25	GT:DR:DV	0/0:60:20
+chr1	1140410	44	N	CGTCCGAACCGGGGGGACCTGGGTCCTGGGAGCTTCCTGGGTTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGAGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACATCGAACCGGGGGGCCCTGGGAGTCCTGGGAGCTTCTGGGGTCAGAAGGTGGGAGTGTCCAGCATCGAACCGGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTAGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCATGGGGGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGAGCTTCCTGGGGTCAGAAGGTAGGGGTGTCAACGTCAGACAGGGGACCTGGGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGCGTCGAACCGGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCCAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1140410;STD_quant_start=232.849;STD_quant_stop=281.385;Kurtosis_quant_start=1.04118;Kurtosis_quant_stop=0.939709;SVTYPE=INS;SUPTYPE=AL;SVLEN=536;STRANDS=+-;STRANDS2=4,7,4,7;RE=11;REF_strand=42,40;Strandbias_pval=0.522879;AF=0.134146	GT:DR:DV	0/0:71:11
+chr1	1141388	45	N	TCATCCTCTGTCCACAACCCCATCCTTACCTCTATCCCCCACCTTACATCTCATTCCTCTATCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1141394;STD_quant_start=10.6724;STD_quant_stop=14.1704;Kurtosis_quant_start=-1.44054;Kurtosis_quant_stop=-1.65538;SVTYPE=INS;SUPTYPE=AL;SVLEN=55;STRANDS=+-;STRANDS2=14,7,14,7;RE=21;REF_strand=44,40;Strandbias_pval=0.327428;AF=0.25	GT:DR:DV	0/0:63:21
+chr1	1168031	46	CGGGGCCAGCAGACGGGTGAGGGCGGAGGGCCGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1168063;STD_quant_start=14.2864;STD_quant_stop=13.7514;Kurtosis_quant_start=-1.22974;Kurtosis_quant_stop=-1.1581;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=38,32;Strandbias_pval=0.776548;AF=0.214286	GT:DR:DV	0/0:55:15
+chr1	1212606	47	N	CAGCCCTCCTCCCAGCCCCTGGCTCCCTCTGCCCCCTCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1212613;STD_quant_start=10.1207;STD_quant_stop=8.7014;Kurtosis_quant_start=-0.032446;Kurtosis_quant_stop=-1.09448;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=2,5,2,5;RE=7;REF_strand=30,28;Strandbias_pval=0.42665;AF=0.12069	GT:DR:DV	0/0:51:7
+chr1	1226331	48	CCCTCAACCCTGTACGGTCAGGAGGAAACATGGCACCTCCCCTCTGGGGGCTCTTTCCAGAAAC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1226395;STD_quant_start=5.74456;STD_quant_stop=5.1672;Kurtosis_quant_start=-1.50778;Kurtosis_quant_stop=-1.62886;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-64;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=34,39;Strandbias_pval=0.77815;AF=0.205479	GT:DR:DV	0/0:58:15
+chr1	1227293	49	GCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1227467;STD_quant_start=2.14476;STD_quant_stop=1.34164;Kurtosis_quant_start=0.1517;Kurtosis_quant_stop=0.426462;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-174;STRANDS=+-;STRANDS2=8,8,8,8;RE=16;REF_strand=32,42;Strandbias_pval=0.782406;AF=0.216216	GT:DR:DV	0/0:58:16
+chr1	1240679	50	N	CCGCCCCCATTCACCCCGGCCGTGGTCCCTACCGCAGCCCCA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1240683;STD_quant_start=7.86398;STD_quant_stop=8.59008;Kurtosis_quant_start=-1.97851;Kurtosis_quant_stop=-1.85397;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;STRANDS2=16,22,16,22;RE=38;REF_strand=34,48;Strandbias_pval=1;AF=0.463415	GT:DR:DV	0/1:44:38
+chr1	1245159	51	N	CTCTGCCCTCCTCCCACCTTCCCCCTCCTCCCCCCACTCCCTCTCCCCTCTTCCCCCGACTCCCTTCCCCTACTCATCTCCTCCTCACCCACTCCTCTCCCCCTCCTCTCCCACTCCTCCCCCTCCTCCCCCCCACTCCTCCCCCCACTGCACTCTCCCCTCTTCCCCCACTCCTCCCCACTCCTCTCCCCTCCTTCTCACCTCCTCTCCCCTCCTCCTCCTCCTGTCCCTCCTCCCCCTCTTCCCCCTCCTCCCCATATACCCTCCTCCTCCTCTCCCTCTTCCTCCCACTCCCCCCACTCCTCCCCACTCCTCTCCCCTCTTGCCCCTCCTCCCTACCACTCCTTCCTCCTCTCCTCTCTTCCCCCCACTCCCTCCCCCACTCCTCTCCTCCTCCACCTCCTCTCCCCTCCTCCCCCACTCCTCTCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1245159;STD_quant_start=3.43996;STD_quant_stop=14.8436;Kurtosis_quant_start=4.9032;Kurtosis_quant_stop=3.59937;SVTYPE=INS;SUPTYPE=AL;SVLEN=439;STRANDS=+-;STRANDS2=12,13,12,13;RE=25;REF_strand=28,36;Strandbias_pval=0.813884;AF=0.390625	GT:DR:DV	0/1:39:25
+chr1	1248060	52	GATCTCCAACTCTGACCTACAGGCAGGAAAGTGGGCAGCCCTGGGAGGCTGGACTGAGGGAGGCTGGACTTCCCACTCAGGCCTACACGCAGGAAAATGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCACCCTACAGGCCAGGACACGGGCAGCCCTGGGAGGCTAGACCGAGGGAGGCTGGGCCTCCCATCTACCCTACAGGCCGGGACACAGGCAGCCCTGGGAGGCTGTACCGAGGGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1248319;STD_quant_start=52.6064;STD_quant_stop=21.0815;Kurtosis_quant_start=3.96322;Kurtosis_quant_stop=3.62028;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-259;STRANDS=+-;STRANDS2=2,5,2,5;RE=7;REF_strand=23,28;Strandbias_pval=0.686983;AF=0.137255	GT:DR:DV	0/0:44:7
+chr1	1249348	53	N	CGCTCACACCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGGGGCTGGGCCTCCCCTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGGCTGGACCAGGGGAGGCGCCAGGCCTCCCACTCGCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCCCGAGGGAGGCTGGAGCCTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1249395;STD_quant_start=37.7478;STD_quant_stop=65.9212;Kurtosis_quant_start=-1.75534;Kurtosis_quant_stop=-1.77327;SVTYPE=INS;SUPTYPE=AL;SVLEN=124;STRANDS=+-;STRANDS2=8,10,8,10;RE=18;REF_strand=26,28;Strandbias_pval=1;AF=0.333333	GT:DR:DV	0/1:36:18
+chr1	1249588	54	N	TGGGATCGAGAGCTGGCTCCCACCGCCTCCCAGGCCTGGACACTGCAGCCCTGGGAGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1249625;STD_quant_start=47.571;STD_quant_stop=45.3122;Kurtosis_quant_start=-1.76881;Kurtosis_quant_stop=-0.375917;SVTYPE=INS;SUPTYPE=AL;SVLEN=61;STRANDS=+-;STRANDS2=6,6,6,6;RE=12;REF_strand=26,28;Strandbias_pval=1;AF=0.222222	GT:DR:DV	0/0:42:12
+chr1	1284183	55	N	TGAGGGGGTGGGGTGGGGGTTGAGTGAGGGGGTGGGGGGGTTGGGTGAGGGGGGTGGGGGGTTGGGTGAGGGGGTGGGGGGCTGGTGAGGGGGTGGGGTTGGGTGAGGGGGTGGGCTCGGGGGGGGTTGAGTGAGGGGGTGGGGTGGGGGGTTGGGTGAGGGGGGTGGGGTGGGGGTTGAGGAGGGGGTGGGGTGTTATGAGGGGTTGGGGGTTGGGTGAGGGGGGGTGGGGGTTGCGAGGGGGTGGGGGGTGGGGGGGTTGAGTGAGGGGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1284201;STD_quant_start=9.78895;STD_quant_stop=64.8333;Kurtosis_quant_start=-1.28536;Kurtosis_quant_stop=-1.87274;SVTYPE=INS;SUPTYPE=AL;SVLEN=282;STRANDS=+-;STRANDS2=18,16,18,16;RE=34;REF_strand=49,32;Strandbias_pval=0.535485;AF=0.419753	GT:DR:DV	0/1:47:34
+chr1	1288944	56	N	CGTGTCCCTGCTCCGGGCCCCGTGTCTCTGTTCACTGGCCCCCGTGTCTCTGCTCCTCGTCCCGTGTCCCTTGCTCCGCCCTGTGTCCCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTTCTGCTCCGTCCTGTGTCTCTTGCTCCGGCCCCCGCGGTCTCTGCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289049;STD_quant_start=87.6482;STD_quant_stop=60.9024;Kurtosis_quant_start=-0.741898;Kurtosis_quant_stop=-0.516487;SVTYPE=INS;SUPTYPE=AL;SVLEN=64;STRANDS=+-;STRANDS2=6,5,6,5;RE=11;REF_strand=28,34;Strandbias_pval=0.74488;AF=0.177419	GT:DR:DV	0/0:51:11
+chr1	1289357	57	N	TGCTCCGTCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCTCCCCGTGTCTCTGCCCCGTCCCGTGTCTACTCCGTCCCGGTCTCTGCTCCGTCCCCCGTGTCTACTCCGTCCCCCGTGTCTCTGCTCCGTCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCGTCCCCGTGTCTCTGCTCCGTCCGTGTCTCTGCTCAATCCCCCGTGTCTCTGCCCCGTCCCGATGTCTCTGCTCCGTCGATGTCTCTATGAGCTCTCCCGTGTCTGCTCCGTCCGTGTCTCTGCTCCGTCCGATGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289476;STD_quant_start=159.932;STD_quant_stop=97.4515;Kurtosis_quant_start=-1.84527;Kurtosis_quant_stop=-1.54002;SVTYPE=INS;SUPTYPE=AL;SVLEN=162;STRANDS=+-;STRANDS2=8,9,8,9;RE=16;REF_strand=28,32;Strandbias_pval=1;AF=0.266667	GT:DR:DV	0/0:44:16
+chr1	1289780	58	N	CCCCGTGTCTCTGCTCCGTCCGTGTCTACTCCGTCCCGATGTCTCTGCCACGTCCCCGTGTCTCTGCCCCGTCCCCGTGTCTCTGCCCCGTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289823;STD_quant_start=26.7955;STD_quant_stop=30.8675;Kurtosis_quant_start=-0.210759;Kurtosis_quant_stop=-0.202563;SVTYPE=INS;SUPTYPE=AL;SVLEN=91;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=28,30;Strandbias_pval=0.778387;AF=0.258621	GT:DR:DV	0/0:43:15
+chr1	1290106	59	N	GTGTCTCTGTCTGGCCCCCCGTGTCTCTGCTCCAGCCCCGTGCCCTGCTCCTCATT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1290106;STD_quant_start=38.9923;STD_quant_stop=30.4023;Kurtosis_quant_start=0.491131;Kurtosis_quant_stop=-0.169138;SVTYPE=INS;SUPTYPE=AL;SVLEN=103;STRANDS=+-;STRANDS2=8,9,8,9;RE=16;REF_strand=30,34;Strandbias_pval=1;AF=0.25	GT:DR:DV	0/0:48:16
+chr1	1324173	60	N	GGGCTCAGGGGCTGGGGGCTGCTGGGCTGAGGCTGGGGAGACTGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1324173;STD_quant_start=5.02933;STD_quant_stop=13.0384;Kurtosis_quant_start=-1.45664;Kurtosis_quant_stop=-1.54076;SVTYPE=INS;SUPTYPE=AL;SVLEN=66;STRANDS=+-;STRANDS2=19,16,19,16;RE=35;REF_strand=38,36;Strandbias_pval=0.838779;AF=0.472973	GT:DR:DV	0/1:39:35
+chr1	1350109	61	GGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1351186;STD_quant_start=20.5878;STD_quant_stop=20.3663;Kurtosis_quant_start=-0.011268;Kurtosis_quant_stop=-1.64329;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-1077;STRANDS=+-;STRANDS2=15,13,15,13;RE=28;REF_strand=32,29;Strandbias_pval=1;AF=0.459016	GT:DR:DV	0/1:33:28
+chr1	1366913	62	TGAATTGGTGAGTTGGTGTGAATTGAATTGTGTGAATGAGTGGATTGGTGAGTGAATTGGTGAGTTGAATTGGTGTGTGTAGTGGATGAGTGTGGATGAATGTGAATTGGCGAGTATGGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1367033;STD_quant_start=14.0961;STD_quant_stop=39.5335;Kurtosis_quant_start=0.90213;Kurtosis_quant_stop=3.80352;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-120;STRANDS=+-;STRANDS2=7,3,7,3;RE=10;REF_strand=36,30;Strandbias_pval=0.499306;AF=0.151515	GT:DR:DV	0/0:56:10
+chr1	1382683	63	N	CAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCCAGTAATCCAGAGGTTACCACCCTTCCCAACAATCCACTAACAATCCAGAGGCCACCACCCTTCCCAGCAATCGGCAAGGACCCAGAGGCCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGGTCACCACCCCTTCCCAAAATCAGTAACCAGGGAGTCCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGCCACCACCCCTTCCCACAACAATCCAGTAACAATCCAGAGGTACCACCCTTCCCAACAATCCAGTAACAATCGACCACCACCCTTCCCAACAATCCAGTAACAATCCAGAGGACACCACCCTTCCCAGCAATCCACTAGCAATCCAGAGGCCACCACCCCTTCCCAACAATCTGGCTTAGCGACCAGAGAGCCACCACCCCTTCCCAACAATCAGTAACAATCCAGGAGTCACCACCGCTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1382683;STD_quant_start=27.5276;STD_quant_stop=144.602;Kurtosis_quant_start=-2.03439;Kurtosis_quant_stop=-2.16418;SVTYPE=INS;SUPTYPE=AL;SVLEN=494;STRANDS=+-;STRANDS2=11,15,11,15;RE=26;REF_strand=24,34;Strandbias_pval=1;AF=0.448276	GT:DR:DV	0/1:32:26
+chr1	1427516	64	N	TCCCAAGTCTCGGCCTCCCTCTCCACCCCTCCCCTTTCCCCTGCATCACCCCGCCCAGCCCCCACCCCTCCATCACCCTGCTCCCGCCCCCTCCCCTCCATCCTGCCCCCCTCCCCCTCCATCACCCTGCCCAGCCCCCTCCCCTCCATCACTCCCAAGCCCTGCCCCCTTCCATCACCCTGCCCTGCCCCCACCCCATCACCCTGCCCTGCCCCCTTCCCCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCTCTCCCCTGCATCACTCCCTGCCCTGCCCCTTTCCCCCTCCATCACCCCAGCCTCTGCCCCTCCCCTCCCCTCCATCACCCTGCCCTGCCCTCCTCTCTCCATCACTCCCTGTCTCTGCCCCCACCCCTCCATCATTCTGCCCTGCTCCTCTCCACCTCCCCCTTCCCCTGCATCACCCAGCCTTCTAAGTTCCTTCCTCCATCAATTCTGCCTCTGCCCCTCCCCTCCATCACTCCTGGCACTCTGCCCTCCCTCCATCACTCCTGCTCCTGCCCCCCACCCCTCCATCATCTTCACACTTTCCACCCCTCCCTTCCCTTCCCCTGCATCATCTGCACTCCTGCCTCTTCCCCCTCCATCACCCTGCCCAGCCCCCTCCCCTCCACTGCTTGCGCCCTCCTCCATCACACCCCGGCCCTGCCCCCAGCTCCG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1427892;STD_quant_start=48.4345;STD_quant_stop=77.4325;Kurtosis_quant_start=-1.98608;Kurtosis_quant_stop=-1.067;SVTYPE=INS;SUPTYPE=AL;SVLEN=439;STRANDS=+-;STRANDS2=12,9,12,9;RE=21;REF_strand=38,32;Strandbias_pval=1;AF=0.3	GT:DR:DV	0/0:49:21
+chr1	1428085	65	N	AGGAGGGAGGGGGAGGAGGGGAGGAAGAAGAAGGAGGAAGAGGAAGGAGGAAAAGAGGAGGAGGAAAGAGAGAGGAAGAAAGGAGGGGAGGAGAAAGAGGAGGGGACAGGAGGGAAGGAGGAGAGAAAGAGGAAAAGAAAGGAGGGAGGGAAGGAGAGAGGAGAGGAAGAGAGAGACAGGGAAGGGAAAGAAAAACAGGGAGGGGAAGGAGGAGGAAGAGGAGGGAAGGAAGAAGAGGAGGAGAGGGAGGGAAGAGAGGAGGGAAAGAGGGAGGAGGAAGAGGGGGCAGGGGAGGAAGAAGAGAACA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1428085;STD_quant_start=33.6102;STD_quant_stop=42.9676;Kurtosis_quant_start=4.12818;Kurtosis_quant_stop=-1.69999;SVTYPE=INS;SUPTYPE=AL;SVLEN=222;STRANDS=+-;STRANDS2=16,13,16,13;RE=29;REF_strand=42,34;Strandbias_pval=1;AF=0.381579	GT:DR:DV	0/1:47:29
+chr1	1442871	66	N	TTTCTATGGTAATGGTGATAAACCAAGTCAA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1442871;STD_quant_start=13.6345;STD_quant_stop=12.775;Kurtosis_quant_start=-1.56517;Kurtosis_quant_stop=-1.35954;SVTYPE=INS;SUPTYPE=AL;SVLEN=31;STRANDS=+-;STRANDS2=9,10,9,10;RE=19;REF_strand=32,38;Strandbias_pval=1;AF=0.271429	GT:DR:DV	0/0:51:19
+chr1	1443674	67	TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1443708;STD_quant_start=0;STD_quant_stop=1.04881;Kurtosis_quant_start=-0.5;Kurtosis_quant_stop=-1.79438;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-34;STRANDS=+-;STRANDS2=6,14,6,14;RE=20;REF_strand=34,42;Strandbias_pval=0.310419;AF=0.263158	GT:DR:DV	0/0:56:20
+chr1	1469099	68	TAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1469134;STD_quant_start=10.2652;STD_quant_stop=9.18559;Kurtosis_quant_start=3.94733;Kurtosis_quant_stop=2.94515;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=0,8,0,8;RE=8;REF_strand=42,38;Strandbias_pval=0.00589579;AF=0.1	GT:DR:DV	0/0:72:8
+chr1	1477855	69	N	CACCACGCCCGGCTAATGTTGTATTTTTAGTAGAGACGGGTTTCTCCCATGGTCAGGCTGGTCTCTAACTCCCGACCTCAGGTGATCCACCCGCCTCGGCCTCTCAACCAGTTGGGATTACAGGCATGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1477883;STD_quant_start=11.94;STD_quant_stop=21.8689;Kurtosis_quant_start=-1.766;Kurtosis_quant_stop=-1.90683;SVTYPE=INS;SUPTYPE=AL;SVLEN=131;STRANDS=+-;STRANDS2=17,15,17,15;RE=32;REF_strand=36,38;Strandbias_pval=0.832673;AF=0.432432	GT:DR:DV	0/1:42:32
+chr1	1497123	70	N	CCTCGGCCTGGGCACGAACGGTCCCATCGAGAGCAGA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1497123;STD_quant_start=3.84708;STD_quant_stop=4.3589;Kurtosis_quant_start=-1.05282;Kurtosis_quant_stop=-1.08734;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=34,36;Strandbias_pval=1;AF=0.214286	GT:DR:DV	0/0:55:15
+chr1	1554173	71	CTAAGGGGTCCCCACGAAGCTGAGCACGAGGCGGATCCGGAC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1554213;STD_quant_start=9.73653;STD_quant_stop=10.0846;Kurtosis_quant_start=0.855249;Kurtosis_quant_stop=1.94672;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-40;STRANDS=+-;STRANDS2=6,7,6,7;RE=13;REF_strand=34,28;Strandbias_pval=0.760968;AF=0.209677	GT:DR:DV	0/0:49:13
+chr1	1595833	72	GAGCAGAACAGGGAGAGACAGAGAGAGAGAGACAGAGAGAGGCAGACAGAGACAGAGAGAGAGACAGACAC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1595902;STD_quant_start=33.7313;STD_quant_stop=33.2971;Kurtosis_quant_start=2.73708;Kurtosis_quant_stop=2.38236;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-69;STRANDS=+-;STRANDS2=5,5,5,5;RE=10;REF_strand=36,42;Strandbias_pval=1;AF=0.128205	GT:DR:DV	0/0:68:10
+chr1	1595853	73	N	ACAGAGAGACAGAGAGAGAAACAGAGAGACAGAGACAGAGAGGCAGACAGAGAGAGACAGACAGAGAGCAGAACAGGGAGAGACAAAAGAGACAGAGAGAGAGAGACAC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1595863;STD_quant_start=39.2785;STD_quant_stop=49.7011;Kurtosis_quant_start=-0.536228;Kurtosis_quant_stop=-0.678589;SVTYPE=INS;SUPTYPE=AL;SVLEN=77;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=36,42;Strandbias_pval=0.661994;AF=0.0641026	GT:DR:DV	0/0:73:5
+chr1	1605690	74	N	GGCTGGGCTGGTCAGGTGTAGGCTGGGCTGGTCAGGCGTGGAGTGGGCTGGTCAGGCGTGGGGTGGGGTGGGCTGGTCAGGTGTGGGCTGGGCCTGGTCAGGTGTGAGGTGGGGTGGTGGGGGTGAGGGGGTTGTCTGGTCAGGTGTGGAGTGGGCTGGTCAGGTGTGGGCTGGGCTGGTCCAGACAGGGTCGGCTGGTCAGGTGTGGGCTGGGCTGGGCTGGTCAGGTGTGGGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1605690;STD_quant_start=28.6112;STD_quant_stop=43.4385;Kurtosis_quant_start=-0.191101;Kurtosis_quant_stop=-1.21501;SVTYPE=INS;SUPTYPE=AL;SVLEN=226;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=38,40;Strandbias_pval=1;AF=0.179487	GT:DR:DV	0/0:64:14
+chr1	1666975	75	CACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1667142;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=11.8809;Kurtosis_quant_stop=8.99409;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-167;STRANDS=+-;STRANDS2=18,14,18,14;RE=32;REF_strand=38,28;Strandbias_pval=1;AF=0.484848	GT:DR:DV	0/1:34:32
+chr1	1681989	76	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1682020;STD_quant_start=0;STD_quant_stop=1.30384;Kurtosis_quant_start=6.9449;Kurtosis_quant_stop=-0.962407;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-31;STRANDS=+-;STRANDS2=10,8,10,8;RE=18;REF_strand=34,26;Strandbias_pval=1;AF=0.3	GT:DR:DV	0/0:42:18
+chr1	1717605	77	GCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1717656;STD_quant_start=8.22935;STD_quant_stop=8.90381;Kurtosis_quant_start=-1.60307;Kurtosis_quant_stop=-1.75287;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-51;STRANDS=+-;STRANDS2=18,19,18,19;RE=37;REF_strand=40,39;Strandbias_pval=1;AF=0.468354	GT:DR:DV	0/1:42:37
+chr1	1749606	78	N	GTCCATGCATATTTTTCTGTGTGATGTGTCTGTGTGTGTGTCTCAGTGGT	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1749616;STD_quant_start=6.64118;STD_quant_stop=6.18572;Kurtosis_quant_start=-1.82765;Kurtosis_quant_stop=-1.82115;SVTYPE=INS;SUPTYPE=AL;SVLEN=48;STRANDS=+-;STRANDS2=19,19,19,19;RE=38;REF_strand=42,38;Strandbias_pval=0.84535;AF=0.475	GT:DR:DV	0/1:42:38
+chr1	1766411	79	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1766446;STD_quant_start=2.64575;STD_quant_stop=6.72681;Kurtosis_quant_start=1;Kurtosis_quant_stop=2.21022;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=2,6,2,6;RE=8;REF_strand=42,40;Strandbias_pval=0.267342;AF=0.097561	GT:DR:DV	0/0:74:8
+chr1	1845825	80	ACACACACACACACACACACACACACACACAC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1845857;STD_quant_start=4.42396;STD_quant_stop=5.59336;Kurtosis_quant_start=0.08546;Kurtosis_quant_stop=1.51911;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=4,3,4,3;RE=7;REF_strand=26,26;Strandbias_pval=1;AF=0.134615	GT:DR:DV	0/0:45:7
+chr1	1924230	81	N	CCCCCAGCCTGCAGCCCACCCCCCCATCTCACCGCCTAGCCCCCATCTCACCAGCTGCCCCCTCCCCGACACACGCCCACCCCCTTATCTCACCAACCA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1924231;STD_quant_start=0.948683;STD_quant_stop=2.72029;Kurtosis_quant_start=0.969632;Kurtosis_quant_stop=-0.273855;SVTYPE=INS;SUPTYPE=AL;SVLEN=96;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=36,28;Strandbias_pval=0.770084;AF=0.21875	GT:DR:DV	0/0:50:14
+chr1	1929385	82	N	AGGGGACAGGTCTGGGGGGGAGGCAGGAGAGAGGGTGAGGGGGAGGCAGGAGTGGGGGAGGGAGGGGAGAGGGTAGGGAGGGAGGAGAGGGTAGGGGGAGGGAGGGAGAGAGGAGGAGGGGAGAGGGTGGGAGGGAGAGAGGAGGAGAAGGGAGGGGACATGGGGAGGGGAGAGGAAAGAGGAGGGAGGGAGAGGGGAGGGAGGGAGCGGGTGAGGGGAGGGAAAGGAGGGAAATGGTATGGGAGGGGAGGGAGGGGAGAGGGTGAGGGGGAGGGAGCAGAGGGAAAGGGTGGGGGAGGGAAGGAAGGGAGAGGGTGGGGGAGGGTAGGGAGGGAGGGAGAGAGAGGGTAGGGGGAGGGGGAGAGAGGGTGAGGAGGGGGAGGGTAGGGGAGGGAAGGAGGGGAGACGGTGAGGGAGGGAGGAGAGGGTAGGGGGGAGGGAGGAAGAGGAGGGGTAGGGAGGGAGGGAGAGGAGAGGGAGGAGGGGAGGAGGGGGAGAGAGGGGTAGGGAGGGAGGGGAGGGAGGGAAGAGGGTAGGAGGGAGGGAGAGGAGAGGGAGGGAGGGAGGGGAGGAGGGAGGGTGGGAGGAGGGAGAGGGTTAGGGGAGGGAGGGAGAGGGAGGGGGAGAGGGTAGGGAGGAGAGGAGGAGAGGGTAGAGGAGGGAGGAGGGGAGAGGGGAGGGGAGGGAGGGAGAAGAGGAGGGAGAGGGTAGGGAGGGAGGGAGAGGAGAGGGGGAGAGGGAGGAGGAGGAGGAGAGGGTAGGGAGGAGGGGAGGAGGGGAGGGGTAGGGAGGGAGGGAGAGGAGGGAGGGAGGGAGGGGGAGGAGGGGGAGAAAGTTAGGGAGGGAGGGAGAGGAGAGGGGGAGGGAGGGAGGGGGAGGAGGAGAGGGGTAGGGAGGGAGGAAGGGAGGGAGGGAGGAGGGCAGGAGGGGAAATTGGGAGGGAGGGGCAGGAGGGAGAGGGTAGGGAGGGAGGGCAGGAGGGAGAGGGTAGGGAGGGAGGGAGGAAGGGAGGGAGGTAGGGAGGAGGAGGAGAGGGTAGGGAGGGAGGAGGAGGGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1929385;STD_quant_start=0;STD_quant_stop=12.4023;Kurtosis_quant_start=0.385661;Kurtosis_quant_stop=-0.139538;SVTYPE=INS;SUPTYPE=AL;SVLEN=1062;STRANDS=+-;STRANDS2=14,9,14,9;RE=23;REF_strand=46,34;Strandbias_pval=0.814909;AF=0.2875	GT:DR:DV	0/0:57:23
+chr1	1934289	83	N	TACACAGGTGTACATTAGATTATTAGGTTGTGAAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1934289;STD_quant_start=26.898;STD_quant_stop=24.4172;Kurtosis_quant_start=-1.47698;Kurtosis_quant_stop=-1.25573;SVTYPE=INS;SUPTYPE=AL;SVLEN=98;STRANDS=+-;STRANDS2=10,4,10,4;RE=14;REF_strand=34,20;Strandbias_pval=0.755487;AF=0.259259	GT:DR:DV	0/0:40:14
+chr1	1949003	84	N	CTTCCCTTCCCCTTCCTTCCTTCTCTCCCTCTCCCTCCTCCTCTTCCCTCCTTTCCTTCCTTCCTTTCCCTTTCCTCCTTCCTCTCCCTCCCCTCCTTTCCCCTTTTCATTCCCTCTTCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1949044;STD_quant_start=33.9013;STD_quant_stop=7.46324;Kurtosis_quant_start=-2.16185;Kurtosis_quant_stop=-0.295908;SVTYPE=INS;SUPTYPE=AL;SVLEN=111;STRANDS=+-;STRANDS2=10,9,10,9;RE=19;REF_strand=30,48;Strandbias_pval=0.304283;AF=0.24359	GT:DR:DV	0/0:59:19
+chr1	1968925	85	CCCTCCTGGGGGCTCCGGTCCTGCCCAGCAGCCCCAGGTGAGACAGCGCCTGGCGGCCCCTCCCTAGCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1968994;STD_quant_start=2.28035;STD_quant_stop=2.25832;Kurtosis_quant_start=0.046742;Kurtosis_quant_stop=0.304863;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-69;STRANDS=+-;STRANDS2=7,10,7,10;RE=17;REF_strand=30,42;Strandbias_pval=1;AF=0.236111	GT:DR:DV	0/0:55:17
+chr1	1979021	86	AGGCTGCACAGAACACGTGTGTCGTGCTGAGCTGGGCGTGGGAAGGCGTCATGTGACGAGGCTGCACAGAACATGCGTGTGGTACTGAGCTGGGCGTGGGAAGGTGTCACGTGACAAGGCTGCACAGAACATGTGTGTGGTACTGAGCTGGGCGTGGGAAGGCATCATGTGACA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1979168;STD_quant_start=12.3369;STD_quant_stop=9.86577;Kurtosis_quant_start=3.47666;Kurtosis_quant_stop=3.76987;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-147;STRANDS=+-;STRANDS2=16,15,16,15;RE=31;REF_strand=34,35;Strandbias_pval=1;AF=0.449275	GT:DR:DV	0/1:38:31
+chr1	1980059	87	CTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1980291;STD_quant_start=9.01234;STD_quant_stop=11.0454;Kurtosis_quant_start=-1.20742;Kurtosis_quant_stop=-0.902165;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-232;STRANDS=+-;STRANDS2=18,18,18,18;RE=36;REF_strand=35,37;Strandbias_pval=1;AF=0.5	GT:DR:DV	0/1:36:36
+chr1	1981556	88	N	CACGCAGGACACACAGCCGCGACGCACACCGGCACGCAGGACACCCAGCCACGGTCACACGCGGGGCACGCAGGACACCCAGCCGCGGTCACATGC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1981581;STD_quant_start=21.5465;STD_quant_stop=28.327;Kurtosis_quant_start=-1.84901;Kurtosis_quant_stop=-1.66641;SVTYPE=INS;SUPTYPE=AL;SVLEN=34;STRANDS=+-;STRANDS2=10,14,10,14;RE=24;REF_strand=34,40;Strandbias_pval=0.814965;AF=0.324324	GT:DR:DV	0/1:50:24
+chr1	1982045	89	N	CGGGGACACGCAGGACACCCAGGACACCCAGCCGCGGACAGACACGGGGGCACACAGGACACCCAGCTCGTGGACAGACA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1982046;STD_quant_start=5.75698;STD_quant_stop=3.31663;Kurtosis_quant_start=-0.365062;Kurtosis_quant_stop=-0.55588;SVTYPE=INS;SUPTYPE=AL;SVLEN=79;STRANDS=+-;STRANDS2=15,14,15,14;RE=29;REF_strand=36,42;Strandbias_pval=0.666552;AF=0.371795	GT:DR:DV	0/1:49:29
+chr1	1982220	90	N	AGATAGACACGGGACACGGACACCCCAGCCGTGACAGACACGGTGACAACACAGACACCCAGCCATGGACAGACACGGGCCACGAGGACACCCAGCCACGGACAGGGACATCGATGGCTTTATGACACTCCAGCCGGTAA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1982220;STD_quant_start=30.0322;STD_quant_stop=21.4957;Kurtosis_quant_start=-1.80704;Kurtosis_quant_stop=-1.88498;SVTYPE=INS;SUPTYPE=AL;SVLEN=206;STRANDS=+-;STRANDS2=12,19,12,19;RE=31;REF_strand=34,46;Strandbias_pval=0.830787;AF=0.3875	GT:DR:DV	0/1:49:31
+chr1	1993705	91	N	GGGCACAGTGGCTCATGCCTGTAATCCCAGCAACATGGGAGCCTGAGGTGGGAGGCTCTCTTGACAGGAGTTTGAGACCAGCCTGGGCAACATAGCAGACCCCCCACCCCGCCATTTCTAGGAAAAAAAAAAAAAAAGTGGCC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1993712;STD_quant_start=0;STD_quant_stop=2.51396;Kurtosis_quant_start=11.9398;Kurtosis_quant_stop=0.982105;SVTYPE=INS;SUPTYPE=AL;SVLEN=141;STRANDS=+-;STRANDS2=23,27,23,27;RE=50;REF_strand=48,58;Strandbias_pval=1;AF=0.471698	GT:DR:DV	0/1:56:50
+chr1	2019222	92	N	GGGGCGGGGGAGGAGAGGGGGGAGGGAGGGGGACCGGGTAGGGTGGGGGGGGGAGGGGAACGGGGAGGGGGCAGGCAGGCGCGGGGTGGGGGGAGGGGAGGGGGAGGGGAGAAGACGGGCAGCGGGAGGGGCGGGGGGAGGGGATGGGGGCGGGGGAGGAGGGCGGCGGGGGAGGGGATGGGGGCGGGGGAGGGGATGGGCGGGGGGAGGGGGA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2019227;STD_quant_start=3.28633;STD_quant_stop=4.04969;Kurtosis_quant_start=-0.713007;Kurtosis_quant_stop=0.942016;SVTYPE=INS;SUPTYPE=AL;SVLEN=211;STRANDS=+-;STRANDS2=23,17,23,17;RE=40;REF_strand=46,34;Strandbias_pval=1;AF=0.5	GT:DR:DV	0/1:40:40
+chr1	2106812	93	N	CCCTCTGGTGGGCGTAGGACCTGTCACCGTGTCACCAGGCCAGGTAACTCTCAGCAGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2106813;STD_quant_start=19.0342;STD_quant_stop=19.9875;Kurtosis_quant_start=-1.45192;Kurtosis_quant_stop=-1.30053;SVTYPE=INS;SUPTYPE=AL;SVLEN=55;STRANDS=+-;STRANDS2=3,12,3,12;RE=15;REF_strand=40,54;Strandbias_pval=0.153747;AF=0.159574	GT:DR:DV	0/0:79:15
+chr1	2110063	94	CCCAACCAAGAGGATCCCAGAGGTGAGACACAGAACGGCCAGGGCTGAATCCGGGGCCCTCCCTGGGGGCAGCCAAGGACCTAAAACCAATGGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2110160;STD_quant_start=19.0866;STD_quant_stop=18.9882;Kurtosis_quant_start=-1.18991;Kurtosis_quant_stop=-1.28399;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-97;STRANDS=+-;STRANDS2=20,21,20,21;RE=41;REF_strand=38,45;Strandbias_pval=0.84876;AF=0.493976	GT:DR:DV	0/1:42:41
+chr1	2121520	95	N	GGTCATGAGGTGGTAGTTAAGTTATGGTAGTTAG	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2121520;STD_quant_start=0;STD_quant_stop=0.8044;Kurtosis_quant_start=2.24316;Kurtosis_quant_stop=2.47427;SVTYPE=INS;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=21,15,21,15;RE=35;REF_strand=60,50;Strandbias_pval=0.704801;AF=0.318182	GT:DR:DV	0/1:75:35
+chr1	2122244	96	N	GTTAGGGTCACGGCGGTGGTTAGGTCGTGGTGGGAGTTAGGGTCACGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTAGCGTCATGGTGGTAGTTAGGGTCACGGCTATAGTTGGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTATTTAGGGTCACGGCTGTAGTTAGCGTCATGGTGGTGGTTAGGTCATGGTGGTAGTTAGGGGTCACGGCTGTAGTTAGGGTCATGGTGGTGGTTAGGTCACTTGCTGTAGTTAGGGTCATGGTGGTAGTTAGGTCATGGTGGTAGTTAGCGTCATGGTGGTGGTTAGGTCATGGTAGTTAGGGTCACTGCCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2122256;STD_quant_start=72.1976;STD_quant_stop=164.724;Kurtosis_quant_start=0.262966;Kurtosis_quant_stop=-1.09184;SVTYPE=INS;SUPTYPE=AL;SVLEN=340;STRANDS=+-;STRANDS2=0,6,0,6;RE=6;REF_strand=56,48;Strandbias_pval=0.0120583;AF=0.0576923	GT:DR:DV	0/0:98:6
+chr1	2123322	97	N	TAATTGGGATCATGACCATGTGATTGGGGTCATGGTGTTAGTTAAGGTCATGACTGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2123322;STD_quant_start=21.4499;STD_quant_stop=25.9846;Kurtosis_quant_start=-0.532609;Kurtosis_quant_stop=-1.17357;SVTYPE=INS;SUPTYPE=AL;SVLEN=88;STRANDS=+-;STRANDS2=9,11,9,11;RE=20;REF_strand=54,52;Strandbias_pval=0.80797;AF=0.188679	GT:DR:DV	0/0:86:20
+chr1	2123768	98	N	GGCTGTGGTTAGGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTAGGTCATGGTGGTAGGTCTGGTCACGGCTAGTTGGGGTCATGGTGGTAGTTAGATCATGGCTGTAGTTAGGGTCAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2123768;STD_quant_start=41.4871;STD_quant_stop=39.3296;Kurtosis_quant_start=-1.74964;Kurtosis_quant_stop=-0.19162;SVTYPE=INS;SUPTYPE=AL;SVLEN=112;STRANDS=+-;STRANDS2=11,11,11,11;RE=22;REF_strand=52,50;Strandbias_pval=1;AF=0.215686	GT:DR:DV	0/0:80:22
+chr1	2124290	100	N	GGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTGGGGTCATGGTGGTAGTTAGGGTCACGGCTATAGTTAGGGTCATGGTGGTAGTTATTGGTCTGTGATAGTTAGCATCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTGGGGTCATAGCTGTAGTTAGGGTCATAGTGGTAGTTGGGGTCACGGCTATAGTTG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2124353;STD_quant_start=62.996;STD_quant_stop=56.9131;Kurtosis_quant_start=-1.3131;Kurtosis_quant_stop=-0.191076;SVTYPE=INS;SUPTYPE=AL;SVLEN=110;STRANDS=+-;STRANDS2=6,7,6,7;RE=13;REF_strand=55,51;Strandbias_pval=0.773897;AF=0.122642	GT:DR:DV	0/0:93:13
+chr1	2124333	99	N	AAGGGTCATGGTGGTAATTAGGATCATGTAGCTGTAGTTAGGGTCATGGTGGTAGTTAGGGTCTGGCTATAGTTGGGGTCATGGTGGTAGTTAGGGTCACAGCGATAGTTAGCATCATGGTGGTAGTTAGGGTCATGGTGGTAGATTGGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATAGCTGTAGTTAGGGTCTGTGGTGGTAGTTGGGGTCCGCGGCTATAGTTGGGGTCCATGGTGGTAGTTAAGGTCACGGCTGTGATTAGCGTCATGGTGGTACGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2124347;STD_quant_start=28.9361;STD_quant_stop=52.7655;Kurtosis_quant_start=0.874818;Kurtosis_quant_stop=0.127135;SVTYPE=INS;SUPTYPE=AL;SVLEN=295;STRANDS=+-;STRANDS2=10,9,10,9;RE=19;REF_strand=54,52;Strandbias_pval=1;AF=0.179245	GT:DR:DV	0/0:87:19
+chr1	2142340	101	CTTTCAATCCAGGGTCCACACATCCAGCAGCCGAAGCGCCCTCCTTTCAATCCAGGGTCCAGGCATCTAGCAGCCGAAGCGCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2142424;STD_quant_start=9.44235;STD_quant_stop=8.28442;Kurtosis_quant_start=7.28739;Kurtosis_quant_stop=4.86915;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-84;STRANDS=+-;STRANDS2=23,15,23,15;RE=38;REF_strand=48,32;Strandbias_pval=1;AF=0.475	GT:DR:DV	0/1:42:38
+chr1	2280758	102	N	GCCTCGGGAGAGTGACAGGCGGCGGCGGCGACACCAGAGAGCGGACGAGAGGACAGGCGGCGGCGGCGATCTTTCAGAGAGCGGGATTTTCCCGAGAGGGACAGAGAAGGCGGCGGAGATTGTCTTCAGAGAGAGGAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2280758;STD_quant_start=32.8507;STD_quant_stop=82.7345;Kurtosis_quant_start=-1.7272;Kurtosis_quant_stop=-1.34469;SVTYPE=INS;SUPTYPE=AL;SVLEN=205;STRANDS=+-;STRANDS2=2,4,2,4;RE=6;REF_strand=32,54;Strandbias_pval=1;AF=0.0697674	GT:DR:DV	0/0:80:6
+chr1	2280945	103	N	CGGTGCGGAGAGATCTTCAGAGAGAGGACGCCTGAGAAGACAGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2281014;STD_quant_start=60.2633;STD_quant_stop=47.2864;Kurtosis_quant_start=-0.84573;Kurtosis_quant_stop=-0.496898;SVTYPE=INS;SUPTYPE=AL;SVLEN=51;STRANDS=+-;STRANDS2=1,5,1,5;RE=6;REF_strand=34,54;Strandbias_pval=0.40609;AF=0.0681818	GT:DR:DV	0/0:82:6
+chr1	2281986	104	GAGAGGACGCCCGAGAAGACAGGCGGTGGCGGAGATCTTCAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2282028;STD_quant_start=21.4103;STD_quant_stop=21.0879;Kurtosis_quant_start=-1.23914;Kurtosis_quant_stop=-0.042349;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-42;STRANDS=+-;STRANDS2=9,11,9,11;RE=20;REF_strand=35,54;Strandbias_pval=0.801434;AF=0.224719	GT:DR:DV	0/0:69:20
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index 95b45309..e0b29e8e 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -108,6 +108,20 @@ def test_vcf(self):
         print(record, record.data)
         assert record.data['CLNSIG'] == 'Pathogenic'
 
+    def test_sniffle(self):
+        results = self.run_main(get_data('sniffles.vcf'), SUPPORTED_TOOL.VCF, False)
+        print(results.keys())
+        record = results['vcf-35777'][0]
+        print(record, record.data)
+        assert record.data['event_type'] == 'translocation'
+    
+    def test_cuteSV(self):
+        results = self.run_main(get_data('cuteSV.vcf'), SUPPORTED_TOOL.VCF, False)
+        print(results.keys())
+        record = results['vcf-cuteSV.BND.0'][0]
+        print(record, record.data)
+        assert record.data['event_type'] == 'inverted translocation'
+    
     def test_breakseq2(self):
         self.run_main(get_data('breakseq.vcf'), SUPPORTED_TOOL.BREAKSEQ, False)
 

From 78a7677c9f5d6578c7173df79a5c1bba961be697 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Tue, 11 Jan 2022 01:04:51 -0800
Subject: [PATCH 070/137] added docker support for lr dependencies and edited
 documentation

---
 .github/CONTRIBUTING.md       |  9 +++++++--
 Dockerfile                    | 14 ++++++++++++++
 docs/configuration/general.md |  2 +-
 docs/tutorials/mini.md        |  2 +-
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index d38244d4..e895f9f4 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -43,8 +43,13 @@ markdown_refdocs mavis -o docs/package --link
 mkdocs build
 ```
 
-The contents of the user manual can then be viewed by opening the build-docs/index.html
-in any available web browser (i.e. google-chrome, firefox, etc.)
+The contents of the user manual can then be viewed by opening the build-docs/index.html in any available web browser 
+(i.e. google-chrome, firefox, etc.). Future development to build the Markdown files into HTML and start a development 
+server to browse the documentation can be done using: 
+
+```bash
+mkdocs serve
+```
 
 ## Deploy to PyPi
 
diff --git a/Dockerfile b/Dockerfile
index b62ea761..2d0dae78 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -17,6 +17,13 @@ RUN git clone https://github.com/lh3/bwa.git && \
     cd .. && \
     mv bwa/bwa /usr/local/bin
 
+# install minimap2
+RUN git clone https://github.com/lh3/minimap2.git && \
+    cd minimap2 && \
+    git checkout v2.24 && \
+    make && \
+    cd .. && \
+    mv minimap2/minimap2.1 /usr/local/bin
 
 # install blat dependencies
 RUN apt-get install -y libcurl4
@@ -26,6 +33,13 @@ RUN wget http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/blat/blat && \
     chmod a+x blat && \
     mv blat /usr/local/bin
 
+# install wtdbg2
+RUN git clone https://github.com/ruanjue/wtdbg2.git && \
+    cd wtdbg2 && \
+    make && \
+    cd .. && \
+    mv wtdbg2/wtdbg2 /usr/local/bin
+
 COPY setup.py setup.py
 COPY setup.cfg setup.cfg
 COPY MANIFEST.in MANIFEST.in
diff --git a/docs/configuration/general.md b/docs/configuration/general.md
index e22f5339..176aef2e 100644
--- a/docs/configuration/general.md
+++ b/docs/configuration/general.md
@@ -8,7 +8,7 @@ The pipeline can be run in steps or it can be configured using a JSON
 configuration file and setup in a single step. Scripts will be generated
 to run all steps following clustering.
 
-The config schema is found in the mavis package under `mavis/schemas/config.json`
+The config schema is found in the mavis package under `src/mavis/schemas/config.json`
 
 Top level settings follow the pattern `<section>.<setting>`. The convert and library
 sections are nested objects.
diff --git a/docs/tutorials/mini.md b/docs/tutorials/mini.md
index 27b5f51f..d4475985 100644
--- a/docs/tutorials/mini.md
+++ b/docs/tutorials/mini.md
@@ -3,7 +3,7 @@
 This tutorial is based on the data included in the tests folder of
 MAVIS. The data files are very small and this tutorial is really only
 intended for testing a MAVIS install. The data here is simulated and
-results are not representitive of the typical events you would see
+results are not representative of the typical events you would see
 reported from MAVIS. For a more complete tutorial with actual fusion
 gene examples, please see the [full tutorial](../../tutorials/full/).
 

From 0ab2e8f9c55e5104041a9d15fc9596a49ac4fe30 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Tue, 11 Jan 2022 14:25:33 -0800
Subject: [PATCH 071/137] revert contributing message

---
 .github/CONTRIBUTING.md | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index e895f9f4..a7f71e24 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -43,14 +43,6 @@ markdown_refdocs mavis -o docs/package --link
 mkdocs build
 ```
 
-The contents of the user manual can then be viewed by opening the build-docs/index.html in any available web browser 
-(i.e. google-chrome, firefox, etc.). Future development to build the Markdown files into HTML and start a development 
-server to browse the documentation can be done using: 
-
-```bash
-mkdocs serve
-```
-
 ## Deploy to PyPi
 
 Install deployment dependencies

From 14d4a911457f5fd181065ae0c5a921621c8d8b09 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Tue, 11 Jan 2022 15:30:35 -0800
Subject: [PATCH 072/137] revert contribution.md file

---
 .github/CONTRIBUTING.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
index a7f71e24..d38244d4 100644
--- a/.github/CONTRIBUTING.md
+++ b/.github/CONTRIBUTING.md
@@ -43,6 +43,9 @@ markdown_refdocs mavis -o docs/package --link
 mkdocs build
 ```
 
+The contents of the user manual can then be viewed by opening the build-docs/index.html
+in any available web browser (i.e. google-chrome, firefox, etc.)
+
 ## Deploy to PyPi
 
 Install deployment dependencies

From 7e07e2c3e99f4e6816ac8c7765d49c2d5757d9ee Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Thu, 20 Jan 2022 14:35:34 -0800
Subject: [PATCH 073/137] added handling of uncertain calls by sniffle

---
 src/mavis/tools/vcf.py  | 212 ++++++++++++++++++++---------------
 tests/data/sniffles.vcf | 242 +++++++++++++++++++---------------------
 2 files changed, 237 insertions(+), 217 deletions(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index eea0fadf..ae1410e5 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -2,6 +2,7 @@
 import re
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
+from copy import deepcopy
 
 import pandas as pd
 
@@ -16,19 +17,19 @@
 from .constants import SUPPORTED_TOOL
 
 PANDAS_DEFAULT_NA_VALUES = [
-    '-1.#IND',
-    '1.#QNAN',
-    '1.#IND',
-    '-1.#QNAN',
-    '#N/A',
-    'N/A',
-    'NA',
-    '#NA',
-    'NULL',
-    'NaN',
-    '-NaN',
-    'nan',
-    '-nan',
+    "-1.#IND",
+    "1.#QNAN",
+    "1.#IND",
+    "-1.#QNAN",
+    "#N/A",
+    "N/A",
+    "NA",
+    "#NA",
+    "NULL",
+    "NaN",
+    "-NaN",
+    "nan",
+    "-nan",
 ]
 
 
@@ -53,7 +54,7 @@ class VcfRecordType:
 
     @property
     def stop(self) -> Optional[int]:
-        return self.info.get('END', self.pos)
+        return self.info.get("END", self.pos)
 
 
 def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
@@ -75,51 +76,51 @@ def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
     | ru]p]        | LL      |
     """
     # ru[p[
-    match = re.match(r'^(?P<ref>\w)(?P<useq>\w*)\[(?P<chr>[^:]+):(?P<pos>\d+)\[$', alt)
+    match = re.match(r"^(?P<ref>\w)(?P<useq>\w*)\[(?P<chr>[^:]+):(?P<pos>\d+)\[$", alt)
     if match:
         return (
-            match.group('chr'),
-            int(match.group('pos')),
+            match.group("chr"),
+            int(match.group("pos")),
             ORIENT.LEFT,
             ORIENT.RIGHT,
-            match.group('ref'),
-            match.group('useq'),
+            match.group("ref"),
+            match.group("useq"),
         )
     # [p[ur
-    match = re.match(r'^\[(?P<chr>[^:]+):(?P<pos>\d+)\[(?P<useq>\w*)(?P<ref>\w)$', alt)
+    match = re.match(r"^\[(?P<chr>[^:]+):(?P<pos>\d+)\[(?P<useq>\w*)(?P<ref>\w)$", alt)
     if match:
         return (
-            match.group('chr'),
-            int(match.group('pos')),
+            match.group("chr"),
+            int(match.group("pos")),
             ORIENT.RIGHT,
             ORIENT.RIGHT,
-            match.group('ref'),
-            match.group('useq'),
+            match.group("ref"),
+            match.group("useq"),
         )
     # ]p]ur
-    match = re.match(r'^\](?P<chr>[^:]+):(?P<pos>\d+)\](?P<useq>\w*)(?P<ref>\w)$', alt)
+    match = re.match(r"^\](?P<chr>[^:]+):(?P<pos>\d+)\](?P<useq>\w*)(?P<ref>\w)$", alt)
     if match:
         return (
-            match.group('chr'),
-            int(match.group('pos')),
+            match.group("chr"),
+            int(match.group("pos")),
             ORIENT.RIGHT,
             ORIENT.LEFT,
-            match.group('ref'),
-            match.group('useq'),
+            match.group("ref"),
+            match.group("useq"),
         )
     # ru]p]
-    match = re.match(r'^(?P<ref>\w)(?P<useq>\w*)\](?P<chr>[^:]+):(?P<pos>\d+)\]$', alt)
+    match = re.match(r"^(?P<ref>\w)(?P<useq>\w*)\](?P<chr>[^:]+):(?P<pos>\d+)\]$", alt)
     if match:
         return (
-            match.group('chr'),
-            int(match.group('pos')),
+            match.group("chr"),
+            int(match.group("pos")),
             ORIENT.LEFT,
             ORIENT.LEFT,
-            match.group('ref'),
-            match.group('useq'),
+            match.group("ref"),
+            match.group("useq"),
         )
     else:
-        raise NotImplementedError('alt specification in unexpected format', alt)
+        raise NotImplementedError("alt specification in unexpected format", alt)
 
 
 def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
@@ -143,7 +144,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             try:
                 value = record.info[key]
             except UnicodeDecodeError as err:
-                log('Ignoring invalid INFO field {} with error: {}'.format(key, err))
+                log("Ignoring invalid INFO field {} with error: {}".format(key, err))
             else:
                 try:
                     value = value[0] if len(value) == 1 else value
@@ -152,27 +153,27 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             info[key] = value
 
         std_row = {}
-        if record.id and record.id != 'N':  # to account for NovoBreak N in the ID field
-            std_row['id'] = record.id
+        if record.id and record.id != "N":  # to account for NovoBreak N in the ID field
+            std_row["id"] = record.id
 
-        if info.get('SVTYPE') == 'BND':
+        if info.get("SVTYPE") == "BND":
             chr2, end, orient1, orient2, ref, alt = parse_bnd_alt(alt)
             std_row[COLUMNS.break1_orientation] = orient1
             std_row[COLUMNS.break2_orientation] = orient2
             std_row[COLUMNS.untemplated_seq] = alt
             if record.ref != ref:
                 raise AssertionError(
-                    'Expected the ref specification in the vcf record to match the sequence '
-                    'in the alt string: {} vs {}'.format(record.ref, ref)
+                    "Expected the ref specification in the vcf record to match the sequence "
+                    "in the alt string: {} vs {}".format(record.ref, ref)
                 )
         else:
-            chr2 = info.get('CHR2', record.chrom)
+            chr2 = info.get("CHR2", record.chrom)
             end = record.stop
             if (
                 alt
                 and record.ref
-                and re.match(r'^[A-Z]+$', alt)
-                and re.match(r'^[A-Z]+', record.ref)
+                and re.match(r"^[A-Z]+$", alt)
+                and re.match(r"^[A-Z]+", record.ref)
             ):
                 std_row[COLUMNS.untemplated_seq] = alt[1:]
                 size = len(alt) - len(record.ref)
@@ -180,9 +181,11 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
                     std_row[COLUMNS.event_type] = SVTYPE.INS
                 elif size < 0:
                     std_row[COLUMNS.event_type] = SVTYPE.DEL
-        std_row.update({COLUMNS.break1_chromosome: record.chrom, COLUMNS.break2_chromosome: chr2})
+        std_row.update(
+            {COLUMNS.break1_chromosome: record.chrom, COLUMNS.break2_chromosome: chr2}
+        )
         if info.get(
-            'PRECISE', False
+            "PRECISE", False
         ):  # DELLY CI only apply when split reads were not used to refine the breakpoint which is then flagged
             std_row.update(
                 {
@@ -196,65 +199,100 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             std_row.update(
                 {
                     COLUMNS.break1_position_start: max(
-                        1, record.pos + info.get('CIPOS', (0, 0))[0]
+                        1, record.pos + info.get("CIPOS", (0, 0))[0]
+                    ),
+                    COLUMNS.break1_position_end: record.pos
+                    + info.get("CIPOS", (0, 0))[1],
+                    COLUMNS.break2_position_start: max(
+                        1, end + info.get("CIEND", (0, 0))[0]
                     ),
-                    COLUMNS.break1_position_end: record.pos + info.get('CIPOS', (0, 0))[1],
-                    COLUMNS.break2_position_start: max(1, end + info.get('CIEND', (0, 0))[0]),
-                    COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
+                    COLUMNS.break2_position_end: end + info.get("CIEND", (0, 0))[1],
                 }
             )
 
-        if 'SVTYPE' in info:
-            std_row[COLUMNS.event_type] = info['SVTYPE']
+        std_row2 = {}
+
+        if "SVTYPE" in info:
+            if info["SVTYPE"] in dir(SVTYPE):
+                std_row[COLUMNS.event_type] = info["SVTYPE"]
+            elif "/" in info["SVTYPE"]:
+                std_row2 = deepcopy(std_row)
+                std_row[COLUMNS.event_type] = info["SVTYPE"].split("/")[0]
+                std_row2[COLUMNS.event_type] = info["SVTYPE"].split("/")[1]
 
         try:
-            orient1, orient2 = info['CT'].split('to')
-            connection_type = {'3': ORIENT.LEFT, '5': ORIENT.RIGHT, 'N': ORIENT.NS}
+            orient1, orient2 = info["CT"].split("to")
+            connection_type = {"3": ORIENT.LEFT, "5": ORIENT.RIGHT, "N": ORIENT.NS}
             std_row[COLUMNS.break1_orientation] = connection_type[orient1]
             std_row[COLUMNS.break2_orientation] = connection_type[orient2]
+            if bool(std_row2):
+                std_row2[COLUMNS.break1_orientation] = connection_type[orient1]
+                std_row2[COLUMNS.break2_orientation] = connection_type[orient2]
         except KeyError:
             pass
-        std_row.update(
-            {k: v for k, v in info.items() if k not in {'CHR2', 'SVTYPE', 'CIPOS', 'CIEND', 'CT'}}
-        )
-        records.append(std_row)
+        if bool(std_row2):
+            std_row2.update(
+                {
+                    k: v
+                    for k, v in info.items()
+                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
+                }
+            )
+            std_row.update(
+                {
+                    k: v
+                    for k, v in info.items()
+                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
+                }
+            )
+            records.append(std_row)
+            records.append(std_row2)
+        else:
+            std_row.update(
+                {
+                    k: v
+                    for k, v in info.items()
+                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
+                }
+            )
+            records.append(std_row)
     return records
 
 
 def convert_pandas_rows_to_variants(df):
     def parse_info(info_field):
         info = {}
-        for pair in info_field.split(';'):
-            if '=' in pair:
-                key, value = pair.split('=', 1)
+        for pair in info_field.split(";"):
+            if "=" in pair:
+                key, value = pair.split("=", 1)
                 info[key] = value
             else:
                 info[pair] = True
 
         # convert info types
         for key in info:
-            if key in {'CIPOS', 'CIEND'}:
-                ci_start, ci_end = info[key].split(',')
+            if key in {"CIPOS", "CIEND"}:
+                ci_start, ci_end = info[key].split(",")
                 info[key] = (int(ci_start), int(ci_end))
-            elif key == 'END':
+            elif key == "END":
                 info[key] = int(info[key])
 
         return info
 
-    df['info'] = df['INFO'].apply(parse_info)
-    df['alts'] = df['ALT'].apply(lambda a: a.split(','))
+    df["info"] = df["INFO"].apply(parse_info)
+    df["alts"] = df["ALT"].apply(lambda a: a.split(","))
 
     rows = []
     for _, row in df.iterrows():
 
         rows.append(
             VcfRecordType(
-                id=row['ID'],
-                pos=row['POS'],
-                info=VcfInfoType(row['info']),
-                chrom=row['CHROM'],
-                ref=row['REF'],
-                alts=row['alts'],
+                id=row["ID"],
+                pos=row["POS"],
+                info=VcfInfoType(row["info"]),
+                chrom=row["CHROM"],
+                ref=row["REF"],
+                alts=row["alts"],
             )
         )
     return rows
@@ -266,33 +304,33 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
     """
     # read the comment/header information
     header_lines = []
-    with open(input_file, 'r') as fh:
-        line = '##'
-        while line.startswith('##'):
+    with open(input_file, "r") as fh:
+        line = "##"
+        while line.startswith("##"):
             header_lines.append(line)
             line = fh.readline().strip()
         header_lines = header_lines[1:]
     # read the data
     df = pd.read_csv(
         input_file,
-        sep='\t',
+        sep="\t",
         skiprows=len(header_lines),
         dtype={
-            'CHROM': str,
-            'POS': int,
-            'ID': str,
-            'INFO': str,
-            'FORMAT': str,
-            'REF': str,
-            'ALT': str,
+            "CHROM": str,
+            "POS": int,
+            "ID": str,
+            "INFO": str,
+            "FORMAT": str,
+            "REF": str,
+            "ALT": str,
         },
-        na_values=PANDAS_DEFAULT_NA_VALUES + ['.'],
+        na_values=PANDAS_DEFAULT_NA_VALUES + ["."],
     )
-    df = df.rename(columns={df.columns[0]: df.columns[0].replace('#', '')})
-    required_columns = ['CHROM', 'INFO', 'POS', 'REF', 'ALT', 'ID']
+    df = df.rename(columns={df.columns[0]: df.columns[0].replace("#", "")})
+    required_columns = ["CHROM", "INFO", "POS", "REF", "ALT", "ID"]
     for col in required_columns:
         if col not in df.columns:
-            raise KeyError(f'Missing required column: {col}')
+            raise KeyError(f"Missing required column: {col}")
     # convert the format fields using the header
     return header_lines, df
 
diff --git a/tests/data/sniffles.vcf b/tests/data/sniffles.vcf
index 700df87a..dd631018 100644
--- a/tests/data/sniffles.vcf
+++ b/tests/data/sniffles.vcf
@@ -1,7 +1,7 @@
 ##fileformat=VCFv4.1
 ##FILTER=<ID=PASS,Description="All filters passed">
 ##source=Sniffles
-##fileDate=20210607:16:05 PMef_minus
+##fileDate=20201105
 ##contig=<ID=chr1,length=248956422>
 ##contig=<ID=chr2,length=242193529>
 ##contig=<ID=chr3,length=198295559>
@@ -197,159 +197,141 @@
 ##contig=<ID=chrUn_GL000216v2,length=176608>
 ##contig=<ID=chrUn_GL000218v1,length=161147>
 ##contig=<ID=chrEBV,length=171823>
-##contig=<ID=NC_001416.1,length=48502>
 ##ALT=<ID=DEL,Description="Deletion">
 ##ALT=<ID=DUP,Description="Duplication">
 ##ALT=<ID=INV,Description="Inversion">
 ##ALT=<ID=INVDUP,Description="InvertedDUP with unknown boundaries">
 ##ALT=<ID=TRA,Description="Translocation">
 ##ALT=<ID=INS,Description="Insertion">
-##FILTER=<ID=UNRESOLVED,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
 ##INFO=<ID=CHR2,Number=1,Type=String,Description="Chromosome for END coordinate in case of a translocation">
 ##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the structural variant">
 ##INFO=<ID=MAPQ,Number=1,Type=Integer,Description="Median mapping quality of paired-ends">
 ##INFO=<ID=RE,Number=1,Type=Integer,Description="read support">
 ##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
 ##INFO=<ID=PRECISE,Number=0,Type=Flag,Description="Precise structural variation">
+##INFO=<ID=UNRESOLVED,Number=0,Type=Flag,Description="An insertion that is longer than the read and thus we cannot predict the full size.">
 ##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Length of the SV">
+##INFO=<ID=REF_strand,Number=2,Type=Integer,Description="Length of the SV">
 ##INFO=<ID=SVMETHOD,Number=1,Type=String,Description="Type of approach used to detect SV">
 ##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##INFO=<ID=RNAMES,Number=.,Type=String,Description="Names of reads supporting SVs (comma separated)">
 ##INFO=<ID=SEQ,Number=1,Type=String,Description="Extracted sequence from the best representative read.">
-##INFO=<ID=STRANDS2,Number=4,Type=Integer,Description="alt reads first + ,alt reads first -,alt reads second + ,alt reads second -.">
-##INFO=<ID=REF_strand,Number=.,Type=Integer,Description="plus strand ref, minus strand ref.">
-##INFO=<ID=Strandbias_pval,Number=A,Type=Float,Description="P-value for fisher exact test for strand bias.">
 ##INFO=<ID=STD_quant_start,Number=A,Type=Float,Description="STD of the start breakpoints across the reads.">
 ##INFO=<ID=STD_quant_stop,Number=A,Type=Float,Description="STD of the stop breakpoints across the reads.">
 ##INFO=<ID=Kurtosis_quant_start,Number=A,Type=Float,Description="Kurtosis value of the start breakpoints across the reads.">
 ##INFO=<ID=Kurtosis_quant_stop,Number=A,Type=Float,Description="Kurtosis value of the stop breakpoints across the reads.">
-##INFO=<ID=SUPTYPE,Number=.,Type=String,Description="Type by which the variant is supported.(SR,AL,NR)">
+##INFO=<ID=SUPTYPE,Number=A,Type=String,Description="Type by which the variant is supported.(SR,ALN,NR)">
 ##INFO=<ID=STRANDS,Number=A,Type=String,Description="Strand orientation of the adjacency in BEDPE format (DEL:+-, DUP:-+, INV:++/--)">
 ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency.">
 ##INFO=<ID=ZMW,Number=A,Type=Integer,Description="Number of ZMWs (Pacbio) supporting SV.">
 ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
 ##FORMAT=<ID=DR,Number=1,Type=Integer,Description="# high-quality reference reads">
 ##FORMAT=<ID=DV,Number=1,Type=Integer,Description="# high-quality variant reads">
-##contig=<ID=NC_001416.1STRANDBIAS>
 ##bcftools_viewVersion=1.11+htslib-1.11
-##bcftools_viewCommand=view --regions chr1 F24721_merged_sorted.bam_5_read_sorted.vcf.gz; Date=Tue Jan  4 22:45:21 2022
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	/projects/jfan_prj/jfan_prj/Nanopore_Testing/2021_nanopore_sv_testing/scratch/depth_testing/POG/COLO829/minimap2_bam/F24721_merged_sorted.bam
-chr1	10006	35777	N	]chr3:198172735]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198172735;STD_quant_start=32.4628;STD_quant_stop=44.8237;Kurtosis_quant_start=2.29519;Kurtosis_quant_stop=-0.995353;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,6,6,0;RE=6;REF_strand=72,102;Strandbias_pval=0.0824618;AF=0.0344828	GT:DR:DV	0/0:168:6
-chr1	10030	36832	N	]chr17:41490827]N	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr17;END=41490827;STD_quant_start=48.6107;STD_quant_stop=4.67516;Kurtosis_quant_start=0.545103;Kurtosis_quant_stop=1.53121;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,7,7,0;RE=7;REF_strand=24,9;Strandbias_pval=0.000613617;AF=0.212121	GT:DR:DV	0/0:26:7
-chr1	10312	35780	N	]chrX:449436]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=449436;STD_quant_start=117.156;STD_quant_stop=68.302;Kurtosis_quant_start=-1.29786;Kurtosis_quant_stop=-0.029231;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=0,6,0,6;RE=6;REF_strand=45,128;Strandbias_pval=0.33926;AF=0.0346821	GT:DR:DV	0/0:167:6
-chr1	10466	35781	N	N[chrX:156030800[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=156030800;STD_quant_start=81.1924;STD_quant_stop=134.17;Kurtosis_quant_start=1.34083;Kurtosis_quant_stop=1.99911;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=5,0,0,5;RE=5;REF_strand=7,8;Strandbias_pval=0.0546956;AF=0.333333	GT:DR:DV	0/1:10:5
-chr1	10467	35779	N	N[chr3:10002[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=10002;STD_quant_start=106.244;STD_quant_stop=161.729;Kurtosis_quant_start=0.552508;Kurtosis_quant_stop=2.99076;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=+-;STRANDS2=6,0,6,0;RE=6;REF_strand=4,5;Strandbias_pval=0.043956;AF=0.666667	GT:DR:DV	0/1:3:6
-chr1	10467	35782	N	N[chr3:198174376[	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198174376;STD_quant_start=17.5865;STD_quant_stop=297.518;Kurtosis_quant_start=0.324147;Kurtosis_quant_stop=0.886959;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=7,0,0,7;RE=7;REF_strand=57,112;Strandbias_pval=0.000675389;AF=0.0414201	GT:DR:DV	0/0:162:7
-chr1	10468	35778	N	N[chr17:41490879[	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr17;END=41490879;STD_quant_start=41.208;STD_quant_stop=1.92354;Kurtosis_quant_start=3.02235;Kurtosis_quant_stop=0.961601;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=11,0,0,11;RE=11;REF_strand=4,6;Strandbias_pval=0.00386997;AF=1	GT:DR:DV	1/1:0:11
-chr1	35143	35783	N	N[chr20:60001[	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=60001;STD_quant_start=0;STD_quant_stop=0.632456;Kurtosis_quant_start=nan;Kurtosis_quant_stop=-0.5;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=0,3;Strandbias_pval=0.196429;AF=1	GT:DR:DV	1/1:0:5
-chr1	136637	0	N	GTGTCGGCTGACCCTCTGTCCGCGTGGAGGCCGGTGGGGTGTGGAGGC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=136638;STD_quant_start=20.4524;STD_quant_stop=21.8541;Kurtosis_quant_start=-1.28738;Kurtosis_quant_stop=-1.27278;SVTYPE=INS;SUPTYPE=AL;SVLEN=46;STRANDS=+-;STRANDS2=10,8,10,8;RE=18;REF_strand=20,20;Strandbias_pval=0.780391;AF=0.45	GT:DR:DV	0/1:22:18
-chr1	136956	1	N	TGACCTCTCTCAGTGTGGGAGGGGGCCGGTGTGAGGCAAGGGGCTCACGCGCGGCCTCTGTCCGCGTGGGAGGGGCCGGTGTGAGACAAGGGGCTCAGGCTGACCTCTCAGCGTGGGAGGGGCCGGTGTGAGGCAAAGGGCTCGGGCTGACCTCTCTCAGCGTGGGAGGGCCAGTGTGAGGCAGGGCTCACATGACCTCTCAGCATGGGAGGGGCCGGTGTGAGACAAGGGCTCGGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=136989;STD_quant_start=36.1248;STD_quant_stop=37.4166;Kurtosis_quant_start=0.21249;Kurtosis_quant_stop=1.96205;SVTYPE=INS;SUPTYPE=AL;SVLEN=195;STRANDS=+-;STRANDS2=6,8,6,8;RE=14;REF_strand=18,20;Strandbias_pval=1;AF=0.368421	GT:DR:DV	0/1:24:14
-chr1	180694	35784	N	]chrX:449444]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chrX;END=449444;STD_quant_start=296.777;STD_quant_stop=10.3923;Kurtosis_quant_start=0.018679;Kurtosis_quant_stop=0;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=0,6,0,6;RE=6;REF_strand=26,30;Strandbias_pval=0.0354297;AF=0.107143	GT:DR:DV	0/0:50:6
-chr1	181262	2	N	CCGGCAGGCGCAGAGAGGCGCGGGCCGGGGTCGGGCGCAGGCGCAGAGAGCGCGGCCGGCGCAGAGGCGCAGAGAGGGCGCAGCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=181262;STD_quant_start=28.5441;STD_quant_stop=24.1125;Kurtosis_quant_start=2.54835;Kurtosis_quant_stop=1.686;SVTYPE=INS;SUPTYPE=AL;SVLEN=93;STRANDS=+-;STRANDS2=19,15,19,15;RE=34;REF_strand=56,46;Strandbias_pval=1;AF=0.333333	GT:DR:DV	0/1:68:34
-chr1	257667	35785	N	]chr5:181462060]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr5;END=181462060;STD_quant_start=0.894427;STD_quant_stop=0.447214;Kurtosis_quant_start=2;Kurtosis_quant_stop=2;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=--;STRANDS2=0,5,5,0;RE=5;REF_strand=19,18;Strandbias_pval=0.0532252;AF=0.135135	GT:DR:DV	0/0:32:5
-chr1	350806	3	N	ACTCACTGAAGGTGGAGGGAAAATGGTGACCTAAGTC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=350807;STD_quant_start=1.22475;STD_quant_stop=2.82843;Kurtosis_quant_start=3;Kurtosis_quant_stop=-0.65625;SVTYPE=INS;SUPTYPE=AL;SVLEN=37;STRANDS=+-;STRANDS2=4,2,4,2;RE=6;REF_strand=10,4;Strandbias_pval=1;AF=0.428571	GT:DR:DV	0/1:8:6
-chr1	368936	4	CCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACGTGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTAGAAACTGCGACACTCCCATGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGTGCCATCTCAGCAGCTCACGGTGTGGAAACTGCGACACTCACACGGGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=369307;STD_quant_start=139.183;STD_quant_stop=119.892;Kurtosis_quant_start=-0.086052;Kurtosis_quant_stop=-0.157727;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-371;STRANDS=+-;STRANDS2=7,5,7,5;RE=12;REF_strand=16,20;Strandbias_pval=0.511217;AF=0.333333	GT:DR:DV	0/1:24:12
-chr1	372679	5	CTTAGGGTCCATTCTGATCTGTATATATGTATAATATATATTATATATGGACCTCAGGGTCCATTCTGATCTGCATATATGTATAATATATATTATATATGGTCCTCAGGGTCCATTCTGATCTGTATATATGTATCATGTAAACATGAGTTCCTGCTGGCATATCTGTCTATAACCGACCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=372858;STD_quant_start=74.4439;STD_quant_stop=102.876;Kurtosis_quant_start=-0.974097;Kurtosis_quant_stop=1.36116;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-179;STRANDS=+-;STRANDS2=6,4,6,4;RE=10;REF_strand=14,12;Strandbias_pval=1;AF=0.384615	GT:DR:DV	0/1:16:10
-chr1	374100	6	N	CCCCCTCTCCTTTCTCCTCTCCATCCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTCGCCCCCTCTCCTTTCTCCCTCTCTATCCCCCTCTCCTTTCTCCCTCTCTCCCCCTCTCCTTTCTCCTCTCCATCCCCTCTCCATCCCCCTCTCCATCTCCTCTCCTTTCTCCTCTCTAGCCCCTCTCCTTTCTCTCTCCTCCCCCTCTCCTTTCTCCCTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=374100;STD_quant_start=57.1456;STD_quant_stop=79.9085;Kurtosis_quant_start=0.684282;Kurtosis_quant_stop=-0.413029;SVTYPE=INS;SUPTYPE=AL;SVLEN=227;STRANDS=+-;STRANDS2=4,4,4,4;RE=8;REF_strand=14,10;Strandbias_pval=0.703493;AF=0.333333	GT:DR:DV	0/1:16:8
-chr1	606600	7	GGTCAGAGCTGTCCTGGGTCAGAGCTGCCCAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=606632;STD_quant_start=2.98329;STD_quant_stop=2.54951;Kurtosis_quant_start=2.61341;Kurtosis_quant_stop=3.87685;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=7,4,7,4;RE=11;REF_strand=28,28;Strandbias_pval=0.516721;AF=0.196429	GT:DR:DV	0/0:45:11
-chr1	609583	8	GTGGCCAGCAGGCGGCGCTGCAGGAGAGGAGATGCCCAGGCCTGGCGGCACACGCGGGTTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=609647;STD_quant_start=21.6956;STD_quant_stop=18.4174;Kurtosis_quant_start=-0.340189;Kurtosis_quant_stop=0.435423;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-64;STRANDS=+-;STRANDS2=9,4,9,4;RE=13;REF_strand=30,34;Strandbias_pval=0.223523;AF=0.203125	GT:DR:DV	0/0:51:13
-chr1	611309	9	TGTGGGTGTGACAGGGTGTGTTCTGTGTGAGAACATGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGATGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTTGGTGTGAGTTCATGGGTGTGACGGGGTGTGCTGTGTGAGAACGTGTGTGTAGTGTTCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCTGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACGGGGTGTGTGCTGTGTGAGAACGTGTGTGTAGTGTCCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=612033;STD_quant_start=78.5303;STD_quant_stop=59.8415;Kurtosis_quant_start=-0.363;Kurtosis_quant_stop=0.0992;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-724;STRANDS=+-;STRANDS2=7,6,7,6;RE=13;REF_strand=29,33;Strandbias_pval=0.763359;AF=0.209677	GT:DR:DV	0/0:49:13
-chr1	744867	10	N	TATATATATATATATATATATATATATATATA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=744867;STD_quant_start=1.34164;STD_quant_stop=4.07431;Kurtosis_quant_start=2;Kurtosis_quant_stop=-0.953985;SVTYPE=INS;SUPTYPE=AL;SVLEN=35;STRANDS=+-;STRANDS2=4,1,4,1;RE=5;REF_strand=24,16;Strandbias_pval=0.635332;AF=0.125	GT:DR:DV	0/0:35:5
-chr1	814584	11	N	AAAAAAAGATGTGAAACCTATTTTCAGAATTAACATTTCCTTCCTAAATATCTAACACAACACTGAAGGAGAAAGTCCAGTCAATTTTATGTAGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=814585;STD_quant_start=17.8792;STD_quant_stop=15.8719;Kurtosis_quant_start=-1.85801;Kurtosis_quant_stop=-1.90018;SVTYPE=INS;SUPTYPE=AL;SVLEN=96;STRANDS=+-;STRANDS2=13,11,13,11;RE=24;REF_strand=40,44;Strandbias_pval=0.64659;AF=0.285714	GT:DR:DV	0/0:60:24
-chr1	820880	12	N	TCTACACTACCTGCCTGGCCAGCAGATCCACCCTGTCTACACTACCTGCCTGGGCAGTAGTTCCACGCAATCTCCCTTACCTGCCTCTCCAGCAGACCCGCCCTATCTATACTACTTGCCTGTCCAGCAGATCCACTTCCCATTCACACGACCTGCCTGTCCAGCAGATCCACCCTGTCTACACTACCTTCCTGCTTGTCCAGCAGGTCCACCCTGTCTATACTACCTGCCTGGCCAGTAGATCCACACACTA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=820881;STD_quant_start=6.70075;STD_quant_stop=12.2963;Kurtosis_quant_start=5.29053;Kurtosis_quant_stop=5.17296;SVTYPE=INS;SUPTYPE=AL;SVLEN=245;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,40;Strandbias_pval=1;AF=0.212766	GT:DR:DV	0/0:74:20
-chr1	820906	13	TCCACCCTGTCTACACTACCTGCTTGTCCAGCAGG	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=820941;STD_quant_start=2.56905;STD_quant_stop=2.70185;Kurtosis_quant_start=-1.38237;Kurtosis_quant_stop=-1.08812;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,40;Strandbias_pval=1;AF=0.212766	GT:DR:DV	0/0:74:20
-chr1	822428	14	CCTGGCCAGCAGATCCACCCTGTCTATACTACCTG	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=822463;STD_quant_start=2.98329;STD_quant_stop=2.91548;Kurtosis_quant_start=-1.2983;Kurtosis_quant_stop=-1.31531;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=11,9,11,9;RE=20;REF_strand=54,42;Strandbias_pval=1;AF=0.208333	GT:DR:DV	0/0:76:20
-chr1	839479	15	ACACACACCTGGACAAACACACCTGGACACACACACCTAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=839519;STD_quant_start=11.7558;STD_quant_stop=10.8904;Kurtosis_quant_start=-1.43253;Kurtosis_quant_stop=-1.20587;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-40;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=42,42;Strandbias_pval=1;AF=0.178571	GT:DR:DV	0/0:69:15
-chr1	853534	16	GCCGTGTGGTAAACTGATGAACCCCGACCCTGATGAACGTGAGATG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=853581;STD_quant_start=21.0143;STD_quant_stop=21.4033;Kurtosis_quant_start=-1.50523;Kurtosis_quant_stop=-1.48919;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-47;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=30,36;Strandbias_pval=1;AF=0.227273	GT:DR:DV	0/0:51:15
-chr1	866801	17	N	CGCTCCTGGCCGTCTCCGAGCCCTCCACATGTCTCCTGCCTCATCCCTGACGTCCTCCCAGGCCCTCGTGGTCACTCCCCCTGCACTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=866839;STD_quant_start=45.3707;STD_quant_stop=37.0162;Kurtosis_quant_start=-2.21189;Kurtosis_quant_stop=-1.85872;SVTYPE=INS;SUPTYPE=AL;SVLEN=49;STRANDS=+-;STRANDS2=6,12,6,12;RE=18;REF_strand=32,44;Strandbias_pval=0.598063;AF=0.236842	GT:DR:DV	0/0:58:18
-chr1	872837	18	N	GGGGAGGTTTCATTTGCTCCACCTGCAGCGAGTAAGTAGCCCATCTCAGGTTTGACTCCTGACTTAATTCCTAACAGGGGAAGCCAAGGTCCTGTGACCCTCCCGGGGGAGGGGTTTCATTTGTTCTACCTGCAGTGAGGTCTGTTAGCCCATCTCAGGTTTGACTCCTGACTCTAATTCTAACAGGAAGCTGTCCTGTAACTCTGGGGAGGGGGGGGTTTCATTTGCTCCACCTGCAGCGAGGTTAGCCCTCCATCTCAGGTTTGACTCCTGACTTAATTCCTAACAGGGGAAGCTGCTGTCCTGTGACTCTGGGAGAAGGGGTTTCATTTGCTCCACCTGCAGTGAGGTCTGCTAGCCCATCTCAGGTTTGACTCTGACTTAATTCCTAAACAGGGGAAGCTGCTGTCCTGTAACTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=872837;STD_quant_start=87.716;STD_quant_stop=108.899;Kurtosis_quant_start=-1.21956;Kurtosis_quant_stop=-1.1229;SVTYPE=INS;SUPTYPE=AL;SVLEN=416;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=28,30;Strandbias_pval=1;AF=0.241379	GT:DR:DV	0/0:44:14
-chr1	876112	19	N	CCCCATACTCCTCCCCCATACTCCCCCATACCCCCCCACACTCCCCCCATACTCCTCCCCCATACTCCCCCATACTCCCCCACACTCCCCCATACTCCTCCCCCATACTCCCCTATACTCCCCACACTCCCCCCAAACTCCCCCCATACTCCTCCCCCATACTCCCCATACTCCCCCACACTCCCCCACACTCCCCCATACTCCCCCACACTGTTCCCCCCATACCTCCCCCATACTCCCCCACACTCCCCCACACTCCCCCACGCTCCTCCCCCACACCCTCCCACACTCCCCCACACTCCCCTACTGCCTTCCCCCACACTCCCCCACACTCCTCCCCATACTCCCCCACACTCCCTCATACTCCCCATACTACCCCAACCTCCCCCATACTCCCCCATACTCCCCACACACTCCCCCCACACTCCCCCCAAACTCCCCCATACTCCTCCCCCAGTACTCCCCCATACTCCCACACTCCCACACTCCCCCACACTCCCCCCATACTCCCCCACACTCCCCCACACTCACTCCACACTCCCCATACTCCCCAAATCTCCCCCATACTCCCACATTCCCCCACACTCCCCACACTCCCCCATACTCCCCCACACTCCCCACACTCACCCACACCCCCCCATACTCCCCAACCTCCCCCATACTCCCCACATTCCCCCATACTCCCCCATACTCCTCCCCCATACTCCCCCCATACTCCCCCACACTCCCCACACTCCCCCATACTCCCCCACACTCCCCATACTCCCCCTGCATCCTCCCCATACTCCCCACATTCCCCCATACTCCCCATACTCCCCACACTCCCCCACACTCCCCCATACTCCCCCTCACACTCCCCCCATACTCCCCAACCTCCCAAACTCCCCCACATTCCCCCATACTCCCCATACTCCCCCAAACTCCCCATACTCCTCCCCTCAATACTCCCCATACTCCCCCATACTGCCCAACCTCCCCATACCCCCCACACTCCCCCCATACTCCCCCCACACCCCCCCCATACTCCCCCACACTCCCCTGCAACTCCCCTTATACTCCTCCCCCATACTCCCCATACTCCCCCCACACTCCCCAAACTCCCCATACTCCTCCCCATACTCCCCATACTCCCCCACACTCCCCCATACTCCTCCCCCATACTCCCCATACCCCCACACTCCCCCATACTCTCCCCATACTCCCCATACTCCCCACACTCCCCCAAACTCCCCCATACTCCTCCCCCATACTCCCCATACTCCCCCACACTCCCCCACACTCCCCCATACTCCCCACACTCCCCCATACTCCCCCAACCTCCCCATACTCCCCCACATTCCCCTATTACTCCCCATACTCCCCAAACTCCCCACATTCCCCCATACTCCCCCATACTCCCCAAACTCCCCCATACTCCTCCCCCACACTCCCCATACTCCCCCATACTCGCCCAACCTCCCCATACTCCCCCACTCCCCCATACTCCCCCACAGTCCCCCACACTCCCCCACACACTCCCCAACCTCCCCCATACTCCCCATACTCGCCCACACTCGCCCACACCCCCCCATACTCCCCACACTCCCCCACACTCCCCCACACCCCCCATACTCCCCCATACTCCCCATACTCCCCCACACCCCCACACT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=876112;STD_quant_start=160.964;STD_quant_stop=281.694;Kurtosis_quant_start=-1.48637;Kurtosis_quant_stop=-0.804806;SVTYPE=INS;SUPTYPE=AL,SR;SVLEN=1649;STRANDS=+-;STRANDS2=7,6,7,6;RE=13;REF_strand=48,38;Strandbias_pval=1;AF=0.151163	GT:DR:DV	0/0:73:13
-chr1	876433	35786	N	N[chr4:189980733[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr4;END=189980733;STD_quant_start=373.501;STD_quant_stop=193.312;Kurtosis_quant_start=-0.601023;Kurtosis_quant_stop=-0.696578;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=4,3,3,4;RE=7;REF_strand=15,15;Strandbias_pval=1;AF=0.233333	GT:DR:DV	0/0:23:7
-chr1	878423	35787	N	]chr3:198124405]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198124405;STD_quant_start=0.632456;STD_quant_stop=0.316228;Kurtosis_quant_start=4.00716;Kurtosis_quant_stop=2.14525;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=12,5,12,5;RE=17;REF_strand=34,32;Strandbias_pval=0.182341;AF=0.257576	GT:DR:DV	0/0:49:17
-chr1	878423	36833	N	]chr3:198124405]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr3;END=198124405;STD_quant_start=2.72029;STD_quant_stop=0.632456;Kurtosis_quant_start=1.9394;Kurtosis_quant_stop=6.69527;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=-+;STRANDS2=3,8,3,8;RE=11;REF_strand=34,32;Strandbias_pval=0.19555;AF=0.166667	GT:DR:DV	0/0:55:11
-chr1	882645	20	ATATATTAGCTATTCTAGACTTTATGCATTTATGTAAAGTTTTCTTTGTTGCACTTTAAGTTCTGTGATACATGGGCAGAGCATGC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=882732;STD_quant_start=2.70801;STD_quant_stop=2.51661;Kurtosis_quant_start=0.409091;Kurtosis_quant_stop=3.73961;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-87;STRANDS=+-;STRANDS2=8,1,8,1;RE=9;REF_strand=77,90;Strandbias_pval=0.0153702;AF=0.0538922	GT:DR:DV	0/0:158:9
-chr1	883246	35788	N	N[chr20:29351529[	.	STRANDBIAS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=29351529;STD_quant_start=6.0208;STD_quant_stop=8.59506;Kurtosis_quant_start=4.94502;Kurtosis_quant_stop=2.53006;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=8,0,0,8;RE=8;REF_strand=31,35;Strandbias_pval=0.00564375;AF=0.121212	GT:DR:DV	0/0:58:8
-chr1	883246	35789	N	N[chr20:29789177[	.	STRANDBIAS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr20;END=29789177;STD_quant_start=1.73205;STD_quant_stop=2.05481;Kurtosis_quant_start=4.74074;Kurtosis_quant_stop=5.09003;SVTYPE=BND;SUPTYPE=SR;SVLEN=1;STRANDS=++;STRANDS2=9,0,0,9;RE=9;REF_strand=30,34;Strandbias_pval=0.00272312;AF=0.140625	GT:DR:DV	0/0:55:9
-chr1	886250	21	N	TGTGCTGGCCCTTTGGCAGAGCAGGTGTGGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=886250;STD_quant_start=14.922;STD_quant_stop=15.3351;Kurtosis_quant_start=-0.359429;Kurtosis_quant_stop=-0.424765;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=4,2,4,2;RE=6;REF_strand=20,38;Strandbias_pval=0.186216;AF=0.103448	GT:DR:DV	0/0:52:6
-chr1	893790	22	AAAAAAAAAAAAATATATATATATATATATATATAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=893826;STD_quant_start=0.738549;STD_quant_stop=0;Kurtosis_quant_start=-1.74362;Kurtosis_quant_stop=-1.42857;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;STRANDS2=13,10,13,10;RE=23;REF_strand=28,24;Strandbias_pval=1;AF=0.442308	GT:DR:DV	0/1:29:23
-chr1	907836	23	N	CTGCCCGGTCCTTCTGACCAGCCGAGAGAGTA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=907836;STD_quant_start=11.7346;STD_quant_stop=12.1491;Kurtosis_quant_start=-0.460251;Kurtosis_quant_stop=-0.470373;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=6,6,6,6;RE=12;REF_strand=34,32;Strandbias_pval=1;AF=0.181818	GT:DR:DV	0/0:54:12
-chr1	909140	24	TTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCATCTTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=909494;STD_quant_start=64.8764;STD_quant_stop=64.2294;Kurtosis_quant_start=-2.03504;Kurtosis_quant_stop=-1.70278;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-354;STRANDS=+-;STRANDS2=20,14,20,14;RE=34;REF_strand=39,33;Strandbias_pval=0.680851;AF=0.472222	GT:DR:DV	0/1:38:34
-chr1	934067	25	GGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=934880;STD_quant_start=19.0606;STD_quant_stop=20.0499;Kurtosis_quant_start=1.46688;Kurtosis_quant_stop=-1.88984;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-813;STRANDS=+-;STRANDS2=12,14,12,14;RE=26;REF_strand=24,33;Strandbias_pval=0.812937;AF=0.45614	GT:DR:DV	0/1:31:26
-chr1	936289	26	AGGGCTCCTGGACGGAGGGGGTCCCCGGTCCCGCCTCCTA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=936328;STD_quant_start=5.46316;STD_quant_stop=5.2915;Kurtosis_quant_start=0.217921;Kurtosis_quant_stop=0.801437;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-39;STRANDS=+-;STRANDS2=11,15,11,15;RE=26;REF_strand=24,34;Strandbias_pval=1;AF=0.448276	GT:DR:DV	0/1:32:26
-chr1	948662	27	N	CCTGGCTGTCCTTGGTCCCCTGGTCCCTTGGCCCTGCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=948696;STD_quant_start=12.3786;STD_quant_stop=16.4784;Kurtosis_quant_start=-2.07742;Kurtosis_quant_stop=-2.33448;SVTYPE=INS;SUPTYPE=AL;SVLEN=37;STRANDS=+-;STRANDS2=8,19,8,19;RE=27;REF_strand=18,40;Strandbias_pval=1;AF=0.465517	GT:DR:DV	0/1:31:27
-chr1	964642	28	CAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=964717;STD_quant_start=14.3717;STD_quant_stop=16.1442;Kurtosis_quant_start=-1.6698;Kurtosis_quant_stop=-1.78719;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-75;STRANDS=+-;STRANDS2=12,10,12,10;RE=22;REF_strand=26,22;Strandbias_pval=1;AF=0.458333	GT:DR:DV	0/1:26:22
-chr1	976811	29	N	CAACCCCGGGAACCGCCTCCCACTCCCCCCACCAACCCCCGGGAACCGCCTCCCACTTCTCCCGCAACCCCGGGAACTGCCTCCCACTCCCTTCTGCAACCCCCGGGAACCGCTCCCACTCCCCGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=976916;STD_quant_start=53.1169;STD_quant_stop=72.0569;Kurtosis_quant_start=-0.975517;Kurtosis_quant_stop=-0.734689;SVTYPE=INS;SUPTYPE=AL,SR;SVLEN=87;STRANDS=+-;STRANDS2=9,10,9,10;RE=15;REF_strand=36,36;Strandbias_pval=1;AF=0.208333	GT:DR:DV	0/0:57:15
-chr1	977334	30	N	CGCTCCCCACTCCCCCGCAACTTCGGGAACCGCCTCCCCACTCCCCCACCAACCCCTGAACCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977334;STD_quant_start=59.633;STD_quant_stop=54.4259;Kurtosis_quant_start=-1.27956;Kurtosis_quant_stop=-1.57241;SVTYPE=INS;SUPTYPE=AL;SVLEN=131;STRANDS=+-;STRANDS2=11,13,11,13;RE=22;REF_strand=38,36;Strandbias_pval=0.814668;AF=0.297297	GT:DR:DV	0/0:52:22
-chr1	977541	31	N	CCCCGGAACCGCTCCCACCGCGCGCAACCCCTGAACCGCCTCCCACTCCCCACCAACCCTGGAACCGCCTCCACTCCCCTCTTACCGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977584;STD_quant_start=48.8615;STD_quant_stop=40.2961;Kurtosis_quant_start=-0.497493;Kurtosis_quant_stop=-0.185665;SVTYPE=INS;SUPTYPE=AL;SVLEN=67;STRANDS=+-;STRANDS2=3,6,3,6;RE=9;REF_strand=36,38;Strandbias_pval=0.490453;AF=0.121622	GT:DR:DV	0/0:65:9
-chr1	977848	32	N	ACCAACCGGGGAGCCGCCTCCCCTCCCCCCACCCGCCCCGAGCCGCCTGCCCCCGCCACCAACCCCGGGAACCACCTCCCACTCCCCGCCCAACCCCGGGAACCGCCCCTCCCCTCCCCACG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=977882;STD_quant_start=61.4687;STD_quant_stop=66.9395;Kurtosis_quant_start=-1.68422;Kurtosis_quant_stop=-1.71073;SVTYPE=INS;SUPTYPE=AL;SVLEN=86;STRANDS=+-;STRANDS2=9,10,9,10;RE=19;REF_strand=34,38;Strandbias_pval=1;AF=0.263889	GT:DR:DV	0/0:53:19
-chr1	988831	33	N	AGTTCTGGAGTTGATTGTTTCTCAGAGGTTCAGGGTTGAGTGTTC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=988831;STD_quant_start=6.13314;STD_quant_stop=6.34277;Kurtosis_quant_start=-1.27849;Kurtosis_quant_stop=-1.31557;SVTYPE=INS;SUPTYPE=AL;SVLEN=46;STRANDS=+-;STRANDS2=11,16,11,16;RE=27;REF_strand=22,36;Strandbias_pval=0.815445;AF=0.465517	GT:DR:DV	0/1:31:27
-chr1	996353	34	N	GCACCTACATCTGGGGCCACAGGATGCAGGGTGGGGAGGGCAAGGCCTCTGCGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=996353;STD_quant_start=25.8341;STD_quant_stop=26.3869;Kurtosis_quant_start=-1.7722;Kurtosis_quant_stop=-1.00787;SVTYPE=INS;SUPTYPE=AL;SVLEN=64;STRANDS=+-;STRANDS2=12,18,12,18;RE=30;REF_strand=30,52;Strandbias_pval=0.826462;AF=0.365854	GT:DR:DV	0/1:52:30
-chr1	998765	35	N	GGGGAGGGCGCTGAGCCGAGGGGGAGGGCTGAGCGGGAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=998770;STD_quant_start=11.4935;STD_quant_stop=9.94485;Kurtosis_quant_start=-1.80857;Kurtosis_quant_stop=-1.45908;SVTYPE=INS;SUPTYPE=AL;SVLEN=34;STRANDS=+-;STRANDS2=4,10,4,10;RE=14;REF_strand=26,46;Strandbias_pval=0.762111;AF=0.194444	GT:DR:DV	0/0:58:14
-chr1	1030890	36	TGTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1030928;STD_quant_start=15.6993;STD_quant_stop=15.5285;Kurtosis_quant_start=0.12083;Kurtosis_quant_stop=-0.088691;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-38;STRANDS=+-;STRANDS2=18,13,18,13;RE=31;REF_strand=39,26;Strandbias_pval=1;AF=0.476923	GT:DR:DV	0/1:34:31
-chr1	1041778	37	GGCCAGTGCCAGGGTCGAGGTGGGCGGCTCCCCCGGGGGAGGGCTG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1041824;STD_quant_start=15.7567;STD_quant_stop=16.687;Kurtosis_quant_start=-0.850302;Kurtosis_quant_stop=-1.83988;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-46;STRANDS=+-;STRANDS2=11,12,11,12;RE=23;REF_strand=23,25;Strandbias_pval=1;AF=0.479167	GT:DR:DV	0/1:25:23
-chr1	1068748	38	N	AAGGCCACGCGGGCTGTGCAGATGCAGGTGCGGCGGGGCGGGCCACGCGGGCTGTGAAGGTGCAGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1068809;STD_quant_start=28.0891;STD_quant_stop=12.2066;Kurtosis_quant_start=-2.18788;Kurtosis_quant_stop=-1.80804;SVTYPE=INS;SUPTYPE=AL;SVLEN=75;STRANDS=+-;STRANDS2=19,14,19,14;RE=33;REF_strand=42,28;Strandbias_pval=0.832838;AF=0.471429	GT:DR:DV	0/1:37:33
-chr1	1076283	39	GCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1076735;STD_quant_start=48.1819;STD_quant_stop=70.5606;Kurtosis_quant_start=0.175533;Kurtosis_quant_stop=-1.43403;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-452;STRANDS=+-;STRANDS2=7,11,7,11;RE=18;REF_strand=18,23;Strandbias_pval=0.780972;AF=0.439024	GT:DR:DV	0/1:23:18
-chr1	1076341	40	GCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGCGGGGAGGCTGAGTCTACGGGGACTCCGTGAGGGGTGGCTGAGTCTATGGGGACTCCGTGCGGGGAGGCTGAGTCTATGGGGACTCCGTGCGGGGTGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCTGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGCCGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGGGGGGAGGCTGAGTCTATGGGGACTCCGTGCCGAGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTTGGGGGAGGCTGAGGCTATGGGGACTCCGTGCGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1077781;STD_quant_start=341.572;STD_quant_stop=478.388;Kurtosis_quant_start=-1.1971;Kurtosis_quant_stop=-0.402974;SVTYPE=DEL;SUPTYPE=AL,SR;SVLEN=-1440;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=19,23;Strandbias_pval=0.653637;AF=0.119048	GT:DR:DV	0/0:37:5
-chr1	1080919	41	N	CTGTCCTTCTCACTTCCTGCCTCGGTCTCTGTCTCCTTCCCTCCGCCCTACCTCGGTCCTATCATCCTTCCTCGCCTACCTCAGGTCCCTGTCTCCTTCCCTCCATACACACTCGGTCCCTGTCTCTCTTCCCTCCGCCTGGTCCCTGTCTCCTTCCCTCCTTCCCCCCACCTCCGGTCCTGTCTCCTTCCCTCCCTTCCGCCTCAGTCTGTCTCACTTCCCTCCGCCCACCTCAGTCCCCTGTCTCCTTCCCTCCCACACTTCCTCTGGTCCTGTCTCCTTCCCTCAAGCCCCCTCAGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1080919;STD_quant_start=32.45;STD_quant_stop=47.856;Kurtosis_quant_start=-1.30067;Kurtosis_quant_stop=-1.55852;SVTYPE=INS;SUPTYPE=AL;SVLEN=289;STRANDS=+-;STRANDS2=12,9,12,9;RE=21;REF_strand=26,20;Strandbias_pval=1;AF=0.456522	GT:DR:DV	0/1:25:21
-chr1	1139106	42	GGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGACCTGGGTCCTGGGGAGTTTCCTGGGGTCAGAAGGTAGGGGTGTCAATGTCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1139227;STD_quant_start=15.678;STD_quant_stop=18.4038;Kurtosis_quant_start=-2.1908;Kurtosis_quant_stop=-2.23094;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-121;STRANDS=+-;STRANDS2=15,6,15,6;RE=21;REF_strand=41,31;Strandbias_pval=0.312886;AF=0.291667	GT:DR:DV	0/0:51:21
-chr1	1140200	43	AGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGGAGCTTCCTGGGGTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1140259;STD_quant_start=17.0822;STD_quant_stop=14.6151;Kurtosis_quant_start=0.610656;Kurtosis_quant_stop=0.583042;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-59;STRANDS=+-;STRANDS2=14,6,14,6;RE=20;REF_strand=44,36;Strandbias_pval=0.312112;AF=0.25	GT:DR:DV	0/0:60:20
-chr1	1140410	44	N	CGTCCGAACCGGGGGGACCTGGGTCCTGGGAGCTTCCTGGGTTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGAGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACATCGAACCGGGGGGCCCTGGGAGTCCTGGGAGCTTCTGGGGTCAGAAGGTGGGAGTGTCCAGCATCGAACCGGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTAGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCATGGGGGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGAGCTTCCTGGGGTCAGAAGGTAGGGGTGTCAACGTCAGACAGGGGACCTGGGTCCTGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGCGTCGAACCGGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCCAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1140410;STD_quant_start=232.849;STD_quant_stop=281.385;Kurtosis_quant_start=1.04118;Kurtosis_quant_stop=0.939709;SVTYPE=INS;SUPTYPE=AL;SVLEN=536;STRANDS=+-;STRANDS2=4,7,4,7;RE=11;REF_strand=42,40;Strandbias_pval=0.522879;AF=0.134146	GT:DR:DV	0/0:71:11
-chr1	1141388	45	N	TCATCCTCTGTCCACAACCCCATCCTTACCTCTATCCCCCACCTTACATCTCATTCCTCTATCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1141394;STD_quant_start=10.6724;STD_quant_stop=14.1704;Kurtosis_quant_start=-1.44054;Kurtosis_quant_stop=-1.65538;SVTYPE=INS;SUPTYPE=AL;SVLEN=55;STRANDS=+-;STRANDS2=14,7,14,7;RE=21;REF_strand=44,40;Strandbias_pval=0.327428;AF=0.25	GT:DR:DV	0/0:63:21
-chr1	1168031	46	CGGGGCCAGCAGACGGGTGAGGGCGGAGGGCCGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1168063;STD_quant_start=14.2864;STD_quant_stop=13.7514;Kurtosis_quant_start=-1.22974;Kurtosis_quant_stop=-1.1581;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=38,32;Strandbias_pval=0.776548;AF=0.214286	GT:DR:DV	0/0:55:15
-chr1	1212606	47	N	CAGCCCTCCTCCCAGCCCCTGGCTCCCTCTGCCCCCTCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1212613;STD_quant_start=10.1207;STD_quant_stop=8.7014;Kurtosis_quant_start=-0.032446;Kurtosis_quant_stop=-1.09448;SVTYPE=INS;SUPTYPE=AL;SVLEN=32;STRANDS=+-;STRANDS2=2,5,2,5;RE=7;REF_strand=30,28;Strandbias_pval=0.42665;AF=0.12069	GT:DR:DV	0/0:51:7
-chr1	1226331	48	CCCTCAACCCTGTACGGTCAGGAGGAAACATGGCACCTCCCCTCTGGGGGCTCTTTCCAGAAAC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1226395;STD_quant_start=5.74456;STD_quant_stop=5.1672;Kurtosis_quant_start=-1.50778;Kurtosis_quant_stop=-1.62886;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-64;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=34,39;Strandbias_pval=0.77815;AF=0.205479	GT:DR:DV	0/0:58:15
-chr1	1227293	49	GCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1227467;STD_quant_start=2.14476;STD_quant_stop=1.34164;Kurtosis_quant_start=0.1517;Kurtosis_quant_stop=0.426462;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-174;STRANDS=+-;STRANDS2=8,8,8,8;RE=16;REF_strand=32,42;Strandbias_pval=0.782406;AF=0.216216	GT:DR:DV	0/0:58:16
-chr1	1240679	50	N	CCGCCCCCATTCACCCCGGCCGTGGTCCCTACCGCAGCCCCA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1240683;STD_quant_start=7.86398;STD_quant_stop=8.59008;Kurtosis_quant_start=-1.97851;Kurtosis_quant_stop=-1.85397;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;STRANDS2=16,22,16,22;RE=38;REF_strand=34,48;Strandbias_pval=1;AF=0.463415	GT:DR:DV	0/1:44:38
-chr1	1245159	51	N	CTCTGCCCTCCTCCCACCTTCCCCCTCCTCCCCCCACTCCCTCTCCCCTCTTCCCCCGACTCCCTTCCCCTACTCATCTCCTCCTCACCCACTCCTCTCCCCCTCCTCTCCCACTCCTCCCCCTCCTCCCCCCCACTCCTCCCCCCACTGCACTCTCCCCTCTTCCCCCACTCCTCCCCACTCCTCTCCCCTCCTTCTCACCTCCTCTCCCCTCCTCCTCCTCCTGTCCCTCCTCCCCCTCTTCCCCCTCCTCCCCATATACCCTCCTCCTCCTCTCCCTCTTCCTCCCACTCCCCCCACTCCTCCCCACTCCTCTCCCCTCTTGCCCCTCCTCCCTACCACTCCTTCCTCCTCTCCTCTCTTCCCCCCACTCCCTCCCCCACTCCTCTCCTCCTCCACCTCCTCTCCCCTCCTCCCCCACTCCTCTCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1245159;STD_quant_start=3.43996;STD_quant_stop=14.8436;Kurtosis_quant_start=4.9032;Kurtosis_quant_stop=3.59937;SVTYPE=INS;SUPTYPE=AL;SVLEN=439;STRANDS=+-;STRANDS2=12,13,12,13;RE=25;REF_strand=28,36;Strandbias_pval=0.813884;AF=0.390625	GT:DR:DV	0/1:39:25
-chr1	1248060	52	GATCTCCAACTCTGACCTACAGGCAGGAAAGTGGGCAGCCCTGGGAGGCTGGACTGAGGGAGGCTGGACTTCCCACTCAGGCCTACACGCAGGAAAATGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCACCCTACAGGCCAGGACACGGGCAGCCCTGGGAGGCTAGACCGAGGGAGGCTGGGCCTCCCATCTACCCTACAGGCCGGGACACAGGCAGCCCTGGGAGGCTGTACCGAGGGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1248319;STD_quant_start=52.6064;STD_quant_stop=21.0815;Kurtosis_quant_start=3.96322;Kurtosis_quant_stop=3.62028;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-259;STRANDS=+-;STRANDS2=2,5,2,5;RE=7;REF_strand=23,28;Strandbias_pval=0.686983;AF=0.137255	GT:DR:DV	0/0:44:7
-chr1	1249348	53	N	CGCTCACACCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGGGGCTGGGCCTCCCCTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGGCTGGACCAGGGGAGGCGCCAGGCCTCCCACTCGCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCCCGAGGGAGGCTGGAGCCTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1249395;STD_quant_start=37.7478;STD_quant_stop=65.9212;Kurtosis_quant_start=-1.75534;Kurtosis_quant_stop=-1.77327;SVTYPE=INS;SUPTYPE=AL;SVLEN=124;STRANDS=+-;STRANDS2=8,10,8,10;RE=18;REF_strand=26,28;Strandbias_pval=1;AF=0.333333	GT:DR:DV	0/1:36:18
-chr1	1249588	54	N	TGGGATCGAGAGCTGGCTCCCACCGCCTCCCAGGCCTGGACACTGCAGCCCTGGGAGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1249625;STD_quant_start=47.571;STD_quant_stop=45.3122;Kurtosis_quant_start=-1.76881;Kurtosis_quant_stop=-0.375917;SVTYPE=INS;SUPTYPE=AL;SVLEN=61;STRANDS=+-;STRANDS2=6,6,6,6;RE=12;REF_strand=26,28;Strandbias_pval=1;AF=0.222222	GT:DR:DV	0/0:42:12
-chr1	1284183	55	N	TGAGGGGGTGGGGTGGGGGTTGAGTGAGGGGGTGGGGGGGTTGGGTGAGGGGGGTGGGGGGTTGGGTGAGGGGGTGGGGGGCTGGTGAGGGGGTGGGGTTGGGTGAGGGGGTGGGCTCGGGGGGGGTTGAGTGAGGGGGTGGGGTGGGGGGTTGGGTGAGGGGGGTGGGGTGGGGGTTGAGGAGGGGGTGGGGTGTTATGAGGGGTTGGGGGTTGGGTGAGGGGGGGTGGGGGTTGCGAGGGGGTGGGGGGTGGGGGGGTTGAGTGAGGGGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1284201;STD_quant_start=9.78895;STD_quant_stop=64.8333;Kurtosis_quant_start=-1.28536;Kurtosis_quant_stop=-1.87274;SVTYPE=INS;SUPTYPE=AL;SVLEN=282;STRANDS=+-;STRANDS2=18,16,18,16;RE=34;REF_strand=49,32;Strandbias_pval=0.535485;AF=0.419753	GT:DR:DV	0/1:47:34
-chr1	1288944	56	N	CGTGTCCCTGCTCCGGGCCCCGTGTCTCTGTTCACTGGCCCCCGTGTCTCTGCTCCTCGTCCCGTGTCCCTTGCTCCGCCCTGTGTCCCTGCTCCGTCCCGTGTCTCTGCTCCGTCCCCCGTGTTCTGCTCCGTCCTGTGTCTCTTGCTCCGGCCCCCGCGGTCTCTGCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289049;STD_quant_start=87.6482;STD_quant_stop=60.9024;Kurtosis_quant_start=-0.741898;Kurtosis_quant_stop=-0.516487;SVTYPE=INS;SUPTYPE=AL;SVLEN=64;STRANDS=+-;STRANDS2=6,5,6,5;RE=11;REF_strand=28,34;Strandbias_pval=0.74488;AF=0.177419	GT:DR:DV	0/0:51:11
-chr1	1289357	57	N	TGCTCCGTCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCCGTCTCCCCGTGTCTCTGCCCCGTCCCGTGTCTACTCCGTCCCGGTCTCTGCTCCGTCCCCCGTGTCTACTCCGTCCCCCGTGTCTCTGCTCCGTCCGTGTCTCTGCTCCGTCCCGTGTCTCTGCTCGTCCCCGTGTCTCTGCTCCGTCCGTGTCTCTGCTCAATCCCCCGTGTCTCTGCCCCGTCCCGATGTCTCTGCTCCGTCGATGTCTCTATGAGCTCTCCCGTGTCTGCTCCGTCCGTGTCTCTGCTCCGTCCGATGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289476;STD_quant_start=159.932;STD_quant_stop=97.4515;Kurtosis_quant_start=-1.84527;Kurtosis_quant_stop=-1.54002;SVTYPE=INS;SUPTYPE=AL;SVLEN=162;STRANDS=+-;STRANDS2=8,9,8,9;RE=16;REF_strand=28,32;Strandbias_pval=1;AF=0.266667	GT:DR:DV	0/0:44:16
-chr1	1289780	58	N	CCCCGTGTCTCTGCTCCGTCCGTGTCTACTCCGTCCCGATGTCTCTGCCACGTCCCCGTGTCTCTGCCCCGTCCCCGTGTCTCTGCCCCGTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1289823;STD_quant_start=26.7955;STD_quant_stop=30.8675;Kurtosis_quant_start=-0.210759;Kurtosis_quant_stop=-0.202563;SVTYPE=INS;SUPTYPE=AL;SVLEN=91;STRANDS=+-;STRANDS2=8,7,8,7;RE=15;REF_strand=28,30;Strandbias_pval=0.778387;AF=0.258621	GT:DR:DV	0/0:43:15
-chr1	1290106	59	N	GTGTCTCTGTCTGGCCCCCCGTGTCTCTGCTCCAGCCCCGTGCCCTGCTCCTCATT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1290106;STD_quant_start=38.9923;STD_quant_stop=30.4023;Kurtosis_quant_start=0.491131;Kurtosis_quant_stop=-0.169138;SVTYPE=INS;SUPTYPE=AL;SVLEN=103;STRANDS=+-;STRANDS2=8,9,8,9;RE=16;REF_strand=30,34;Strandbias_pval=1;AF=0.25	GT:DR:DV	0/0:48:16
-chr1	1324173	60	N	GGGCTCAGGGGCTGGGGGCTGCTGGGCTGAGGCTGGGGAGACTGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1324173;STD_quant_start=5.02933;STD_quant_stop=13.0384;Kurtosis_quant_start=-1.45664;Kurtosis_quant_stop=-1.54076;SVTYPE=INS;SUPTYPE=AL;SVLEN=66;STRANDS=+-;STRANDS2=19,16,19,16;RE=35;REF_strand=38,36;Strandbias_pval=0.838779;AF=0.472973	GT:DR:DV	0/1:39:35
-chr1	1350109	61	GGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1351186;STD_quant_start=20.5878;STD_quant_stop=20.3663;Kurtosis_quant_start=-0.011268;Kurtosis_quant_stop=-1.64329;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-1077;STRANDS=+-;STRANDS2=15,13,15,13;RE=28;REF_strand=32,29;Strandbias_pval=1;AF=0.459016	GT:DR:DV	0/1:33:28
-chr1	1366913	62	TGAATTGGTGAGTTGGTGTGAATTGAATTGTGTGAATGAGTGGATTGGTGAGTGAATTGGTGAGTTGAATTGGTGTGTGTAGTGGATGAGTGTGGATGAATGTGAATTGGCGAGTATGGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1367033;STD_quant_start=14.0961;STD_quant_stop=39.5335;Kurtosis_quant_start=0.90213;Kurtosis_quant_stop=3.80352;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-120;STRANDS=+-;STRANDS2=7,3,7,3;RE=10;REF_strand=36,30;Strandbias_pval=0.499306;AF=0.151515	GT:DR:DV	0/0:56:10
-chr1	1382683	63	N	CAACAATCCAGTAACAATCCAGAGGTCACCACCCTTCCCAACAATCCAGTAATCCAGAGGTTACCACCCTTCCCAACAATCCACTAACAATCCAGAGGCCACCACCCTTCCCAGCAATCGGCAAGGACCCAGAGGCCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGGTCACCACCCCTTCCCAAAATCAGTAACCAGGGAGTCCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGCCACCACCCCTTCCCACAACAATCCAGTAACAATCCAGAGGTACCACCCTTCCCAACAATCCAGTAACAATCGACCACCACCCTTCCCAACAATCCAGTAACAATCCAGAGGACACCACCCTTCCCAGCAATCCACTAGCAATCCAGAGGCCACCACCCCTTCCCAACAATCTGGCTTAGCGACCAGAGAGCCACCACCCCTTCCCAACAATCAGTAACAATCCAGGAGTCACCACCGCTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1382683;STD_quant_start=27.5276;STD_quant_stop=144.602;Kurtosis_quant_start=-2.03439;Kurtosis_quant_stop=-2.16418;SVTYPE=INS;SUPTYPE=AL;SVLEN=494;STRANDS=+-;STRANDS2=11,15,11,15;RE=26;REF_strand=24,34;Strandbias_pval=1;AF=0.448276	GT:DR:DV	0/1:32:26
-chr1	1427516	64	N	TCCCAAGTCTCGGCCTCCCTCTCCACCCCTCCCCTTTCCCCTGCATCACCCCGCCCAGCCCCCACCCCTCCATCACCCTGCTCCCGCCCCCTCCCCTCCATCCTGCCCCCCTCCCCCTCCATCACCCTGCCCAGCCCCCTCCCCTCCATCACTCCCAAGCCCTGCCCCCTTCCATCACCCTGCCCTGCCCCCACCCCATCACCCTGCCCTGCCCCCTTCCCCTCCATCATCCCGCCCGCTCCCCTCTCCACCCCTCCCTCTCCCCTGCATCACTCCCTGCCCTGCCCCTTTCCCCCTCCATCACCCCAGCCTCTGCCCCTCCCCTCCCCTCCATCACCCTGCCCTGCCCTCCTCTCTCCATCACTCCCTGTCTCTGCCCCCACCCCTCCATCATTCTGCCCTGCTCCTCTCCACCTCCCCCTTCCCCTGCATCACCCAGCCTTCTAAGTTCCTTCCTCCATCAATTCTGCCTCTGCCCCTCCCCTCCATCACTCCTGGCACTCTGCCCTCCCTCCATCACTCCTGCTCCTGCCCCCCACCCCTCCATCATCTTCACACTTTCCACCCCTCCCTTCCCTTCCCCTGCATCATCTGCACTCCTGCCTCTTCCCCCTCCATCACCCTGCCCAGCCCCCTCCCCTCCACTGCTTGCGCCCTCCTCCATCACACCCCGGCCCTGCCCCCAGCTCCG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1427892;STD_quant_start=48.4345;STD_quant_stop=77.4325;Kurtosis_quant_start=-1.98608;Kurtosis_quant_stop=-1.067;SVTYPE=INS;SUPTYPE=AL;SVLEN=439;STRANDS=+-;STRANDS2=12,9,12,9;RE=21;REF_strand=38,32;Strandbias_pval=1;AF=0.3	GT:DR:DV	0/0:49:21
-chr1	1428085	65	N	AGGAGGGAGGGGGAGGAGGGGAGGAAGAAGAAGGAGGAAGAGGAAGGAGGAAAAGAGGAGGAGGAAAGAGAGAGGAAGAAAGGAGGGGAGGAGAAAGAGGAGGGGACAGGAGGGAAGGAGGAGAGAAAGAGGAAAAGAAAGGAGGGAGGGAAGGAGAGAGGAGAGGAAGAGAGAGACAGGGAAGGGAAAGAAAAACAGGGAGGGGAAGGAGGAGGAAGAGGAGGGAAGGAAGAAGAGGAGGAGAGGGAGGGAAGAGAGGAGGGAAAGAGGGAGGAGGAAGAGGGGGCAGGGGAGGAAGAAGAGAACA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1428085;STD_quant_start=33.6102;STD_quant_stop=42.9676;Kurtosis_quant_start=4.12818;Kurtosis_quant_stop=-1.69999;SVTYPE=INS;SUPTYPE=AL;SVLEN=222;STRANDS=+-;STRANDS2=16,13,16,13;RE=29;REF_strand=42,34;Strandbias_pval=1;AF=0.381579	GT:DR:DV	0/1:47:29
-chr1	1442871	66	N	TTTCTATGGTAATGGTGATAAACCAAGTCAA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1442871;STD_quant_start=13.6345;STD_quant_stop=12.775;Kurtosis_quant_start=-1.56517;Kurtosis_quant_stop=-1.35954;SVTYPE=INS;SUPTYPE=AL;SVLEN=31;STRANDS=+-;STRANDS2=9,10,9,10;RE=19;REF_strand=32,38;Strandbias_pval=1;AF=0.271429	GT:DR:DV	0/0:51:19
-chr1	1443674	67	TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1443708;STD_quant_start=0;STD_quant_stop=1.04881;Kurtosis_quant_start=-0.5;Kurtosis_quant_stop=-1.79438;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-34;STRANDS=+-;STRANDS2=6,14,6,14;RE=20;REF_strand=34,42;Strandbias_pval=0.310419;AF=0.263158	GT:DR:DV	0/0:56:20
-chr1	1469099	68	TAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	STRANDBIAS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1469134;STD_quant_start=10.2652;STD_quant_stop=9.18559;Kurtosis_quant_start=3.94733;Kurtosis_quant_stop=2.94515;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=0,8,0,8;RE=8;REF_strand=42,38;Strandbias_pval=0.00589579;AF=0.1	GT:DR:DV	0/0:72:8
-chr1	1477855	69	N	CACCACGCCCGGCTAATGTTGTATTTTTAGTAGAGACGGGTTTCTCCCATGGTCAGGCTGGTCTCTAACTCCCGACCTCAGGTGATCCACCCGCCTCGGCCTCTCAACCAGTTGGGATTACAGGCATGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1477883;STD_quant_start=11.94;STD_quant_stop=21.8689;Kurtosis_quant_start=-1.766;Kurtosis_quant_stop=-1.90683;SVTYPE=INS;SUPTYPE=AL;SVLEN=131;STRANDS=+-;STRANDS2=17,15,17,15;RE=32;REF_strand=36,38;Strandbias_pval=0.832673;AF=0.432432	GT:DR:DV	0/1:42:32
-chr1	1497123	70	N	CCTCGGCCTGGGCACGAACGGTCCCATCGAGAGCAGA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1497123;STD_quant_start=3.84708;STD_quant_stop=4.3589;Kurtosis_quant_start=-1.05282;Kurtosis_quant_stop=-1.08734;SVTYPE=INS;SUPTYPE=AL;SVLEN=40;STRANDS=+-;STRANDS2=7,8,7,8;RE=15;REF_strand=34,36;Strandbias_pval=1;AF=0.214286	GT:DR:DV	0/0:55:15
-chr1	1554173	71	CTAAGGGGTCCCCACGAAGCTGAGCACGAGGCGGATCCGGAC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1554213;STD_quant_start=9.73653;STD_quant_stop=10.0846;Kurtosis_quant_start=0.855249;Kurtosis_quant_stop=1.94672;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-40;STRANDS=+-;STRANDS2=6,7,6,7;RE=13;REF_strand=34,28;Strandbias_pval=0.760968;AF=0.209677	GT:DR:DV	0/0:49:13
-chr1	1595833	72	GAGCAGAACAGGGAGAGACAGAGAGAGAGAGACAGAGAGAGGCAGACAGAGACAGAGAGAGAGACAGACAC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1595902;STD_quant_start=33.7313;STD_quant_stop=33.2971;Kurtosis_quant_start=2.73708;Kurtosis_quant_stop=2.38236;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-69;STRANDS=+-;STRANDS2=5,5,5,5;RE=10;REF_strand=36,42;Strandbias_pval=1;AF=0.128205	GT:DR:DV	0/0:68:10
-chr1	1595853	73	N	ACAGAGAGACAGAGAGAGAAACAGAGAGACAGAGACAGAGAGGCAGACAGAGAGAGACAGACAGAGAGCAGAACAGGGAGAGACAAAAGAGACAGAGAGAGAGAGACAC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1595863;STD_quant_start=39.2785;STD_quant_stop=49.7011;Kurtosis_quant_start=-0.536228;Kurtosis_quant_stop=-0.678589;SVTYPE=INS;SUPTYPE=AL;SVLEN=77;STRANDS=+-;STRANDS2=3,2,3,2;RE=5;REF_strand=36,42;Strandbias_pval=0.661994;AF=0.0641026	GT:DR:DV	0/0:73:5
-chr1	1605690	74	N	GGCTGGGCTGGTCAGGTGTAGGCTGGGCTGGTCAGGCGTGGAGTGGGCTGGTCAGGCGTGGGGTGGGGTGGGCTGGTCAGGTGTGGGCTGGGCCTGGTCAGGTGTGAGGTGGGGTGGTGGGGGTGAGGGGGTTGTCTGGTCAGGTGTGGAGTGGGCTGGTCAGGTGTGGGCTGGGCTGGTCCAGACAGGGTCGGCTGGTCAGGTGTGGGCTGGGCTGGGCTGGTCAGGTGTGGGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1605690;STD_quant_start=28.6112;STD_quant_stop=43.4385;Kurtosis_quant_start=-0.191101;Kurtosis_quant_stop=-1.21501;SVTYPE=INS;SUPTYPE=AL;SVLEN=226;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=38,40;Strandbias_pval=1;AF=0.179487	GT:DR:DV	0/0:64:14
-chr1	1666975	75	CACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1667142;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=11.8809;Kurtosis_quant_stop=8.99409;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-167;STRANDS=+-;STRANDS2=18,14,18,14;RE=32;REF_strand=38,28;Strandbias_pval=1;AF=0.484848	GT:DR:DV	0/1:34:32
-chr1	1681989	76	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1682020;STD_quant_start=0;STD_quant_stop=1.30384;Kurtosis_quant_start=6.9449;Kurtosis_quant_stop=-0.962407;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-31;STRANDS=+-;STRANDS2=10,8,10,8;RE=18;REF_strand=34,26;Strandbias_pval=1;AF=0.3	GT:DR:DV	0/0:42:18
-chr1	1717605	77	GCTTTCAGCTAGAGTTTGCTCTCTCTGGTTTTCGGTCTGTGACACACGCAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1717656;STD_quant_start=8.22935;STD_quant_stop=8.90381;Kurtosis_quant_start=-1.60307;Kurtosis_quant_stop=-1.75287;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-51;STRANDS=+-;STRANDS2=18,19,18,19;RE=37;REF_strand=40,39;Strandbias_pval=1;AF=0.468354	GT:DR:DV	0/1:42:37
-chr1	1749606	78	N	GTCCATGCATATTTTTCTGTGTGATGTGTCTGTGTGTGTGTCTCAGTGGT	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1749616;STD_quant_start=6.64118;STD_quant_stop=6.18572;Kurtosis_quant_start=-1.82765;Kurtosis_quant_stop=-1.82115;SVTYPE=INS;SUPTYPE=AL;SVLEN=48;STRANDS=+-;STRANDS2=19,19,19,19;RE=38;REF_strand=42,38;Strandbias_pval=0.84535;AF=0.475	GT:DR:DV	0/1:42:38
-chr1	1766411	79	AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1766446;STD_quant_start=2.64575;STD_quant_stop=6.72681;Kurtosis_quant_start=1;Kurtosis_quant_stop=2.21022;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;STRANDS2=2,6,2,6;RE=8;REF_strand=42,40;Strandbias_pval=0.267342;AF=0.097561	GT:DR:DV	0/0:74:8
-chr1	1845825	80	ACACACACACACACACACACACACACACACAC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1845857;STD_quant_start=4.42396;STD_quant_stop=5.59336;Kurtosis_quant_start=0.08546;Kurtosis_quant_stop=1.51911;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;STRANDS2=4,3,4,3;RE=7;REF_strand=26,26;Strandbias_pval=1;AF=0.134615	GT:DR:DV	0/0:45:7
-chr1	1924230	81	N	CCCCCAGCCTGCAGCCCACCCCCCCATCTCACCGCCTAGCCCCCATCTCACCAGCTGCCCCCTCCCCGACACACGCCCACCCCCTTATCTCACCAACCA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1924231;STD_quant_start=0.948683;STD_quant_stop=2.72029;Kurtosis_quant_start=0.969632;Kurtosis_quant_stop=-0.273855;SVTYPE=INS;SUPTYPE=AL;SVLEN=96;STRANDS=+-;STRANDS2=7,7,7,7;RE=14;REF_strand=36,28;Strandbias_pval=0.770084;AF=0.21875	GT:DR:DV	0/0:50:14
-chr1	1929385	82	N	AGGGGACAGGTCTGGGGGGGAGGCAGGAGAGAGGGTGAGGGGGAGGCAGGAGTGGGGGAGGGAGGGGAGAGGGTAGGGAGGGAGGAGAGGGTAGGGGGAGGGAGGGAGAGAGGAGGAGGGGAGAGGGTGGGAGGGAGAGAGGAGGAGAAGGGAGGGGACATGGGGAGGGGAGAGGAAAGAGGAGGGAGGGAGAGGGGAGGGAGGGAGCGGGTGAGGGGAGGGAAAGGAGGGAAATGGTATGGGAGGGGAGGGAGGGGAGAGGGTGAGGGGGAGGGAGCAGAGGGAAAGGGTGGGGGAGGGAAGGAAGGGAGAGGGTGGGGGAGGGTAGGGAGGGAGGGAGAGAGAGGGTAGGGGGAGGGGGAGAGAGGGTGAGGAGGGGGAGGGTAGGGGAGGGAAGGAGGGGAGACGGTGAGGGAGGGAGGAGAGGGTAGGGGGGAGGGAGGAAGAGGAGGGGTAGGGAGGGAGGGAGAGGAGAGGGAGGAGGGGAGGAGGGGGAGAGAGGGGTAGGGAGGGAGGGGAGGGAGGGAAGAGGGTAGGAGGGAGGGAGAGGAGAGGGAGGGAGGGAGGGGAGGAGGGAGGGTGGGAGGAGGGAGAGGGTTAGGGGAGGGAGGGAGAGGGAGGGGGAGAGGGTAGGGAGGAGAGGAGGAGAGGGTAGAGGAGGGAGGAGGGGAGAGGGGAGGGGAGGGAGGGAGAAGAGGAGGGAGAGGGTAGGGAGGGAGGGAGAGGAGAGGGGGAGAGGGAGGAGGAGGAGGAGAGGGTAGGGAGGAGGGGAGGAGGGGAGGGGTAGGGAGGGAGGGAGAGGAGGGAGGGAGGGAGGGGGAGGAGGGGGAGAAAGTTAGGGAGGGAGGGAGAGGAGAGGGGGAGGGAGGGAGGGGGAGGAGGAGAGGGGTAGGGAGGGAGGAAGGGAGGGAGGGAGGAGGGCAGGAGGGGAAATTGGGAGGGAGGGGCAGGAGGGAGAGGGTAGGGAGGGAGGGCAGGAGGGAGAGGGTAGGGAGGGAGGGAGGAAGGGAGGGAGGTAGGGAGGAGGAGGAGAGGGTAGGGAGGGAGGAGGAGGGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1929385;STD_quant_start=0;STD_quant_stop=12.4023;Kurtosis_quant_start=0.385661;Kurtosis_quant_stop=-0.139538;SVTYPE=INS;SUPTYPE=AL;SVLEN=1062;STRANDS=+-;STRANDS2=14,9,14,9;RE=23;REF_strand=46,34;Strandbias_pval=0.814909;AF=0.2875	GT:DR:DV	0/0:57:23
-chr1	1934289	83	N	TACACAGGTGTACATTAGATTATTAGGTTGTGAAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1934289;STD_quant_start=26.898;STD_quant_stop=24.4172;Kurtosis_quant_start=-1.47698;Kurtosis_quant_stop=-1.25573;SVTYPE=INS;SUPTYPE=AL;SVLEN=98;STRANDS=+-;STRANDS2=10,4,10,4;RE=14;REF_strand=34,20;Strandbias_pval=0.755487;AF=0.259259	GT:DR:DV	0/0:40:14
-chr1	1949003	84	N	CTTCCCTTCCCCTTCCTTCCTTCTCTCCCTCTCCCTCCTCCTCTTCCCTCCTTTCCTTCCTTCCTTTCCCTTTCCTCCTTCCTCTCCCTCCCCTCCTTTCCCCTTTTCATTCCCTCTTCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1949044;STD_quant_start=33.9013;STD_quant_stop=7.46324;Kurtosis_quant_start=-2.16185;Kurtosis_quant_stop=-0.295908;SVTYPE=INS;SUPTYPE=AL;SVLEN=111;STRANDS=+-;STRANDS2=10,9,10,9;RE=19;REF_strand=30,48;Strandbias_pval=0.304283;AF=0.24359	GT:DR:DV	0/0:59:19
-chr1	1968925	85	CCCTCCTGGGGGCTCCGGTCCTGCCCAGCAGCCCCAGGTGAGACAGCGCCTGGCGGCCCCTCCCTAGCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1968994;STD_quant_start=2.28035;STD_quant_stop=2.25832;Kurtosis_quant_start=0.046742;Kurtosis_quant_stop=0.304863;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-69;STRANDS=+-;STRANDS2=7,10,7,10;RE=17;REF_strand=30,42;Strandbias_pval=1;AF=0.236111	GT:DR:DV	0/0:55:17
-chr1	1979021	86	AGGCTGCACAGAACACGTGTGTCGTGCTGAGCTGGGCGTGGGAAGGCGTCATGTGACGAGGCTGCACAGAACATGCGTGTGGTACTGAGCTGGGCGTGGGAAGGTGTCACGTGACAAGGCTGCACAGAACATGTGTGTGGTACTGAGCTGGGCGTGGGAAGGCATCATGTGACA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1979168;STD_quant_start=12.3369;STD_quant_stop=9.86577;Kurtosis_quant_start=3.47666;Kurtosis_quant_stop=3.76987;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-147;STRANDS=+-;STRANDS2=16,15,16,15;RE=31;REF_strand=34,35;Strandbias_pval=1;AF=0.449275	GT:DR:DV	0/1:38:31
-chr1	1980059	87	CTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1980291;STD_quant_start=9.01234;STD_quant_stop=11.0454;Kurtosis_quant_start=-1.20742;Kurtosis_quant_stop=-0.902165;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-232;STRANDS=+-;STRANDS2=18,18,18,18;RE=36;REF_strand=35,37;Strandbias_pval=1;AF=0.5	GT:DR:DV	0/1:36:36
-chr1	1981556	88	N	CACGCAGGACACACAGCCGCGACGCACACCGGCACGCAGGACACCCAGCCACGGTCACACGCGGGGCACGCAGGACACCCAGCCGCGGTCACATGC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1981581;STD_quant_start=21.5465;STD_quant_stop=28.327;Kurtosis_quant_start=-1.84901;Kurtosis_quant_stop=-1.66641;SVTYPE=INS;SUPTYPE=AL;SVLEN=34;STRANDS=+-;STRANDS2=10,14,10,14;RE=24;REF_strand=34,40;Strandbias_pval=0.814965;AF=0.324324	GT:DR:DV	0/1:50:24
-chr1	1982045	89	N	CGGGGACACGCAGGACACCCAGGACACCCAGCCGCGGACAGACACGGGGGCACACAGGACACCCAGCTCGTGGACAGACA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1982046;STD_quant_start=5.75698;STD_quant_stop=3.31663;Kurtosis_quant_start=-0.365062;Kurtosis_quant_stop=-0.55588;SVTYPE=INS;SUPTYPE=AL;SVLEN=79;STRANDS=+-;STRANDS2=15,14,15,14;RE=29;REF_strand=36,42;Strandbias_pval=0.666552;AF=0.371795	GT:DR:DV	0/1:49:29
-chr1	1982220	90	N	AGATAGACACGGGACACGGACACCCCAGCCGTGACAGACACGGTGACAACACAGACACCCAGCCATGGACAGACACGGGCCACGAGGACACCCAGCCACGGACAGGGACATCGATGGCTTTATGACACTCCAGCCGGTAA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1982220;STD_quant_start=30.0322;STD_quant_stop=21.4957;Kurtosis_quant_start=-1.80704;Kurtosis_quant_stop=-1.88498;SVTYPE=INS;SUPTYPE=AL;SVLEN=206;STRANDS=+-;STRANDS2=12,19,12,19;RE=31;REF_strand=34,46;Strandbias_pval=0.830787;AF=0.3875	GT:DR:DV	0/1:49:31
-chr1	1993705	91	N	GGGCACAGTGGCTCATGCCTGTAATCCCAGCAACATGGGAGCCTGAGGTGGGAGGCTCTCTTGACAGGAGTTTGAGACCAGCCTGGGCAACATAGCAGACCCCCCACCCCGCCATTTCTAGGAAAAAAAAAAAAAAAGTGGCC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=1993712;STD_quant_start=0;STD_quant_stop=2.51396;Kurtosis_quant_start=11.9398;Kurtosis_quant_stop=0.982105;SVTYPE=INS;SUPTYPE=AL;SVLEN=141;STRANDS=+-;STRANDS2=23,27,23,27;RE=50;REF_strand=48,58;Strandbias_pval=1;AF=0.471698	GT:DR:DV	0/1:56:50
-chr1	2019222	92	N	GGGGCGGGGGAGGAGAGGGGGGAGGGAGGGGGACCGGGTAGGGTGGGGGGGGGAGGGGAACGGGGAGGGGGCAGGCAGGCGCGGGGTGGGGGGAGGGGAGGGGGAGGGGAGAAGACGGGCAGCGGGAGGGGCGGGGGGAGGGGATGGGGGCGGGGGAGGAGGGCGGCGGGGGAGGGGATGGGGGCGGGGGAGGGGATGGGCGGGGGGAGGGGGA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2019227;STD_quant_start=3.28633;STD_quant_stop=4.04969;Kurtosis_quant_start=-0.713007;Kurtosis_quant_stop=0.942016;SVTYPE=INS;SUPTYPE=AL;SVLEN=211;STRANDS=+-;STRANDS2=23,17,23,17;RE=40;REF_strand=46,34;Strandbias_pval=1;AF=0.5	GT:DR:DV	0/1:40:40
-chr1	2106812	93	N	CCCTCTGGTGGGCGTAGGACCTGTCACCGTGTCACCAGGCCAGGTAACTCTCAGCAGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2106813;STD_quant_start=19.0342;STD_quant_stop=19.9875;Kurtosis_quant_start=-1.45192;Kurtosis_quant_stop=-1.30053;SVTYPE=INS;SUPTYPE=AL;SVLEN=55;STRANDS=+-;STRANDS2=3,12,3,12;RE=15;REF_strand=40,54;Strandbias_pval=0.153747;AF=0.159574	GT:DR:DV	0/0:79:15
-chr1	2110063	94	CCCAACCAAGAGGATCCCAGAGGTGAGACACAGAACGGCCAGGGCTGAATCCGGGGCCCTCCCTGGGGGCAGCCAAGGACCTAAAACCAATGGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2110160;STD_quant_start=19.0866;STD_quant_stop=18.9882;Kurtosis_quant_start=-1.18991;Kurtosis_quant_stop=-1.28399;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-97;STRANDS=+-;STRANDS2=20,21,20,21;RE=41;REF_strand=38,45;Strandbias_pval=0.84876;AF=0.493976	GT:DR:DV	0/1:42:41
-chr1	2121520	95	N	GGTCATGAGGTGGTAGTTAAGTTATGGTAGTTAG	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2121520;STD_quant_start=0;STD_quant_stop=0.8044;Kurtosis_quant_start=2.24316;Kurtosis_quant_stop=2.47427;SVTYPE=INS;SUPTYPE=AL;SVLEN=33;STRANDS=+-;STRANDS2=21,15,21,15;RE=35;REF_strand=60,50;Strandbias_pval=0.704801;AF=0.318182	GT:DR:DV	0/1:75:35
-chr1	2122244	96	N	GTTAGGGTCACGGCGGTGGTTAGGTCGTGGTGGGAGTTAGGGTCACGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTAGCGTCATGGTGGTAGTTAGGGTCACGGCTATAGTTGGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTATTTAGGGTCACGGCTGTAGTTAGCGTCATGGTGGTGGTTAGGTCATGGTGGTAGTTAGGGGTCACGGCTGTAGTTAGGGTCATGGTGGTGGTTAGGTCACTTGCTGTAGTTAGGGTCATGGTGGTAGTTAGGTCATGGTGGTAGTTAGCGTCATGGTGGTGGTTAGGTCATGGTAGTTAGGGTCACTGCCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2122256;STD_quant_start=72.1976;STD_quant_stop=164.724;Kurtosis_quant_start=0.262966;Kurtosis_quant_stop=-1.09184;SVTYPE=INS;SUPTYPE=AL;SVLEN=340;STRANDS=+-;STRANDS2=0,6,0,6;RE=6;REF_strand=56,48;Strandbias_pval=0.0120583;AF=0.0576923	GT:DR:DV	0/0:98:6
-chr1	2123322	97	N	TAATTGGGATCATGACCATGTGATTGGGGTCATGGTGTTAGTTAAGGTCATGACTGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2123322;STD_quant_start=21.4499;STD_quant_stop=25.9846;Kurtosis_quant_start=-0.532609;Kurtosis_quant_stop=-1.17357;SVTYPE=INS;SUPTYPE=AL;SVLEN=88;STRANDS=+-;STRANDS2=9,11,9,11;RE=20;REF_strand=54,52;Strandbias_pval=0.80797;AF=0.188679	GT:DR:DV	0/0:86:20
-chr1	2123768	98	N	GGCTGTGGTTAGGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTAGGTCATGGTGGTAGGTCTGGTCACGGCTAGTTGGGGTCATGGTGGTAGTTAGATCATGGCTGTAGTTAGGGTCAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2123768;STD_quant_start=41.4871;STD_quant_stop=39.3296;Kurtosis_quant_start=-1.74964;Kurtosis_quant_stop=-0.19162;SVTYPE=INS;SUPTYPE=AL;SVLEN=112;STRANDS=+-;STRANDS2=11,11,11,11;RE=22;REF_strand=52,50;Strandbias_pval=1;AF=0.215686	GT:DR:DV	0/0:80:22
-chr1	2124290	100	N	GGGTCATGGTGGTAGTTAGGATCATGGCTGTAGTTGGGGTCATGGTGGTAGTTAGGGTCACGGCTATAGTTAGGGTCATGGTGGTAGTTATTGGTCTGTGATAGTTAGCATCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTGGGGTCATAGCTGTAGTTAGGGTCATAGTGGTAGTTGGGGTCACGGCTATAGTTG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2124353;STD_quant_start=62.996;STD_quant_stop=56.9131;Kurtosis_quant_start=-1.3131;Kurtosis_quant_stop=-0.191076;SVTYPE=INS;SUPTYPE=AL;SVLEN=110;STRANDS=+-;STRANDS2=6,7,6,7;RE=13;REF_strand=55,51;Strandbias_pval=0.773897;AF=0.122642	GT:DR:DV	0/0:93:13
-chr1	2124333	99	N	AAGGGTCATGGTGGTAATTAGGATCATGTAGCTGTAGTTAGGGTCATGGTGGTAGTTAGGGTCTGGCTATAGTTGGGGTCATGGTGGTAGTTAGGGTCACAGCGATAGTTAGCATCATGGTGGTAGTTAGGGTCATGGTGGTAGATTGGGGTCATGGTGGTAGTTAGGGTCATGGTGGTAGTTAGGGTCATAGCTGTAGTTAGGGTCTGTGGTGGTAGTTGGGGTCCGCGGCTATAGTTGGGGTCCATGGTGGTAGTTAAGGTCACGGCTGTGATTAGCGTCATGGTGGTACGTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2124347;STD_quant_start=28.9361;STD_quant_stop=52.7655;Kurtosis_quant_start=0.874818;Kurtosis_quant_stop=0.127135;SVTYPE=INS;SUPTYPE=AL;SVLEN=295;STRANDS=+-;STRANDS2=10,9,10,9;RE=19;REF_strand=54,52;Strandbias_pval=1;AF=0.179245	GT:DR:DV	0/0:87:19
-chr1	2142340	101	CTTTCAATCCAGGGTCCACACATCCAGCAGCCGAAGCGCCCTCCTTTCAATCCAGGGTCCAGGCATCTAGCAGCCGAAGCGCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2142424;STD_quant_start=9.44235;STD_quant_stop=8.28442;Kurtosis_quant_start=7.28739;Kurtosis_quant_stop=4.86915;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-84;STRANDS=+-;STRANDS2=23,15,23,15;RE=38;REF_strand=48,32;Strandbias_pval=1;AF=0.475	GT:DR:DV	0/1:42:38
-chr1	2280758	102	N	GCCTCGGGAGAGTGACAGGCGGCGGCGGCGACACCAGAGAGCGGACGAGAGGACAGGCGGCGGCGGCGATCTTTCAGAGAGCGGGATTTTCCCGAGAGGGACAGAGAAGGCGGCGGAGATTGTCTTCAGAGAGAGGAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2280758;STD_quant_start=32.8507;STD_quant_stop=82.7345;Kurtosis_quant_start=-1.7272;Kurtosis_quant_stop=-1.34469;SVTYPE=INS;SUPTYPE=AL;SVLEN=205;STRANDS=+-;STRANDS2=2,4,2,4;RE=6;REF_strand=32,54;Strandbias_pval=1;AF=0.0697674	GT:DR:DV	0/0:80:6
-chr1	2280945	103	N	CGGTGCGGAGAGATCTTCAGAGAGAGGACGCCTGAGAAGACAGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2281014;STD_quant_start=60.2633;STD_quant_stop=47.2864;Kurtosis_quant_start=-0.84573;Kurtosis_quant_stop=-0.496898;SVTYPE=INS;SUPTYPE=AL;SVLEN=51;STRANDS=+-;STRANDS2=1,5,1,5;RE=6;REF_strand=34,54;Strandbias_pval=0.40609;AF=0.0681818	GT:DR:DV	0/0:82:6
-chr1	2281986	104	GAGAGGACGCCCGAGAAGACAGGCGGTGGCGGAGATCTTCAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.12;CHR2=chr1;END=2282028;STD_quant_start=21.4103;STD_quant_stop=21.0879;Kurtosis_quant_start=-1.23914;Kurtosis_quant_stop=-0.042349;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-42;STRANDS=+-;STRANDS2=9,11,9,11;RE=20;REF_strand=35,54;Strandbias_pval=0.801434;AF=0.224719	GT:DR:DV	0/0:69:20
+##bcftools_viewCommand=view sniffles.vcf.gz; Date=Mon Jan 17 15:03:26 2022
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	F00209
+chr1	10175	30259	N	]chrX:449442]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=137.151;STD_quant_stop=51.4701;Kurtosis_quant_start=0.180157;Kurtosis_quant_stop=2.98567;SVTYPE=BND;RNAMES=36b45e58-0eba-46a5-9d01-819279aa26ae,66c0071b-0063-42fd-84db-bda11d270a1b,6dd212c3-59c6-48bc-ad46-e83521f1d4a7,8a407a84-39ff-438e-bbb3-939b2de7f56b,9c86fe43-fc57-45d2-a21e-be9c2b2a7ab2,c7599105-7f14-4b32-bcff-2718b1c27e14;SUPTYPE=SR;SVLEN=0;STRANDS=-+;RE=6;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:6
+chr1	66231	0	AATATATATTATATTATATAATATATAATATAAATATAATA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=66272;STD_quant_start=4.70106;STD_quant_stop=2.68328;Kurtosis_quant_start=1.94037;Kurtosis_quant_stop=2.86338;SVTYPE=DEL;RNAMES=18949eae-a81e-4a30-ab2f-714a2a651b3e,303df7a0-ffff-44da-9783-1266c2aa8e43,42f0045c-6ce4-4454-89b4-dcc55a60aa20,4620fc8c-a235-44f0-ab2f-132e3b23b1a3,46cd3de9-56ec-4a5c-af40-c772fda8c97e,4e2cad12-2931-4e21-8219-62ee48f4a71b,7d47d56b-7cea-4c3c-8499-e4e37d87d4c3,8b613887-454e-4e1c-98c2-e6c7ba7f5a82,96142aba-b9ef-4816-b99f-26d55fc7611e,be826b7d-8200-4805-abde-9b98bd68ae52,dbf59bf4-e594-449d-ab9d-822448f148b2,f6ec6a8f-ce44-4312-bf9e-c1d3ab70bad3;SUPTYPE=AL;SVLEN=-41;STRANDS=+-;RE=12;REF_strand=5,4;AF=0.571429	GT:DR:DV	0/1:9:12
+chr1	136971	1_0	N	GTGTGGGAGGGGGCCGGTGTGAGGGCAAGGGGCTCACGCTGACCTCTGTCCGCGTGGGAGGGGCAGTGTGGACGGGGGCTCGGGCTGACCTCTCCCAGCGTGGGAGGGGCCAGTGTGAGGCAAGGAGCTCCACACTGACCTCTCAGCATCGGGAGGGGGCCGGTGTGAGACAGGGGCTCGGGCTGACCTCTCTCCCG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=137157;STD_quant_start=38.086;STD_quant_stop=40.5911;Kurtosis_quant_start=-1.74556;Kurtosis_quant_stop=-0.884647;SVTYPE=INS;RNAMES=232c6c93-a600-4ff3-913b-ea599f590441,2b160313-a262-4a4c-846c-6840b04830f3,2e0372f6-d634-4380-ba6a-a4ab5ad20d2b,2ec2281b-d0fe-48c1-b5b1-f8e1232d806f,3c3847a1-26c1-490e-8b36-d060b856de07,400b2a8f-a57b-4456-880f-4ee3770e9900,492047a3-a057-4612-ad54-6fcabf3edd04,51603ab9-4512-4957-ab67-388025b73ff7,548de1e6-386a-4a05-b589-b9c43db86d94,744e152e-272a-4871-98d7-51d9427da8ce,75c3d587-3f58-432e-99c3-427cb21d4f83,81b6dafe-1d61-45cf-95e7-1acdaa42e707,91ff0042-3aa3-418e-afc5-0ece769cc09f,9c5b3e92-dbf6-4d9f-9844-7cc97539e1c2,aa0055c1-7f35-44ce-a928-03dcbe35097a,b1d2f26e-496a-4f13-85aa-6add0867ac95,e5e48f9e-34f6-4dc3-8f7d-fe1a50492e82,e89599e5-28ba-4eb4-9916-beb01fddebf7,eb609540-ec5a-4cc2-b9c9-d8161df13388,ee1e714f-1506-494a-b627-03732f6d0fb6,f89425df-020b-4daa-96a7-526e8903e0b4,faf824ea-0cf9-4d2e-ba2c-b3e3b26c40f1,fc2651cd-6016-4c60-afdf-f37c1291e57f;SUPTYPE=AL;SVLEN=94;STRANDS=+-;RE=23;REF_strand=12,14;AF=0.469388	GT:DR:DV	0/1:26:23
+chr1	139403	1_1	GCAGGAGCTGGGCCTGGAGAGGCTGCAAAGAA	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=139435;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=0.630869;Kurtosis_quant_stop=0.700919;SVTYPE=DEL;RNAMES=19379760-7221-41d8-b5db-64b06e0aaa71,23253008-8cc6-41cc-a562-482426273785,232c6c93-a600-4ff3-913b-ea599f590441,2b160313-a262-4a4c-846c-6840b04830f3,2ec2281b-d0fe-48c1-b5b1-f8e1232d806f,301da2b9-8ef6-48ce-85e3-05416b6cff9f,31427bc1-7882-452e-9d65-eb196ccdcbde,39875412-abe8-4c09-908c-d7358b9d1244,3c3847a1-26c1-490e-8b36-d060b856de07,400b2a8f-a57b-4456-880f-4ee3770e9900,492047a3-a057-4612-ad54-6fcabf3edd04,695b38ca-2d0d-40ee-ade7-94575dfeab24,69e3bbfc-05d9-41ca-97b8-e4100dc134bd,6a895396-2469-45a2-9b53-21325d361d45,744e152e-272a-4871-98d7-51d9427da8ce,8291b073-eb34-4cba-b548-3d7b8fac78f9,91ff0042-3aa3-418e-afc5-0ece769cc09f,9c5b3e92-dbf6-4d9f-9844-7cc97539e1c2,a3c44e09-58a0-4db2-97b2-9d26c8ab1af3,b1d2f26e-496a-4f13-85aa-6add0867ac95,b94fafca-6aa6-4d59-a6a7-e281d3abf987,eb609540-ec5a-4cc2-b9c9-d8161df13388,ee1e714f-1506-494a-b627-03732f6d0fb6,faf824ea-0cf9-4d2e-ba2c-b3e3b26c40f1,fc2651cd-6016-4c60-afdf-f37c1291e57f;SUPTYPE=AL;SVLEN=-32;STRANDS=+-;RE=25;REF_strand=2,6;AF=0.757576	GT:DR:DV	0/1:8:25
+chr1	372612	3	TGATCTGTATATATGTATCATGTAAACATGAGTTCCTGCTGGCATATCTGTCTATAACCGACCACCTTAGGGTCCATTCTGATCTGTATATATGTATAATATATATTATATATGGACCTCAGGGTCCATTCTGATCTGCATATATGTATAATATATATTATATATGGTCCTCAGGGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=372770;STD_quant_start=85.6061;STD_quant_stop=72.4707;Kurtosis_quant_start=-1.37847;Kurtosis_quant_stop=0.624284;SVTYPE=DEL;RNAMES=0ea301f0-f842-48c6-99d9-789e5306ca31,51be7816-260b-4aa6-bb11-e7bf4970584b,9e9106b8-8807-402f-9b32-f6ec842256e8,c9381473-3100-4d39-9a7c-17f0952c8329,fa9a877c-5ca5-4c7d-a1ec-7ac05eb89dd0;SUPTYPE=AL;SVLEN=-158;STRANDS=+-;RE=5;REF_strand=3,1;AF=0.555556	GT:DR:DV	0/1:4:5
+chr1	610585	4_0	N	TGGGTTCTCTGTGGCCGAACGGGCGCGCGGTGATGCAGGAGATGCCCAGACCTGGCGGCTAAAGGCG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=610745;STD_quant_start=38.4851;STD_quant_stop=42.1248;Kurtosis_quant_start=-1.68346;Kurtosis_quant_stop=-1.60045;SVTYPE=INS;RNAMES=19f22868-9df9-4dfb-b17f-1f9d4fa49c17,1b5a6250-2329-4d71-8055-5ddd95e62e84,2347a361-390f-4514-bfad-70bb6f85a06e,2769b543-d184-47fb-89d1-668ec575fbeb,2e264729-925c-443a-a171-14f917a224cd,725e215c-6117-42ae-b103-11dd0144b9e4,7aada54b-a736-4131-9d5a-e1196802f17a,83fb6dbf-7f76-460e-824e-c08e23603e92,8535d941-7b29-4387-ad34-baad65338467,90159235-0960-44f7-bc14-f5f4c0f05d03,91de4a73-81b8-4943-b471-8cd9a85075d0,944217ac-85b1-4b0a-9e4b-134a37d2dab4,95778194-63c1-42cb-9c62-29860bebdce9,9909d57d-46b5-4956-a28f-aacc34e97f1c,caa57e28-a0f5-44a0-80e6-9ca10e61de9e,dbf5d80b-fa42-4b4c-944b-ca97ebdf7747,ee509fc4-32f4-4d20-8598-446cd9eff2ea;SUPTYPE=AL;SVLEN=70;STRANDS=+-;RE=17;REF_strand=11,13;AF=0.414634	GT:DR:DV	0/1:24:17
+chr1	611533	4_1	GCTGTGTGAGAACGTGTGTGTAGTGTTCACATGTCCTCTGTGCGTGAGTCCCCGTGTGTGATGTTGTGTTCTCGGTGTGAGTTCATGGGTGTGACG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=611630;STD_quant_start=13.3454;STD_quant_stop=14.8257;Kurtosis_quant_start=0.822308;Kurtosis_quant_stop=0.534113;SVTYPE=DEL;RNAMES=1b5a6250-2329-4d71-8055-5ddd95e62e84,2e264729-925c-443a-a171-14f917a224cd,32296472-3493-443e-82c0-a123bf1c6203,7aada54b-a736-4131-9d5a-e1196802f17a,83fb6dbf-7f76-460e-824e-c08e23603e92,8535d941-7b29-4387-ad34-baad65338467,91de4a73-81b8-4943-b471-8cd9a85075d0,9909d57d-46b5-4956-a28f-aacc34e97f1c,b4157b20-84b8-49a5-a02c-e64fc3b473da,caa57e28-a0f5-44a0-80e6-9ca10e61de9e,fad6f4a8-5974-4453-b8ec-44dee69ca8ad,fea67814-cd2a-4ca0-887a-0423f749162d;SUPTYPE=AL;SVLEN=-97;STRANDS=+-;RE=12;REF_strand=1,1;AF=0.857143	GT:DR:DV	1/1:2:12
+chr1	744866	6	N	TGTATATAAGTATATATATATATATATATATATATATATATATATATATATATATA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=744916;STD_quant_start=1.34164;STD_quant_stop=6.40312;Kurtosis_quant_start=2;Kurtosis_quant_stop=-0.304343;SVTYPE=INS;RNAMES=31380806-bb1a-44fc-af33-32dafb8b1bd2,60ed3a7f-df66-44b8-9a3d-85ae047cb2d9,6527face-7c09-4095-b4a5-395c81bf0af6,bde72b50-3594-4ce7-9152-524b2041a889,cb820290-c6fb-4b50-9d74-dae21678da93;SUPTYPE=AL;SVLEN=50;STRANDS=+-;RE=5;REF_strand=2,7;AF=0.357143	GT:DR:DV	0/1:9:5
+chr1	820879	7_0	N	TTCACTCACCCTGCCTGGCCAGCAGATCCACCCTGTCTACACTCACCTGCCTGGGCAGTAGTTCCACGTGAATCTCCCCTACCTGCCTCTCCAGCAGACCCGCCCCATCTATACTACTTGCCTGTCCAGCAGATCCACTCTATCTGCTACCCACCTGCCTGTCCAGCAGGATCCACCCAAGTCTACCTGCCTCCTGCTTCTTGTCCAGCAGGTCCACCCTGTCTATACTACCTGCCTGGCCAGTAGATCCACACTA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=821129;STD_quant_start=7.30121;STD_quant_stop=4.91466;Kurtosis_quant_start=3.29404;Kurtosis_quant_stop=-1.30228;SVTYPE=INS;RNAMES=08277f66-4246-4196-a8c6-0e935cefd23b,12a4e196-c480-4f95-add7-01dc344ddc95,173ce38d-faa4-4d49-a590-8f7d91cc381e,30b916a3-789e-4e6b-836f-56b482e17b85,31f1731a-474f-4059-b0af-f35ca10dc0bb,33fb62e8-4b9e-48a5-b3cc-fe874bae84bb,34d557b0-c085-4370-bacb-708eca60fc92,3cf47072-e6cd-4818-b8f9-a563494f585e,5b8f73ed-d36d-4762-8a40-120a7850ad64,5b9c493f-d9b7-4ef0-af6a-da1dbccd4de4,744c7b3c-b4d5-4f77-8eca-3dc88f3ab8a9,840c98a5-9354-4c10-9f1c-bada9822e1a2,889ca98e-fa18-4759-8d64-992080ed5292,9c636e2f-414e-495b-a746-c65d424324ff,9eabda16-e5f3-4f38-8a40-b3e55b54b4ac,ab64bd0c-2bd9-4504-a3c5-d6b5d24de147,bbf9711d-8100-438c-b863-be499e145ac6,bf87f7b4-2063-4be9-a2da-d50684789239,d2431d93-a300-43be-9fe3-53e37023fa04,d2579512-6af0-4071-94bb-e6a335f02a4b,d2a2d42b-9520-42fb-aa83-08120d9c4990,da10e67a-f6bc-43c0-836e-21f5cbd4f89c,da5f3bed-ce32-4afe-977f-dec998afcd20,da95c1a1-06a7-4e80-89ba-8836eb320f83,fd27d273-d4e5-4079-941a-64bd4dd859da,ffa7d785-a137-48b2-bb17-ad92db6d6c42;SUPTYPE=AL;SVLEN=244;STRANDS=+-;RE=26;REF_strand=13,12;AF=0.509804	GT:DR:DV	0/1:25:26
+chr1	822427	7_1	CCCTGGCCAGCAGATCCACCCTGTCTATACTACCTG	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=822462;STD_quant_start=7.58837;STD_quant_stop=7.27438;Kurtosis_quant_start=-1.25525;Kurtosis_quant_stop=2.28728;SVTYPE=DEL;RNAMES=08277f66-4246-4196-a8c6-0e935cefd23b,12a4e196-c480-4f95-add7-01dc344ddc95,173ce38d-faa4-4d49-a590-8f7d91cc381e,30b916a3-789e-4e6b-836f-56b482e17b85,31f1731a-474f-4059-b0af-f35ca10dc0bb,33fb62e8-4b9e-48a5-b3cc-fe874bae84bb,34d557b0-c085-4370-bacb-708eca60fc92,3cf47072-e6cd-4818-b8f9-a563494f585e,5b8f73ed-d36d-4762-8a40-120a7850ad64,5b9c493f-d9b7-4ef0-af6a-da1dbccd4de4,6638dfba-bd16-4489-840a-8961dccca88f,744c7b3c-b4d5-4f77-8eca-3dc88f3ab8a9,9c636e2f-414e-495b-a746-c65d424324ff,9eabda16-e5f3-4f38-8a40-b3e55b54b4ac,a39a01e8-9332-42c6-ac8f-d50f08bfa762,ab64bd0c-2bd9-4504-a3c5-d6b5d24de147,bbf9711d-8100-438c-b863-be499e145ac6,bf87f7b4-2063-4be9-a2da-d50684789239,d2579512-6af0-4071-94bb-e6a335f02a4b,da10e67a-f6bc-43c0-836e-21f5cbd4f89c,da5f3bed-ce32-4afe-977f-dec998afcd20,da95c1a1-06a7-4e80-89ba-8836eb320f83,dc757ea7-bb53-4189-862a-762bfcc45215,fd27d273-d4e5-4079-941a-64bd4dd859da;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;RE=24;REF_strand=0,1;AF=0.96	GT:DR:DV	1/1:1:24
+chr1	839472	9	CTAGACACACACACCTGGACAAACACACCTGGACACACACA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=839513;STD_quant_start=18.0924;STD_quant_stop=18.9209;Kurtosis_quant_start=-0.921195;Kurtosis_quant_stop=-0.652794;SVTYPE=DEL;RNAMES=06082ac4-343f-46cb-8dbb-16044a695d40,173ce38d-faa4-4d49-a590-8f7d91cc381e,290bc1f7-bba6-401e-914e-e3e08dac6ca6,33633824-c73a-43b1-8755-e413b56b11cd,59fe81ea-0f96-4d4f-aec8-6d99e1b74249,935fc28f-4368-4a5d-b152-03e3a54580c9,ac29ef04-2573-42e2-9a2c-22c376c8956b,d2ce726d-3d70-4406-b906-4c83146f54da,db7670c0-06cf-4e31-a4bb-12cbc91ff9ff;SUPTYPE=AL;SVLEN=-41;STRANDS=+-;RE=9;REF_strand=2,11;AF=0.409091	GT:DR:DV	0/1:13:9
+chr1	876055	10_0	N	CCCACACTCCCCACACTCCCATACCCCCACACTCCCCCACACTCACCCACACCCCCCCCATACTCCCCAACTCTCCCCATACTCCCCACATTCCCCATACTCCCCCATACTCCCAAACTCCCCCATACTCCCCTATACTCCCCCCATACTCCCCACACTCCCCCATACCCTCCCCCATACTCCTCCCCCATACTCCCATATTCCCCCCATACTCCCCCATACTCCCCCCAAACTCCCCCATACTCCCTCCCCCACACTCCCCCATACTCCCCCCACACTCCCCCACACTCCCTGCAAACTCCCCCATACTCCCCCATACCCCCACACTTTCACACTTCACACTCCCCACACTCCCCCAAACTCCCCCATACTCCTCCCCCATACTCCCCATACCCCCACACTCCCCCACACTCCCCCACACTCCCCATACTCCCTATACTCTTCCCCATACTCCCCCATACTCCCCCACACCCCCCCAAACTCCCCCCATACCTCCTCCCCATACTCCCTCACACCCCCACAATCCCCACACACTCCCCCACACTCCCCCACACTCCCCCATACTCCCCACACTCCCCCACACTCCCCCATACTCCCCCACACTTCCATACCCCCAACCTCCCCATACTCCCCCACATTCCCCATACTCCCCATACTCCCCTAAACTCCCCCATACTCCTCCTCCCCCCACACTCCCCACACTCCCCCCACACTCCCCCAAACTCCCCCATACTCCTCCCCCCATACTCCCCATACTCCCCCACACTCCCTTCATACTCCCCCAACCTCCCCATACTCCCCACATTCCCCATACTCTCATACTCCCCAAAACTCCCCATACTCTCCCCCATACTCCCCATACTCCCCACACTCCCCCACACTCTTATACTCTCTGCACTCCCCATACCCCTAACCTCCCCCCATACTCCCCACATTCCCCCTATACTCCCCCATACTCCCCCAAACTCCCCATACTCCTCCCCTATACTCCCCATACTCCCCCACACTCCCACACTCCCCCCATACTTCCCCACACACTCCCCCATACTCCCCCAACCTCCCCATACTCCCCACATTCCCCCATACTCCCCATACTCCCTAAACTCTCTATACTCTTCCCCATACTCCCCATACTCCCCCACACTCCCCCCACACTCCCCCATACTCCCCCAAACTCCCCCCATACCCTCCTCCCCATACTCCCCCATACTCCCCCACACTCCCCCCAAACTCCCCATACTCCTCCCTCACTCCCCCATACTCCCCAACCTCCCCCATACTCCCCCACATTCCCCCATACTCCCCCATACTCCCCCAAACTCCCCCATACTCCCTCCCCATACTCCCCACACTCCCCCACACTCCCCCAAACTCCCCATACCCTCCCCCATACTCCCCACACTCCCCACACTCCCCCTACACTCCCCCATACTCCCCCACACTCCCCACACTCCCCCATACTCCCACACATTCCCCCACACTCCCCAACTCCCATACTCCCCCACATTCCCCAGTTACCCCCCATACTCCCCAAACTCCCCCATACTCCTCCCCCCACACTCCCCCATACTCCCCATACTCGCCCAACCTCCCCCATACTCCCCACACTCCCCCATACTCCCCACAGTCCCCCACACTCCCCCACACTCCCCCCAACCTCCCCCATACCTCCCCATACTCGCCCACACTCGCCCACACCCCCCCCCATACTCCCCCACACTCCCCCATACTCCCCACACCCCCATACTCCCCATACTCCCCCATACT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=877822;STD_quant_start=61.8558;STD_quant_stop=515.837;Kurtosis_quant_start=0.589259;Kurtosis_quant_stop=-0.256238;SVTYPE=INS;RNAMES=24d22080-dc3e-40a2-9ca5-671548228dc4,2ba3fe9c-4032-49da-af3f-d8ec59a55560,53e073b5-1dac-49fb-b5d1-3564047008e6,5f5806db-7c48-4e2a-a314-0a7dcb1ca284,7a8adb88-1a90-4871-ae73-a4211a91e352,b3933a23-f008-40a9-b8fd-16142ec02196,dee43754-3fd9-4b9b-8734-ffac87bb82ae;SUPTYPE=AL,SR;SVLEN=1767;STRANDS=+-;RE=7;REF_strand=5,11;AF=0.304348	GT:DR:DV	0/1:16:7
+chr1	876234	10_1	N	CCCACACTCGCCACACTCCCCCACACTCTCACAGCCGCCCACACTCCCCCCACACTCCCCACACTCGCCCACACACTCCCTCACGCTCCCCTATACTCGCCACGCCTCCCCATACGTCACGCTCCCCCACGCCTCCCCCACACTCGCCCACGCTCCCACACTCGCCCACGCCTCCCCACGCTCCCCTGCCACCCCGCCACGCTTCCCCACGCGCCCACACTCCCCACGCTCCCCATACCCGCCCACGCTCCCCCACGCGCCCCACGCTCCCCACGCCTCCCCACACTCTCGCCCACACTCCCCACACTCCCGCCCACACTTCTGACACTCA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=876574;STD_quant_start=111.344;STD_quant_stop=186.734;Kurtosis_quant_start=-0.221175;Kurtosis_quant_stop=2.38407;SVTYPE=INS;RNAMES=0bc44752-5742-4579-adba-8631316c72cb,0dc02ef8-8f74-4162-a5e4-efe931a57738,297b488b-933a-419b-ad51-1ccbc4d4a005,2ba3fe9c-4032-49da-af3f-d8ec59a55560,3b2e373b-12df-4457-9d80-cdf85a4783d2,53e073b5-1dac-49fb-b5d1-3564047008e6,7a8adb88-1a90-4871-ae73-a4211a91e352,88b4b3e8-f5a5-4dc6-b806-09e01d74d202,b3933a23-f008-40a9-b8fd-16142ec02196,dee43754-3fd9-4b9b-8734-ffac87bb82ae,f2061e65-523d-4a76-8217-2ec92f003d78;SUPTYPE=AL;SVLEN=305;STRANDS=+-;RE=6;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:6
+chr1	876435	30260	N	N[chr4:189981026[	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=8.33667;STD_quant_stop=4.22295;Kurtosis_quant_start=-0.21474;Kurtosis_quant_stop=2.25059;SVTYPE=BND;RNAMES=2b803f71-4a7c-40e0-b7fd-7cb14781ccd7,3aad2dc7-a939-4dda-aef3-79752a09e031,41fe3fce-daef-4b91-a138-0a043ec01c1e,a1686d65-694d-419a-87c5-b59aea8ead01,a2d1106c-5908-4a52-bfc4-2d2c9f82f0ab,f7ddba20-5e7d-4bb6-89c5-13113bd7f9e1;SUPTYPE=SR;SVLEN=0;STRANDS=++;RE=6;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:6
+chr1	878422	12_1	N	]chr3:198124404]N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=2.30217;STD_quant_stop=7.16938;Kurtosis_quant_start=1.44065;Kurtosis_quant_stop=-0.509718;SVTYPE=BND;RNAMES=0933c3d1-d309-47b4-a379-0c40a5d41472,12bbb6c8-1b3a-4f9c-b6d4-583c5de2853b,4eb474a5-759c-4751-bf00-2632e7513a00,5842611b-34f3-4833-90ea-2105b399f00b,74b0cf2b-41fa-41e1-a1b8-ed795e5b69f4,96694515-ad94-44d0-976e-224b88435a94,972a72b9-c511-4f41-86fe-585ad5fd0801,b369b2b2-bad2-4494-9e32-7eecff663621,b6befb03-ee20-4bbc-9848-323e1aa2cb3f,c467bfb0-2ef6-4e90-ad38-b6d6e20f114a,c8929532-4d6d-4b90-83af-495c63b7e4a7,f6684964-716f-4b3b-9235-568583100244;SUPTYPE=SR;SVLEN=0;STRANDS=-+;RE=12;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:12
+chr1	882644	12_0	AATATATTAGCTATTCTAGACTTTATGCATTTATGTAAAGTTTTCTTTGTTGCACTTTAAGTTCTGTGATACATGGGCAGAGCATGC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=882731;STD_quant_start=1.06904;STD_quant_stop=1.06904;Kurtosis_quant_start=0.5;Kurtosis_quant_stop=0.5;SVTYPE=DEL;RNAMES=0933c3d1-d309-47b4-a379-0c40a5d41472,3a5b25c9-aade-4476-8014-39b5642704d8,74b0cf2b-41fa-41e1-a1b8-ed795e5b69f4,b6befb03-ee20-4bbc-9848-323e1aa2cb3f,bb8d3472-1ce4-4b38-8b80-4b84b1701d43,bcb08d1d-3e42-4615-a634-ef3d9480e50f,f6684964-716f-4b3b-9235-568583100244;SUPTYPE=AL;SVLEN=-87;STRANDS=+-;RE=7;REF_strand=5,5;AF=0.411765	GT:DR:DV	0/1:10:7
+chr1	893788	13_0	AAAAAAAAAAAAAAAATATATATATATATATATATAT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=893823;STD_quant_start=1.14017;STD_quant_stop=1.41421;Kurtosis_quant_start=-1.64583;Kurtosis_quant_stop=-0.318339;SVTYPE=DEL;RNAMES=00d5acc9-d809-4589-950a-60e1d649dfc7,109e8760-9901-47ff-8a78-d1fedbe48b93,3b534e86-089c-4fd1-b9f3-8d1163c5a507,489cc112-f6ce-4256-96f0-ca3cf41eb59c,53e073b5-1dac-49fb-b5d1-3564047008e6,5c54e581-16b7-46ba-84f6-e11aa4b8d9e0,6b1e6aec-dd79-4ef4-8043-90eb43c7fa2c,9147c917-4393-4322-977b-e6338fd29061,9be9485e-ad2f-41d0-8ac4-f57e3ac93aed,b2e8130d-86be-4002-a5bc-7235556660cc,b56a92bb-ce39-498d-922f-2802050d7204,c818c252-86b6-4110-b421-8c279ec8c231,e1167d89-45c7-47bc-8539-370607a5358c,ebac6587-35c3-4616-a81b-bd1da9a477d7,fefb4062-9f96-4f27-8b0b-162ccea44e01;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;RE=15;REF_strand=10,5;AF=0.5	GT:DR:DV	0/1:15:15
+chr1	904485	14_0	CCGAACGCGGCCGCCTCCTCCTCCGAACGTGGCCTCCTCCGAACGCGGCCGCCTCCTCCTCCGAACGCGGCCGCCTCCTCCTCCGAACGTGGCCTCCTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=904584;STD_quant_start=51.4369;STD_quant_stop=33.1021;Kurtosis_quant_start=4.67957;Kurtosis_quant_stop=4.14782;SVTYPE=DEL;RNAMES=19a83ded-da76-4fd1-9d6c-4572155cc51b,3b5bfdad-0924-428c-8284-18d99627090f,3ba8bc4f-69e0-4d8d-b69f-9e2289b06688,415d65ee-1c0b-461c-b310-a38b48e5692a,610ff93f-9e06-4e31-9bf4-70a050355a1a,df47dc10-1b84-4197-970b-c094542544bf,e1167d89-45c7-47bc-8539-370607a5358c,f83b72b2-ba10-4b95-8511-0b082a408426;SUPTYPE=AL;SVLEN=-99;STRANDS=+-;RE=8;REF_strand=4,12;AF=0.333333	GT:DR:DV	0/1:16:8
+chr1	909237	14_1	CCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGGGGTCGGGGTCAGGCCCCCGGGCGCACCGTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCGGGCGCATCTTTGCTGGTATATGCGGTGGTCGGGGTCAGGCCCCCCGGGCGCACTGTTGCTGGTATATGCGGTGGTCGGGGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=909509;STD_quant_start=51.2513;STD_quant_stop=11.9122;Kurtosis_quant_start=-2.0209;Kurtosis_quant_stop=-0.565726;SVTYPE=DEL;RNAMES=19a83ded-da76-4fd1-9d6c-4572155cc51b,3433165a-feff-48f7-91ee-0279d610ddc7,360ed506-7a48-4421-8af0-b64753d62481,3b5bfdad-0924-428c-8284-18d99627090f,3e451da8-35d7-4d31-8657-457c259b5530,5e0336fb-9a37-4ffa-b3c6-2bf76dd82963,610ff93f-9e06-4e31-9bf4-70a050355a1a,6e076f45-b13c-4c3e-a6fd-f5480d2226e5,78b39159-73d3-4d3d-96b9-9deb44c8eb61,78d028b3-4e8e-4f27-9dd1-7ef337811157,7d4c1e6b-566f-40f5-bf7a-424b7c727093,80c8f62a-aa31-4749-b42e-231f2fcf38f8,8c8772c1-9835-4ddf-981d-9deffa314b75,9147c917-4393-4322-977b-e6338fd29061,a19a1b07-5d05-445e-80ee-f7e497c9ecea,aa594e54-c5b8-40a5-833c-58875d3e5999,cfd6cb35-a661-4ea7-95cb-3aa38bfcaa7f,d9016bbe-ab64-4d23-b7a1-776e7e517976,df47dc10-1b84-4197-970b-c094542544bf,f83b72b2-ba10-4b95-8511-0b082a408426;SUPTYPE=AL;SVLEN=-272;STRANDS=+-;RE=20;REF_strand=0,5;AF=0.8	GT:DR:DV	0/1:5:20
+chr1	934091	16_0	GGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTCCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGAGGCTGCTCCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGGGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCAGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTCCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAGGCGGCTGCGTTACAGGTGGGCGGGGGAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=934910;STD_quant_start=19.3391;STD_quant_stop=21.5105;Kurtosis_quant_start=-0.515907;Kurtosis_quant_stop=-1.15124;SVTYPE=DEL;RNAMES=124b3bf2-e03c-4634-846b-2d530a2dbcb2,1781f8b9-81a6-42d3-ac68-1800883e491e,209cc8c7-9960-4a36-a996-4008bd485e46,242e4f3f-18e4-496c-ba24-6a9ff3a24125,2bebd884-0751-49b7-94e3-bd32bf865963,40bbb350-5375-47cf-944c-61b5f8c9d041,46ca9b09-71f1-4292-af26-cd48f1404028,5977409f-df40-4fe5-b999-a88b4b73634d,61c20644-d612-4241-abba-a6d54e3c1de4,662ce493-1ac9-4aee-b8b4-c43eb1018c81,7026857a-757b-4e15-bbe7-cb3eaed77a8a,77b9f439-e950-4fc8-892b-a63bd3e71bfa,83d4916f-5cb8-4b1b-babb-ecfac63be3b7,9528f7c1-6606-4854-920b-0fabdc84e9cd,96557e7b-5546-4ed2-a367-922cd2900539,b099ef7d-46d6-45d4-890d-91d1ca8501f0,ba913561-f678-4664-abe9-4a2f1d9bce07,bedf1d2f-09c6-4b54-b06b-9dec7430b3f9,d59e7196-e5d4-44da-9cb9-695dee19261b;SUPTYPE=AL,SR;SVLEN=-819;STRANDS=+-;RE=19;REF_strand=12,14;AF=0.422222	GT:DR:DV	0/1:26:19
+chr1	936270	16_1	CCCGGTCCCGCCTCCTAGGGCTCCTGGACGGAGGGGGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=936308;STD_quant_start=11.3181;STD_quant_stop=10.8674;Kurtosis_quant_start=0.18586;Kurtosis_quant_stop=0.128029;SVTYPE=DEL;RNAMES=124b3bf2-e03c-4634-846b-2d530a2dbcb2,209cc8c7-9960-4a36-a996-4008bd485e46,242e4f3f-18e4-496c-ba24-6a9ff3a24125,2bebd884-0751-49b7-94e3-bd32bf865963,46ca9b09-71f1-4292-af26-cd48f1404028,47d773c5-943d-4289-b877-2815dc141bad,5977409f-df40-4fe5-b999-a88b4b73634d,61c20644-d612-4241-abba-a6d54e3c1de4,7026857a-757b-4e15-bbe7-cb3eaed77a8a,77b9f439-e950-4fc8-892b-a63bd3e71bfa,83d4916f-5cb8-4b1b-babb-ecfac63be3b7,b099ef7d-46d6-45d4-890d-91d1ca8501f0,ba913561-f678-4664-abe9-4a2f1d9bce07,bedf1d2f-09c6-4b54-b06b-9dec7430b3f9,d59e7196-e5d4-44da-9cb9-695dee19261b;SUPTYPE=AL;SVLEN=-38;STRANDS=+-;RE=15;REF_strand=1,2;AF=0.833333	GT:DR:DV	1/1:3:15
+chr1	964676	18_0	CAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCCGCAGTGGGGATGTGCTGCCGGGAGGGGGGCGCGGGTCC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=964770;STD_quant_start=21.6036;STD_quant_stop=23.8911;Kurtosis_quant_start=-2.11167;Kurtosis_quant_stop=-2.08637;SVTYPE=DEL;RNAMES=007f21ba-17b4-46a3-aa4d-081ef24edbbd,0afc74bd-f468-4913-9597-1315c6a8533e,10d02ba9-b64d-4956-bf6f-825cef849b25,13506373-57b1-47eb-b006-787867b085ee,16824a47-9cc6-40fa-b0ed-6e1f55c2cecd,1b9f382f-b4f4-42f8-95ab-b1a1f10f50fb,290e4239-4819-430c-a311-bfd0563349c8,2b549ec4-b7ad-4e8c-b457-c6996e0c759c,39573df9-cf23-40a7-a704-d5ab2dfd5703,430f1b34-3d72-4b36-a77c-cec897e28ca7,4d0451c5-1810-48b4-97af-926032a90afa,52d3f37a-a63a-4756-92da-ac05f339f986,584a8bca-cf13-4c4f-b503-b9e62504fc1c,6af59770-110b-4424-a602-ea3ccb6a7878,788d7471-2b7e-413d-9723-0dfe554f8af8,7b3747dd-59c6-4178-a12e-b55b321d5167,7d3ab225-0534-4dad-a10c-b1e7fb2d238e,88bd6216-b874-4349-9782-8c6da9c98893,9f727bd1-9793-40c0-b319-e35c7200e56e,acc0886f-ee67-4fc8-bb0b-a2c590c1d054,b75e9abf-252a-4b0e-ae59-739b9f66d901,b99a4d9f-e569-413a-86dd-4a943b0d33cd,be9533d0-ad23-4627-9ebe-a1e6d7022a73,bf9ba184-af83-4258-9d40-7057111a23fe,d491d22b-dd30-4ada-8a29-1a8835ce9218,d8373e97-7940-456a-91ee-19616db9be98,d9ae820d-7a7f-4f8f-be5a-19cf08f5577c,daa2cc74-ea58-4f46-8483-176dd9419d26,feb36428-bbec-40fc-a31f-7efee21eff79;SUPTYPE=AL;SVLEN=-94;STRANDS=+-;RE=29;REF_strand=13,11;AF=0.54717	GT:DR:DV	0/1:24:29
+chr1	977229	18_1	GGGAACCGCCTCCCACCACCCCGCCAACCCCGGGAACCGCCTGCCCCCACCGACCAACCCCGGGAACCGCCTCCCACTCCCCCCGCAACCCCG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=977322;STD_quant_start=11.2101;STD_quant_stop=4.83046;Kurtosis_quant_start=2.61025;Kurtosis_quant_stop=1.51714;SVTYPE=DEL;RNAMES=2b549ec4-b7ad-4e8c-b457-c6996e0c759c,43704193-64c6-43d5-973d-7101b47356bc,7f53067c-50d7-454b-a1e4-7908d7c99b0e,b99a4d9f-e569-413a-86dd-4a943b0d33cd,d11dbfd0-a514-48d5-9f05-966223c8a579,d6cc2ee8-8aba-4960-986f-05f699117e8c;SUPTYPE=AL;SVLEN=-93;STRANDS=+-;RE=6;REF_strand=4,7;AF=0.352941	GT:DR:DV	0/1:11:6
+chr1	988819	20_0	N	CAGGTGTTGGAGTTCTGGGTTGATTGTTTCTGGAGTTCAGGGTT	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=988865;STD_quant_start=2.62679;STD_quant_stop=1.61245;Kurtosis_quant_start=-0.549025;Kurtosis_quant_stop=-0.683669;SVTYPE=INS;RNAMES=029e506b-adf5-4deb-a0c6-edbdb42a09f7,12e733b1-6ea2-402b-aebc-2084047b6a62,1893e96e-75ee-4722-a8ef-3dfcb414a8a7,1d3f51af-4cba-46b6-bad7-5f426a10f1ea,42604927-b04c-47d6-939f-2621d9edaf1f,4f3c8ec8-67a1-4ecc-a764-6dc570e2af24,4f6b1562-1987-432b-a6bf-f0efaaecd4db,5353a5d1-4fe7-404f-b4de-eb80ad893f58,638ec215-8161-4df5-92ab-d643aa423e67,7f3be9df-7ed3-4d06-a42e-d1a476654736,92509d04-fc7b-4c4a-8766-a4241623bd40,b62c6c2b-8459-46b5-b0c2-4e178cf260ca,c088d984-e900-4ba1-8cce-080d4928e682,c5b2df39-0e38-478a-9210-6b8b609a12ed,d7955e6b-2206-481a-a262-d8989c555700,ddd90a85-78d0-422a-af35-ae50309bc66c,e99082ef-93e5-40ad-8455-94a80916b1c3,ec37fbf0-be64-41e1-982f-d23b2a266bf2;SUPTYPE=AL;SVLEN=46;STRANDS=+-;RE=18;REF_strand=10,8;AF=0.5	GT:DR:DV	0/1:18:18
+chr1	996295	20_1	N	GGGGCCACAGGATGCGGGGTGGGGAGGGTGAAGAGCCCCCGCGGGAAGGGGGCACCCCACATCTGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=996360;STD_quant_start=26.7245;STD_quant_stop=37.4793;Kurtosis_quant_start=0.970638;Kurtosis_quant_stop=0.790126;SVTYPE=INS;RNAMES=0915ef75-f739-4301-96af-ebceac688c4c,1893e96e-75ee-4722-a8ef-3dfcb414a8a7,28d1e658-9bae-4858-a742-061855b9278a,6333df9d-145c-4aa8-9028-f3f9ccc06b81,b62c6c2b-8459-46b5-b0c2-4e178cf260ca,d6f79315-e46b-497e-b827-767ac6cf1545,db2806e1-917f-45ee-a8b8-25636c4a5378,e99082ef-93e5-40ad-8455-94a80916b1c3,f07b32d9-c7f7-4680-a840-f666b6cf7729,fe68c184-2a97-4d11-9114-1df0a66767bf;SUPTYPE=AL;SVLEN=63;STRANDS=+-;RE=10;REF_strand=8,7;AF=0.4	GT:DR:DV	0/1:15:10
+chr1	998781	20_2	N	GGGCGCGGAGCCAGAGGGGAGGGGCGCGGGCGGAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=998815;STD_quant_start=11.393;STD_quant_stop=13.914;Kurtosis_quant_start=-1.1487;Kurtosis_quant_stop=-1.85367;SVTYPE=INS;RNAMES=0915ef75-f739-4301-96af-ebceac688c4c,0eac73c2-c883-4445-b0cc-009930fc72a1,1893e96e-75ee-4722-a8ef-3dfcb414a8a7,399e5085-05e9-4c7b-9914-335284af39eb,444b7e2a-3c4a-43ac-8b7b-e6c31efde0a2,6333df9d-145c-4aa8-9028-f3f9ccc06b81,8dcd62ff-861a-4c89-ad36-6a8517077d79,b14e68a3-8227-437a-803e-b29758052bc7,b4eb0a05-9685-4301-8b20-242f525b1ab9,b62c6c2b-8459-46b5-b0c2-4e178cf260ca,cd30eb7e-a992-468f-af66-0557742c3fea,e99082ef-93e5-40ad-8455-94a80916b1c3,f07b32d9-c7f7-4680-a840-f666b6cf7729,fe68c184-2a97-4d11-9114-1df0a66767bf;SUPTYPE=AL;SVLEN=49;STRANDS=+-;RE=14;REF_strand=5,3;AF=0.636364	GT:DR:DV	0/1:8:14
+chr1	1029388	23_0	N	CAGTGTCTGTACGCGGGCAGGTGGGGGGGACATC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1029422;STD_quant_start=17.6716;STD_quant_stop=19.5631;Kurtosis_quant_start=-0.450147;Kurtosis_quant_stop=0.145801;SVTYPE=INS;RNAMES=0dbbc1b4-43d9-4326-bd7b-cc802c9a2b4c,10e9b032-4bcc-4068-b5b1-1724a0b5795a,138eebd4-774a-4f46-8938-81e94e0af82d,2479e4b6-a85b-443b-9e27-cd05ed5e955e,2ab3f0c1-cb90-4d95-b552-11036cc90f15,4e2eef40-74dc-4389-90aa-056e20d4c94a,a34f86bd-7e61-443f-a57a-2024d132a2d5;SUPTYPE=AL;SVLEN=31;STRANDS=+-;RE=7;REF_strand=22,13;AF=0.166667	GT:DR:DV	0/0:35:7
+chr1	1030889	23_1	TGTGTGTGTGTGCAGTGCATGGTGCTGTGAGATCAGCG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1030927;STD_quant_start=21.095;STD_quant_stop=18.0305;Kurtosis_quant_start=0.568655;Kurtosis_quant_stop=1.76382;SVTYPE=DEL;RNAMES=025d71ad-b816-465c-85c7-852cc50e8f01,067562c3-ce9e-4e07-8b38-b04c2694a162,0dbbc1b4-43d9-4326-bd7b-cc802c9a2b4c,0f0ef869-42bb-4cf8-a626-d91c8bb04fe1,10e9b032-4bcc-4068-b5b1-1724a0b5795a,2479e4b6-a85b-443b-9e27-cd05ed5e955e,33077168-17c4-49b2-9e40-4908688eab22,4e2eef40-74dc-4389-90aa-056e20d4c94a,5ac7bfbe-f94b-4ccc-ad13-05785a0fa5d4,97876ae0-e67d-4a5d-ad62-dcfb2b317ec2,a34f86bd-7e61-443f-a57a-2024d132a2d5,b1f44b33-f202-460b-ab97-16997ba51e0b,de31c769-d318-481c-82fa-9bacc0ec7a3a;SUPTYPE=AL;SVLEN=-38;STRANDS=+-;RE=13;REF_strand=3,5;AF=0.619048	GT:DR:DV	0/1:8:13
+chr1	1041780	23_2	TGCCAGGGTCGAGGTGGGCGGCTCCCCCGGGGGAGGGCTGCGGC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1041825;STD_quant_start=12.9264;STD_quant_stop=14.7741;Kurtosis_quant_start=-1.54283;Kurtosis_quant_stop=-1.637;SVTYPE=DEL;RNAMES=07160a2f-abd0-4716-a7ae-ec72c443f87c,0dbbc1b4-43d9-4326-bd7b-cc802c9a2b4c,10e9b032-4bcc-4068-b5b1-1724a0b5795a,1b91e1fa-f320-4299-b4e9-4efcd5b3d8e1,23b5e5aa-f1f1-499a-a8a7-fef5b4df5432,35a03981-f54c-4e50-a133-1dfa1a201bbb,52e9d7df-a0e8-4e5b-9c63-93205fb3c801,59745b2a-056c-4e54-a844-c9ee0ce88256,664b6dd2-e6c1-4a97-b0ea-8e8d344b3f76,751f8a9c-2b84-4175-8133-ea28806910cb,796b71e9-2116-47b5-ac6a-50ee08cb9331,79ad12ce-dd22-4a58-b850-2357f13acfee,97876ae0-e67d-4a5d-ad62-dcfb2b317ec2,97d20bbd-53e0-4018-acf0-db05251300ec,9ff5026e-72e2-497d-8630-e3bc7293038b,b1f44b33-f202-460b-ab97-16997ba51e0b,ba433a82-16c3-438d-b25f-703eeebfe478,bd2f2b5c-c504-41e4-9525-b435c4564f47,c80ed0a5-8989-4fbd-9d7b-50e259ae7b10,e04f050f-0ccf-4a65-80aa-72be947daae1,ee88b583-c87d-43c4-8c0f-0d476bacec62,fae31778-33ce-4698-8e4f-4446a886e78a;SUPTYPE=AL;SVLEN=-45;STRANDS=+-;RE=22;REF_strand=19,9;AF=0.44	GT:DR:DV	0/1:28:22
+chr1	1068767	26_0	N	TGCGGCAGGGCGGGGCCACGCGGGCTGTGAAGGTGCAGGTGCGGCGGGGCGGGGCCACGCGGGCTGTGCAGATCTA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1068844;STD_quant_start=30.133;STD_quant_stop=23.4825;Kurtosis_quant_start=-0.543543;Kurtosis_quant_stop=0.420942;SVTYPE=INS;RNAMES=3613b9c9-bc84-4fe5-b334-fbe6cfefb0b3,4d613eaa-c75b-427b-bcfa-d2a36e9752e1,67c7025c-1529-4b62-8002-5226234cfe92,a85f6095-9dc2-4f1e-bf02-4a687064798b,e127f38a-6eea-4e8f-8f9c-ace633135257,e21dedd9-7626-4e45-bf24-cf0e7656d95d,eef7a5b9-dd4d-4a41-9cf1-6ba86ffd8ccf;SUPTYPE=AL;SVLEN=76;STRANDS=+-;RE=7;REF_strand=7,10;AF=0.291667	GT:DR:DV	0/0:17:7
+chr1	1076286	26_1	GGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGCTGGGAGGCTGAGGCTATGGGGACTCCGTCGGGGGAGGCTGAGTCTATGGGGACTCCGTGGGGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGGGGCAGGCTGAGGCTATGGTGACTCCGTGCAGGGCTGTGAGGCTACGGGGACTCCGTGGGGGGTGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGGGGGGAGGCTGAGGCTACGGGGACTCCGTGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1076752;STD_quant_start=268.831;STD_quant_stop=235.392;Kurtosis_quant_start=4.8416;Kurtosis_quant_stop=0.720321;SVTYPE=DEL;RNAMES=4d613eaa-c75b-427b-bcfa-d2a36e9752e1,505f2694-d3a3-4eea-a0d6-30d0458e700c,67c7025c-1529-4b62-8002-5226234cfe92,69e95039-a35c-430e-a899-7f82938a75e9,705d9f28-3f68-4b5d-8324-0dc43b348b30,a5dc9ee6-2b37-434f-b073-36d2c59b4036,a85f6095-9dc2-4f1e-bf02-4a687064798b,b0bfdf33-8a60-40d5-9119-96ed2e3158e9,e21dedd9-7626-4e45-bf24-cf0e7656d95d,eef7a5b9-dd4d-4a41-9cf1-6ba86ffd8ccf;SUPTYPE=AL;SVLEN=-466;STRANDS=+-;RE=10;REF_strand=9,6;AF=0.4	GT:DR:DV	0/1:15:10
+chr1	1077148	28	GAGGGGTGGCTGAGTCTATGGGGACTCCGTGCGGGGAGGCTGAGTCTATGGGGACTCCGTGCGGGGTGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGGCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCCGTGCCGGGAGGCTGAGTCTATGGGGACTCTGTGCCGGGAGGCTGAGGCTACGGGGACTCCG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1077357;STD_quant_start=102.569;STD_quant_stop=91.527;Kurtosis_quant_start=-0.624606;Kurtosis_quant_stop=1.477;SVTYPE=DEL;RNAMES=212133cc-17f8-4547-b7c0-71bf328b78b6,6e23a346-ccd3-4e0a-83da-19ac7bf5d569,705d9f28-3f68-4b5d-8324-0dc43b348b30,76c26f65-ea8f-4f9c-8c9d-74c46b6f68ef,93978c11-26de-45b7-b3be-f04dba46a851;SUPTYPE=AL;SVLEN=-209;STRANDS=+-;RE=5;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:5
+chr1	1080846	26_2	N	CCCCCTCGTCCCTATCTCCTTCCCTCCCGCCCCACCTCGGTCCCTGTCTCCTTCCCTCCCGCCCCCACCTCGGTCCCATCTCCTTCCCTCCGCCCCCACCTCGGTCTGTCCTTCCCTCCGCCCCCACCTCGGGTCCCCTGTCTCCTTCCTCCGCCCCCCACCTCGGTCCCTGTCTCTCTTCCCTCCGCCCCCACCTCGGTCCCTGTCTCCTTCCCTCCGCCCCCACCTCGGTCCCTGTCTCCTTCCCTCCGCC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1081030;STD_quant_start=89.569;STD_quant_stop=98.2558;Kurtosis_quant_start=-1.80857;Kurtosis_quant_stop=-1.09275;SVTYPE=INS;RNAMES=440b87b8-c6ec-442a-af75-dfa89e00edf8,4424b8d4-4051-48fa-be56-d6919c8a0dc7,4d613eaa-c75b-427b-bcfa-d2a36e9752e1,65a7caae-cab8-4f0c-875f-28cbf39e1a0e,76c26f65-ea8f-4f9c-8c9d-74c46b6f68ef,86084906-62ee-4dd3-b4a4-47006f5cbd0c,9d9069f8-b384-4f99-8dad-c399744fca4d,a5dc9ee6-2b37-434f-b073-36d2c59b4036,a6cacd3b-9403-486b-87fc-b66025e3de6b,a745741f-6c0e-48c7-96ae-398a2f5a2dd1,a85f6095-9dc2-4f1e-bf02-4a687064798b,c7c22436-4967-4980-857e-d4105fd50e88,e21dedd9-7626-4e45-bf24-cf0e7656d95d;SUPTYPE=AL;SVLEN=83;STRANDS=+-;RE=13;REF_strand=7,6;AF=0.5	GT:DR:DV	0/1:13:13
+chr1	1139864	30_0	GCTTCCTGAGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGTTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGGACCTGGGTCATGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGGCCTGGGTCCTGGGG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1140256;STD_quant_start=134.584;STD_quant_stop=128.357;Kurtosis_quant_start=-1.78526;Kurtosis_quant_stop=-1.77303;SVTYPE=DEL;RNAMES=03579faf-d5cd-44d0-bb48-de23461ec62a,0e6102ec-395f-44b3-a0af-91bb5151dafb,20ee4b65-a9e4-4e77-b144-17ee965f41d1,2be9761a-6073-4af9-a552-d3d45d98e6bb,30402e66-d8d8-49c6-a718-a6c263f7a348,426ef750-0308-48cf-927e-8386cb02b5ee,867072c4-7e47-47d9-bf1f-b1056ac2ff08,95eb0ff6-823f-408e-924b-e401abeb5176,96a778dd-8bff-43f5-a535-f23595d8a1e4,97ccf349-50cd-40d3-aad9-abe54542a246,a48ccc5b-fe15-4ff0-84d5-8e72fc9270c2,acdc8e01-1f35-4f4c-99cc-dab2db17fe7b,af81c555-08ce-4b39-970c-731d4345afae,b03b7b95-fa87-4c5b-95af-169634b315ae,ca39052e-b26a-4d0b-bb09-e1b0f886fff7,e162dc0b-3b10-4a23-ad43-04c1b770f511,edb790fc-a9a8-41e5-9330-733d40b6dd4a,f8620b5e-2fe8-41db-bf86-f25be5003c44;SUPTYPE=AL;SVLEN=-392;STRANDS=+-;RE=18;REF_strand=13,11;AF=0.428571	GT:DR:DV	0/1:24:18
+chr1	1140472	30_1	GGTCCTGGGGAGCTTCCTGGGTTCAGAAGGTGGGGGTGTCAGCATCGAACCGGGGAACCTGGGTCCTGGGGAGCTTCCTGGGGTCAGAAGGTGGGGGTGTCAACGTCGAACCGGGGGACCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1140613;STD_quant_start=49.8849;STD_quant_stop=25.1515;Kurtosis_quant_start=-1.2216;Kurtosis_quant_stop=-0.707103;SVTYPE=DEL;RNAMES=0e6102ec-395f-44b3-a0af-91bb5151dafb,1744c00c-5a62-44aa-8a0f-6686d8fd0c2c,20ee4b65-a9e4-4e77-b144-17ee965f41d1,27bec66e-3470-4d6a-a1a0-bdf2e9f76aa7,30402e66-d8d8-49c6-a718-a6c263f7a348,426ef750-0308-48cf-927e-8386cb02b5ee,43236ba2-f7bc-4926-9518-f80bf9379dec,69ac7be4-859a-490c-aa1d-710e74c87805,867072c4-7e47-47d9-bf1f-b1056ac2ff08,95eb0ff6-823f-408e-924b-e401abeb5176,96a778dd-8bff-43f5-a535-f23595d8a1e4,a9f6b127-756a-46c4-b05b-36fe658bcdc2,ca39052e-b26a-4d0b-bb09-e1b0f886fff7,dce3bf36-c24f-4f92-8a66-2a28546aa380,e162dc0b-3b10-4a23-ad43-04c1b770f511,edb790fc-a9a8-41e5-9330-733d40b6dd4a;SUPTYPE=AL;SVLEN=-141;STRANDS=+-;RE=16;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:16
+chr1	1141363	30_2	N	CATCCACCTTAAAAATCCACAACCCCATCCTTACCTCTATCCCCACCACATCCTTACCA	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1141434;STD_quant_start=5.99444;STD_quant_stop=7.48777;Kurtosis_quant_start=-0.908378;Kurtosis_quant_stop=-1.01514;SVTYPE=INS;RNAMES=03579faf-d5cd-44d0-bb48-de23461ec62a,059316d4-869c-4aa5-b5cb-89219c0e8cf7,0e6102ec-395f-44b3-a0af-91bb5151dafb,1744c00c-5a62-44aa-8a0f-6686d8fd0c2c,20ee4b65-a9e4-4e77-b144-17ee965f41d1,240ad04f-3a6d-4091-a021-14b0d5c5852e,27bec66e-3470-4d6a-a1a0-bdf2e9f76aa7,2be9761a-6073-4af9-a552-d3d45d98e6bb,2e886ee0-ba8e-44d2-870b-aeb6fe4d42dc,30402e66-d8d8-49c6-a718-a6c263f7a348,34f6a8b7-dee3-471e-8d7c-1d82c2af6fce,3efd9d8b-152e-4717-ac25-67a387888f95,426ef750-0308-48cf-927e-8386cb02b5ee,43236ba2-f7bc-4926-9518-f80bf9379dec,5814c0bd-ae36-442f-baab-cda55d24b2bb,69ac7be4-859a-490c-aa1d-710e74c87805,867072c4-7e47-47d9-bf1f-b1056ac2ff08,93ce268e-dc9c-4025-b06e-67ab7d5bca98,95eb0ff6-823f-408e-924b-e401abeb5176,96a778dd-8bff-43f5-a535-f23595d8a1e4,97ccf349-50cd-40d3-aad9-abe54542a246,a48ccc5b-fe15-4ff0-84d5-8e72fc9270c2,a9f6b127-756a-46c4-b05b-36fe658bcdc2,acdc8e01-1f35-4f4c-99cc-dab2db17fe7b,af81c555-08ce-4b39-970c-731d4345afae,b03b7b95-fa87-4c5b-95af-169634b315ae,ca39052e-b26a-4d0b-bb09-e1b0f886fff7,dce3bf36-c24f-4f92-8a66-2a28546aa380,e162dc0b-3b10-4a23-ad43-04c1b770f511,edb790fc-a9a8-41e5-9330-733d40b6dd4a;SUPTYPE=AL;SVLEN=57;STRANDS=+-;RE=30;REF_strand=3,1;AF=0.882353	GT:DR:DV	1/1:4:30
+chr1	1157295	33	GCCCACCCATCCCGCCCCCAGCCCACCCATCCCATCCCC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1157334;STD_quant_start=3.24037;STD_quant_stop=2.46982;Kurtosis_quant_start=0.152647;Kurtosis_quant_stop=1.82248;SVTYPE=DEL;RNAMES=1cb1934d-1fd3-494b-bf4b-ae8f9cf41d90,6738bc4d-79ff-4d32-8fe0-faeae3af727e,7c2f604f-d8aa-473c-81e9-9a90eb796bbe,8778275f-b6a5-436b-8397-8d2d7e021575,8aa2352e-5d62-40fe-9317-aa7c2f53bf41,8ef36b02-ba48-419c-beed-613ee47ba1cf,a395395c-4e15-486a-b806-e344f3f55174,c4687b3b-680d-4be8-b9f9-d5265f5a0964,dcc3d03e-894a-4ee9-addb-ea3bf13ae874,eb25737e-1e54-43ae-9847-00c581fb18cd,fe3e2ffe-81a9-4348-92c5-960666a6a0a2;SUPTYPE=AL;SVLEN=-39;STRANDS=+-;RE=11;REF_strand=11,13;AF=0.314286	GT:DR:DV	0/1:24:11
+chr1	1184798	34_0	N	GGAGCTAGCCTCCAGGCAACTGTGCCCCCAGTTCATGCAGAAGCTCCTCTTAGTTAGGGGGATGTTCCCTCTTGGGGATCCCTCATGAGGACAGGTCTCCTGGACAGCTTCCGGGAGCCAGTCTCCAGGGCAACCGTGCCCCCAGTTCATGCAGAAGTTTCTAGGTTAGGGGATGTTCTCTTGGGGGACCTCTGCAGGAGGACGAGCCCTCCGACAGTCTGGGAGCCAGTCCCAGGCACCGTGTGCCCCCAGTTCATGCAGAAGCCTTCTAGGTTAGGGGGATGTTCTTCTTGGGACCCCCGTGAGGACAGGCCTCCGGACAGCCC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1185052;STD_quant_start=69.5507;STD_quant_stop=31.3783;Kurtosis_quant_start=-1.85297;Kurtosis_quant_stop=-0.741594;SVTYPE=INS;RNAMES=277e1ae1-0168-457f-927c-4cdd7bb99461,29a280f3-67bb-467d-88bc-3b8ab020891c,3136fe80-1a33-41be-b56c-78a92eae8d90,35bfe6d5-d6f0-463d-aac8-a977a63158e2,454a61df-2858-4bc0-bac1-85e3b73fe7e0,49ac4bf5-faae-4ba9-af5b-de6fc0f24626,51bba82b-43e2-4996-831c-beb240efcd26,60f02903-b561-4003-9ff2-04dcae4ba733,62a0f631-8725-4dbf-a7dc-119d44e4b0cf,8102c2a1-7b8e-40a3-ad9a-87abf4ea1dda,85b18239-9e8e-4ab5-9642-ce22386937be,8a38435f-7873-41f0-8b55-e81b11347e5e,8b2e4a49-5031-41c4-88ed-fa12579cc05a,b002a3cb-36f2-46b4-bfbb-4d4ba9f178f0,bd6b4530-715c-461c-a245-5e6bec500fd4,ee0f5472-a5f9-4c75-b6c5-ea35f92b1742;SUPTYPE=AL;SVLEN=309;STRANDS=+-;RE=15;REF_strand=4,12;AF=0.483871	GT:DR:DV	0/1:16:15
+chr1	1192679	34_1	N	CAGCAACACTCCAGTTGGTAAGGAGGTAGACACTCCAGTTGATTTGGTAGACACCCAGTTGGTATGAGTGCAGACACTCCGTTCAGACG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1192770;STD_quant_start=34.6569;STD_quant_stop=33.5604;Kurtosis_quant_start=-1.06469;Kurtosis_quant_stop=-1.07101;SVTYPE=INS;RNAMES=11919ebe-07ef-4563-a58e-7e8852e40c62,29a280f3-67bb-467d-88bc-3b8ab020891c,430451ae-8c86-45ec-9e02-204b85126a03,4bc42b23-3c32-4cf4-a56d-76d7ee9d460a,51bba82b-43e2-4996-831c-beb240efcd26,62a0f631-8725-4dbf-a7dc-119d44e4b0cf,8102c2a1-7b8e-40a3-ad9a-87abf4ea1dda,85b18239-9e8e-4ab5-9642-ce22386937be,8a38435f-7873-41f0-8b55-e81b11347e5e,90410340-c9ec-4f3c-9f05-83aeaa50aea4,9ae6c2a6-0912-44e3-bc02-2c4088899257,9c49056c-60c3-4694-a9f2-1d24903ebaf6,a8000c57-5c8f-41b9-88ce-7a7c8e029e58,a861fb94-46de-474e-bd54-9b9b6967ee64,aace44f8-b9b9-440c-86d9-846e5972c063,bd6b4530-715c-461c-a245-5e6bec500fd4,c89afb67-abb2-41ab-bede-4ec6864e51af,fc640450-f292-4db5-a8cc-cda19822e131;SUPTYPE=AL;SVLEN=95;STRANDS=+-;RE=18;REF_strand=3,11;AF=0.5625	GT:DR:DV	0/1:14:18
+chr1	1195863	35_1	N	ACTCCTGAGCTCAAGCGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGGACTACAGGTGTGAGCCACGTGCCCGACTAACTTTGTGTATTTCTAGTAGAGATGGGGTCTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGAGCTCAAGCGATCCTCCTGCCTCAGCCTCCCAAAGTGCTGGGACTACAGGTGTGAGCCATGCGCCCGACCAATTTGTGTATTTTTAGTAGAGATGGGGTCTCACCATGTTGGCCAGGCTGGTCTCAA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1196134;STD_quant_start=10.8904;STD_quant_stop=9.59166;Kurtosis_quant_start=0.14018;Kurtosis_quant_stop=0.571278;SVTYPE=INS;RNAMES=00a96eba-04ea-4c0c-be77-943a5ddcbb3e,08988e9c-750f-4f68-aafe-2e5dc576b27e,11919ebe-07ef-4563-a58e-7e8852e40c62,309ad33e-4564-479c-beb3-51ed8d00faf8,430451ae-8c86-45ec-9e02-204b85126a03,4bc42b23-3c32-4cf4-a56d-76d7ee9d460a,50b989c6-4808-4fc5-9c7a-9d4356562d73,7bf4a6f9-c730-4436-909d-0e84b3b3d360,8102c2a1-7b8e-40a3-ad9a-87abf4ea1dda,9ae6c2a6-0912-44e3-bc02-2c4088899257,9c49056c-60c3-4694-a9f2-1d24903ebaf6,a8000c57-5c8f-41b9-88ce-7a7c8e029e58,aace44f8-b9b9-440c-86d9-846e5972c063,b4b6d8a2-0f6b-4d4d-bdbc-5a480dce1460,bfcefdd8-6767-449a-98d3-18de2c092438,fc640450-f292-4db5-a8cc-cda19822e131;SUPTYPE=AL;SVLEN=267;STRANDS=+-;RE=16;REF_strand=2,2;AF=0.8	GT:DR:DV	0/1:4:16
+chr1	1202531	36_1	N	AGCCCAGTACAGCCAGGCCAGTAACCCAGTCTCCAGCCCAGTACCCAACCCCCGAGGCCCAGTACCCATCCCGGGCCCAGTACGGCCAG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1202642;STD_quant_start=40.7689;STD_quant_stop=41.1278;Kurtosis_quant_start=-1.10253;Kurtosis_quant_stop=-1.13284;SVTYPE=INS;RNAMES=1adb49a2-289d-45e5-bc0b-3673e037af90,2df4f476-58f5-4eb4-a57f-71f911ddb69f,3b1716ef-9033-4f29-aeee-9b124b922b17,6d83fc2c-dd1e-442b-9850-57e3d6995045,7bf4a6f9-c730-4436-909d-0e84b3b3d360,95b9accd-2c2f-431f-99e3-eb6495dd7f8e,a8000c57-5c8f-41b9-88ce-7a7c8e029e58,ac886eb9-d238-4e1e-b71d-afc052b0e108,b4b6d8a2-0f6b-4d4d-bdbc-5a480dce1460,babe9803-f544-4f38-8e57-73141b7fc495,c7db4fb1-544c-42d8-b7aa-6bf08e24ea24,c9ad1d7d-d31f-4863-b739-176e8ecdb97d,cc9ffd44-267e-45e9-b1e2-632c01537746,f4f62c5e-60a8-4ba5-af8e-bc8262002045,f7d86242-2de8-4d7d-a21d-a9dfb95cf47f;SUPTYPE=AL;SVLEN=135;STRANDS=+-;RE=15;REF_strand=5,5;AF=0.6	GT:DR:DV	0/1:10:15
+chr1	1212606	37_1	N	CTGTGTCTCCTCCCAGCCCCTGGCCCTCTGCTCCCTG	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1212643;STD_quant_start=9.40744;STD_quant_stop=8.43801;Kurtosis_quant_start=-0.556909;Kurtosis_quant_stop=-1.21037;SVTYPE=INS;RNAMES=04062431-13ec-4e44-ab66-5a1b0f33c5fe,26efab64-d240-43c4-8a69-2dddbb7442b0,2d0c7e92-46b6-4b6a-b2f3-33de1bda9d4a,333dc1ea-8db5-4688-baa6-4a7269baa8a4,8ee6f2fb-956b-4569-959a-3a774dc2119b,994d504b-d7de-46f8-887e-f979cce5b46e,a84ea522-82cc-4147-8fc1-d1cda00b2e15,c7db4fb1-544c-42d8-b7aa-6bf08e24ea24,dbc43eb6-558c-4a1d-923d-50df791325e7,f7d86242-2de8-4d7d-a21d-a9dfb95cf47f;SUPTYPE=AL;SVLEN=32;STRANDS=+-;RE=10;REF_strand=5,6;AF=0.47619	GT:DR:DV	0/1:11:10
+chr1	1225741	39_0	CACACACTCCACATGCCACAGACACGGGCCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1225772;STD_quant_start=12.8776;STD_quant_stop=12.5366;Kurtosis_quant_start=-0.436071;Kurtosis_quant_stop=-0.820559;SVTYPE=DEL;RNAMES=1b9aba61-1a2b-43f2-9eff-6ff778de4a59,39425bb9-5854-41f9-b741-082aa63d0cf4,45562285-7d3c-45a4-87fc-36ca744e483b,a2cec193-29f2-4d60-8aa9-0467e37c5f4f,a6bd9111-655a-4da8-a72d-7f469d9b22e6,b756ca77-feee-479e-b35a-628f46eafc0f;SUPTYPE=AL;SVLEN=-31;STRANDS=+-;RE=6;REF_strand=9,8;AF=0.26087	GT:DR:DV	0/0:17:6
+chr1	1226338	39_1	GTACGGTCAGGAGGAAACATGGCACCTCCCCTCTGGGGGCTCTTTCCAGAAACCCTCAACCC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1226400;STD_quant_start=3.34664;STD_quant_stop=2.89827;Kurtosis_quant_start=-0.781543;Kurtosis_quant_stop=-0.761724;SVTYPE=DEL;RNAMES=008e889f-e959-4777-95e1-f0f53a4334dd,00be4721-5fce-43ac-8334-ba4546a49991,080d6acc-03a7-4cba-a86d-742a26c144de,0fa1f5d5-11c3-4de1-98cc-f9d60c32c056,1b9aba61-1a2b-43f2-9eff-6ff778de4a59,39425bb9-5854-41f9-b741-082aa63d0cf4,857efaff-3674-4ef8-89fe-f9638f609115,93c5ef6d-b54c-4808-acf2-05c00acdc5cc,93cfcbe8-1573-4033-a833-27cbdd8ec529,994d504b-d7de-46f8-887e-f979cce5b46e,a2cec193-29f2-4d60-8aa9-0467e37c5f4f,a6bd9111-655a-4da8-a72d-7f469d9b22e6,b756ca77-feee-479e-b35a-628f46eafc0f,c6c2851d-1031-45d1-95aa-486eb563efa5,d5fa6b3d-485a-4e4f-8efc-1f6c74c6802a,eff9bc97-f12b-43ec-86a8-2aabe4538a97;SUPTYPE=AL;SVLEN=-62;STRANDS=+-;RE=16;REF_strand=0,1;AF=0.941176	GT:DR:DV	1/1:1:16
+chr1	1227295	39_2	GGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCGGCGGGAAGGCGAGCTCGTGGCCAGGCCCTGCGGGAAGGCGAGCTCGTGGCCAGGCCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1227466;STD_quant_start=3.72827;STD_quant_stop=2.32379;Kurtosis_quant_start=-0.966443;Kurtosis_quant_stop=-0.297101;SVTYPE=DEL;RNAMES=008e889f-e959-4777-95e1-f0f53a4334dd,00be4721-5fce-43ac-8334-ba4546a49991,080d6acc-03a7-4cba-a86d-742a26c144de,1b9aba61-1a2b-43f2-9eff-6ff778de4a59,39425bb9-5854-41f9-b741-082aa63d0cf4,4c94aa0d-60a3-40ca-b22e-cc7a3bef0bfe,857efaff-3674-4ef8-89fe-f9638f609115,93c5ef6d-b54c-4808-acf2-05c00acdc5cc,93cfcbe8-1573-4033-a833-27cbdd8ec529,994d504b-d7de-46f8-887e-f979cce5b46e,a2cec193-29f2-4d60-8aa9-0467e37c5f4f,a6bd9111-655a-4da8-a72d-7f469d9b22e6,b756ca77-feee-479e-b35a-628f46eafc0f,c6c2851d-1031-45d1-95aa-486eb563efa5,d5fa6b3d-485a-4e4f-8efc-1f6c74c6802a,e38a0a27-360a-443d-a1f9-623653437f99,e3c28301-b82b-445e-8e8f-e452cb05525d,f66db954-420d-4eb9-8ff8-ad2bd3709180,f98168e1-e2ea-4e02-9b61-8bc06c077c08;SUPTYPE=AL;SVLEN=-171;STRANDS=+-;RE=19;REF_strand=1,4;AF=0.791667	GT:DR:DV	0/1:5:19
+chr1	1240679	41_1	N	CAGCCCTTCGCCTCGCCCCCATTCACCCCGGCCGTGGTCCCCGCCG	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1240721;STD_quant_start=9.65919;STD_quant_stop=8.89382;Kurtosis_quant_start=-1.4823;Kurtosis_quant_stop=-1.52921;SVTYPE=INS;RNAMES=09729ccf-3196-4cae-9ebe-4faaa862e66e,0d7665dd-6576-42b7-a8c1-27bd68a9d49c,194e0f94-996f-4a42-ab07-2facd258dd40,1a7dab12-4454-4bed-b41b-2d6946aac30b,2f2a2f48-af67-4fbc-ba75-94f0aa75156c,8be9430e-48c3-4bcb-877c-45fd3777346b,a4a727eb-971e-46da-8003-6ed814f3532f,a9d6e62e-975c-4494-b03f-2f3297fb5338,bf321fd3-95ca-4cff-abe6-9a298c318f0f,cf376f94-eacf-4013-89ca-707dab183b9e,d600852f-8fa2-40ea-9339-07a7cd033a84,e38a0a27-360a-443d-a1f9-623653437f99,ebf79d47-5397-4b0c-8100-c1d91f9ebfbe,f98168e1-e2ea-4e02-9b61-8bc06c077c08,f9f30360-e8ec-480e-9803-34b7ded4ae8b;SUPTYPE=AL;SVLEN=41;STRANDS=+-;RE=15;REF_strand=4,8;AF=0.555556	GT:DR:DV	0/1:12:15
+chr1	1245142	42_1	N	CCCACCTCCCCCACTCATCTCCCTCTCCCCACTCCTCTGCCCTCCCTCCCTTCCCCCTCCTCCCCCACTCCCTTCCCTCTTCCCCCGACTCCTTCCCCCTACTCATCTCCCTCCTCCCCCACTCCCTCTCCCTCCTCTCCCACTCCTCCCCCCTCCTCCCCACTCCTCCCCACTGCTCTCCCTCTTCCCCCCCCACTCCTCCCCACTCCTCCCTCCTTCTCCACTCCTCTCCCCTCCCACTCCCCTCCCCCACTCCTGTCCCCTCCCTCCCCCCCCTCTTCCCCCTCCCTCCCCCACTCATCTCCCTCCTCCCACTCCCTCTCTCCCCTTTCCTCCCACTCCCCCCACTCCTCCCCCACTCCTCTCCCCTCTTGCTCCACTCCTCCCCCCCACTCCTTCCCCACTCTCCTCTCTCTCTCCCCCCACTCCTCC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1245551;STD_quant_start=23.8328;STD_quant_stop=45.7821;Kurtosis_quant_start=-0.017098;Kurtosis_quant_stop=-0.158047;SVTYPE=INS;RNAMES=07c11880-546b-4465-a6c4-0b239950bb6d,09729ccf-3196-4cae-9ebe-4faaa862e66e,1a7dab12-4454-4bed-b41b-2d6946aac30b,2f2a2f48-af67-4fbc-ba75-94f0aa75156c,317e4f04-e8e2-47a8-9c05-ef94c9a7ebd4,66736e57-ef09-42ad-8f74-beb60ddd937c,9a22573e-7dbd-417a-a38d-eeaf6301029b,a9d6e62e-975c-4494-b03f-2f3297fb5338,afe6a776-b0e5-4d00-9289-bb67aafd12c1,d15d462f-e582-4923-ac5d-a8d5295903a6,eb6918ae-e91b-4b8e-99ca-2aeef6556276,f9f30360-e8ec-480e-9803-34b7ded4ae8b,fbc2109b-27cd-451e-90a1-19586aaf4a1a;SUPTYPE=AL;SVLEN=420;STRANDS=+-;RE=13;REF_strand=3,9;AF=0.52	GT:DR:DV	0/1:12:13
+chr1	1248881	42_2	ACGGGCAGCCCTGGGAGGCTGGAGCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGAGCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGAGCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGACACGGGCAGCCCTGGGAGGCTGGACCGAGGGAGGCTGGGCCTCCCACTCCGCCCTACAGGCCGGGAC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1249211;STD_quant_start=22.8053;STD_quant_stop=21.8403;Kurtosis_quant_start=-1.69581;Kurtosis_quant_stop=-1.68758;SVTYPE=DEL;RNAMES=037003f8-b2b7-4e67-ab0a-e7b297be8a5d,09729ccf-3196-4cae-9ebe-4faaa862e66e,1a7dab12-4454-4bed-b41b-2d6946aac30b,2016fd8d-7da2-46f5-96f9-2a2b3c15f2fa,2f2a2f48-af67-4fbc-ba75-94f0aa75156c,317e4f04-e8e2-47a8-9c05-ef94c9a7ebd4,4cdbb972-6e35-4c1a-81b5-47a0e61cf0c1,66736e57-ef09-42ad-8f74-beb60ddd937c,83ddf732-bfb5-4e42-8042-160d0881c90e,901358de-fdfe-451c-81ab-16ae8872aa65,9a22573e-7dbd-417a-a38d-eeaf6301029b,9f9ef125-570b-4a75-b83a-beb47feef6d6,a9118b80-3582-4c37-8439-e4ee6e2820ba,a9d6e62e-975c-4494-b03f-2f3297fb5338,ac195749-76e0-41fd-a703-e392ee7a4426,afe6a776-b0e5-4d00-9289-bb67aafd12c1,b166d473-f790-4559-b720-9ad3025ad640,bc8c5a51-d98e-4f23-b1c8-1a311a65a568,c221a396-2145-44d3-9355-789c193d1c96,cf376f94-eacf-4013-89ca-707dab183b9e,d15d462f-e582-4923-ac5d-a8d5295903a6,d6107ee4-f1a4-4cba-b91e-68f4dd2812a4,e9fd2d30-9f04-4e0c-99ce-a984301a6235,eb6918ae-e91b-4b8e-99ca-2aeef6556276,f5588eb1-c65b-4885-8ac8-32b6c5b5c475;SUPTYPE=AL;SVLEN=-330;STRANDS=+-;RE=25;REF_strand=4,4;AF=0.757576	GT:DR:DV	0/1:8:25
+chr1	1284190	45	N	GGGGTGTTGGTGAGGGGTTGGGGTTGGGTGAGGGGGTGGGGTGGGGTTGGGTGAGGGGGTGGGGGTTGGGTGAGGGGGGTGGGGTCGGGGTTGGAGTGAGGTGGGCGCCACAGGCAAAGCCAGCAGGGTGGGGGCTGGGTGAGGGTGGGGCAAGGGCAGGGGCTGGGGCTAAGTGAGGGGGTGGGGTTGGGGTGAGGGGGTGTGGGGGCCGGTGAGGGGGTGGGGGGTTGGAGGAGGGGGTGGGGTGTTGGGTGAGGGGTTGGGGTTGGGTGAGGGGGTGGGGGGTTGGGTGAGGGGGTGGGGTGTTGGGTGAGGGGTTGGGGTTGGGTGAGGGGGGTGGGGGGTTGGTGAGGGGGTGGGGGTGGGGGTTGGAGGAGGGGGCTGGGGGCTGGGGTGAGGGGGTGTTGGGTGAGGGGTTGGGGTTGCGCAGGTGTGTGTGTGGCTGGGGGTGAGGGGGTGGGGGTGGGGTTGGAGGAGGGGAAGGTTGGGGGGTTGGGTGAGGGGGTGGGGGTGGGTGTTGGGTGAGGGGTTGGGGGGTTGGGGTGAGGGGGTGGGGCTGGGGGTGAGGGTGGCAGGGGGCTGGGGGAGGGGG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1284799;STD_quant_start=22.926;STD_quant_stop=86.9787;Kurtosis_quant_start=-0.693689;Kurtosis_quant_stop=-1.20235;SVTYPE=INS;RNAMES=0c6ea7a6-172e-42cc-897a-13d95d62bc45,3b2ce036-07ef-4213-949c-d0377d2950a5,42a6610e-7aeb-416d-bbef-1cef0d386717,47282d13-ea62-41a6-a0bd-92cedb34cb3f,8cd6c891-3e8b-4f83-a190-abd1f0321ad2,98eba6a7-8652-4240-a7bb-79a06683518a,bc037eb7-a08b-4888-9ec8-b5b5c886615e,c8ae8560-ca59-4cf5-846e-be8a2746f03b,cfb0bf08-d8bd-4f88-b3ce-22ee1fb0567b,d41ae278-2dde-4c65-81d5-6728386c7ea1,d905968a-b77b-4b8c-9ddc-7ec356b5148b,e02988fa-b749-4ba1-98c7-69cde5026997,ef9795fd-1558-4c0f-9a07-3ffc80f58234,f090615a-b5ec-4a2f-96ab-094ac8219fee;SUPTYPE=AL;SVLEN=599;STRANDS=+-;RE=14;REF_strand=4,10;AF=0.5	GT:DR:DV	0/1:14:14
+chr1	1324159	46	N	TATCGAGCCCGTGGCCAAATGAGGCTTGAGGCAAATTTCAAAAACACTTGCCCCAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1324218;STD_quant_start=18.1576;STD_quant_stop=31.3401;Kurtosis_quant_start=0.83085;Kurtosis_quant_stop=-0.226595;SVTYPE=INS;RNAMES=0554bafb-13c8-4360-afde-ed31ba1dc2bf,1283b68b-cce2-4adf-897e-bb538321787a,1aaa67ca-6c19-476f-83b6-067ccdf62a30,30d14799-fb8b-4caf-bc0c-f49a1ab3e01d,3fd47162-bcf1-4282-9e93-c2b97119ea48,801df1d1-72f7-41ea-88bc-23f3beabe09f,80961a72-f599-48fd-89f8-bcda39a7b522,ae7e2c7d-9539-4ef8-87c2-149fcbcd929c,b63cfa8a-2ef1-4598-9677-9dfbddf4961d,bde6cca3-0f44-4abc-b9b1-3dc3b39093e6,c46748b8-a101-4214-8b60-8dcdd901052d,d4ff9d85-36b6-41b0-ab88-a56a12e0fcfb,e7bb299d-38db-4c6f-b60b-3ceb9efe73e3,f6b6760c-e9eb-4aff-8e43-dedb9698d437;SUPTYPE=AL;SVLEN=55;STRANDS=+-;RE=14;REF_strand=5,8;AF=0.518519	GT:DR:DV	0/1:13:14
+chr1	1339902	47_0	N	CCCCCAGCAGCCCCCACAGACTCCACCCACAGCCGCATGTCCCCCAGCAGCCCCCGCAGACCCACCCGCAGCCGCATGTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1339971;STD_quant_start=11.7516;STD_quant_stop=11.9164;Kurtosis_quant_start=-0.816288;Kurtosis_quant_stop=-1.33387;SVTYPE=INS;RNAMES=04da80bc-81b5-4cdc-8ffe-7f1bec4f6472,10156bf4-1468-45e1-80e5-c9d29b6c9ce5,262a5294-1b83-4afb-a9a8-4a00e9d3cca3,298e0884-c687-4f03-85ea-722fcd4e4c8a,4819a1ff-f986-49b2-bcdb-227fca169690,500dac03-7a9b-4739-8335-a0b096e79808,66fb9e46-bfd0-43ba-9836-481feb3ccb0c,6daf23b8-9c71-4508-a570-9c76ec1e6415,7ad61263-7c14-4778-9ac4-e5a8bcb01870,80961a72-f599-48fd-89f8-bcda39a7b522,850cec68-5eb5-41e0-b940-dbfc8ae45fbc,a50d14a7-5e42-40b5-9c92-a5e88265cf14,c380b278-00d8-4313-9249-ef02a0ecfd0b,cdca1690-e8b9-4e03-b9b3-df4333ff650b,d366963c-1b8d-4554-9e4a-2986f8337291,d90af666-03d9-44e4-bd39-13cc4b1cd95c,deeabf69-eddb-4515-9169-048cc1ab5e8a,fb253266-0c10-43f1-92f5-11c1601107f5;SUPTYPE=AL;SVLEN=75;STRANDS=+-;RE=18;REF_strand=5,10;AF=0.545455	GT:DR:DV	0/1:15:18
+chr1	1350096	47_1	GGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCTGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACCGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGTGAGGAGGGGGCCTGGACGGGGCAGGAGCGACGGGGGGAGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1351176;STD_quant_start=21.5198;STD_quant_stop=28.674;Kurtosis_quant_start=1.78891;Kurtosis_quant_stop=2.40552;SVTYPE=DEL;RNAMES=2d5ac90d-887b-406e-8d4f-3942594b3bf3,32e9e238-02d6-487c-a9be-8bf9bd99270a,3c99af7e-584a-448b-81dd-0c39c927200d,3d4d0474-244a-4618-8f28-e383b161918a,3e429703-76cf-4f53-aa9d-03325d98056c,3f950823-a8a6-433d-b805-d3bceb1a6e0f,45d3a422-62d7-4592-8cc4-3aff359a06ef,500dac03-7a9b-4739-8335-a0b096e79808,550a7bb2-4116-43ff-806a-24fbbe31653c,61090925-ea4d-4b09-850b-cf04c0566601,63c74d33-7e97-45f9-a1d6-a589aa2f74da,850cec68-5eb5-41e0-b940-dbfc8ae45fbc,a50d14a7-5e42-40b5-9c92-a5e88265cf14,d50cb4a7-ea94-44a4-bc0d-34c1e54743ea,d781296c-fbd6-4b69-a189-19ceb599cf93,ef7451df-782a-4d52-b4bd-8cdb75b048f7,f9882952-a02e-4100-ba78-54f16d02075a;SUPTYPE=AL,SR;SVLEN=-1080;STRANDS=+-;RE=17;REF_strand=6,8;AF=0.548387	GT:DR:DV	0/1:14:17
+chr1	1382491	49_0	N	CACCCCTTCCCAACAATACAGTAACAATCCAGAGGCCACCACCCCTTCCCAATCCAGTAACAATCCGGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1382627;STD_quant_start=54.9363;STD_quant_stop=78.4895;Kurtosis_quant_start=0.239206;Kurtosis_quant_stop=-1.35672;SVTYPE=INS;RNAMES=031dab0a-580f-4be2-a665-8873f4fe00b5,18ed9029-212b-4761-935b-e24e3f2335a0,2425f835-9cd7-4965-891a-445d5c82100e,47b4bc54-5717-42c7-8a3a-74c006870b50,51645d41-cfe8-4d0c-8cf4-3370c85e8d8f,5fa055e8-9694-47fb-bfa3-026a05270ceb,7ae4b36e-e16f-48be-b025-b65c61c10f22,ace3a3d8-cfe5-4fa4-a6fa-87bab1cc8974,c9a408b4-e1d5-4ea4-89e2-1be3c480b771,d2c003a3-5b0f-4938-8c0b-30c582fd859b,eb9380e9-db69-4c1c-b09e-498537795866,f470deff-8d76-4561-819f-6e0a15bffd71;SUPTYPE=AL;SVLEN=112;STRANDS=+-;RE=11;REF_strand=12,8;AF=0.354839	GT:DR:DV	0/1:20:11
+chr1	1382671	49_1	N	AACAATCCACTAACAATCCAGAGGTCACCACCTTTGTGGCCAGTAACAATCCAGAGGCCACCACCCCTTTAACAATCCAGTGATCCAGGTCACACCCTTTTCCAACAATCCACTAATCCAGAGGTCACCACCCCAACAATCCTAACAATCCAGAGGTCACCACCCCTTCCCAACAATCCAGTAACAATCCAGAGGTACCACCCCTTCAACAATCCAGTAACAGTCCAGAAGTGCCACGCTGCAACAATCACTAAACAATGAAGGTCACCACCTTAACAATCACTAACAATCAGAGGTCACCACCCCTTCCCAACAATCCAGTAACAATCCAGGTGCCACCCCCTTCCCAACAATCCAGTAACAATCCAGAGGTCACCACCAGCACCAACAATCCACTAACAATCCAGAGGTCACCACCCCTTCCCAACAATCCAGTAACAATCCCGAAGTCACCGCACCACTTCAACGACCAGTAACAATCCAGGGTTACCACCCTTC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1383328;STD_quant_start=48.5716;STD_quant_stop=219.294;Kurtosis_quant_start=-0.873336;Kurtosis_quant_stop=-0.73496;SVTYPE=INS;RNAMES=031dab0a-580f-4be2-a665-8873f4fe00b5,1fa65f63-cc9c-47d5-ae79-edc5ad0ea2a5,47b4bc54-5717-42c7-8a3a-74c006870b50,51645d41-cfe8-4d0c-8cf4-3370c85e8d8f,7005bf86-94d0-477c-9c5b-6621cac52dcc,7511864c-ceb2-41e0-990a-47e3a512333c,7ae4b36e-e16f-48be-b025-b65c61c10f22,9da165d9-cd92-4f66-838c-3ce69f156e84,ace3a3d8-cfe5-4fa4-a6fa-87bab1cc8974,ba9672e5-1031-4619-95e7-b569294cdb48,f470deff-8d76-4561-819f-6e0a15bffd71;SUPTYPE=AL;SVLEN=653;STRANDS=+-;RE=11;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:11
+chr1	1382773	49_1	N	GCTAACAATCCAGTAACAATCCAGAGGCCACCACCCTTCCCAACAATCCAGTAACAATCCAGAGGTACCACCCCCTTCCCAACAATCCAGTAACAATCAAGGCCACCACCCCTTCCCAACAATCCAGTAACAATCCAAGAGGACACCACCCTTCCCAACAATCCACTAGCAATCCAGAGGCCACCACCCCGCTTCCCAGCAATCTGACAACGACCCAGAGGCCACCACCCCTTCCCCAACAATCAGTAACAATCCAGAGGTCTTACCCTTCCCAACAATCCAGTAACAATCCCGAGGTGCACCACCCCTTCCCAACAATCCAGTAACAATCCAAGAGGTCACCACCCCTTCCCAACAATCCAGTAACAATCCGGTCACACCCCTTTTCAACAATCTCCTGGTAACAATCCGATT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1383281;STD_quant_start=93.8962;STD_quant_stop=287.586;Kurtosis_quant_start=-0.319993;Kurtosis_quant_stop=1.42519;SVTYPE=INS;RNAMES=18ed9029-212b-4761-935b-e24e3f2335a0,5fa055e8-9694-47fb-bfa3-026a05270ceb,8785c74a-f492-4719-9121-079a6417e94b,9da165d9-cd92-4f66-838c-3ce69f156e84,a439ed56-3edf-44d9-bf00-d97b318e76cb,ab32879e-eb33-4a1a-8487-f33420ef77a6,b225fd26-a014-46f0-9d67-69bc928cc6bf,b527a53b-3e90-4329-8a95-741663307174,d2c003a3-5b0f-4938-8c0b-30c582fd859b,eb9380e9-db69-4c1c-b09e-498537795866;SUPTYPE=AL;SVLEN=576;STRANDS=+-;RE=10;REF_strand=1,0;AF=0.909091	GT:DR:DV	1/1:1:10
+chr1	1477854	52	N	TTTTAGTAGAGACGGGGTTTCTCCATGTTGGTCAGGCTGGTCTCTAACTCCCGACCTCAGGTGATCCACCCGCCTCGGCCTCTCAAGCTGTTGGGATTACAGGCATGTGCCACCACGCCTGGCTAATGTTGTAT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1478020;STD_quant_start=12.7243;STD_quant_stop=24.7809;Kurtosis_quant_start=-1.6989;Kurtosis_quant_stop=-1.97912;SVTYPE=INS;RNAMES=07b60109-cdef-4cd2-abca-2ea1363e0c91,0e05fbc5-5ee7-4ad1-942d-b164cadb94d3,2924c668-d524-45ac-bead-4d285f81b619,2f7309fb-cddd-4875-a0f3-f840e3e40213,59110e1d-3156-4eac-a45f-715787273ff4,67a300aa-1927-4adc-9ee6-ed3534ccfba1,6a36cbf8-7d4b-4691-b122-131c5af687af,6bded483-378a-458d-9f76-78e4f172d4e1,6decd45c-0d98-4323-be81-152e5e0fd97f,88dbeec9-ccb8-4466-b504-1276b26e437a,972d07da-7206-475c-bb67-d9ec6eadf48f,a04994e3-af48-4d81-9393-3b909f1eb329,a205b141-4498-48c2-8dd6-4a9a0d41de87,aabe76bd-9eb5-4a00-a7ce-80624ed3249b,b0c224b4-f321-4396-a5d6-b4532e6055a0,bdd1fd54-de1f-48db-bc04-0f44fa780b44,d1c4824a-20f7-4c6a-94fc-8631e56a0e5f,d2809436-8fce-4737-93f9-c4d6365fd29b,dd8aa318-13f5-47f6-b942-a15cfa44ed9f,ee7926cb-fdb1-44f8-9088-8c9564bd9901,f0da4292-174c-4644-b7a7-99656f6c0b0a,ffbc26e5-d2c2-47de-84db-a2dc513802ba;SUPTYPE=AL;SVLEN=133;STRANDS=+-;RE=22;REF_strand=12,10;AF=0.5	GT:DR:DV	0/1:22:22
+chr1	1546874	53	N	GCCGGGCACGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCATCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAACATGGAGAAACCCTGTCTCTACC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1546993;STD_quant_start=10.127;STD_quant_stop=14.476;Kurtosis_quant_start=4.51638;Kurtosis_quant_stop=4.89892;SVTYPE=INS;RNAMES=09299259-d83c-4aa5-82bd-f56be5ce9644,405fa48d-a656-4ed0-8b6f-62eb733f543c,42fe0d36-e9c9-40ab-9ba9-230f92cf317b,6a51b1c9-4ed2-4fb0-8938-3715299a7bae,7077c5e7-6290-494b-b083-95ab4d5ff81d,764610e4-2563-4d90-b950-d39c00d405f7,9af8a08f-f027-4782-975c-709f3691fd59,a3444d11-9c79-4a51-a077-576c8b8cc347,ea688935-8eef-4635-9532-06a1ac559daa;SUPTYPE=AL;SVLEN=119;STRANDS=+-;RE=9;REF_strand=9,7;AF=0.36	GT:DR:DV	0/1:16:9
+chr1	1565675	54	N	CCTGTGTGGTGCAGGGCAGAGAACAGGACGTCGCATGGGCCCGACGGTGCTGGCTCCATGGGAACCGAGACCCAACACTCAAAGGAGACA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1565765;STD_quant_start=30.6359;STD_quant_stop=29.5296;Kurtosis_quant_start=-1.01354;Kurtosis_quant_stop=-1.11149;SVTYPE=INS;RNAMES=25cac45a-d879-4485-95f3-033b3d563fcc,6986875f-0079-43f4-b9e3-cd3aff393a0c,ac287bd2-b6ac-47b8-81ad-5a57790ee05b,c94b73ad-6aa2-4f25-adae-67029234b312,caf29d05-d9a4-4ffe-b87a-52f61e34df88,cf636c5d-4d04-4716-8915-4374376612e6,db5aff64-6919-4022-a47f-193efee55adb,e79af9a6-2b0f-42ae-b7dd-afb3ae25f7bd,ed481cd5-296a-4124-bb1d-4eede5da8b48;SUPTYPE=AL;SVLEN=98;STRANDS=+-;RE=9;REF_strand=11,7;AF=0.333333	GT:DR:DV	0/1:18:9
+chr1	1605539	55	AGGTGGGGTGGTGAGGTGAGGGGTTGTCTGGTCAGGTGT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1605578;STD_quant_start=20.1742;STD_quant_stop=6.72947;Kurtosis_quant_start=3.80595;Kurtosis_quant_stop=1.69901;SVTYPE=DEL;RNAMES=25a79ac4-2527-42c8-a92f-4413cf2d79a7,4532170c-09d3-4a4d-bb82-6bf9a0285295,4625b6d8-c84a-451d-b1c0-8b69ffe7d14e,58febb71-3f6d-4ae4-ab92-a274f854d1ab,69944d07-10fc-4685-ae49-0b1abf934484,b5d1648f-e2b7-429c-a313-5097fa483adf,c9971d2d-63f1-495c-bd4f-b6b89a24ec55;SUPTYPE=AL;SVLEN=-39;STRANDS=+-;RE=7;REF_strand=15,7;AF=0.241379	GT:DR:DV	0/0:22:7
+chr1	1651424	56	CCCCTCTGAACGGTCTGTGACACACGCATGCTTTCAGCTAGAGTACTCTATAGCC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1651479;STD_quant_start=14.9963;STD_quant_stop=13.3083;Kurtosis_quant_start=1.5721;Kurtosis_quant_stop=1.07756;SVTYPE=DEL;RNAMES=3e560e4f-c1b7-4a35-9414-eb142c3b7371,5fa2de07-06ec-45b5-8fea-d8e40c4e87c4,80ba1e79-0b04-4fe7-a303-b445b8542da8,8323da44-1b1e-44a6-bc51-e8f2ef737de0,86f27843-e02f-42ab-92cf-d2bf6aa778af,997bd44b-d3a6-463b-8b23-f666f2fe9020,aabdff84-36d9-49c2-b616-d4735837347f,c25206e5-b872-4b86-8f9f-65694bb54114,ce9f1e56-86b8-4126-8e10-aab2b1eb7714;SUPTYPE=AL;SVLEN=-55;STRANDS=+-;RE=9;REF_strand=10,11;AF=0.3	GT:DR:DV	0/0:21:9
+chr1	1666974	57	CACGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAATCACTTCAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCAAACCAGAGAAATCCAGCTCTGGGTGACAGAGCAAGACTCTGTTTCGGGAAAAATAAAATACATAGGCAGGGCGCGGTGGCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1667141;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=-0.819463;Kurtosis_quant_stop=0.270047;SVTYPE=DEL;RNAMES=060d6b86-2e66-4bbc-9a7c-9415526fa59b,072bb3a8-d200-4713-93c2-402b748b423e,075bc3cd-b1fc-4aa1-ba75-68943deea3c6,10b7c597-ef63-402c-9097-f3376630e3c4,14c39ed9-2b60-45b4-a156-ac1b0d908e95,2a1516d4-c826-4977-b218-2e1cd8604862,31afceef-4ee2-4387-9ac2-fd2e68aa0aca,35a63615-be4e-4c08-aa01-82f619f97040,3fbb21ab-76a1-4150-8514-b6a0469f9b15,4edd050e-a954-4530-a877-d6f971f06b68,51a68670-f0ec-4dcb-8724-f4cb2aa00601,615e378f-c130-4a00-9bf0-abd8e121548f,667e1d2b-597e-4aab-8a12-afcff720acf5,80ba1e79-0b04-4fe7-a303-b445b8542da8,8442a8ac-c73a-4f46-b2f5-30b585ad4616,8464a7a3-7aac-4df0-879c-fbd179ec8f2d,93501dc1-8719-427c-a534-5213d8388a95,9e14698f-da46-4339-bb7e-f2e110546fd4,b4a6b25c-ca71-4120-9dcb-5d60e5a23d1a,c51126bb-e48b-40ce-96a6-1a4301d59c71,d9d4337e-9156-4143-9353-42d114ccb8f7,e8098d58-79d8-4078-aa30-0d00a54368d8;SUPTYPE=AL;SVLEN=-167;STRANDS=+-;RE=22;REF_strand=7,12;AF=0.536585	GT:DR:DV	0/1:19:22
+chr1	1717623	58_0	TCTCTCTGGTTTTCGGTCTGTGACACACGCATGCTTTCAGCTAGAGTTTG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1717673;STD_quant_start=11.201;STD_quant_stop=11.5226;Kurtosis_quant_start=-2.1147;Kurtosis_quant_stop=-2.02344;SVTYPE=DEL;RNAMES=06f856d1-5b17-42aa-9d6b-f21c66498038,0c812675-f9c3-4cc2-b4bf-5e1522b413f7,15f0fc0a-5a10-4b82-a5dc-d24da2596044,2dedee60-2b41-417c-ba5e-0a4643d1e88e,2ecf0dc8-2605-42cd-a772-c9daa1ef189d,35dbb28b-1fab-4d4e-a6e4-224dcd7d5ece,44af996b-4fd6-40fa-a745-74450571919f,4a860044-14a8-4664-9846-b4c79cc1d378,5c1563fd-0a6f-45b2-90cb-1766cc4005aa,5e2b3871-32ff-43b5-9d9c-347908496c13,630856f7-d053-4cf3-a4f4-1ee922a7d71b,6f5bbe6e-5829-4055-b877-9bb70b07481f,87115b33-5081-4912-8489-633290dcac77,8d802d4a-8303-46ad-a184-e69ef9423609,95eb2d56-d6f7-4d0c-bb3a-55fc40088a7b,988e5d2a-032a-47a9-ac76-91ca9bce9a5e,9bb4d606-0331-4508-bbd8-9539a1bfc5af,a7cb795d-1d7d-4660-9999-e9287d3e3793,a89a4a91-6936-448e-bce6-8fbf748a97b8,ac3c2093-1fae-425c-a1f1-746d9562a403,b49ad046-fa38-4191-86db-753a9a202f44,c7ea695f-a0d3-43d0-9a12-3f7ae05af65b,c92a3f69-30ad-4d7a-8a7a-865c2b8817ad,cacb7c9c-efc3-4873-8d2a-2bcf4d08d93e,dcc35244-21cb-4b79-9826-5846971fdd0a,e20d17db-5b1c-447b-a1cc-322875c886f4,fb6a3418-9ee8-46cf-886e-8e50e7743a19;SUPTYPE=AL;SVLEN=-50;STRANDS=+-;RE=27;REF_strand=10,17;AF=0.5	GT:DR:DV	0/1:27:27
+chr1	1719634	58_1	N	TTTTTTTTTTTTTTTTTTTTTAGACTCGCTCTGTCACCC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1719670;STD_quant_start=0;STD_quant_stop=3.06594;Kurtosis_quant_start=nan;Kurtosis_quant_stop=-1.18244;SVTYPE=INS;RNAMES=0c812675-f9c3-4cc2-b4bf-5e1522b413f7,15f0fc0a-5a10-4b82-a5dc-d24da2596044,2ecf0dc8-2605-42cd-a772-c9daa1ef189d,a89a4a91-6936-448e-bce6-8fbf748a97b8,c92a3f69-30ad-4d7a-8a7a-865c2b8817ad;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=5;REF_strand=2,1;AF=0.625	GT:DR:DV	0/1:3:5
+chr1	1749608	60	N	CGTACATATTTTTTCTGTGTGATACGTGTGTGTGTGTGTCCTCAGTAATT	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1749658;STD_quant_start=5.15752;STD_quant_stop=4.69042;Kurtosis_quant_start=-1.52873;Kurtosis_quant_stop=-1.51005;SVTYPE=INS;RNAMES=069e1174-a90c-4a8a-b9be-dabb89b4f617,0d26ef2f-f950-4838-b0cd-2653c1ba9321,1dce2fd9-4045-47b9-ba8b-06dd52f2cabe,507e6a6b-87e9-4a0d-a837-9f91679a8d0e,50d64c3e-602f-4f8e-afdc-75c12855505f,76ad039b-f625-4578-ac35-3f536b735d06,794c41e1-4459-4017-9203-d77cc61b11ba,8a853631-8dd9-488a-92dd-714e0379f76f,8b335610-8201-44d1-9ae4-8744df3d006e,8bd5465a-56cc-4113-90e6-02441057a170,94d7beb9-6b85-49f6-aea2-250625cb0a0a,a642d74f-8330-43bf-b3f5-47b511d7cefb,d28a37eb-cbd4-4e36-a6a9-db5e5304af79,d412e8ee-9386-43f2-81c9-fd71136a8258,d74db67f-e377-4c86-b559-29839d3cc68e,e1435545-3a30-4d63-add9-f8192bf2c8b1,e2550f3f-e93c-4725-bf15-9e889c3c44be,f4197f06-21c9-4510-ae09-9a76159e43ab,fbc25fb5-5bbe-41d0-8819-3f71b5b42d6b;SUPTYPE=AL;SVLEN=48;STRANDS=+-;RE=19;REF_strand=6,10;AF=0.542857	GT:DR:DV	0/1:16:19
+chr1	1929384	61_0	N	GAGGGGACAGGTCTGGGGAAGGCTAGGAGAGAGAGGTGAGGGGGGAGGCAGGGGAGATGTTGAGGGGGAGGGAGGGGAGAGGGTGGGGGAGGGAGGGGGGAGAGAGGGGGTAAGGGGGAGGAGGGAGGGGGAGAGGGGTAGGGAGGAGGAGAGGAGGAGGGAGAGGGTAGGGAGGGAGAGGAGGAAGAAGGGGAGGCGCTTGGGGAGGGAGGGAAGGAAGAGGGAGGGAGGGGAGAGGGAGGAGGGAGCAGGTGGGGGAGGGAAGGAGGGGGAAATGGTATGGGGGAGGGAGGGAGGGGGAGAGAGGGTAGGGAGAGAAGGGGGAGCAAGAGGGAAAGGGTAGGGGGAGGAAGGAAGGGAGAGGGTAGGGGAGGTAGGGAGGAGGAGGGTAGGGGAGGGGAGAGGGGTAGGGGAGGGAGAGGGCAGGGGGAGGGAAGGGGAGGGAGACGGTAGAGGGAGGGAGGGAGGAGAGGGAGTAGGGGAGGAGAGGAGGGGAGAGGGTAGGAGGGGAAGGGGGAGGGAGGAGAGGAGGGGAGGGAGGGGGAGGGAGGGGAGAGGGTAGGGAGGGAGGGGGAGGAGGGAAGAGGGTAGGGAGGGAGGGAGAGGAGAGGGGAGGAGGGAGGGAGGGAGGGAGAGGGTGGGGAGGAGGGAGGAGGTTAGGGAGGGAGGGAGAGGAGGGGAGAGGGTAGGGGGAGGGAAGGAGGGAGAGGGTAGGGGAGGGAGGAGGGGGAGAGGGGAGGGGGGAGGGAGGAAGAGGAGGGAGAGGGTAGGGGAGGAGGGAGAGGAGAGGGAGGGGGAGGGGAGGAGGGAGAGGGTAGGGAGGGAGGGAGGAGGAGGGGAGGGAGTGGGGAGGGAGGGAGAGGAGAGGGGAGGGAGGAGGGGGAGGAGGGAGAGGGTAGGGAGGGAGGGAGAGGAGGAGGGAGGGGGAGGGGAGGAGGAGAGGGTAGGGAGGGAGGGAGAGAGGAGGGAGGGAGGGGGCAGGAGGGAGAGGGGTAGGGAGGAGGCAGGAGGGGAGAGGGTAGGGAGGGAGGGCAGGAGGGAGAGAGGGTAGGGAGGGAGGGGAGGAAGGAGAGAGGTAGGGAGGGAGGGAGGGGGAGAGGGTAGGGAGGGAGGGAGAGAGGAGGGGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1930417;STD_quant_start=26.8721;STD_quant_stop=60.9526;Kurtosis_quant_start=-0.068928;Kurtosis_quant_stop=-0.877909;SVTYPE=INS;RNAMES=1f7ba560-1978-466a-8153-158e01dfe601,5a22fc3b-c4e9-4a52-9230-fcf16cee6cc1,8b6629f2-2a9c-4eaf-976d-d798aea4837e,90fb6a6c-97f6-487a-b61f-ae5f107333fe,94f3e08b-29e7-463f-9ea1-6a4d37f1b66a,a26dd940-0f20-4de8-966d-a3965fe00286,aa773c47-82ee-4b6b-80f6-38ea90908a12,ad7c3f25-2dba-44cf-ba68-3129cdf177cf,ce88f9c2-7d06-4543-aff6-4318dcd0117d;SUPTYPE=AL;SVLEN=1008;STRANDS=+-;RE=9;REF_strand=4,5;AF=0.5	GT:DR:DV	0/1:9:9
+chr1	1935132	61_1	GTACACACGTGTGTATGTGTGTTGCTGTGGGTACACACGTGTGTACGTGGGTGTTAGGCTGTGGGTACACACGTGTGTACGTGGGTGTTAGGTTGTGGGTACACAGGTGTGTACGTGGGTGTTAGGTTGTAGGTACACACGTGTGTACGTGGGTGTTAGGTTGTAGGTACACACGTGTGTACGTGGGTGTTAGGTTGTGGGTACACACGTGTACGTGGGTGTTGTAGGTACACACGTGTGTACGTGGGTGTTAGGCTGTAAGTACACACGTGTGTATGTGGGTGTTAGGTTGTAGGTACACAGGTGTATACGTGGGTGTTAGGTTGTAGGTACACAGGTGTGTACGTGGGTGTTAGGTTGTAGGTACACACGTGTGTATGTGGGTGTTAGGTTATAGGTACACACGTCTGTATGTGTGGGTGTTAGGTTGTAGGTACACACGTGTGTACGTGGGTGTTAGGTTGTAGGTACACAGGTGTGTGCGCGCTAGGTTGTAGGTACACATGTGTGCACGTGGGTTAGGTTGTAGGTACACACGTGTGTACCTGTTAGGTTGTAGGTATACACGTGTGTACGTGTGTGTGTT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1935578;STD_quant_start=153.559;STD_quant_stop=132.515;Kurtosis_quant_start=-1.19934;Kurtosis_quant_stop=1.97897;SVTYPE=DEL;RNAMES=1f79460c-a490-4e87-8ed1-bd0e2311fd96,35ca5bc9-cb85-4600-a1cc-cc7ad8ea0900,46bb71d9-c744-41b2-9e5b-280327785e81,5a22fc3b-c4e9-4a52-9230-fcf16cee6cc1,ad7c3f25-2dba-44cf-ba68-3129cdf177cf;SUPTYPE=AL;SVLEN=-446;STRANDS=+-;RE=5;REF_strand=12,5;AF=0.227273	GT:DR:DV	0/0:17:5
+chr1	1937841	61_2	CACCAGGTCCACCTCTGGACACAGGTCCACC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1937872;STD_quant_start=1.78885;STD_quant_stop=0.894427;Kurtosis_quant_start=2;Kurtosis_quant_stop=2;SVTYPE=DEL;RNAMES=17cf1867-d8d0-4d6c-a0a4-7cd27f98d21c,35ca5bc9-cb85-4600-a1cc-cc7ad8ea0900,46bb71d9-c744-41b2-9e5b-280327785e81,5a22fc3b-c4e9-4a52-9230-fcf16cee6cc1,ad7c3f25-2dba-44cf-ba68-3129cdf177cf;SUPTYPE=AL;SVLEN=-31;STRANDS=+-;RE=5;REF_strand=1,0;AF=0.833333	GT:DR:DV	1/1:1:5
+chr1	1948947	64	N	CCTCCTTCCTTCCTCTTTCCTTCCTTCCTTCCCTCCCCTTACTCCTTCTTCCTTCCTTCCCCTTCCTTCTTCCTTCTCTCCCTCCCTCCCTTCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1949050;STD_quant_start=5.56776;STD_quant_stop=18.5661;Kurtosis_quant_start=-0.180711;Kurtosis_quant_stop=-0.853763;SVTYPE=INS;RNAMES=055ba478-5d2e-4d99-b186-a0135c719653,0ba34aee-4104-4f2c-a266-ee9e94ffd88a,16677c20-3c6c-4b74-91e8-9a8eaff0cc41,17cf1867-d8d0-4d6c-a0a4-7cd27f98d21c,2193c379-056a-4f6b-ad63-500a7d6b0f81,276d02c1-43c7-4839-b05e-e369b5027db1,4050aa75-6cc0-4e4a-9a4d-6ef9c7a531b0,4d69d239-a49f-4385-9509-38d2c650dd91,5b314be2-40de-453b-b564-55e0f23e0f9d,60487873-d91c-416a-b799-b3cfa6ece402,870a857f-39a7-49e9-9ca2-3e5167e0080c,8c86180c-2b7d-4542-abb2-60c8669ba5f3,9fc8b03e-d153-4578-b4ef-72542bb4681f,eb2e28c0-57be-443a-b9fd-aaca2ce7331a,f371bad6-cc9f-4532-96cc-ca566fb95edb,f4df7d94-09a6-4fef-8265-b6b7f695023d,f597f2f9-4eb8-4ff5-b998-0775bdaebcf8;SUPTYPE=AL;SVLEN=103;STRANDS=+-;RE=17;REF_strand=10,6;AF=0.515152	GT:DR:DV	0/1:16:17
+chr1	1968925	65	CCCTCCTGGGGGCTCCGGTCCTGCCCAGCAGCCCCAGGTGAGACAGCGCCTGGCGGCCCCTCCCTAGCTCC	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1968995;STD_quant_start=6.21289;STD_quant_stop=6.26099;Kurtosis_quant_start=2.1732;Kurtosis_quant_stop=3.01628;SVTYPE=DEL;RNAMES=0054b1d2-e5b3-4425-8021-7bfc27793f2e,03eab717-c88a-49b3-b03d-3fccf189e51b,114859cf-340f-46a6-82ae-597f42d2c3a2,46b53123-ab33-4ba3-be43-aa960b5282e6,5032e8bb-0e9f-4ef3-8f74-853166bdf8d1,58437404-2131-42f8-a4a7-3c238e0dce06,7e0e439a-bd8d-45b6-92af-6d04069896de,ba78be33-e248-406d-9ea9-65de8c94cb9b,c9c5a4c8-212d-49de-a138-0b77a80bf5df,f664d4be-60e1-44b4-b2ba-08356f03b7c5,f6fff9c2-22f7-4909-82d3-ef14a9823885;SUPTYPE=AL;SVLEN=-70;STRANDS=+-;RE=11;REF_strand=13,12;AF=0.305556	GT:DR:DV	0/1:25:11
+chr1	1980245	66_0	ATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGCGTGGGGAGGACGGGTGAACGAGAGACTGTATCTAAGCCACCGGCACAGATCGCAGTGGGCGCCCTCTTACCGTGTGGGGAGGACGGGTGAACGA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1980454;STD_quant_start=101.743;STD_quant_stop=92.693;Kurtosis_quant_start=-0.884687;Kurtosis_quant_stop=-0.52696;SVTYPE=DEL;RNAMES=052bdc46-518f-41db-ad42-0684dc228ca4,32319478-b72c-45fa-bc4c-fe51c11649c7,706e2df9-de59-4d99-b258-713deb89d841,7e0e439a-bd8d-45b6-92af-6d04069896de,8525da23-273d-43ea-b798-67f153a33bbc,95c7442c-1458-4fd5-b134-66f19fe6c45b,a6ea1aaf-dc00-4e26-aa3d-f0edd224e3a7,b522295d-622c-4c09-92dd-f125fde27113,d7383ef9-e484-418b-b935-184212da47b4,d8ac8009-6fb7-4c0f-be91-f5ae65921ef2,e2dd6517-ff29-49ac-a274-9768838cc651;SUPTYPE=AL;SVLEN=-209;STRANDS=+-;RE=11;REF_strand=8,10;AF=0.37931	GT:DR:DV	0/1:18:11
+chr1	1982226	66_1	N	CGCAGGACACCCAACCACGGACAGACACGGGGGCACGCAGGACACCCAGCCGCGGACAGACACGGGGCACGCGGAACACCCAGCCGTGGACAGACCACCAGAGAGCACACAGGACACCCAGCCGTGGACAGACACGGGAACAGGACACCCAGCCGTGGACAGACACGGTGACACACAGGACACCCAGCCATGGACAGACACGGGGACACGCAGGACACAGCCACGGACAGACACGGGGAC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1982427;STD_quant_start=111.252;STD_quant_stop=95.5136;Kurtosis_quant_start=-0.937548;Kurtosis_quant_stop=-1.0182;SVTYPE=INS;RNAMES=052bdc46-518f-41db-ad42-0684dc228ca4,528aac50-b822-4bd5-9cbd-4b8382236279,8525da23-273d-43ea-b798-67f153a33bbc,90a3c1c7-3727-4019-8995-80b749f304c5,b522295d-622c-4c09-92dd-f125fde27113,c99f1e8e-64f8-46b0-8baa-52ad4bea8869,d0b22d0c-2bd5-4ae3-b65c-807960446b61;SUPTYPE=AL;SVLEN=206;STRANDS=+-;RE=7;REF_strand=5,3;AF=0.466667	GT:DR:DV	0/1:8:7
+chr1	1993704	68	N	GGGCACAGTGGCTCATGCCTGTAATCCCAGCAACATGGGAGCCTGAGGTGGGAGGCTCTCTTGAGGCCAGGAGTTTGAGACCAGCCTGAGCAACATAGTGAGACCCCACCGCCATTTCTAGGAAAAAAAAAAAGTGGCC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=1993848;STD_quant_start=0;STD_quant_stop=2.30217;Kurtosis_quant_start=-0.137648;Kurtosis_quant_stop=0.586411;SVTYPE=INS;RNAMES=12612e6e-777e-43af-8849-4b074b2f00ac,3199a42c-620f-4a1d-a406-892e05fe365d,3a9ec060-4a54-44ef-bd05-acc590e6927c,48257349-9599-4e4d-b1c8-81428bed719b,516476f1-e11d-4860-b20c-6bcd57126832,5f26394a-a245-43ed-9364-e07fef58a86e,69f19577-181a-43f0-8698-b2a254a4af7e,751e7a48-0915-4aba-bbee-fc4169db51af,7fd8a496-1821-4f89-89e5-aa459208dee7,83936f75-3e7b-4cd8-8444-fb2758e79309,a6ea1aaf-dc00-4e26-aa3d-f0edd224e3a7,a78f0b5f-91c6-49b6-a65d-14ba7899fcf0,c99f1e8e-64f8-46b0-8baa-52ad4bea8869,ce1e0c2c-7782-4be5-9d87-0f852599231c,d0eb4f07-4b62-43d9-986f-fead3a9c4de5,d923e257-cf61-40a0-bfde-487fc0c5aca4,e01bb1b6-6bb4-456c-a445-0038552a3166,e5283a39-f252-4bee-8378-2587557d50c8,ea7096fc-b05a-4d68-9cd7-0b0707b652dc,f7aaf47b-c90c-4458-b7b3-234f6f20fc43,ff61f792-d1df-48a7-b53b-3c122cbdfbb0;SUPTYPE=AL;SVLEN=144;STRANDS=+-;RE=21;REF_strand=10,8;AF=0.538462	GT:DR:DV	0/1:18:21
+chr1	2019228	69_0	N	GGGAGGAGAGGGGGAGAGGAGGGGACCTGGGTAGGGTGGGGGGAGGGGAACGGGGAGGGGAGCGGCGGGCGCGGGGTGGGGTGGGAGGGGGGAGGGGGAGGGGAGAAGACGGGCAGCGGGAGGGCAGAGGATGGGGGCGGGGGGGGAGGGGAGGGGGCGGCGGAGGATGGGGCGGGGGAGTGGGATGGGCGGGGGAGGGGGAGGGGGGC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2019444;STD_quant_start=5.89654;STD_quant_stop=7.31174;Kurtosis_quant_start=-1.49088;Kurtosis_quant_stop=-0.213696;SVTYPE=INS;RNAMES=084b4c10-4abe-45a3-bb41-50a843f349b7,141e97fe-ac49-4c29-a3d5-ed11490e5e0c,189bdbba-7c5c-425c-9986-48faee8994b8,1b320452-023c-43d2-80aa-3fd843eca48f,1b523e3c-4545-43ce-a386-de80099285aa,2ba0273c-2942-412d-9e6c-64000dbe5a24,2d01c13b-fe6d-4d94-9e85-f1a8b75a97a0,3745f5b0-c4b2-4f98-ad05-1fd16961d44f,3d6ee7dc-5dae-4bea-ac7b-c51be12b4e35,3fee962e-0292-42b9-97d4-c9f60cb80cb0,5117c8e0-9e16-4765-8cc1-83c57b053121,59eff853-5383-4fcd-a740-7dd1cb819f3d,6a95fb4f-660a-4aea-a830-b0a3a3e25876,6ec61bb5-9fd5-4f87-bf26-ab3662797e0c,71ffaa82-5556-4b11-a020-d9cfc9377ca8,7688b7ce-31d9-40e6-b262-b707e4beb0a9,7d2f3cce-628d-4c6c-89e8-32c85eff6029,8323db15-fe57-4d04-a384-12eb8e286127,844e781b-8f42-42f9-b667-183919bd5497,85db8648-9b21-4396-96cd-64d8210ec0b6,88158c15-e1b0-4217-9414-260e07ea041c,91e77f06-2ce4-48a1-a4aa-6e99b50183e3,c98072a1-b91c-415c-bf05-4caa96f7fb2d,cfb3d8e5-eeca-4519-986e-d8d2e1629fcf,df0b69a9-893f-40aa-8210-bafad1da5208,eac11fa2-4f1c-4773-ba53-46f54d556aff;SUPTYPE=AL;SVLEN=209;STRANDS=+-;RE=26;REF_strand=11,12;AF=0.530612	GT:DR:DV	0/1:23:26
+chr1	2031615	69_1	CTGCAGGACCCAGGTGTCTGGGGCGCTACCATCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2031650;STD_quant_start=0;STD_quant_stop=1;Kurtosis_quant_start=3.55176;Kurtosis_quant_stop=1.96435;SVTYPE=DEL;RNAMES=12039f9a-b3a1-4bb5-889f-27118e7212a0,189d9ec3-67c5-4156-aef7-84f137e1c188,1b320452-023c-43d2-80aa-3fd843eca48f,29ba43e9-795c-4344-a62e-19c797d939fa,3745f5b0-c4b2-4f98-ad05-1fd16961d44f,3f364712-fe9c-4e2f-adc9-07f69555c1e4,4946e6a8-20a0-4177-931d-6b5edbb7310f,6d810f14-3521-4f01-a7c8-08501e86ea6b,93dd4c46-9379-4e94-9814-b69d63b9480d,b933e615-1c59-4dd9-89ee-9805edf3a17f,e52271ee-8199-4c0d-af4e-95d436da9609,f7fff377-de1b-49cd-8605-1b8a832b0185;SUPTYPE=AL;SVLEN=-35;STRANDS=+-;RE=12;REF_strand=5,13;AF=0.4	GT:DR:DV	0/1:18:12
+chr1	2106498	71_0	GGTAACTCTCAGCAAGCCCCTCTGGTGGGCGAGGACCTCCACACGTGTCACCAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2106559;STD_quant_start=18.0638;STD_quant_stop=24.1309;Kurtosis_quant_start=-1.94975;Kurtosis_quant_stop=-2.12239;SVTYPE=DEL;RNAMES=28805ff2-2a6a-44c7-ba7c-39b95ea9cfbf,2af6fbbd-f264-4229-87a0-a713a86bb300,373d65fa-058a-4e65-9c7d-c54373d59b09,3a963710-a757-49c5-8b1e-ff2706ed2b16,3ba8c7a4-62e2-412e-8b3d-05a8d0b1d74d,3f3ee43c-cad2-4769-a361-3d1b65c2e547,6205add9-ede8-46ea-9983-c9bca75d01f0,6604bd06-aa45-437f-8639-5bace412a763,6cbb710b-746d-4605-8f55-5118404f191b,701b0d67-ee07-4077-bdc2-32678e6df494,7116e163-0218-469b-a925-7c3770efd791,7a8fe2c5-d3c9-4e7b-b667-18ca3a3634c0,90931cd4-5f1c-41a0-bc14-9f16138a7d1d,937a7161-5462-466f-b1d9-c91ad0a29917,947202e3-b846-49d1-b795-98bab687979f,c69112e7-fdaf-431a-942d-6e6e7d219b3f,c791f368-5fb0-4f42-9385-585f75bb6a91,fd995180-e743-4bb9-8ac6-0f8dd3bd6060;SUPTYPE=AL;SVLEN=-61;STRANDS=+-;RE=18;REF_strand=11,20;AF=0.367347	GT:DR:DV	0/1:31:18
+chr1	2110096	71_1	GAGACACAGAACGGCCAGGGCTGAATCCGGGGCCCTCCCTGGGGGCAGCCAAGGACCTAAAACCAATGGGTCCCAACCAAGAGGATCCCAGAG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2110188;STD_quant_start=28.137;STD_quant_stop=29.053;Kurtosis_quant_start=-2.1532;Kurtosis_quant_stop=-2.15889;SVTYPE=DEL;RNAMES=1b6d873e-ffd5-48ae-ad64-cbfc2fe5f991,2af6fbbd-f264-4229-87a0-a713a86bb300,2f93bb83-61c2-410e-9504-299571c80478,3a963710-a757-49c5-8b1e-ff2706ed2b16,3ba8c7a4-62e2-412e-8b3d-05a8d0b1d74d,6604bd06-aa45-437f-8639-5bace412a763,6cbb710b-746d-4605-8f55-5118404f191b,701b0d67-ee07-4077-bdc2-32678e6df494,7116e163-0218-469b-a925-7c3770efd791,74f450b4-f44e-401c-ae14-25ed8682996e,7a8fe2c5-d3c9-4e7b-b667-18ca3a3634c0,90556fe1-6f15-43ad-b541-18b71bfd6a30,92c42e9b-0738-4473-a13d-3c950c13a88f,947202e3-b846-49d1-b795-98bab687979f,9e8b5bf4-1212-4b8c-955d-956dbb5f8573,b4b91919-6fad-4984-ac92-d36a6e55e84c,c69112e7-fdaf-431a-942d-6e6e7d219b3f,c791f368-5fb0-4f42-9385-585f75bb6a91,cb91c0ac-5c4e-461b-8b51-ae7a6d9ba2c0,cd3b3d48-2407-4570-bd64-4c74d85c1113,d92913bc-455d-4f37-8076-fb2c944a43e4,db136f46-27ff-4915-88c0-de0b3fcc6559,dee55e5f-5ad4-4ba1-8f33-4da8a0ca888c,f2baf3f5-0375-4789-a4a0-421c72aaedfe,faff0bd7-0f1e-4a4f-b19a-683c00ffe049,fd995180-e743-4bb9-8ac6-0f8dd3bd6060;SUPTYPE=AL;SVLEN=-92;STRANDS=+-;RE=26;REF_strand=6,1;AF=0.787879	GT:DR:DV	0/1:7:26
+chr1	2121519	73	N	GGTCATGGTGGTAGTTAGGGTTATGGTAGTTAG	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2121553;STD_quant_start=8.15037;STD_quant_stop=7.96421;Kurtosis_quant_start=2.31242;Kurtosis_quant_stop=0.704955;SVTYPE=INS;RNAMES=150f2d25-b265-4af7-8135-23aa9741506d,1a6336dd-1444-4c9f-a01d-b44e5ec055d3,4b3f8741-f139-4956-ba84-952af879c943,81f0780e-3130-42ed-b489-a3ca347b4e5c,9d20c94f-6d1e-4624-9b25-3d5949d7e53c,df559026-6706-483c-922a-d706d4e0bfbd,ecc5327a-3d59-4d19-8cb7-0e24ab84ebbd;SUPTYPE=AL;SVLEN=34;STRANDS=+-;RE=7;REF_strand=10,6;AF=0.304348	GT:DR:DV	0/1:16:7
+chr1	2122420	74_0	GTGGTAGGGTCGTGGTGGTTAGGGTCGTGGCGGTGGTTAGGGTCGTGGCGGTGGTTAGGGTT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2122481;STD_quant_start=2.34521;STD_quant_stop=7.3598;Kurtosis_quant_start=0.553719;Kurtosis_quant_stop=1.81801;SVTYPE=DEL;RNAMES=0650f152-57e2-432b-9f22-f61cef3fdadf,6679a3cc-d5ab-43a8-9476-6a63a9592057,670102b2-c1bb-4323-835a-edf30c6e4457,6be13089-e486-4759-b1d3-99fb4da3e5ce,bee0bcc7-3494-4ac8-9d57-3707ea5045a2,ee18ca1c-9244-4440-b623-79f76165755a;SUPTYPE=AL;SVLEN=-61;STRANDS=+-;RE=6;REF_strand=2,0;AF=0.75	GT:DR:DV	0/1:2:6
+chr1	2123427	74_1	GTGGTTAGGGTCGTGGCGGTGGTTAGGGTTGTGGTGGTTAGGGTTGTGGTGGTTAGGGTTGTGGTGGTTAGGGTCGTGGCGGTGGTTAGGGTCGTGGCGGTGGTTAGGGTT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2123528;STD_quant_start=35.106;STD_quant_stop=31.7715;Kurtosis_quant_start=2.91495;Kurtosis_quant_stop=-1.20498;SVTYPE=DEL;RNAMES=0650f152-57e2-432b-9f22-f61cef3fdadf,670102b2-c1bb-4323-835a-edf30c6e4457,73fb19cb-0ca2-4185-bfe2-5c1511d92a8a,adf37c10-0ebd-4b66-a691-4d78e9ce3887,bee0bcc7-3494-4ac8-9d57-3707ea5045a2,ee18ca1c-9244-4440-b623-79f76165755a,eedb21ba-7cb6-45a8-a524-2ce1dca4f9e1;SUPTYPE=AL;SVLEN=-101;STRANDS=+-;RE=7;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:7
+chr1	2142340	76	TTTCAATCCAGGGTCCACACATCCAGCAGCCGAAGCGCCCTCCTTTCAATCCAGGGTCCAGGCATCTAGCAGCCGAAGCGCCT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2142424;STD_quant_start=21.468;STD_quant_stop=22.4611;Kurtosis_quant_start=-0.568143;Kurtosis_quant_stop=-0.726049;SVTYPE=DEL;RNAMES=171ee54f-94db-40e5-abd3-64c25c3bd654,1ec9394b-a56b-4ecc-9a95-a189e11ab936,414c9229-dbb4-4f8d-b352-3395870549ce,8b1a60f1-2b05-40cf-96cb-5c25d3609202,a82e313f-0107-45b6-bff9-499aa5741572,bdc72979-282b-40f1-8822-46206a303af5,d601a903-b81b-423b-8b40-931b45c4812e,d7aed26e-dee7-4026-ba04-ad90cbf1ab97;SUPTYPE=AL;SVLEN=-84;STRANDS=+-;RE=8;REF_strand=12,10;AF=0.266667	GT:DR:DV	0/0:22:8
+chr1	2147720	77	N	TGTCCACTGACCTCTCCATTCTCCGTCTGTTGTCCACTGACCTCTCCGCTCATCCACTCCATCTATTGTCCACTGACCCCTCTTCATCTATCCATTCATTGTCCACTGACCTCTCATCTATCCATCCATCTGGTCAGTCCACTGATCTCTCTCCATCTATCCATCCACTTC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2147883;STD_quant_start=2.04939;STD_quant_stop=2.91548;Kurtosis_quant_start=3.27179;Kurtosis_quant_stop=3.80751;SVTYPE=INS;RNAMES=3b6c8737-901b-4b87-bb72-eb7092dfef29,3dc2a634-4c43-496c-8ec4-37141b9f49e1,50520c4d-5117-476f-91d6-88a96210bd00,5bb38fd6-be27-4945-8879-e7fa91fb4ed2,61131946-4d87-43e0-ba2a-437d65cb5d46,6f92ee94-91e0-447b-bdd8-cf1b4bb2cb22,7d2495f0-d0d8-4f23-ab87-8966f0fc9a76,89ca80fa-ec8f-409d-b1df-6035496e9aa0,955dbf85-2ba3-4862-81af-eeba6ded1571,bc432383-d7f9-46e6-91e4-c7ac27d0e9bb,e4d10a9a-fd46-4b43-96d4-260144261a04,fa90e368-9f17-4bc3-baba-6cdb0f1720df;SUPTYPE=AL;SVLEN=162;STRANDS=+-;RE=12;REF_strand=6,6;AF=0.5	GT:DR:DV	0/1:12:12
+chr1	2212064	78	N	GCTAATTTGTTTTTTTTTGTATTTTTAGGAGGCGACAGGGTTTCACCGTGTTAGCCAGGTTGATCTCGATCTCCTGACCTCGTGATCCGCCCGCCTCAGCCTCCCAAAGTGCTGGGATTACAGCGTGAAGAGCCACCGGTGCCCGGCCAGTTTTTTATATTTTTTTATTAGAGACGGGGTTTCATCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCCACCTGCCTCGGCCTCCCAAAGTGCCGGGGATTACAGGCGTGAGCCACCGTGCCTGGCCGAGAGAGCAAGCTGCTGCTTAGTTTTTTTTTGAGACGGAGTCTTGTGTCGCCAGGCTGGAGTGCAGTAGTGTGATCTCGGCTCACTGCAAGCTCCACCTCCCGGGTTCACACCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCGCCCGCCACCACGCCCG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2212522;STD_quant_start=74.6405;STD_quant_stop=66.4379;Kurtosis_quant_start=-1.46472;Kurtosis_quant_stop=-1.44655;SVTYPE=INS;RNAMES=0676b04a-7c8d-42cf-b577-88e7c5e59366,186fb406-a7c2-427a-a852-c832ed397d77,1c28123c-97c3-4b56-be80-95f423024035,392588b0-1f81-4dfb-95e2-516600f4d921,3b3d82bf-b95b-42dc-85b6-6e0ec7909512,487beba9-384f-4d39-9206-67d9ff555f3b,4ff2e1df-681d-41eb-804f-d62b1e1243f7,65813d37-b46f-41e7-8a62-74fd40ef4802,83bbdbb2-2f4e-409c-a225-7ac6c3a60112,91830ab4-3efb-4ef9-b092-74588a1f1977,a5bb802e-de21-4019-a251-702f17b31459,b01eb484-c448-4b3f-9e9d-57fdbc83c1b3,bd07653a-6a66-4532-9d78-64ec38c0e59c,e03f35b8-06d0-430d-928e-6fb4c40ba032,e508e912-2b3a-4490-8895-b31a09536944,e6819841-704e-4239-8e88-3185449188dd,f21c9471-ca40-4c86-83e4-d79827d0c0b2;SUPTYPE=AL;SVLEN=457;STRANDS=+-;RE=17;REF_strand=13,8;AF=0.447368	GT:DR:DV	0/1:21:17
+chr1	2280686	79	AGAGAGGACGCCCGAGAAGACAGGCGGCGGCGGCGATCTTCAGAGAGAGAGATGCCCGAGAAGACAGGCGGTGGCGGAGATCTTCAGAGAGAGGACGCCCGAGAAGACAGGCGGTGGCGGAGATCTTCAGAGAGAGGACGCCCGAGAAGACAGGCGGCGGCGGCGATCTTCAGAGAGAGGACGCCCGAGAAGACAGGCGGCGGCGGCGATCTTCAGAGAGAGGACGCCCGAGAAGACAGGCGGCGGCGGCGATCTTC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2280929;STD_quant_start=20.1122;STD_quant_stop=50.9735;Kurtosis_quant_start=-1.35663;Kurtosis_quant_stop=1.0857;SVTYPE=DEL;RNAMES=0201190b-ffdf-4da2-b1d2-878efae5276d,065e39f9-6c32-425a-a3f9-3c41cdf17cfb,54aa1c8a-ce0b-440f-a73b-74137b246f3f,7569fcc1-6df6-4e2e-9a3e-7b3475257f88,85469682-a260-411a-9da2-40949100da1a,a8546972-7f28-4aa2-b19c-b104b07c3f26,abcf1b8d-cac3-4621-b0ff-8187389417c8,b314ebbf-e428-4f84-ae07-a53631ded366,c334c780-b78a-478d-beaa-12a683730973,e2507dc8-5255-4d67-90d5-ed5828f6e81f,f57ea356-9ee6-484a-9685-3caf5029b3fa;SUPTYPE=AL;SVLEN=-243;STRANDS=+-;RE=11;REF_strand=10,12;AF=0.333333	GT:DR:DV	0/1:22:11
+chr1	2393034	80_0	N	GGACGCTGTGGCAGAGGACTTCATCCCATGTTTCAAAGTGCCCT	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2393076;STD_quant_start=0;STD_quant_stop=1.07417;Kurtosis_quant_start=-1.11224;Kurtosis_quant_stop=-1.09293;SVTYPE=INS;RNAMES=00980201-0c96-4489-90dc-fe1b1a16ba8c,06ce59b7-1c95-4cd6-afc0-5cb2d44c3da0,0eee7d18-36f4-4b41-88b8-413a26a2e92b,12233d0a-ba0e-4f6b-8f20-231b4c0df705,276b7eca-4c55-4ea4-b706-e2ea0ef806bc,37f7640b-5f00-4496-b03c-176b2fee9f81,4f196bd2-aaf9-4c99-b954-f48454f1377e,699db0e6-24b9-4257-83dc-2c92e96479c3,75751984-314d-402e-90cb-d2064a9e9a6c,7670c0d3-c90a-43cb-9761-3dc4e3c0c86e,7b099be2-d750-4c08-a6fb-25185120d10e,82e87691-2a14-4c37-b073-9e706d600638,8d2e0d04-baaf-4485-aced-48bda5916742,990bf247-679d-4047-a113-7dd3aa4fcf65,9a56d0b2-8fbf-4179-8647-05454c9be994,b2881ddb-8ae7-479b-8896-35c01a48e844,bac07239-5e7f-4fc6-9362-01d8a6b6791f,bbc701f9-17eb-4353-81c9-ff5ee961b554,c0c21a75-e2b2-43de-aaf8-af53e92c26b1,c4c9fe9e-5f08-4239-8531-3cbd08912e5b,cd7c8196-ac48-4caa-9414-32a3b37a7d25,d43881ac-62d7-4668-bb72-b6e5d50fd8d1,d48718e0-864b-46d5-93f8-8fde7e786b8e,d8b68c1f-fe56-4915-bf2b-670ccac3f03c,ea7de415-8965-4b45-aa66-864bba6cecf9,f78c1fa7-d9ef-4f7e-b39c-4c95bc54f740;SUPTYPE=AL;SVLEN=42;STRANDS=+-;RE=26;REF_strand=14,11;AF=0.509804	GT:DR:DV	0/1:25:26
+chr1	2401344	80_1	N	CTCCTCCCTCCTCCCTCCCTCCCTCCCTCCTCCTCCTCCCTCTCCCCTCCTCCTTCCCTCCCTCTCCTCCCTCCTTCCTCCCTCCTCCTCCTCCCTCCTCCTCCCTCCTCCCTTCTTCCTCCCTCCTCCTCCTCCCTTCTTCCTCCCTCCTCCCTCCCTCCCTCCCTCCCCCTCCCTCCTCCCCTCTCCTCCCCCCTCCCTCCTCCTTCCTCCTCCTCCCTCCCTCCTCCCTTCCTCCCT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2401601;STD_quant_start=7.79957;STD_quant_stop=12.6853;Kurtosis_quant_start=-1.73392;Kurtosis_quant_stop=-0.358473;SVTYPE=INS;RNAMES=0122932c-f48b-4b83-bece-1983fa357a82,01bd0d80-e250-4be3-996c-e6bbdfad82f8,06ce59b7-1c95-4cd6-afc0-5cb2d44c3da0,084d817e-9526-4e38-8a85-954a55703197,17d48ee0-5f98-4ddf-b6a2-82b201fd9140,21c67059-ba8f-4d02-856a-26f28b81f0a6,25f27c4c-cea6-4fcb-baf9-a9cbc2cbf798,67e266bf-3cf2-482f-b9f6-1f7e0edbfb20,7a7bd87e-f893-41c4-84b5-0d0cf345a57c,9a56d0b2-8fbf-4179-8647-05454c9be994,a7d2160f-22a6-493a-b788-050768eb9bb4,a97bbe0a-c62b-41ab-970e-ec673cca054a,b10646da-43c5-4aa8-9cf4-48d6dabf1fac,b167dc86-fc0d-4b6e-9a82-fd7250b88b9d,c0c21a75-e2b2-43de-aaf8-af53e92c26b1,cd37a7be-b9ba-4348-a01c-e1fb286af589,cd7c8196-ac48-4caa-9414-32a3b37a7d25,cf8fb7a6-6ee4-4679-9589-2c546ab0eaf8,d79040ee-f236-4142-9d86-632cf2ec11fb,d8b68c1f-fe56-4915-bf2b-670ccac3f03c,dbd74465-d543-4b90-885d-5585f6c5c733,ea7de415-8965-4b45-aa66-864bba6cecf9,ee10783d-ee86-4b3d-b3fe-70c862bc79f5,f3edbd0a-a726-46cc-bac8-bbfe128fbe42;SUPTYPE=AL;SVLEN=257;STRANDS=+-;RE=24;REF_strand=9,6;AF=0.615385	GT:DR:DV	0/1:15:24
+chr1	2435943	83_1	N	]chr8:2191727]N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=361.135;STD_quant_stop=375.138;Kurtosis_quant_start=-1.00232;Kurtosis_quant_stop=-1.41029;SVTYPE=BND;RNAMES=1034f436-2911-4e62-8bb4-897d96384930,20c61a5f-55ca-4466-8f0c-5acfa8d937ec,2c5731ed-a065-4428-a87f-11560422cb7a,4d806a60-f552-4c1d-a41b-4d92c9a7f44b,7096ab27-21cd-4a75-9b3b-61942414d57f,8f0f8a9c-51ac-46b9-8c10-6fe63afbc58d,99b9a2b6-169c-4b13-b450-fcb969c61e37,afcf9b05-eaa9-4bdd-a231-8b97fb693a94,b2c03c3c-381a-487e-a13b-90230e1a2fbf,c885fe57-3116-4b4d-a3e7-ca92b07181be,d457a04c-02bb-4cb5-84b4-f3b2b6e4f78e,d67e1e93-5385-4ea9-a0c0-d1c773b8903a;SUPTYPE=SR;SVLEN=0;STRANDS=-+;RE=12;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:12
+chr1	2436031	82_0	N	TCTTCCCTCCTCCCCCTCCTCCTTCTCCTCTCCCCTTATC	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2436072;STD_quant_start=2.58844;STD_quant_stop=3.4785;Kurtosis_quant_start=3.2681;Kurtosis_quant_stop=1.23088;SVTYPE=INS;RNAMES=15db541e-fff8-459c-a500-6bf0abef1181,45d1807a-8638-48e3-b1ab-c13ec31f67d4,8501a903-194a-4b39-90b9-5dbb8894a965,89d5ae55-5d6f-48f9-87bf-a47000cd43bb,917960b5-a32c-4b3b-aaf3-3030d0ae4adf,c518ca32-4881-4ff2-a436-6aaf65d6dcd7,d29c808b-64dd-431c-af93-f0b792fdac5f,db4d5b64-7cc8-433b-8c7e-1fcdc1d2f749,edf624ea-c4fb-4000-963f-03379b300d79,ee26af85-3d3f-4c14-84a5-694053a729cf,f0e08ed7-b7cb-4090-b5e0-e2d11c3aec32,f2021b13-8bd1-4ecd-b83b-26b1f3834c67;SUPTYPE=AL;SVLEN=42;STRANDS=+-;RE=12;REF_strand=9,10;AF=0.387097	GT:DR:DV	0/1:19:12
+chr1	2436235	83_0	N	<INV>	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=51906855;STD_quant_start=248.038;STD_quant_stop=29.0723;Kurtosis_quant_start=-1.13146;Kurtosis_quant_stop=-0.792692;SVTYPE=INV;RNAMES=20c61a5f-55ca-4466-8f0c-5acfa8d937ec,63ca8f3a-dccd-4544-bcb6-165078b143c0,8f0f8a9c-51ac-46b9-8c10-6fe63afbc58d,b4a16a83-7eb8-41d9-92b6-c3343ed532a5,e00aaa66-34bb-44d9-bd63-b8f65c2725e1;SUPTYPE=SR;SVLEN=49470620;STRANDS=--;RE=5;REF_strand=1,2;AF=0.625	GT:DR:DV	0/1:3:5
+chr1	2436453	30263	N	N[chr21:44407143[	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=260.661;STD_quant_stop=33.892;Kurtosis_quant_start=0.156456;Kurtosis_quant_stop=1.93083;SVTYPE=BND;RNAMES=38ae4519-daf6-4f97-891a-f0fa8e3ad89b,54b53a6d-e860-41ba-ab89-bc5ea855609b,6023543c-a3b7-4668-83d4-5874646245d2,99b9a2b6-169c-4b13-b450-fcb969c61e37,b2e8082e-4084-46f8-b792-8d697b657d64,df600a7e-047d-4f6a-8bc2-53036240ad2a;SUPTYPE=SR;SVLEN=0;STRANDS=+-;RE=6;REF_strand=0,1;AF=0.857143	GT:DR:DV	1/1:1:6
+chr1	2440693	82_1	N	CCCGAAACGCCCGCGGGATCTTCGTTGCTGCGACCAGGATCCTCTCCACATGTCTGTCGCTGGCCTTGCCCGGCCCGCCGGGGATCTTGCATGCTGCGACCCAGGGATCCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCGCCGGGGATCTTGCATGCTGCGACCAGTGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCCGGGAGATCTTGCATTGCTGCGACCAGGGATCACCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCGCCCGAGGGATCTTGCATACTGCGACCAGTGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCGGGGATCTTGCCCATGCTCGACCAGTGATCCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCCGAGATCTTGGGGCTGCTGCGACCAGGATCCTCTCCATATCCTGTCGCTGGCCTTGCCCGGCCCGCCGAGGGATCTTGCATTGCTGCGACCAGGATCCTCTCTCCATGTCTGTCGCCAGCCGCCATGACTTCGCCCGGGGATCTTGCATGCACTGCGACCAGTGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCCCGGGGATCTTGCATTGCTCGGGCCAGGGATCTCTCCATGTCTGTCGCTGGCCTTGCCGACCGCCGAGGATCTTTGCATTGCTGCGACCAGGGATCCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCCGGCCCGCCGGGGATCTTGCATTGCTCGACCAGGGATCCTCTCTCCATGTCTGTCGCTGGCCTGCCCGGCCCCGCCCGAGGATCTTGCATTGCTGCGCGACCAGGGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCCGGGGGATCTTGCATGCTGCGACCCAGTGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCGCCGGGGATCTTGCATTGCTCGACCAGGGATCCTCTCTCCATGTCTGTCGCTGGCCTTGCCCGGCCCGCCCGGGGATCTTGCATTGCCGACCAGGGATCCTCTCTCCTTGTGTCGCTGGCCTT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2441617;STD_quant_start=35.0071;STD_quant_stop=184.207;Kurtosis_quant_start=1.35393;Kurtosis_quant_stop=-1.47296;SVTYPE=INS;RNAMES=10a01cbd-3204-44bd-8e18-aa1f7102a596,11cc45ed-33df-4995-84b5-02be2a3c198a,2e000069-23ca-4c66-85fb-500ffa84bc1e,32508232-0783-466c-8c97-23f11117dccf,375a5306-1f06-4084-a995-4f8695e83ec6,449d6781-7c8d-4f3f-aed2-7a122c648ad6,45d1807a-8638-48e3-b1ab-c13ec31f67d4,471288e6-2af7-4c44-8ced-0b2e14a0af09,4fe8ce9a-fd28-4e0b-90f4-5a90d99f2466,6b456c6e-3fe1-4b74-a35d-1c240b6ce279,6e0f3747-6214-415b-a677-635aa90110b8,747fe45c-dfcc-4b42-b4b8-e396c7bcf318,79c8b378-0cf0-47a2-ab2b-c4f512f9ff0a,8501a903-194a-4b39-90b9-5dbb8894a965,a41e48ed-36b3-4af2-87c6-9c114d8067f2,ae4210ea-eb59-418a-8709-462b3ab10a77,bdb15ecc-872a-4aef-9611-a439385e8ed3,dac45185-8624-4672-97a4-5e015bc689ca,edf624ea-c4fb-4000-963f-03379b300d79,ee26af85-3d3f-4c14-84a5-694053a729cf,f2021b13-8bd1-4ecd-b83b-26b1f3834c67;SUPTYPE=AL,SR;SVLEN=963;STRANDS=+-;RE=15;REF_strand=3,6;AF=0.625	GT:DR:DV	0/1:9:15
+chr1	2459450	85	TCCTTCCTGGTGGTTCTCCTTCCTGGTGGTCCTCCTTCCTGGTGGTCCTCCTTCCTGGTGGTCCTCCTTCCTGGTGGTCCTCCTTCCTGGTGGTCCTCCTTCCTGGTGGTCCTCCTTGCCGGTGGTCCTCCTTGCCGGTGGTCTTCCTTTCCGGTGGTCCTCCTTGCCTGTGGTCCTCCTTGCCTGTGGTCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2459642;STD_quant_start=0.547723;STD_quant_stop=0;Kurtosis_quant_start=0.482804;Kurtosis_quant_stop=5.95281;SVTYPE=DEL;RNAMES=034ea11e-3da4-453a-80aa-6efab59d5110,0d288516-a039-451f-9f69-20031569d572,0d96f8b9-1d1e-4ece-a418-703cf14d00b6,0fec5033-ebb8-4832-bdc7-66bdb1ef1f57,1a37d739-733c-4be8-9dad-2083e6928e5c,1c1de513-464e-4024-a10d-29956a286cb1,551711d9-55f3-4933-9066-4df1346da909,761838d0-4eaf-4837-a45e-c92313e20f53,768d7eca-6f56-49b2-af49-be31afd1e13e,798d8388-3fbf-4607-8a25-97853325b1c5,8b4ad02d-6cbb-4aad-928d-7d7d1a34be5e,940c4e93-c468-49db-bc93-a17a034f1d7b,9e6f7ab9-acd0-4376-8175-df4a33abc5eb,9ecd2a2e-9ad5-43fb-aa89-98e139d419a3,9f2908f7-2773-402f-8f27-79dfab5594cf,a913fa6f-c3b4-488e-856d-1e47dce746ff,c14638ff-820c-42d0-aa9f-7cb8feb3238e,c71f623c-221a-49bb-b463-a8057d0a7f29,ce283681-ed7e-4685-ac1b-a1d5308e6055,e28ed1db-4be8-4a5e-9eab-f220168fe4f7;SUPTYPE=AL;SVLEN=-192;STRANDS=+-;RE=20;REF_strand=11,7;AF=0.526316	GT:DR:DV	0/1:18:20
+chr1	2522791	86	N	TATAGTGACTTAACGGAGGGCACTGTATGCTATAGTGACTTAACGGAGGGCACCGTGTGTGTTATAGTGACTTAACGGAGGGCACCGTATGGTGCTATAGTGACTTAACTGAGGGCACTGTGTGTGTTATAGTGACTTAACGGAGGGGCACCGTGTGTGTTATAGTGACTTAACGGAGGGCACCGTATGGTGCTATAGTGACTTAACGGAGGGCACCGTATGGTGCTATGTGACTTAACGGAGGGGACCGTGTGGTGCTATAGTGACTTAACGGAGGGCATTGTGTGTGCCAGTGACTTAACGGAGGGCACCCCGTACGGTGCTATAGTGACTTAACGGAGGGCACTGTGTGTGCTAAAGTGACTAACGGAGGGGACCGTGTGGTGTTATAGTGACTTAACGGAGGGCACCGGATGGTGCCAGGTAGTGACTTAACGGAAGGGACTGTGGTGTTATAGTGACTTAACGGAGGGCACTGATGGTGCTATAGTGACTTAACGAGGGGACCGTGTGGTGTTATAGTGACTTAACGGAGGGCACCGTGTGGTGT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2523320;STD_quant_start=46.8466;STD_quant_stop=106.999;Kurtosis_quant_start=0.908045;Kurtosis_quant_stop=-0.365621;SVTYPE=INS;RNAMES=2dd6b87a-9fbd-4327-9b00-f19a7b42aff9,a61d5f51-b87b-4a54-8b91-4faed7bafa4c,b0ce83fd-b979-40ba-819c-9619b2e49608,d2840c79-4ff9-4db4-84bd-eaf4be8bef66,d880e068-2bc0-4d31-810c-c47e1af97a5a,f1f4d10c-8e26-4889-b569-f3289b79f3b4;SUPTYPE=AL;SVLEN=550;STRANDS=+-;RE=5;REF_strand=7,6;AF=0.277778	GT:DR:DV	0/0:13:5
+chr1	2592355	87_0	CCCCTCCCCTGCTGTGCTGGCACCCCCTCCCCTGCCGCGCTGATGCCCCCTCCCCTGATGCACTGGCGCCCCCTCCCCTGCCATGCTGACGCCCCCTCCCCTGCCGTGCTGGCGCCCCCTCCCCTGCCGCGCTGACGCCCCCTCCCCTGCCGCGCTGACGCCCCCTCCCCTGCCGCGCTGATGCCCCCTCCCCTGCCGTGCTGGCGCCCCCT	N	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2592568;STD_quant_start=2;STD_quant_stop=2.52678;Kurtosis_quant_start=-0.815348;Kurtosis_quant_stop=-0.886441;SVTYPE=DEL;RNAMES=075ea038-cc1f-46e9-81ea-8fc94439f2c2,0a6d50c0-fd3a-442f-a61b-35e7ddc4dedf,15659d1b-e42b-496e-bc0b-0a57abbb5e38,1d97238d-4d17-4db8-88ab-03c183ae0934,2e6d3d64-84f0-458e-a973-7fbe937ed23c,34e8529a-be85-4285-873d-921539591f34,42778f38-cc84-482c-8f5c-d5a1d7b77879,47a610f2-3a2a-4ea7-a878-625bbdd9f062,4deec02b-bcd9-47b1-a876-b3e61131452f,505aee8b-2f38-48ea-a2de-98cb355f35f3,58fd450b-dd5c-428d-8134-d604331bdab1,6fd0bb2e-9c38-4a19-9e15-3ede11c3883a,73f809f6-8ac0-4838-9667-e52087f81be8,7d212f5c-b0a1-49da-8aab-af4cdd659e1e,7ecd2a68-f165-41f2-af24-f64ead2e5b71,862b9f9b-5094-45f8-9005-65d41c8d9c35,8e3220de-e682-4c27-974c-509712b22ad6,91c549fd-0458-499c-ad46-9416fb3a1a10,a4ae79d5-09d3-48de-a91b-84b92555c4c8,b14fcb62-2759-4596-9fe6-7ecbb82f2138,cbbc3bdd-ae78-4a61-9866-f97800766dd1,e492c156-9ec3-49ef-870f-3b755678869b,eb03f7dc-4330-4729-85ef-e125a9e4ad5b,fc132c78-0c57-4046-ab0d-8f8d74ab914a,fcb46bff-2c24-406f-81ea-b4a009ed61f7,ff4023b0-f820-4d8c-a517-c04455f9b1d0;SUPTYPE=AL;SVLEN=-213;STRANDS=+-;RE=26;REF_strand=13,10;AF=0.530612	GT:DR:DV	0/1:23:26
+chr1	2602149	87_1	N	GCCACGTCAGGACCAGCCTCCCTCAGGTAGAAGTCAGGTTCGTCTTCTTGGAGTCAGAGGCCACTCAGCAATCTAGA	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2602230;STD_quant_start=13.3791;STD_quant_stop=12.1169;Kurtosis_quant_start=-2.12793;Kurtosis_quant_stop=-2.03683;SVTYPE=INS;RNAMES=080bd3e6-be18-4eae-b582-b2c931bce677,1499a1be-5ee2-457a-8b64-dac5f7a79a47,284431c2-f271-49c8-84b1-fc48b113b70b,34e8529a-be85-4285-873d-921539591f34,3e58d6a5-20b8-49b7-a02e-bd73c598775a,453c0bb2-2128-4ecc-9c6b-ce569db89834,48447aec-1b08-4409-86ea-8cd17bc45a23,4f69d8c2-229a-4c57-8007-e7c999b63942,6a25ad15-ad1e-4e5c-884c-845a87c6fbdc,72165ea7-31cf-4f4b-a099-145d9d7b1266,7bbc658c-4c6e-4dc6-a2a1-fbb0b436919d,7ecd2a68-f165-41f2-af24-f64ead2e5b71,7fb8622d-2c5f-492f-85bd-6f74f57119f1,82472465-ed07-4feb-989a-bb3cdea3150c,91c549fd-0458-499c-ad46-9416fb3a1a10,91dc1e9d-faf0-4bac-8f7e-86b955cd99f5,93e9e469-b3ca-43b3-bc0f-212e0f33afd3,995b8daf-cc06-4bd6-82c4-693e270ab7ea,e0e7fa04-7c3a-4748-b129-dac09bb9619f,f0783eb8-fd5d-42e3-9d2d-e6a2d8c7254f,fc132c78-0c57-4046-ab0d-8f8d74ab914a,fe0d04d3-3b08-4618-8579-c51c98a1ccaa;SUPTYPE=AL;SVLEN=80;STRANDS=+-;RE=22;REF_strand=6,11;AF=0.564103	GT:DR:DV	0/1:17:22
+chr1	2652083	89_0	N	CCCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCCACGCCCAGGTGAGCAATGACAGCCTGGAACAGCACCCACCCCCAGGAGAGCATCTGACAGCCTGGAGCAGAACCCACCCACAGGCGAGCATCTGACAGCCTGGGTCGGCAC	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2652221;STD_quant_start=21.3049;STD_quant_stop=12.182;Kurtosis_quant_start=-1.88012;Kurtosis_quant_stop=-1.16379;SVTYPE=INS;RNAMES=02ba50c5-26f8-45f3-8fe7-9a7f61fac7f6,079fd425-df3d-468a-b552-2e579dc0522b,1914c345-2c26-4520-ac2c-99d33848176d,1f650b27-ee75-4812-bd48-f4c326820dde,26678881-769b-4da2-ab6b-58551722824b,2fc0e095-e459-4e17-9a9f-18a199c50908,30044b95-f04e-4a4b-b10e-f58df1806af9,3a1870b8-919f-41da-9acc-bc039af2a1ba,4c13c934-2614-4c44-bfbd-9a7289e40062,516909d0-ab9f-48da-9ec4-418f4c436131,5bf5cef3-7d7d-4139-8e47-9bf31dd216af,5d9b20fd-1f6f-41df-8350-a29246e536dd,6e2b1fc4-bd27-482b-9f5b-9777177143a6,70f92b03-38dd-47c1-a7e8-9fc4d81fa318,7448038e-ce47-4302-8073-a80274bfa16a,986509c9-3855-4ee3-a667-9bc970c66867,9a19d03f-fb7e-4770-ad1f-e64cbba6aefe,9a1fea37-5dec-4c49-a7c1-f808c97041a0,a56c5898-8f78-4731-a928-1c882472b4ec,b52e0fd0-0627-4176-aaa2-64fb7935a108,edcb1ac9-c75c-4c02-b9a2-411765f99e37;SUPTYPE=AL;SVLEN=37;STRANDS=+-;RE=21;REF_strand=17,19;AF=0.368421	GT:DR:DV	0/1:36:21
+chr1	2653604	91_0	TACACCCACAAGTGAGCATCTGACAGCCTGGAGCAGCATCCACACCCCCAGGCGAGCATCTGACAGCCTGGAACAGCACCCA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2653716;STD_quant_start=53.9565;STD_quant_stop=32.0765;Kurtosis_quant_start=-1.7751;Kurtosis_quant_stop=-0.966136;SVTYPE=DEL;RNAMES=0158ae9b-1cea-44ac-a523-2ffbdd24d482,05eb3583-4eea-45e9-bfcd-772055aaedeb,126d5e77-444b-41e6-af80-cc9f4aa8c66a,24b0a6b5-ee2c-466c-bdb4-4f69e8cfb6eb,2c750e0a-d09c-4052-9e22-b7846b9901df,54548ce1-5153-4ab5-9132-09917e430a74,6dccd774-2617-4c36-927c-19552cbe2840,6e29f3fb-6791-4396-84de-60f79bc784f7,77574f58-be4a-45e0-ae50-65b8e2f3e1fa,8a1925ec-84a4-493d-aee2-ffa8f53e1dd9,95be938e-9621-403e-b941-41fcc109e84a,984a1b40-0e0e-47b1-8859-0d807d1c2cf3,abde884c-3320-4e31-842d-e148f7459810,b4c9e4bf-5762-4ccd-80fa-fb08a6a0010e,d92d3960-4f96-4a2b-b1cc-ba4658976990;SUPTYPE=AL;SVLEN=-112;STRANDS=+-;RE=15;REF_strand=12,21;AF=0.3125	GT:DR:DV	0/1:33:15
+chr1	2653644	89_1	N	CCCCAGGTGAGCATCTGGCCAGCCTGAACAACACTCCTG	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2653708;STD_quant_start=6.84349;STD_quant_stop=14.8661;Kurtosis_quant_start=-0.608737;Kurtosis_quant_stop=-1.58209;SVTYPE=INS;RNAMES=00db15b2-57f3-4b04-99ec-a767cb355dd6,02ba50c5-26f8-45f3-8fe7-9a7f61fac7f6,067adc23-3733-463b-bf30-3506ccb80286,0b952212-2a8f-4c3f-9242-feae635ad72f,0c9a0a71-e6f8-4fca-8488-c1815cb723ad,0f08527b-7745-4b44-adc4-838aa079ac29,4a582a55-20bd-4a02-b343-9ba8a8186c96,4ec9953b-3be5-493b-9e01-3ddb75ecddc7,51ec221c-3b15-42f2-b08f-c4b701f3b538,56e0025b-6e20-4c5d-9fef-17ed8bd0e3d5,5c75377d-1fb1-48be-ab51-ec39995d0b92,5d9b20fd-1f6f-41df-8350-a29246e536dd,70423a2f-e8b0-42f7-9ab4-c0e1beb13a3e,7175d4aa-1063-4242-af5f-5789c7f7de4d,76885d77-2eda-4866-9c3a-011e2198d357,7e647479-6d90-4de4-bc04-8f6df2119d0e,932bb34e-fc09-48b8-8c7a-85de34b5071b,94087aa5-facd-4548-81e4-ce9eb1a404b8,9dbf2ebd-4b68-40e0-aea3-73db2c89d7ea,9dd105c2-6f3b-4383-8a6f-30ba7bec1207,a8888d71-bd7f-422c-8c01-7b9d2469aada,abde884c-3320-4e31-842d-e148f7459810,c4bfb544-5450-4e71-979a-f8ed12864a0f,c86e0a6c-26f3-41de-b6ed-d29c3f370e1a,d8bc8df3-fbcb-4efe-a9e7-b310aa7113ef;SUPTYPE=AL;SVLEN=81;STRANDS=+-;RE=24;REF_strand=13,21;AF=0.413793	GT:DR:DV	0/1:34:24
+chr1	2655003	91_1	CTCTCACAACCCCAGGTGAGCATCTGACAGCCCGGAACA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2655057;STD_quant_start=27.4481;STD_quant_stop=21.7117;Kurtosis_quant_start=4.01557;Kurtosis_quant_stop=4.40304;SVTYPE=DEL;RNAMES=0f08527b-7745-4b44-adc4-838aa079ac29,43110448-5a2a-4374-affb-b5131e88b80f,54548ce1-5153-4ab5-9132-09917e430a74,6e29f3fb-6791-4396-84de-60f79bc784f7,6f07594a-7079-46ad-816c-932b2ecb0f98,85efba47-b448-4ed2-b080-afc1cab0d598,a8888d71-bd7f-422c-8c01-7b9d2469aada,abde884c-3320-4e31-842d-e148f7459810,c9f1ed31-fd3e-4516-a789-374b480d1624,dbe924d2-b7be-4baa-a70c-f38f48bd2d6d,e7b6b62b-35e7-4e51-9d37-a930f65ca207;SUPTYPE=AL;SVLEN=-54;STRANDS=+-;RE=11;REF_strand=12,14;AF=0.297297	GT:DR:DV	0/0:26:11
+chr1	2655093	89_2	CCCCAGGTGAGCATCCAACAGCCTGGAACAGCACCGACACCCCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATCTGATATCCTGGAACAGCACCCACACCCCCAGGTGAGCATCTGACAGGCTGGAGCAGCACGCACACCCCCAGTGAGCATCTGACAGCCTGCAACAGCTCTCACAACCCCAGGTGAGCATCTGACAGCCCGGAACAGCACGCTGCACCCCCAAGTGAGCACCTGACAGCCTGGAGCAGCAACCACA	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2655359;STD_quant_start=101.621;STD_quant_stop=124.593;Kurtosis_quant_start=-0.743602;Kurtosis_quant_stop=0.797034;SVTYPE=DEL;RNAMES=00db15b2-57f3-4b04-99ec-a767cb355dd6,02ba50c5-26f8-45f3-8fe7-9a7f61fac7f6,0b952212-2a8f-4c3f-9242-feae635ad72f,126d5e77-444b-41e6-af80-cc9f4aa8c66a,5d9b20fd-1f6f-41df-8350-a29246e536dd,70423a2f-e8b0-42f7-9ab4-c0e1beb13a3e,8a1925ec-84a4-493d-aee2-ffa8f53e1dd9,c86e0a6c-26f3-41de-b6ed-d29c3f370e1a;SUPTYPE=AL;SVLEN=-266;STRANDS=+-;RE=8;REF_strand=3,2;AF=0.615385	GT:DR:DV	0/1:5:8
+chr1	2655316	91_1	ACAGCACCCACACCCCCAGGTGAGCATCTGACAGGCTGGAGCAGCACGCACACCCCCAGTGAGCATCTGACAGCCTGCAACAGCTCTCACAACCCCAGGTGAGCATCTGACAGCCCGGAACAGCACGCTGCACCCCCAAGTGAGCACCTGACAGCCTGGAGCAGCAACCACACCCCCAGGTGAGCATCCAACAGCCTGGAACAGCACCGACACCCCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGGTGAGCATCTGATATCCTG	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2655505;STD_quant_start=107.58;STD_quant_stop=66.298;Kurtosis_quant_start=-2.19968;Kurtosis_quant_stop=-2.1278;SVTYPE=DEL;RNAMES=00db15b2-57f3-4b04-99ec-a767cb355dd6,0158ae9b-1cea-44ac-a523-2ffbdd24d482,03e5cf49-71c2-4755-a458-465a48094234,0d2d5305-6e5f-4727-b1bc-07bef37ffe11,32cfc35d-2be4-4f41-9d31-21d2e0267fe4,3477c04c-40eb-4421-843a-d4322a4ec6d9,3ca46174-1e88-4b14-9c48-4402e09bfb27,4a582a55-20bd-4a02-b343-9ba8a8186c96,4ec9953b-3be5-493b-9e01-3ddb75ecddc7,51ec221c-3b15-42f2-b08f-c4b701f3b538,58092463-80c2-408c-aa63-6b00486f44bc,7175d4aa-1063-4242-af5f-5789c7f7de4d,74573bdf-6135-449c-a83a-52763f7a8385,77574f58-be4a-45e0-ae50-65b8e2f3e1fa,7bfdc99d-734d-460a-bdd1-204a51c1f608,7e647479-6d90-4de4-bc04-8f6df2119d0e,85efba47-b448-4ed2-b080-afc1cab0d598,9772b2f4-57f6-4aaa-981d-2f3a9e50d6c9,9dbf2ebd-4b68-40e0-aea3-73db2c89d7ea,abde884c-3320-4e31-842d-e148f7459810,ae486c6b-8ef6-4185-b8e9-e7f26a83cf26,b122e05c-3996-413d-a12f-d4547c943fe5,c9f1ed31-fd3e-4516-a789-374b480d1624,dbe924d2-b7be-4baa-a70c-f38f48bd2d6d,e4a66895-140f-476b-a91e-d94d0ba95342,e7b6b62b-35e7-4e51-9d37-a930f65ca207,ede912f4-5167-4638-902c-67a102d17350,ff58ec71-8a66-4246-9f93-79a1d9af860b;SUPTYPE=AL;SVLEN=-189;STRANDS=+-;RE=28;REF_strand=3,1;AF=0.875	GT:DR:DV	1/1:4:28
+chr1	2657218	89_1	TGATGGTCTGGAGCAGCACCCACAACCACAGGTGAACATCAGAGAGTCTGGAGCAGCGCCCACAACCCCAGGCGAGCATCTGACAGCCTGGAGCCGTGCCCAAACACCCAGGTGAGCATCTGACAGCATGGAGCAGCACCCATAGCCCAAGGTGAGCATCTGACAACTTGGAGCAGCACCCACACCCCGAGGTGAGCATCTGACCTCCCGGAGCAGTACCAGTACCCCCAGGCGAGCATCTGAACTCATGGAGCAGCACCCACGCCCCCAGGCGAGCATCTGACCGAACGGAGCAGCACCCACAACCCCAGGCGAGCATCTGACAGCATGAAACAGCACCCAGAACTCCAGGTGAGCATCTGACAGCCCGCAGTAGCACCCACAAGCACAAGTGAGAAT	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2657716;STD_quant_start=100.28;STD_quant_stop=99.8248;Kurtosis_quant_start=-1.44056;Kurtosis_quant_stop=-1.42879;SVTYPE=DEL;RNAMES=0158ae9b-1cea-44ac-a523-2ffbdd24d482,0b952212-2a8f-4c3f-9242-feae635ad72f,126d5e77-444b-41e6-af80-cc9f4aa8c66a,1f650b27-ee75-4812-bd48-f4c326820dde,26678881-769b-4da2-ab6b-58551722824b,2fc0e095-e459-4e17-9a9f-18a199c50908,45751196-7376-4e6d-8073-de7dbd962408,516909d0-ab9f-48da-9ec4-418f4c436131,51ec221c-3b15-42f2-b08f-c4b701f3b538,5bf5cef3-7d7d-4139-8e47-9bf31dd216af,6e2b1fc4-bd27-482b-9f5b-9777177143a6,70423a2f-e8b0-42f7-9ab4-c0e1beb13a3e,8a1925ec-84a4-493d-aee2-ffa8f53e1dd9,91629993-2017-44a6-86a4-238d1fd17249,9a19d03f-fb7e-4770-ad1f-e64cbba6aefe,c86e0a6c-26f3-41de-b6ed-d29c3f370e1a,edcb1ac9-c75c-4c02-b9a2-411765f99e37;SUPTYPE=AL,SR;SVLEN=-498;STRANDS=+-;RE=17;REF_strand=13,12;AF=0.404762	GT:DR:DV	0/1:25:17
+chr1	2657257	89_1	CCCTGCACACCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGTTGAGCATCTGATGGTCTGGAGCAGCACCCACAACCACAGGTGAACATCAGAGAGTCTGGAGCAGCGCCCACAACCCCAGGCGAGCATCTGACAGCCTGGAGCCGTGCCCAAACACCCAGGTGAGCATCTGACAGCATGGAGCAGCACCCATAGCCCAAGGTGAGCATCTGACAACTTGGAGCAGCACCCACACCCCGAGGTGAGCATCTGACCTCCCGGAGCAGTACCAGTACCCCCAGGCGAGCATCTGAACTCATGGAGCAGCACCCACGCCCCCAGGCGAGCATCTGACCGAACGGAGCAGC	N	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=2657477;STD_quant_start=107.679;STD_quant_stop=106.262;Kurtosis_quant_start=-1.52544;Kurtosis_quant_stop=0.301427;SVTYPE=DEL;RNAMES=112c6a5f-ad13-406d-90b5-5b98716845e5,1914c345-2c26-4520-ac2c-99d33848176d,5bf5cef3-7d7d-4139-8e47-9bf31dd216af,5d9b20fd-1f6f-41df-8350-a29246e536dd,7cadd1ca-76fd-4e10-9536-dcc4a660b1a2,91629993-2017-44a6-86a4-238d1fd17249;SUPTYPE=AL;SVLEN=-220;STRANDS=+-;RE=6;REF_strand=2,6;AF=0.428571	GT:DR:DV	0/1:8:6
+chr1	125029102	1150_3	N	<DEL/INV>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=125029168;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=1.95204;Kurtosis_quant_stop=4.40106;SVTYPE=DEL/INV;RNAMES=0615a1e2-43d8-4ee1-aad8-b3bc30654032,07df46cf-fab9-4b7a-9c61-63d6fb79063d,0c62bf53-a1fa-4cd8-9f65-9b6d896437f6,0c775ec9-8cd4-46fa-b432-d8fadbe12e8c,13f6e3a3-7e12-4fe1-96e8-6fb510c5e51e,25140c43-b936-4ec7-88ac-5f35ee57eb89,425d50b0-d4c0-4128-befb-3cb9a20d0395,42ec5104-09bf-4324-9579-9099acbf7650,61e7dbc6-c78a-4047-aaee-6660f758bf93,711bb46e-6427-4960-9abf-a11838e69701,76aadab8-9dfe-46bc-a906-eb70126c5841,77b0f627-1ffc-43ff-8f07-a3839b73e859,78e435d7-34fa-4d52-9f3f-189868c74142,7bff020f-5745-4030-a649-c2ca270932f4,801d64f6-1205-4a00-a1c0-eeff98c29be7,928578aa-6777-4ba7-a150-7b2eaa900249,993a19f7-ab8d-4636-aa9f-56566d3ab328,9dc57ac2-909f-467e-a15f-26041dee67d0,a5fdd9d5-ed51-4036-88e5-6009ce233bc6;SUPTYPE=NR;SVLEN=-66;STRANDS=+-;RE=19;REF_strand=0,5;AF=0.791667	GT:DR:DV	0/1:5:19
+chr10	125502113	16341_1	N	<INVDUP>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr10;END=125508658;STD_quant_start=0;STD_quant_stop=3.1305;Kurtosis_quant_start=nan;Kurtosis_quant_stop=2;SVTYPE=INVDUP;RNAMES=11f6d8be-ef3b-44bd-bc04-b7a4c6619129,235ef779-5cc1-4999-91cd-25c6bbcfbb08,3058d7cf-0c2c-493e-8bd7-97adb6e8c721,40788bbd-835b-4706-8abe-a76b9672804f,4ed139db-33bf-4ed2-b709-840d06a92d5f,929a407e-1103-4f89-81fa-20b902c08c6e,a8b62faf-9df4-497e-a1df-f4d6af7a92e0,c34b12b2-7aee-4c9a-89a2-81d8f9d34a8b;SUPTYPE=SR;SVLEN=6545;STRANDS=++;RE=5;REF_strand=6,4;AF=0.333333	GT:DR:DV	0/1:10:5
+chr11	3653753	16746_1	N	CAACCCTACCTCTGTCTCTCCTCACCGCTATTCTCCCATCAGACATCAGTCTTGTAAATTCCAATTCCTACCTCTGTCTATCCTCACTGCCATTCTCCCACTGACATCAAGTCTTGTAAATCTCCACCTCCTACCTCTGTCTATCCTCACTGCCATTCTCCCATCAGACATCAAGTTCTGTAAATTCCACCTCCTACCTCTCTGTCTATCCTCACTGCCATTCTCCCATCTGGACATCAAGTCTTGTAAATT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr11;END=3654825;STD_quant_start=139.219;STD_quant_stop=2.84445;Kurtosis_quant_start=-1.96058;Kurtosis_quant_stop=-0.064951;SVTYPE=DUP/INS;RNAMES=00b67655-ed56-4780-b3fe-be29d59e1859,028e509d-93df-4dc8-82e9-a09514d7bc3d,02cce278-7597-4217-8253-5f28a462e699,06eb2443-613b-404c-950a-d4af531205bd,0fcbfcc2-2a80-4da8-b592-d7862d4e5f32,0ffca8ba-4f7f-4475-b7f1-b57ec5c83ed8,130292a2-4dff-4c25-b571-b1ed9fb82f6e,14038c62-2b05-446f-8d58-b34a3f784d23,16732163-f7e5-4bc2-bfc8-a38f908e5ba8,190c6094-4e7b-4f20-91a1-30e6b0d189db,19b242ce-893d-4b98-ac2d-70cf8e216d61,206d22d5-959a-4d55-9d85-eca31de42f0b,21cc7629-0ee7-44b6-9e17-664918ab0ac2,26fd2c01-04d2-40f7-b350-aeea96752a72,2c67fc96-be15-4e19-bb73-662a104fdd1a,329783a5-e55a-4276-8c13-1f8bdbff7700,342c2503-a98b-4a20-9dd9-8190bdd743fa,34c23995-bcaa-4dcd-aefa-3c96d4032492,4f12c658-1e88-44a9-9689-18bc76d12047,5da809c9-cf2c-4562-a703-3d1b12927220,6145c5c6-c4ed-4b30-987f-e653337a0a18,744b6c64-1a96-4dcb-9216-8be6bdcfe7f3,84e6ec27-5a6b-463d-8681-045651b2af07,8eedae6b-ec01-4367-bd47-2081f9df8f33,8f6ff282-26eb-4eb5-89bd-df9010689ba6,9c8fa8a6-da33-4599-b835-24d0220c6139,a2494f89-4dba-472a-9b20-c61d0a0512af,ad3b03b1-1cf7-4a54-a6df-eb7563ddbbea,b2b77f8b-0659-4996-8280-078e8b9463bb,b7819371-05b3-4eac-a229-54a49a852133,bafd9ab5-3cc7-4c21-b48f-186d1a8e5351,bff30357-4e2d-46a7-927e-707223885e25,c2d8bcd3-a488-4709-8d33-f9c000c54d51,c4e7702f-8831-4236-b6a8-6723a3d668f8,ca4a99ee-181d-488a-9eea-e0ef7e9b765e,ca6d0c9c-bc64-4888-b660-18ca49b597b9,ccf84af9-06f8-4bb1-b844-e4512907b8bd,d00d6c06-b2e2-407d-b294-d585efdb53ad,da8571f5-d34e-4e04-a7a8-b2696a4661e0,e1fd56ef-d7ee-4703-8fad-a90383cb4156,ebf37b99-cfbc-4168-a497-a8453d0e698a,f8008b3f-d0e6-474b-82c1-bb28a53b9e01,f8c3fa80-204b-4d1f-b782-358c648e48bd,fa2f213f-f63d-4828-a601-bfdfae84b8e9;SUPTYPE=AL,SR;SVLEN=61;STRANDS=+-;RE=22;REF_strand=1,0;AF=0.956522	GT:DR:DV	1/1:1:22

From 302d8544ded6d21ea121961c48693699409fb88d Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Thu, 20 Jan 2022 14:49:16 -0800
Subject: [PATCH 074/137] reformat file

---
 src/mavis/tools/vcf.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index ae1410e5..c25fe27d 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -181,9 +181,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
                     std_row[COLUMNS.event_type] = SVTYPE.INS
                 elif size < 0:
                     std_row[COLUMNS.event_type] = SVTYPE.DEL
-        std_row.update(
-            {COLUMNS.break1_chromosome: record.chrom, COLUMNS.break2_chromosome: chr2}
-        )
+        std_row.update({COLUMNS.break1_chromosome: record.chrom, COLUMNS.break2_chromosome: chr2})
         if info.get(
             "PRECISE", False
         ):  # DELLY CI only apply when split reads were not used to refine the breakpoint which is then flagged
@@ -201,11 +199,8 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
                     COLUMNS.break1_position_start: max(
                         1, record.pos + info.get("CIPOS", (0, 0))[0]
                     ),
-                    COLUMNS.break1_position_end: record.pos
-                    + info.get("CIPOS", (0, 0))[1],
-                    COLUMNS.break2_position_start: max(
-                        1, end + info.get("CIEND", (0, 0))[0]
-                    ),
+                    COLUMNS.break1_position_end: record.pos + info.get("CIPOS", (0, 0))[1],
+                    COLUMNS.break2_position_start: max(1, end + info.get("CIEND", (0, 0))[0]),
                     COLUMNS.break2_position_end: end + info.get("CIEND", (0, 0))[1],
                 }
             )

From 4b42f693cd282544a458c373b6e16587082efc56 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 16:44:49 -0800
Subject: [PATCH 075/137] Remove leftover unused perl file

---
 src/tools/TSV.pm | 202 -----------------------------------------------
 1 file changed, 202 deletions(-)
 delete mode 100644 src/tools/TSV.pm

diff --git a/src/tools/TSV.pm b/src/tools/TSV.pm
deleted file mode 100644
index ddc48bb8..00000000
--- a/src/tools/TSV.pm
+++ /dev/null
@@ -1,202 +0,0 @@
-package TSV;
-
-#** @file
-# Main file for processing tsv formatted files
-# given some list of required column names, goes through the rows and builds a
-# hash for each row by column names
-# note the hash will only store information for column names that we pass in
-# via the required columns list
-#*
-
-use strict;
-use warnings;
-use POSIX qw(strftime);
-
-my $_warnings_off = 0;
-
-sub import
-{
-    my $class = shift;
-    my $_warnings_off = shift if $#_ >= 0;
-}
-
-sub _build_header_hash
-{
-    #** @function private _build_header_hash($required_column_names)
-    # @param $required_column_names [required] the list of expected column names
-    # @return a reference to a hash of the required column names
-    #*
-    my $required_column_names = shift;
-    my $header_index_hash = {};
-    for my $col (@$required_column_names)
-    {
-        $header_index_hash->{$col} = -1; #default
-    }
-    return $header_index_hash;
-}
-
-sub generate_header_comments
-{
-    #** @function header_comments($inputfile, $outputfile)
-    # @param $program the program used to generate the results
-    # @param $version the version of the above program
-    # @param %args a hash representing the parameters that the above program was run with
-    # @return the string that will be put at the top of the output file
-    #*
-    my $program = shift;
-    my $version = shift;
-    my %args = @_;
-    my $time = strftime("%Y-%m-%d %H:%M:%S", localtime);
-    my @result = (
-        "## Generated by $program version $version at $time",
-        "## Running Parameters: ",
-    );
-    while((my $option, my $setting) = each %args)
-    {
-        push(@result, sprintf(
-                "##\t%s\t%s", $option,
-                defined $setting? $setting : 'undef'
-            )
-        );
-    }
-    push(@result, "##");
-    return join("\n", @result) . "\n";
-}
-
-sub _parse_input_line
-{
-    #** @function private parse_input_line($header_index, $line)
-    # builds a hash of a row from the input file using expected column identifiers
-    # @param $header_index the hash of column names with their index positions in a line
-    # @param $line the row we are parsing
-    # @retval {} the row hash
-    #*
-    my ($header_index, $line) = @_;
-
-    my @fields = split("\t", $line, -1);
-    if(scalar @fields < scalar keys %$header_index)
-    {
-        my $err = ("[ERROR] in row $line\n"
-            . "[ERROR] found "
-            . scalar @fields 
-            . " but expected " 
-            . (scalar keys %{$header_index} ) 
-            . " fields\n"
-            . "Error reading the input file. The number of fields"
-            . " in the input row is less than the number of required"
-            . " columns. Please check that the input file is"
-            . " tab-delimited and has the correct number of fields");
-        die $err;
-    }
-
-    my $record = {};
-    while((my $column_name, my $index) = each %$header_index)
-    {
-        $record->{$column_name} = $fields[$index];
-    }
-    return $record;
-}
-
-sub _parse_header_line
-{
-    #** @function private undef parse_header_line($header_index_ref, $header)
-    # fills the header_index hash with the column names (keys) and their positions (values) in the line
-    # @warning throws an exception if it is passed a column header (in the hash) that is not found
-    # @param $header_index_ref reference to the hash indices by required column names
-    # @param $header header string from the input file
-    # @return none
-    #*
-    my ($header_index_ref, $header) = @_;
-    
-    my @column_names = split("\t", $header, -1);
-    my $counter = 0;
-    my %dup_counter = ();
-    while(my $col = shift @column_names) # store the positions of the column names that we are looking for
-    {
-        if(exists $dup_counter{$col})
-        {
-            die "[ERROR] duplicate column names $col in header\n";
-        }
-        $dup_counter{$col} = undef;
-        $header_index_ref->{$col} = $counter;
-        $counter++;
-    }
-    while((my $column, my $index_position) = each %$header_index_ref) # check to ensure we have valid index positions for each of the required columns
-    {
-        if($index_position < 0)
-        {
-            die "[ERROR] in parsing the header of the inputfile. Did not find the required column $column";
-        }
-    }
-}
-
-sub parse_input
-{
-    #** @function public () parse_input($filename, $req_columns)
-    # reads a tab-delimited file
-    # creates an array of the rows (excluding comments and the header)
-    # each row is turned into a hash (by header column names)
-    # @param $filename the input file
-    # @param $req_columns an array of expected column names
-    # @retval () an array of the input file rows
-    #*
-    my ($filename, $req_columns) = @_;
-    my $header_index = _build_header_hash($req_columns);
-    die unless defined $header_index;
-    open(my $fh, "<", $filename)
-        or die "Could not open inputfile $filename\n";
-    my $line;
-
-    while($line = <$fh>)
-    {
-        last if(not ($line =~ m/^##/)); # skip comments, defined by double hash line
-    }
-    # the next line is the header
-    chomp($line);
-    $line =~ s/^#//; # remove the header starting hash if present
-
-    _parse_header_line($header_index, $line);
-    
-    my @header = sort { $header_index->{$a} <=> $header_index->{$b} } keys(%$header_index);
-    my @catalog = ();
-    while($line = <$fh>)
-    {
-        chomp($line); # remove leading and trailing whitespace
-        next if $line eq "";
-        my $record = _parse_input_line($header_index, $line);
-        die if !defined $record;
-        push(@catalog, $record);
-    }
-    return (\@header, \@catalog);
-}
-
-
-sub string_line
-{
-    #** @method public static $ string_line($header, $line, $delim)
-    # @param $header [required] (type: array ref)
-    # @param $line [required] (type: hash ref)
-    # @param $delim [optional] (type: string)
-    # puts a line back together in the same order as the input header given some
-    # delimiter
-    # @return (type: string) the string of the input line
-    #*
-    my $header = shift;
-    my $line = shift;
-    my $delim = shift;
-    $delim = ! defined $delim ? "\t" : $delim;
-    
-    my @new_line = ();
-    for my $col (@$header)
-    {
-        if (! exists $line->{$col})
-        {
-            die "[ERROR] column '$col' not in row";
-        }
-        push(@new_line, $line->{$col});
-    }
-    return join($delim, @new_line);
-}
-
-
-1; # this makes the module usable from another perl script

From 2793abac36237ebfe7af777f0a7341da7fe6df33 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 16:45:15 -0800
Subject: [PATCH 076/137] Fix inputs for skip validate version of config

---
 Snakefile | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 113dc2fb..4c8233f9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -170,10 +170,18 @@ if not config['skip_stage.validate']:
                 + ' &> {log}'
 
 
+def get_annotate_input_file(wildcards):
+    if not config['skip_stage.validate']:
+        return expand(rules.validate.output, library=[wildcards.library], job_id=[wildcards.job_id])
+    return expand(CLUSTER_OUTPUT, library=[wildcards.library], job_id=[wildcards.job_id])
+
+
 rule annotate:
     input: rules.validate.output if not config['skip_stage.validate'] else rules.cluster.output
     output: stamp=output_dir('{library}/annotate/batch-{job_id}/MAVIS.COMPLETE'),
         result=output_dir('{library}/annotate/batch-{job_id}/annotations.tab')
+    params:
+        inputfile=get_annotate_input_file
     log: os.path.join(LOG_DIR, '{library}.annotate.snakemake.batch-{job_id}.log.txt')
     container: CONTAINER
     resources:
@@ -184,7 +192,7 @@ rule annotate:
     shell:
         'mavis annotate --config {rules.init_config.output}'
             + ' --library {wildcards.library}'
-            + ' --inputs {input}'
+            + ' --inputs {params.inputfile}'
             + ' --output ' + output_dir('{wildcards.library}/annotate/batch-{wildcards.job_id}')
             + ' &> {log}'
 

From 4a8b1d0aafbd219d7fd4b751152a81f3652d0937 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 16:45:52 -0800
Subject: [PATCH 077/137] Only run the bam stats collection if validation is
 not skipped

---
 src/mavis/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mavis/main.py b/src/mavis/main.py
index abedaf82..ea2f93aa 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -265,7 +265,8 @@ def main(argv=None):
             )
         elif command == SUBCOMMAND.SETUP:
             # add bam stats to the config if missing
-            _config.add_bamstats_to_config(config)
+            if not config.get('skip_stage.validate'):
+                _config.add_bamstats_to_config(config)
             _util.LOG(f'writing: {args.outputfile}')
             with open(args.outputfile, 'w') as fh:
                 fh.write(json.dumps(config, sort_keys=True, indent='  '))

From 574172e71ca5b8ece99325d5c7f18780f2a0ad22 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 16:46:09 -0800
Subject: [PATCH 078/137] Apply custom functions after adding default values

---
 src/mavis/util.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mavis/util.py b/src/mavis/util.py
index abd55c34..446520d3 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -398,10 +398,6 @@ def soft_null_cast(value):
         if col not in df and col not in add_default:
             raise KeyError(f'missing required column: {col}')
 
-    # run the custom functions
-    for col, func in apply.items():
-        df[col] = df[col].apply(func)
-
     if COLUMNS.opposing_strands in df:
         df[COLUMNS.opposing_strands] = df[COLUMNS.opposing_strands].apply(
             lambda x: None if x == '?' else soft_cast(x, cast_type=bool)
@@ -434,6 +430,10 @@ def soft_null_cast(value):
         else:
             df[col] = default_value
 
+    # run the custom functions
+    for col, func in apply.items():
+        df[col] = df[col].apply(func)
+
     # set overwriting defaults
     for col, value in overwrite.items():
         df[col] = value

From a9abb205c77f8580e3f274b95ed81611bb00da2f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 18:27:28 -0800
Subject: [PATCH 079/137] Add test for annotation-only workflow

---
 tests/mini-tutorial.annotate_only.config.json | 52 ++++++++++
 tests/snakemake/test_mini_workflow.py         | 98 ++++++++++++++++---
 2 files changed, 134 insertions(+), 16 deletions(-)
 create mode 100644 tests/mini-tutorial.annotate_only.config.json

diff --git a/tests/mini-tutorial.annotate_only.config.json b/tests/mini-tutorial.annotate_only.config.json
new file mode 100644
index 00000000..b270c7dc
--- /dev/null
+++ b/tests/mini-tutorial.annotate_only.config.json
@@ -0,0 +1,52 @@
+{
+    "annotate.draw_fusions_only": false,
+    "convert": {
+        "mock_converted": {
+            "inputs": [
+                "tests/data/mock_sv_events.tsv"
+            ],
+            "file_type": "mavis",
+            "assume_no_untemplated": true
+        }
+    },
+    "skip_stage.validate": true,
+    "cluster.uninformative_filter": true,
+    "cluster.limit_to_chr": null,
+    "cluster.min_clusters_per_file": 5,
+    "libraries": {
+        "mock-A47933": {
+            "assign": [
+                "tests/data/mock_trans_sv_events.tsv"
+            ],
+            "bam_file": "tests/data/mock_trans_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "transcriptome",
+            "strand_specific": true
+        },
+        "mock-A36971": {
+            "assign": [
+                "mock_converted"
+            ],
+            "bam_file": "tests/data/mock_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "genome",
+            "strand_specific": false
+        }
+    },
+    "output_dir": "output_dir",
+    "reference.annotations": [
+        "tests/data/mock_annotations.json"
+    ],
+    "reference.dgv_annotation": [
+        "tests/data/mock_dgv_annotation.txt"
+    ],
+    "reference.masking": [
+        "tests/data/mock_masking.tab"
+    ],
+    "reference.reference_genome": [
+        "tests/data/mock_reference_genome.fa"
+    ],
+    "reference.template_metadata": [
+        "tests/data/cytoBand.txt"
+    ]
+}
diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index 963f339c..af332343 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -12,6 +12,16 @@
 from ..util import glob_exists, long_running_test, package_relative_file
 
 
+def list_files(startpath):
+    for root, dirs, files in os.walk(startpath):
+        level = root.replace(startpath, '').count(os.sep)
+        indent = ' ' * 4 * (level)
+        print('{}{}/'.format(indent, os.path.basename(root)))
+        subindent = ' ' * 4 * (level + 1)
+        for f in files:
+            print('{}{}'.format(subindent, f))
+
+
 @pytest.fixture
 def blat_output_dir():
     temp_output = tempfile.mkdtemp()
@@ -45,6 +55,21 @@ def bwa_output_dir():
     shutil.rmtree(temp_output)
 
 
+@pytest.fixture
+def annotate_only_output_dir():
+    temp_output = tempfile.mkdtemp()
+
+    os.makedirs(os.path.join(temp_output, 'mavis/schemas'))
+
+    with open(package_relative_file('tests/mini-tutorial.annotate_only.config.json'), 'r') as fh:
+        config = json.load(fh)
+    config['output_dir'] = os.path.join(temp_output, 'output_dir')
+    with open(os.path.join(temp_output, 'mini-tutorial.config.json'), 'w') as fh:
+        fh.write(json.dumps(config))
+    yield temp_output
+    shutil.rmtree(temp_output)
+
+
 @pytest.fixture
 def output_dir(request):
     return request.getfixturevalue(request.param)
@@ -67,22 +92,63 @@ def test_workflow(output_dir):
     with patch.object(sys, 'argv', argv):
         try:
             snakemake_main()
-            assert glob_exists(os.path.join(output_dir, 'summary', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'pairing', 'MAVIS.COMPLETE'))
-            assert glob_exists(os.path.join(output_dir, 'mock-A47933', 'cluster', 'MAVIS.COMPLETE'))
-            assert glob_exists(
-                os.path.join(output_dir, 'mock-A47933', 'validate', '*', 'MAVIS.COMPLETE')
-            )
-            assert glob_exists(
-                os.path.join(output_dir, 'mock-A47933', 'annotate', '*', 'MAVIS.COMPLETE')
-            )
-            assert glob_exists(os.path.join(output_dir, 'mock-A36971', 'cluster', 'MAVIS.COMPLETE'))
-            assert glob_exists(
-                os.path.join(output_dir, 'mock-A36971', 'validate', '*', 'MAVIS.COMPLETE')
-            )
-            assert glob_exists(
-                os.path.join(output_dir, 'mock-A36971', 'annotate', '*', 'MAVIS.COMPLETE')
-            )
+
+        except SystemExit as err:
+            if err.code != 0:
+                raise err
+
+    list_files(output_dir)
+    for expected_file in [
+        os.path.join('summary', 'MAVIS.COMPLETE'),
+        os.path.join('pairing', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A47933', 'cluster', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A47933', 'annotate', 'batch-*', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'cluster', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'annotate', 'batch-*', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A47933', 'validate', 'batch-*', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'validate', 'batch-*', 'MAVIS.COMPLETE'),
+    ]:
+        if not glob_exists(os.path.join(output_dir, 'output_dir', expected_file)):
+            raise AssertionError(f'{expected_file} does not exist')
+
+
+@long_running_test
+@pytest.mark.parametrize('output_dir', ['annotate_only_output_dir'], indirect=True)
+def test_no_validate_worflow(output_dir):
+    argv = [
+        'snakemake',
+        '-s',
+        package_relative_file('Snakefile'),
+        '-j',
+        '1',
+        '--configfile',
+        os.path.join(output_dir, 'mini-tutorial.config.json'),
+        '-d',
+        package_relative_file(),
+    ]
+    with patch.object(sys, 'argv', argv):
+        try:
+            snakemake_main()
+
         except SystemExit as err:
             if err.code != 0:
                 raise err
+
+    list_files(output_dir)
+    for expected_file in [
+        os.path.join('summary', 'MAVIS.COMPLETE'),
+        os.path.join('pairing', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A47933', 'cluster', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A47933', 'annotate', 'batch-*', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'cluster', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'annotate', 'batch-*', 'MAVIS.COMPLETE'),
+    ]:
+        if not glob_exists(os.path.join(output_dir, 'output_dir', expected_file)):
+            raise AssertionError(f'{expected_file} does not exist')
+
+    for unexpected_file in [
+        os.path.join('mock-A47933', 'validate', 'batch-*', 'MAVIS.COMPLETE'),
+        os.path.join('mock-A36971', 'validate', 'batch-*', 'MAVIS.COMPLETE'),
+    ]:
+        if glob_exists(os.path.join(output_dir, 'output_dir', unexpected_file)):
+            raise AssertionError(f'{unexpected_file} exists')

From 837a28d79750dd22c3b39155635a49ebdedb9af8 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 18:30:02 -0800
Subject: [PATCH 080/137] Remove leftover debugging code

---
 tests/snakemake/test_mini_workflow.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index af332343..2b559234 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -12,16 +12,6 @@
 from ..util import glob_exists, long_running_test, package_relative_file
 
 
-def list_files(startpath):
-    for root, dirs, files in os.walk(startpath):
-        level = root.replace(startpath, '').count(os.sep)
-        indent = ' ' * 4 * (level)
-        print('{}{}/'.format(indent, os.path.basename(root)))
-        subindent = ' ' * 4 * (level + 1)
-        for f in files:
-            print('{}{}'.format(subindent, f))
-
-
 @pytest.fixture
 def blat_output_dir():
     temp_output = tempfile.mkdtemp()
@@ -97,7 +87,6 @@ def test_workflow(output_dir):
             if err.code != 0:
                 raise err
 
-    list_files(output_dir)
     for expected_file in [
         os.path.join('summary', 'MAVIS.COMPLETE'),
         os.path.join('pairing', 'MAVIS.COMPLETE'),
@@ -134,7 +123,6 @@ def test_no_validate_worflow(output_dir):
             if err.code != 0:
                 raise err
 
-    list_files(output_dir)
     for expected_file in [
         os.path.join('summary', 'MAVIS.COMPLETE'),
         os.path.join('pairing', 'MAVIS.COMPLETE'),

From 3d8602376416535c16c60e7b35651312558edbaf Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 21:09:59 -0800
Subject: [PATCH 081/137] Add annotation only example to docs

---
 docs/tutorials/annotation.md | 98 ++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 docs/tutorials/annotation.md

diff --git a/docs/tutorials/annotation.md b/docs/tutorials/annotation.md
new file mode 100644
index 00000000..adfaf17d
--- /dev/null
+++ b/docs/tutorials/annotation.md
@@ -0,0 +1,98 @@
+# Annotation Only
+
+Sometimes you have a set of variants and would simply like to run the annotate step of MAVIS to visualize and annotate them.
+
+First you need to create your basic config to tell MAVIS where the reference files you want to use are and some minimal information about the library/sample you want to process.
+
+Here is an example config where the user has created a minimal input file in the MAVIS standard input file format. We convert it to expand any unknowns (ex. SV type if left blank)
+
+```json
+{
+    "libraries": {
+        "my_library": {
+            "assign": ["my_converted_file"],
+            "disease_status": "normal",
+            "protocol": "genome"
+        }
+    },
+    "convert": {
+        "my_converted_file": {
+            "inputs": ["/path/to/file/structural_variants.txt"],
+            "file_type": "mavis"
+         }
+    },
+    "cluster.split_only": true,
+    "skip_stage.validate": true,
+    "output_dir": "my_output_dir",
+    "reference.annotations": "/path/to/mavis/reference_files/ensembl79_hg38_annotations.json",
+    "reference.template_metadata": "/path/to/mavis/reference_files/hg38_cytoBand.txt",
+    "reference.reference_genome": "/path/to/hg38_no_alt/genome/hg38_no_alt.fa",
+    "reference.masking": "/path/to/mavis/reference_files/masking_hg38.adjusted.tab",
+    "reference.dgv_annotation": "/path/to/mavis/reference_files/dgv_hg38_annotations.tab"
+}
+```
+
+Another example is given in the MAVIS tests folder under `tests/mini-tutorial.annotate_only.config.json` which looks like this
+
+```json
+{
+    "annotate.draw_fusions_only": false,
+    "convert": {
+        "mock_converted": {
+            "inputs": [
+                "tests/data/mock_sv_events.tsv"
+            ],
+            "file_type": "mavis",
+            "assume_no_untemplated": true
+        }
+    },
+    "skip_stage.validate": true,
+    "cluster.uninformative_filter": true,
+    "cluster.limit_to_chr": null,
+    "cluster.min_clusters_per_file": 5,
+    "libraries": {
+        "mock-A47933": {
+            "assign": [
+                "tests/data/mock_trans_sv_events.tsv"
+            ],
+            "bam_file": "tests/data/mock_trans_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "transcriptome",
+            "strand_specific": true
+        },
+        "mock-A36971": {
+            "assign": [
+                "mock_converted"
+            ],
+            "bam_file": "tests/data/mock_reads_for_events.sorted.bam",
+            "disease_status": "diseased",
+            "protocol": "genome",
+            "strand_specific": false
+        }
+    },
+    "output_dir": "output_dir",
+    "reference.annotations": [
+        "tests/data/mock_annotations.json"
+    ],
+    "reference.dgv_annotation": [
+        "tests/data/mock_dgv_annotation.txt"
+    ],
+    "reference.masking": [
+        "tests/data/mock_masking.tab"
+    ],
+    "reference.reference_genome": [
+        "tests/data/mock_reference_genome.fa"
+    ],
+    "reference.template_metadata": [
+        "tests/data/cytoBand.txt"
+    ]
+}
+```
+
+Either of these configurations can be run with the following command simply by changing the configfile argument
+
+```bash
+snakemake -j 1 \
+    --configfile tests/mini-tutorial.annotate_only.config.json \
+    -s Snakefile
+```

From aa0a861f646a80e15105f645abed4523acd64d32 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 22:28:21 -0800
Subject: [PATCH 082/137] Use mavis_config instead of local schemas

---
 setup.cfg                                   |   1 +
 src/mavis/annotate/main.py                  |   5 +-
 src/mavis/illustrate/constants.py           |   2 +-
 src/mavis/pairing/constants.py              |   3 +-
 src/mavis/schemas/__init__.py               |  35 -
 src/mavis/schemas/config.json               | 795 --------------------
 src/mavis/schemas/overlay.json              | 142 ----
 src/mavis/validate/base.py                  |   2 +-
 src/mavis/validate/evidence.py              |   2 -
 src/mavis/validate/main.py                  |   1 -
 tests/integration/test_align.py             |   3 +-
 tests/integration/test_assemble.py          |   2 +-
 tests/integration/test_validate.py          |   2 +-
 tests/integration/test_validate_evidence.py |   2 +-
 14 files changed, 12 insertions(+), 985 deletions(-)
 delete mode 100644 src/mavis/schemas/__init__.py
 delete mode 100644 src/mavis/schemas/config.json
 delete mode 100644 src/mavis/schemas/overlay.json

diff --git a/setup.cfg b/setup.cfg
index a34fc7ef..b75ac619 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,6 +48,7 @@ install_requires =
     shortuuid>=0.5.0
     svgwrite
     typing_extensions>=4
+    pyfaidx^=0.6.3.1
 setup_requires =
     pip>=9.0.0
     setuptools>=36.0.0
diff --git a/src/mavis/annotate/main.py b/src/mavis/annotate/main.py
index f1a9456e..e1df8e68 100644
--- a/src/mavis/annotate/main.py
+++ b/src/mavis/annotate/main.py
@@ -4,11 +4,12 @@
 import time
 from typing import Dict, List
 
-from ..constants import COLUMNS, PRIME, PROTOCOL, sort_columns
+from mavis_config import get_by_prefix
+
+from ..constants import COLUMNS, PRIME, sort_columns
 from ..error import DrawingFitError, NotSpecifiedError
 from ..illustrate.constants import DiagramSettings
 from ..illustrate.diagram import draw_sv_summary_diagram
-from ..schemas import DEFAULTS, get_by_prefix
 from ..util import LOG, generate_complete_stamp, mkdirp, read_inputs
 from .constants import PASS_FILENAME
 from .file_io import ReferenceFile
diff --git a/src/mavis/illustrate/constants.py b/src/mavis/illustrate/constants.py
index 670e5364..7dcf5e09 100644
--- a/src/mavis/illustrate/constants.py
+++ b/src/mavis/illustrate/constants.py
@@ -1,7 +1,7 @@
 from colour import Color
+from mavis_config import DEFAULTS, get_by_prefix
 
 from ..constants import GIEMSA_STAIN
-from ..schemas import DEFAULTS, get_by_prefix
 
 
 class DiagramSettings:
diff --git a/src/mavis/pairing/constants.py b/src/mavis/pairing/constants.py
index 69077f4d..cb55e123 100644
--- a/src/mavis/pairing/constants.py
+++ b/src/mavis/pairing/constants.py
@@ -1,7 +1,8 @@
 from typing import Dict
 
+from mavis_config import DEFAULTS
+
 from ..constants import CALL_METHOD, MavisNamespace
-from ..schemas import DEFAULTS
 
 PAIRING_DISTANCES: Dict[str, int] = {
     CALL_METHOD.FLANK: DEFAULTS['pairing.flanking_call_distance'],
diff --git a/src/mavis/schemas/__init__.py b/src/mavis/schemas/__init__.py
deleted file mode 100644
index f41bda5c..00000000
--- a/src/mavis/schemas/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-try:
-    from collections import Mapping
-except ImportError:
-    from collections.abc import Mapping
-
-import os
-
-from snakemake.utils import validate as snakemake_validate
-
-
-class ImmutableDict(Mapping):
-    def __init__(self, data):
-        self._data = data
-
-    def __getitem__(self, key):
-        return self._data[key]
-
-    def __len__(self):
-        return len(self._data)
-
-    def __iter__(self):
-        return iter(self._data)
-
-
-def get_by_prefix(config, prefix):
-    return {k.replace(prefix, ''): v for k, v in config.items() if k.startswith(prefix)}
-
-
-DEFAULTS = {}
-snakemake_validate(
-    DEFAULTS,
-    os.path.join(os.path.dirname(__file__), 'config.json'),
-    set_default=True,
-)
-DEFAULTS = ImmutableDict(DEFAULTS)
diff --git a/src/mavis/schemas/config.json b/src/mavis/schemas/config.json
deleted file mode 100644
index a948cd77..00000000
--- a/src/mavis/schemas/config.json
+++ /dev/null
@@ -1,795 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "additionalProperties": false,
-    "properties": {
-        "annotate.annotation_filters": {
-            "default": [
-                "choose_more_annotated",
-                "choose_transcripts_by_priority"
-            ],
-            "description": "A comma separated list of filters to apply to putative annotations",
-            "items": {
-                "enum": [
-                    "choose_more_annotated",
-                    "choose_transcripts_by_priority"
-                ],
-                "type": "string"
-            },
-            "type": "array"
-        },
-        "annotate.draw_fusions_only": {
-            "default": true,
-            "description": "Flag to indicate if events which do not produce a fusion transcript should produce illustrations",
-            "type": "boolean"
-        },
-        "annotate.draw_non_synonymous_cdna_only": {
-            "default": true,
-            "description": "Flag to indicate if events which are synonymous at the cdna level should produce illustrations",
-            "type": "boolean"
-        },
-        "annotate.max_orf_cap": {
-            "default": 3,
-            "description": "The maximum number of orfs to return (best putative orfs will be retained)",
-            "type": "integer"
-        },
-        "annotate.min_domain_mapping_match": {
-            "default": 0.9,
-            "description": "A number between 0 and 1 representing the minimum percent match a domain must map to the fusion transcript to be displayed",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "annotate.min_orf_size": {
-            "default": 300,
-            "description": "The minimum length (in base pairs) to retain a putative open reading frame (orf)",
-            "type": "integer"
-        },
-        "bam_stats.distribution_fraction": {
-            "default": 0.97,
-            "description": "the proportion of the distribution to use in computing stdev",
-            "maximum": 1,
-            "minimum": 0.01,
-            "type": "number"
-        },
-        "bam_stats.sample_bin_size": {
-            "default": 1000,
-            "description": "how large to make the sample bin (in bp)",
-            "type": "integer"
-        },
-        "bam_stats.sample_cap": {
-            "default": 1000,
-            "description": "maximum number of reads to collect for any given sample region",
-            "type": "integer"
-        },
-        "bam_stats.sample_size": {
-            "default": 500,
-            "description": "the number of genes/bins to compute stats over",
-            "type": "integer"
-        },
-        "cluster.cluster_initial_size_limit": {
-            "default": 25,
-            "description": "The maximum cumulative size of both breakpoints for breakpoint pairs to be used in the initial clustering phase (combining based on overlap)",
-            "type": "integer"
-        },
-        "cluster.cluster_radius": {
-            "default": 100,
-            "description": "Maximum distance allowed between paired breakpoint pairs",
-            "type": "integer"
-        },
-        "cluster.limit_to_chr": {
-            "default": [
-                "1",
-                "2",
-                "3",
-                "4",
-                "5",
-                "6",
-                "7",
-                "8",
-                "9",
-                "10",
-                "11",
-                "12",
-                "13",
-                "14",
-                "15",
-                "16",
-                "17",
-                "18",
-                "19",
-                "20",
-                "21",
-                "22",
-                "X",
-                "Y"
-            ],
-            "description": "A list of chromosome names to use. breakpointpairs on other chromosomes will be filteredout. for example '1 2 3 4' would filter out events/breakpoint pairs on any chromosomes but 1, 2, 3, and 4",
-            "items": {
-                "type": "string"
-            },
-            "type": [
-                "array",
-                "null"
-            ]
-        },
-        "cluster.max_files": {
-            "default": 200,
-            "description": "The maximum number of files to output from clustering/splitting",
-            "minimum": 1,
-            "type": "integer"
-        },
-        "cluster.max_proximity": {
-            "default": 5000,
-            "description": "The maximum distance away from an annotation before the region in considered to be uninformative",
-            "type": "integer"
-        },
-        "cluster.min_clusters_per_file": {
-            "default": 50,
-            "description": "The minimum number of breakpoint pairs to output to a file",
-            "minimum": 1,
-            "type": "integer"
-        },
-        "cluster.split_only": {
-            "default": false,
-            "description": "just split the input files, do not merge input breakpoints into clusters",
-            "type": "boolean"
-        },
-        "cluster.uninformative_filter": {
-            "default": false,
-            "description": "Flag that determines if breakpoint pairs which are not within max_proximity to any annotations are filtered out prior to clustering",
-            "type": "boolean"
-        },
-        "convert": {
-            "additionalProperties": {
-                "properties": {
-                    "assume_no_untemplated": {
-                        "default": false,
-                        "description": "Assume the lack of untemplated information means that there IS not untemplated sequence expected at the breakpoints",
-                        "type": "boolean"
-                    },
-                    "file_type": {
-                        "description": "the tool the file is input from or 'mavis' for standard mavis-style tab files",
-                        "enum": [
-                            "manta",
-                            "delly",
-                            "transabyss",
-                            "pindel",
-                            "chimerascan",
-                            "mavis",
-                            "defuse",
-                            "breakdancer",
-                            "vcf",
-                            "breakseq",
-                            "cnvnator",
-                            "strelka",
-                            "starfusion"
-                        ],
-                        "type": "string"
-                    },
-                    "inputs": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "minItems": 1,
-                        "type": "array",
-                        "description": "List of input files"
-                    },
-                    "strand_specific": {
-                        "default": false,
-                        "type": "boolean"
-                    }
-                },
-                "required": [
-                    "inputs",
-                    "file_type"
-                ],
-                "type": "object"
-            },
-            "type": "object"
-        },
-        "illustrate.domain_color": {
-            "default": "#ccccb3",
-            "description": "Domain fill color",
-            "type": "string",
-            "pattern": "^#[a-zA-Z0-9]{6}"
-        },
-        "illustrate.domain_mismatch_color": {
-            "default": "#b2182b",
-            "description": "Domain fill color on 0%% match",
-            "type": "string",
-            "pattern": "^#[a-zA-Z0-9]{6}"
-        },
-        "illustrate.domain_name_regex_filter": {
-            "default": "^PF\\d+$",
-            "description": "The regular expression used to select domains to be displayed (filtered by name)",
-            "type": "string"
-        },
-        "illustrate.domain_scaffold_color": {
-            "default": "#000000",
-            "description": "The color of the domain scaffold",
-            "type": "string",
-            "pattern": "^#[a-zA-Z0-9]{6}"
-        },
-        "illustrate.drawing_width_iter_increase": {
-            "default": 500,
-            "description": "The amount (in  pixels) by which to increase the drawing width upon failure to fit",
-            "type": "integer"
-        },
-        "illustrate.exon_min_focus_size": {
-            "default": 10,
-            "description": "Minimum size of an exon for it to be granted a label or min exon width",
-            "type": "integer"
-        },
-        "illustrate.gene1_color": {
-            "default": "#657e91",
-            "description": "The color of genes near the first gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene1_color_selected": {
-            "default": "#518dc5",
-            "description": "The color of the first gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene2_color": {
-            "default": "#325556",
-            "description": "The color of genes near the second gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene2_color_selected": {
-            "default": "#4c9677",
-            "description": "The color of the second gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.label_color": {
-            "default": "#000000",
-            "description": "The label color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.mask_fill": {
-            "default": "#ffffff",
-            "description": "Color of mask (for deleted region etc.)",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.mask_opacity": {
-            "default": 0.7,
-            "description": "Opacity of the mask layer",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "illustrate.max_drawing_retries": {
-            "default": 5,
-            "description": "The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output",
-            "type": "integer"
-        },
-        "illustrate.novel_exon_color": {
-            "default": "#5D3F6A",
-            "description": "Novel exon fill color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.scaffold_color": {
-            "default": "#000000",
-            "description": "The color used for the gene/transcripts scaffolds",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.splice_color": {
-            "default": "#000000",
-            "description": "Splicing lines color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.width": {
-            "default": 1000,
-            "description": "The drawing width in pixels",
-            "type": "integer"
-        },
-        "illustrate.breakpoint_color": {
-            "default": "#000000",
-            "description": "Breakpoint outline color",
-            "type": "string",
-            "pattern": "^#[a-zA-Z0-9]{6}"
-        },
-        "libraries": {
-            "additionalProperties": {
-                "additionalProperties": false,
-                "properties": {
-                    "assign": {
-                        "items": {
-                            "type": "string"
-                        },
-                        "minItems": 1,
-                        "type": "array",
-                        "description": "List of input files or conversion aliases that should be processed for this library"
-                    },
-                    "total_batches": {
-                        "type": "integer",
-                        "min": 1,
-                        "description": "The number of jobs to slit a library into for cluster/validate/annotate. This will be set during initialization of the config if not given"
-                    },
-                    "bam_file": {
-                        "type": "string",
-                        "description": "Path to the bam file containing the sequencing reads for this library"
-                    },
-                    "disease_status": {
-                        "enum": [
-                            "diseased",
-                            "normal"
-                        ],
-                        "type": "string"
-                    },
-                    "median_fragment_size": {
-                        "type": "integer",
-                        "description": "The median fragment size in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
-                    },
-                    "protocol": {
-                        "enum": [
-                            "genome",
-                            "transcriptome"
-                        ],
-                        "type": "string"
-                    },
-                    "read_length": {
-                        "type": "integer",
-                        "description": "The read length in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
-                    },
-                    "stdev_fragment_size": {
-                        "type": "integer",
-                        "description": "The standard deviation of fragment size in the paired-end read library. This will be computed from the bam during initialization of the config if not given"
-                    },
-                    "strand_determining_read": {
-                        "default": 2,
-                        "description": "1 or 2. the read in the pair which determines if (assuming a stranded protocol) the first or second read in the pair matches the strand sequenced",
-                        "type": "integer"
-                    },
-                    "strand_specific": {
-                        "default": false,
-                        "type": "boolean"
-                    }
-                },
-                "required": [
-                    "disease_status",
-                    "protocol",
-                    "assign"
-                ],
-                "type": "object"
-            },
-            "minProperties": 1,
-            "type": "object"
-        },
-        "log": {
-            "type": "string"
-        },
-        "log_level": {
-            "default": "INFO",
-            "enum": [
-                "INFO",
-                "DEBUG"
-            ],
-            "type": "string"
-        },
-        "output_dir": {
-            "type": "string",
-            "description": "path to the directory to output the MAVIS files to"
-        },
-        "pairing.contig_call_distance": {
-            "default": 10,
-            "description": "The maximum distance allowed between breakpoint pairs (called by contig) in order for them to pair",
-            "type": "integer"
-        },
-        "pairing.flanking_call_distance": {
-            "default": 50,
-            "description": "The maximum distance allowed between breakpoint pairs (called by flanking pairs) in order for them to pair",
-            "type": "integer"
-        },
-        "pairing.input_call_distance": {
-            "default": 20,
-            "description": "The maximum distance allowed between breakpoint pairs (called by input tools, not validated) in order for them to pair",
-            "type": "integer"
-        },
-        "pairing.spanning_call_distance": {
-            "default": 20,
-            "description": "The maximum distance allowed between breakpoint pairs (called by spanning reads) in order for them to pair",
-            "type": "integer"
-        },
-        "pairing.split_call_distance": {
-            "default": 20,
-            "description": "The maximum distance allowed between breakpoint pairs (called by split reads) in order for them to pair",
-            "type": "integer"
-        },
-        "reference.aligner_reference": {
-            "examples": [
-                "tests/data/mock_reference_genome.2bit"
-            ],
-            "items": {
-                "type": "string"
-            },
-            "maxItems": 1,
-            "minItems": 1,
-            "type": "array",
-            "description": "The reference genome file used by the aligner"
-        },
-        "reference.annotations": {
-            "examples": [
-                "tests/data/mock_annotations.json"
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array",
-            "description": "The reference file containing gene/transcript position information"
-        },
-        "reference.dgv_annotation": {
-            "examples": [
-                [
-                    "tests/data/mock_dgv_annotation.txt"
-                ]
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array"
-        },
-        "reference.masking": {
-            "examples": [
-                [
-                    "tests/data/mock_masking.tab"
-                ]
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array",
-            "description": "A list of regions to ignore in validation. Generally these are centromeres and telomeres or known poor mapping areas"
-        },
-        "reference.reference_genome": {
-            "examples": [
-                [
-                    "tests/data/mock_reference_genome.fa"
-                ]
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array"
-        },
-        "reference.template_metadata": {
-            "examples": [
-                [
-                    "tests/data/cytoBand.txt"
-                ]
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array"
-        },
-        "skip_stage.validate": {
-            "default": false,
-            "description": "skip the validation stage of the MAVIS pipeline",
-            "type": "boolean"
-        },
-        "summary.filter_cdna_synon": {
-            "default": true,
-            "description": "Filter all annotations synonymous at the cdna level",
-            "type": "boolean"
-        },
-        "summary.filter_min_complexity": {
-            "default": 0.2,
-            "description": "Filter event calls based on call sequence complexity",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "summary.filter_min_flanking_reads": {
-            "default": 10,
-            "description": "Minimum number of flanking pairs for a call by flanking pairs",
-            "type": "integer"
-        },
-        "summary.filter_min_linking_split_reads": {
-            "default": 1,
-            "description": "Minimum number of linking split reads for a call by split reads",
-            "type": "integer"
-        },
-        "summary.filter_min_remapped_reads": {
-            "default": 5,
-            "description": "Minimum number of remapped reads for a call by contig",
-            "type": "integer"
-        },
-        "summary.filter_min_spanning_reads": {
-            "default": 5,
-            "description": "Minimum number of spanning reads for a call by spanning reads",
-            "type": "integer"
-        },
-        "summary.filter_min_split_reads": {
-            "default": 5,
-            "description": "Minimum number of split reads for a call by split reads",
-            "type": "integer"
-        },
-        "summary.filter_protein_synon": {
-            "default": false,
-            "description": "Filter all annotations synonymous at the protein level",
-            "type": "boolean"
-        },
-        "summary.filter_trans_homopolymers": {
-            "default": true,
-            "description": "Filter all single bp ins/del/dup events that are in a homopolymer region of at least 3 bps and are not paired to a genomic event",
-            "type": "boolean"
-        },
-        "validate.aligner": {
-            "default": "blat",
-            "description": "The aligner to use to map the contigs/reads back to the reference e.g blat or bwa",
-            "enum": [
-                "bwa mem",
-                "blat"
-            ],
-            "type": "string"
-        },
-        "validate.assembly_kmer_size": {
-            "default": 0.74,
-            "description": "The percent of the read length to make kmers for assembly",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.assembly_max_paths": {
-            "default": 8,
-            "description": "The maximum number of paths to resolve. this is used to limit when there is a messy assembly graph to resolve. the assembly will pre-calculate the number of paths (or putative assemblies) and stop if it is greater than the given setting",
-            "type": "integer"
-        },
-        "validate.assembly_min_edge_trim_weight": {
-            "default": 3,
-            "description": "This is used to simplify the debruijn graph before path finding. edges with less than this frequency will be discarded if they are non-cutting, at a fork, or the end of a path",
-            "type": "integer"
-        },
-        "validate.assembly_min_exact_match_to_remap": {
-            "default": 15,
-            "description": "The minimum length of exact matches to initiate remapping a read to a contig",
-            "type": "integer"
-        },
-        "validate.assembly_min_remap_coverage": {
-            "default": 0.9,
-            "description": "Minimum fraction of the contig sequence which the remapped sequences must align over",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.assembly_min_remapped_seq": {
-            "default": 3,
-            "description": "The minimum input sequences that must remap for an assembled contig to be used",
-            "type": "integer"
-        },
-        "validate.assembly_min_uniq": {
-            "default": 0.1,
-            "description": "Minimum percent uniq required to keep separate assembled contigs. if contigs are more similar then the lower scoring, then shorter, contig is dropped",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.assembly_strand_concordance": {
-            "default": 0.51,
-            "description": "When the number of remapped reads from each strand are compared, the ratio must be above this number to decide on the strand",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.blat_limit_top_aln": {
-            "default": 10,
-            "description": "Number of results to return from blat (ranking based on score)",
-            "type": "integer"
-        },
-        "validate.blat_min_identity": {
-            "default": 0.9,
-            "description": "The minimum percent identity match required for blat results when aligning contigs",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.call_error": {
-            "default": 10,
-            "description": "Buffer zone for the evidence window",
-            "type": "integer"
-        },
-        "validate.clean_aligner_files": {
-            "default": false,
-            "description": "Remove the aligner output files after the validation stage is complete. not required for subsequent steps but can be useful in debugging and deep investigation of events",
-            "type": "boolean"
-        },
-        "validate.contig_aln_max_event_size": {
-            "default": 50,
-            "description": "Relates to determining breakpoints when pairing contig alignments. for any given read in a putative pair the soft clipping is extended to include any events of greater than this size. the softclipping is added to the side of the alignment as indicated by the breakpoint we are assigning pairs to",
-            "type": "integer"
-        },
-        "validate.contig_aln_merge_inner_anchor": {
-            "default": 20,
-            "description": "The minimum number of consecutive exact match base pairs to not merge events within a contig alignment",
-            "type": "integer"
-        },
-        "validate.contig_aln_merge_outer_anchor": {
-            "default": 15,
-            "description": "Minimum consecutively aligned exact matches to anchor an end for merging internal events",
-            "type": "integer"
-        },
-        "validate.contig_aln_min_anchor_size": {
-            "default": 50,
-            "description": "The minimum number of aligned bases for a contig (m or =) in order to simplify. do not have to be consecutive",
-            "type": "integer"
-        },
-        "validate.contig_aln_min_extend_overlap": {
-            "default": 10,
-            "description": "Minimum number of bases the query coverage interval must be extended by in order to pair alignments as a single split alignment",
-            "type": "integer"
-        },
-        "validate.contig_aln_min_query_consumption": {
-            "default": 0.9,
-            "description": "Minimum fraction of the original query sequence that must be used by the read(s) of the alignment",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.contig_aln_min_score": {
-            "default": 0.9,
-            "description": "Minimum score for a contig to be used as evidence in a call by contig",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.fetch_min_bin_size": {
-            "default": 50,
-            "description": "The minimum size of any bin for reading from a bam file. increasing this number will result in smaller bins being merged or less bins being created (depending on the fetch method)",
-            "type": "integer"
-        },
-        "validate.fetch_reads_bins": {
-            "default": 5,
-            "description": "Number of bins to split an evidence window into to ensure more even sampling of high coverage regions",
-            "type": "integer"
-        },
-        "validate.fetch_reads_limit": {
-            "default": 3000,
-            "description": "Maximum number of reads, cap, to loop over for any given evidence window",
-            "type": "integer"
-        },
-        "validate.filter_secondary_alignments": {
-            "default": true,
-            "description": "Filter secondary alignments when gathering read evidence",
-            "type": "boolean"
-        },
-        "validate.fuzzy_mismatch_number": {
-            "default": 1,
-            "description": "The number of events/mismatches allowed to be considered a fuzzy match",
-            "type": "integer"
-        },
-        "validate.max_sc_preceeding_anchor": {
-            "default": 6,
-            "description": "When remapping a softclipped read this determines the amount of softclipping allowed on the side opposite of where we expect it. for example for a softclipped read on a breakpoint with a left orientation this limits the amount of softclipping that is allowed on the right. if this is set to none then there is no limit on softclipping",
-            "type": "integer"
-        },
-        "validate.min_anchor_exact": {
-            "default": 6,
-            "description": "Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum number of consecutive exact matches to anchor a read to initiate targeted realignment",
-            "type": "integer"
-        },
-        "validate.min_anchor_fuzzy": {
-            "default": 10,
-            "description": "Applies to re-aligning softclipped reads to the opposing breakpoint. the minimum length of a fuzzy match to anchor a read to initiate targeted realignment",
-            "type": "integer"
-        },
-        "validate.min_anchor_match": {
-            "default": 0.9,
-            "description": "Minimum percent match for a read to be kept as evidence",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.min_call_complexity": {
-            "default": 0.1,
-            "description": "The minimum complexity score for a call sequence. is an average for non-contig calls. filters low complexity contigs before alignment. see [contig_complexity](#contig_complexity)",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "validate.min_double_aligned_to_estimate_insertion_size": {
-            "default": 2,
-            "description": "The minimum number of reads which map soft-clipped to both breakpoints to assume the size of the untemplated sequence between the breakpoints is at most the read length - 2 * min_softclipping",
-            "type": "integer"
-        },
-        "validate.min_flanking_pairs_resolution": {
-            "default": 10,
-            "description": "The minimum number of flanking reads required to call a breakpoint by flanking evidence",
-            "type": "integer"
-        },
-        "validate.min_linking_split_reads": {
-            "default": 2,
-            "description": "The minimum number of split reads which aligned to both breakpoints",
-            "type": "integer"
-        },
-        "validate.min_mapping_quality": {
-            "default": 5,
-            "description": "The minimum mapping quality of reads to be used as evidence",
-            "type": "integer"
-        },
-        "validate.min_non_target_aligned_split_reads": {
-            "default": 1,
-            "description": "The minimum number of split reads aligned to a breakpoint by the input bam and no forced by local alignment to the target region to call a breakpoint by split read evidence",
-            "type": "integer"
-        },
-        "validate.min_sample_size_to_apply_percentage": {
-            "default": 10,
-            "description": "Minimum number of aligned bases to compute a match percent. if there are less than this number of aligned bases (match or mismatch) the percent comparator is not used",
-            "type": "integer"
-        },
-        "validate.min_softclipping": {
-            "default": 6,
-            "description": "Minimum number of soft-clipped bases required for a read to be used as soft-clipped evidence",
-            "type": "integer"
-        },
-        "validate.min_spanning_reads_resolution": {
-            "default": 5,
-            "description": "Minimum number of spanning reads required to call an event by spanning evidence",
-            "type": "integer"
-        },
-        "validate.min_splits_reads_resolution": {
-            "default": 3,
-            "description": "Minimum number of split reads required to call a breakpoint by split reads",
-            "type": "integer"
-        },
-        "validate.outer_window_min_event_size": {
-            "default": 125,
-            "description": "The minimum size of an event in order for flanking read evidence to be collected",
-            "type": "integer"
-        },
-        "validate.stdev_count_abnormal": {
-            "default": 3,
-            "description": "The number of standard deviations away from the normal considered expected and therefore not qualifying as flanking reads",
-            "type": "number"
-        },
-        "validate.trans_fetch_reads_limit": {
-            "default": 12000,
-            "description": "Related to [fetch_reads_limit](#fetch_reads_limit). overrides fetch_reads_limit for transcriptome libraries when set. if this has a value of none then fetch_reads_limit will be used for transcriptome libraries instead",
-            "type": [
-                "integer",
-                "null"
-            ]
-        },
-        "validate.trans_min_mapping_quality": {
-            "default": 0,
-            "description": "Related to [min_mapping_quality](#min_mapping_quality). overrides the min_mapping_quality if the library is a transcriptome and this is set to any number not none. if this value is none, min_mapping_quality is used for transcriptomes aswell as genomes",
-            "type": [
-                "integer",
-                "null"
-            ]
-        },
-        "validate.write_evidence_files": {
-            "default": true,
-            "description": "Write the intermediate bam and bed files containing the raw evidence collected and contigs aligned. not required for subsequent steps but can be useful in debugging and deep investigation of events",
-            "type": "boolean"
-        }
-    },
-    "anyOf": [
-        {
-            "not": {
-                "properties": {
-                    "skip_stage.validate": {
-                        "const": true
-                    }
-                },
-                "required": [
-                    "reference.aligner_reference"
-                ]
-            }
-        }
-    ],
-    "type": "object"
-}
diff --git a/src/mavis/schemas/overlay.json b/src/mavis/schemas/overlay.json
deleted file mode 100644
index 3fe89cf5..00000000
--- a/src/mavis/schemas/overlay.json
+++ /dev/null
@@ -1,142 +0,0 @@
-{
-    "$schema": "http://json-schema.org/draft-07/schema#",
-    "additionalProperties": false,
-    "properties": {
-        "illustrate.breakpoint_color": {
-            "default": "#000000",
-            "description": "Breakpoint outline color",
-            "type": "string"
-        },
-        "illustrate.domain_color": {
-            "default": "#ccccb3",
-            "description": "Domain fill color",
-            "type": "string"
-        },
-        "illustrate.domain_mismatch_color": {
-            "default": "#b2182b",
-            "description": "Domain fill color on 0%% match",
-            "type": "string"
-        },
-        "illustrate.domain_name_regex_filter": {
-            "default": "^PF\\d+$",
-            "description": "The regular expression used to select domains to be displayed (filtered by name)",
-            "type": "string"
-        },
-        "illustrate.domain_scaffold_color": {
-            "default": "#000000",
-            "description": "The color of the domain scaffold",
-            "type": "string"
-        },
-        "illustrate.drawing_width_iter_increase": {
-            "default": 500,
-            "description": "The amount (in  pixels) by which to increase the drawing width upon failure to fit",
-            "type": "integer"
-        },
-        "illustrate.exon_min_focus_size": {
-            "default": 10,
-            "description": "Minimum size of an exon for it to be granted a label or min exon width",
-            "type": "integer"
-        },
-        "illustrate.gene1_color": {
-            "default": "#657e91",
-            "description": "The color of genes near the first gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene1_color_selected": {
-            "default": "#518dc5",
-            "description": "The color of the first gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene2_color": {
-            "default": "#325556",
-            "description": "The color of genes near the second gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.gene2_color_selected": {
-            "default": "#4c9677",
-            "description": "The color of the second gene",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.label_color": {
-            "default": "#000000",
-            "description": "The label color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.mask_fill": {
-            "default": "#ffffff",
-            "description": "Color of mask (for deleted region etc.)",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.mask_opacity": {
-            "default": 0.7,
-            "description": "Opacity of the mask layer",
-            "maximum": 1,
-            "minimum": 0,
-            "type": "number"
-        },
-        "illustrate.max_drawing_retries": {
-            "default": 5,
-            "description": "The maximum number of retries for attempting a drawing. each iteration the width is extended. if it is still insufficient after this number a gene-level only drawing will be output",
-            "type": "integer"
-        },
-        "illustrate.novel_exon_color": {
-            "default": "#5D3F6A",
-            "description": "Novel exon fill color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.scaffold_color": {
-            "default": "#000000",
-            "description": "The color used for the gene/transcripts scaffolds",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.splice_color": {
-            "default": "#000000",
-            "description": "Splicing lines color",
-            "pattern": "^#[a-zA-Z0-9]{6}",
-            "type": "string"
-        },
-        "illustrate.width": {
-            "default": 1000,
-            "description": "The drawing width in pixels",
-            "type": "integer"
-        },
-        "log": {
-            "type": "string"
-        },
-        "log_level": {
-            "default": "INFO",
-            "enum": [
-                "INFO",
-                "DEBUG"
-            ],
-            "type": "string"
-        },
-        "reference.annotations": {
-            "examples": [
-                "tests/data/mock_annotations.json"
-            ],
-            "items": {
-                "type": "string"
-            },
-            "minItems": 1,
-            "type": "array"
-        },
-        "validate.min_mapping_quality": {
-            "default": 5,
-            "description": "The minimum mapping quality of reads to be used as evidence",
-            "type": "integer"
-        }
-    },
-    "required": [
-        "reference.annotations"
-    ],
-    "type": "object"
-}
diff --git a/src/mavis/validate/base.py b/src/mavis/validate/base.py
index e6767d3a..1225d69c 100644
--- a/src/mavis/validate/base.py
+++ b/src/mavis/validate/base.py
@@ -4,6 +4,7 @@
 from typing import Dict, List, Optional, Set, Tuple
 
 import pysam
+from mavis_config import DEFAULTS
 
 from ..assemble import assemble
 from ..bam import cigar as _cigar
@@ -23,7 +24,6 @@
 )
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..schemas import DEFAULTS
 from ..util import DEVNULL
 
 
diff --git a/src/mavis/validate/evidence.py b/src/mavis/validate/evidence.py
index a689170c..19f2fbdb 100644
--- a/src/mavis/validate/evidence.py
+++ b/src/mavis/validate/evidence.py
@@ -3,13 +3,11 @@
 
 import pysam
 
-from ..align import SplitAlignment, call_read_events
 from ..annotate.variant import overlapping_transcripts
 from ..bam import cigar as _cigar
 from ..breakpoint import Breakpoint
 from ..constants import CIGAR, COLUMNS, ORIENT, PROTOCOL, STRAND, SVTYPE
 from ..interval import Interval
-from ..schemas import DEFAULTS
 from .base import Evidence
 
 
diff --git a/src/mavis/validate/main.py b/src/mavis/validate/main.py
index 51d1e48f..4bec5fcd 100644
--- a/src/mavis/validate/main.py
+++ b/src/mavis/validate/main.py
@@ -16,7 +16,6 @@
 from ..bam.cache import BamCache
 from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL
-from ..schemas import get_by_prefix
 from ..util import (
     LOG,
     filter_on_overlap,
diff --git a/tests/integration/test_align.py b/tests/integration/test_align.py
index 0075a32d..0b3f556e 100644
--- a/tests/integration/test_align.py
+++ b/tests/integration/test_align.py
@@ -2,7 +2,6 @@
 from unittest import mock
 
 import mavis.bam.cigar as _cigar
-import pytest
 from mavis import align
 from mavis.annotate.file_io import load_reference_genome
 from mavis.assemble import Contig
@@ -11,8 +10,8 @@
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import CIGAR, ORIENT, STRAND, reverse_complement
 from mavis.interval import Interval
-from mavis.schemas import DEFAULTS
 from mavis.validate.evidence import GenomeEvidence
+from mavis_config import DEFAULTS
 
 from ..util import blat_only, bwa_only, get_data
 from . import MockLongString, MockObject, MockRead
diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index 6930b685..b91b1c4d 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -5,8 +5,8 @@
 from mavis.assemble import Contig, assemble, filter_contigs
 from mavis.constants import reverse_complement
 from mavis.interval import Interval
-from mavis.schemas import DEFAULTS
 from mavis.util import LOG
+from mavis_config import DEFAULTS
 
 from ..util import get_data, long_running_test
 from . import MockObject
diff --git a/tests/integration/test_validate.py b/tests/integration/test_validate.py
index 8f0d8471..e29a063f 100644
--- a/tests/integration/test_validate.py
+++ b/tests/integration/test_validate.py
@@ -5,9 +5,9 @@
 from mavis.bam.read import SamRead
 from mavis.breakpoint import Breakpoint
 from mavis.constants import NA_MAPPING_QUALITY, ORIENT, PYSAM_READ_FLAGS
-from mavis.schemas import DEFAULTS
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence
+from mavis_config import DEFAULTS
 
 from ..util import get_data, long_running_test
 from . import MockLongString, MockObject, MockRead, mock_read_pair
diff --git a/tests/integration/test_validate_evidence.py b/tests/integration/test_validate_evidence.py
index 9a53cf82..1cab995a 100644
--- a/tests/integration/test_validate_evidence.py
+++ b/tests/integration/test_validate_evidence.py
@@ -9,9 +9,9 @@
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, STRAND
 from mavis.interval import Interval
-from mavis.schemas import DEFAULTS
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
+from mavis_config import DEFAULTS
 
 from . import MockBamFileHandle, MockObject, MockRead, mock_read_pair
 

From 154cf87f273823981df653a7c5509e3a1f541775 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 22:30:09 -0800
Subject: [PATCH 083/137] Remove pyfaidx till later

---
 setup.cfg | 1 -
 1 file changed, 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index b75ac619..a34fc7ef 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,7 +48,6 @@ install_requires =
     shortuuid>=0.5.0
     svgwrite
     typing_extensions>=4
-    pyfaidx^=0.6.3.1
 setup_requires =
     pip>=9.0.0
     setuptools>=36.0.0

From 4dbeed0d65ae964eb6bd6a3a48cbf6e0e1c12afe Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 23:53:45 -0800
Subject: [PATCH 084/137] Remove support for tab-delimited annotation files

- use json-schema to validate annotations input file
---
 .gitignore                                    |   1 +
 src/mavis/annotate/annotations_schema.json    | 186 +++++++++++++++++
 src/mavis/annotate/file_io.py                 | 165 ++-------------
 src/tools/migrate_mavis_annotations_2to3.py   | 190 ++++++++++++++++++
 tests/data/annotations_subsample.json         |   2 +-
 tests/data/example_genes.json                 |   2 +
 tests/data/mock_annotations.json              |   2 +-
 .../data/mock_reference_annotations.full.json |   1 +
 .../data/mock_reference_annotations.full.tsv  |   6 -
 tests/data/mock_reference_annotations.json    |   2 +-
 tests/data/mock_reference_annotations.tsv     |   7 -
 tests/data/mock_reference_annotations2.json   |   1 +
 tests/integration/test_annotate.py            |   4 +-
 tests/integration/test_annotate_fileio.py     |  31 +--
 14 files changed, 404 insertions(+), 196 deletions(-)
 create mode 100644 src/mavis/annotate/annotations_schema.json
 create mode 100644 src/tools/migrate_mavis_annotations_2to3.py
 create mode 100644 tests/data/mock_reference_annotations.full.json
 delete mode 100644 tests/data/mock_reference_annotations.full.tsv
 delete mode 100644 tests/data/mock_reference_annotations.tsv
 create mode 100644 tests/data/mock_reference_annotations2.json

diff --git a/.gitignore b/.gitignore
index 1f4c4214..c3cad29a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -41,3 +41,4 @@ bin
 dag*
 tutorial_data
 reference_inputs
+tmp
diff --git a/src/mavis/annotate/annotations_schema.json b/src/mavis/annotate/annotations_schema.json
new file mode 100644
index 00000000..04d0cc50
--- /dev/null
+++ b/src/mavis/annotate/annotations_schema.json
@@ -0,0 +1,186 @@
+{
+    "$schema": "http://json-schema.org/draft-07/schema#",
+    "additionalProperties": false,
+    "properties": {
+        "best_transcript_file": {
+            "type": "string"
+        },
+        "ensembl_version": {
+            "type": "integer"
+        },
+        "generation_time": {
+            "type": "string"
+        },
+        "genes": {
+            "items": {
+                "properties": {
+                    "aliases": {
+                        "default": [
+                        ],
+                        "items": {
+                            "minLength": 1,
+                            "type": "string"
+                        },
+                        "type": "array"
+                    },
+                    "chr": {
+                        "minLength": 1,
+                        "type": "string"
+                    },
+                    "end": {
+                        "minimum": 1,
+                        "type": "integer"
+                    },
+                    "name": {
+                        "minLength": 1,
+                        "type": "string"
+                    },
+                    "start": {
+                        "minimum": 1,
+                        "type": "integer"
+                    },
+                    "strand": {
+                        "enum": [
+                            "+",
+                            "-"
+                        ],
+                        "type": "string"
+                    },
+                    "transcripts": {
+                        "default": [
+                        ],
+                        "items": {
+                            "properties": {
+                                "aliases": {
+                                    "default": [
+                                    ],
+                                    "items": {
+                                        "minLength": 1,
+                                        "type": "string"
+                                    },
+                                    "type": "array"
+                                },
+                                "cdna_coding_end": {
+                                    "minimum": 1,
+                                    "type": [
+                                        "integer",
+                                        "null"
+                                    ],
+                                    "default": null
+                                },
+                                "cdna_coding_start": {
+                                    "minimum": 1,
+                                    "type": [
+                                        "integer",
+                                        "null"
+                                    ],
+                                    "default": null
+                                },
+                                "domains": {
+                                    "default": [
+                                    ],
+                                    "items": {
+                                        "properties": {
+                                            "name": {
+                                                "minLength": 1,
+                                                "type": "string"
+                                            },
+                                            "regions": {
+                                                "minItems": 1,
+                                                "properties": {
+                                                    "end": {
+                                                        "minimum": 1,
+                                                        "type": "integer"
+                                                    },
+                                                    "start": {
+                                                        "minimum": 1,
+                                                        "type": "integer"
+                                                    }
+                                                },
+                                                "type": "array"
+                                            }
+                                        },
+                                        "required": [
+                                            "name",
+                                            "regions"
+                                        ],
+                                        "type": "object"
+                                    },
+                                    "type": "array"
+                                },
+                                "end": {
+                                    "minimum": 1,
+                                    "type": "integer"
+                                },
+                                "exons": {
+                                    "defualt": [
+                                    ],
+                                    "items": {
+                                        "properties": {
+                                            "end": {
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            },
+                                            "start": {
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            }
+                                        },
+                                        "required": [
+                                            "start",
+                                            "end"
+                                        ],
+                                        "type": "object"
+                                    },
+                                    "type": "array"
+                                },
+                                "is_best_transcript": {
+                                    "default": false,
+                                    "type": "boolean"
+                                },
+                                "name": {
+                                    "minLength": 1,
+                                    "type": "string"
+                                },
+                                "start": {
+                                    "minimum": 1,
+                                    "type": "integer"
+                                }
+                            },
+                            "required": [
+                                "start",
+                                "end",
+                                "name"
+                            ],
+                            "type": "object"
+                        },
+                        "type": "array"
+                    }
+                },
+                "required": [
+                    "chr",
+                    "start",
+                    "end",
+                    "name",
+                    "strand"
+                ],
+                "type": "object"
+            },
+            "minItems": 1,
+            "type": "array"
+        },
+        "hugo_mapping_file": {
+            "type": "string"
+        },
+        "script": {
+            "type": "string"
+        },
+        "script_version": {
+            "type": "string"
+        }
+    },
+    "required": [
+        "genes"
+    ],
+    "type": "object"
+}
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index b41489c7..6b1a16f2 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -4,16 +4,16 @@
 import json
 import os
 import re
-import warnings
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Callable, Dict, List, Optional
 
 import pandas as pd
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord
+from snakemake.utils import validate as snakemake_validate
 
-from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, STRAND, translate
+from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, translate
 from ..interval import Interval
-from ..util import DEVNULL, LOG, cast_boolean, filepath
+from ..util import DEVNULL, LOG
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
 from .protein import Domain, Translation
@@ -79,11 +79,8 @@ def load_annotations(
     for filename in filepaths:
         data = None
 
-        if filename.endswith('.json'):
-            with open(filename) as fh:
-                data = json.load(fh)
-        else:
-            data = convert_tab_to_json(filename, warn)
+        with open(filename) as fh:
+            data = json.load(fh)
 
         current_annotations = parse_annotations_json(
             data,
@@ -107,17 +104,20 @@ def parse_annotations_json(
     """
     parses a json of annotation information into annotation objects
     """
+    try:
+        snakemake_validate(
+            data,
+            os.path.join(os.path.dirname(__file__), 'annotations_schema.json'),
+        )
+    except Exception as err:
+        short_msg = '. '.join(
+            [line for line in str(err).split('\n') if line.strip()][:3]
+        )  # these can get super long
+        raise AssertionError(short_msg)
+
     genes_by_chr: Dict[str, List[Gene]] = {}
 
     for gene_dict in data['genes']:
-        if gene_dict['strand'] in ['1', '+', 1]:
-            gene_dict['strand'] = STRAND.POS
-        elif gene_dict['strand'] in ['-1', '-', -1]:
-            gene_dict['strand'] = STRAND.NEG
-        else:
-            raise AssertionError(
-                'input has unexpected form. strand must be 1 or -1 but found', gene_dict['strand']
-            )
 
         gene = Gene(
             chr=gene_dict['chr'],
@@ -130,7 +130,6 @@ def parse_annotations_json(
 
         has_best = False
         for transcript in gene_dict['transcripts']:
-            transcript['is_best_transcript'] = cast_boolean(transcript['is_best_transcript'])
             transcript.setdefault('exons', [])
             exons = [Exon(strand=gene.strand, **ex) for ex in transcript['exons']]
             if not exons:
@@ -205,136 +204,6 @@ def parse_annotations_json(
     return genes_by_chr
 
 
-def convert_tab_to_json(filepath: str, warn: Callable = DEVNULL) -> Dict:
-    """
-    given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
-
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | column name           | example                   | description                                               |
-    +=======================+===========================+===========================================================+
-    | ensembl_transcript_id | ENST000001                |                                                           |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | ensembl_gene_id       | ENSG000001                |                                                           |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | strand                | -1                        | positive or negative 1                                    |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | cdna_coding_start     | 44                        | where translation begins relative to the start of the cdna|
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | cdna_coding_end       | 150                       | where translation terminates                              |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon demitited exon start/ends                      |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | AA_domain_ranges      | DBD:220-251,260-271       | semi-colon delimited list of domains                      |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | hugo_names            | KRAS                      | hugo gene name                                            |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-
-    Args:
-        filepath (str): path to the input tab-delimited file
-
-    Returns:
-        Dict[str,List[Gene]]: a dictionary keyed by chromosome name with values of list of genes on the chromosome
-
-    Warning:
-        does not load translations unless then start with 'M', end with '*' and have a length of multiple 3
-    """
-
-    def parse_exon_list(row):
-        if pd.isnull(row):
-            return []
-        exons = []
-        for temp in re.split('[; ]', row):
-            try:
-                start, end = temp.split('-')
-                exons.append({'start': int(start), 'end': int(end)})
-            except Exception as err:
-                warn('exon error:', repr(temp), repr(err))
-        return exons
-
-    def parse_domain_list(row):
-        if pd.isnull(row):
-            return []
-        domains = []
-        for domain in row.split(';'):
-            try:
-                name, temp = domain.rsplit(':')
-                temp = temp.split(',')
-                temp = [x.split('-') for x in temp]
-                regions = [{'start': int(x), 'end': int(y)} for x, y in temp]
-                domains.append({'name': name, 'regions': regions})
-            except Exception as err:
-                warn('error in domain:', domain, row, repr(err))
-        return domains
-
-    df = pd.read_csv(
-        filepath,
-        dtype={
-            'ensembl_gene_id': str,
-            'ensembl_transcript_id': str,
-            'chr': str,
-            'cdna_coding_start': pd.Int64Dtype(),
-            'cdna_coding_end': pd.Int64Dtype(),
-            'AA_domain_ranges': str,
-            'genomic_exon_ranges': str,
-            'hugo_names': str,
-            'transcript_genomic_start': pd.Int64Dtype(),
-            'transcript_genomic_end': pd.Int64Dtype(),
-            'best_ensembl_transcript_id': str,
-            'gene_start': int,
-            'gene_end': int,
-        },
-        sep='\t',
-        comment='#',
-    )
-
-    for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
-        if col not in df:
-            raise KeyError(f'missing required column: {col}')
-
-    for col, parser in [
-        ('genomic_exon_ranges', parse_exon_list),
-        ('AA_domain_ranges', parse_domain_list),
-    ]:
-        if col in df:
-            df[col] = df[col].apply(parser)
-
-    genes = {}
-    rows = df.where(df.notnull(), None).to_dict('records')
-
-    for row in rows:
-        gene = {
-            'chr': row['chr'],
-            'start': row['gene_start'],
-            'end': row['gene_end'],
-            'name': row['ensembl_gene_id'],
-            'strand': row['strand'],
-            'aliases': row['hugo_names'].split(';') if row.get('hugo_names') else [],
-            'transcripts': [],
-        }
-        if gene['name'] not in genes:
-            genes[gene['name']] = gene
-        else:
-            gene = genes[gene['name']]
-        is_best_transcript = (
-            row.get('best_ensembl_transcript_id', row['ensembl_transcript_id'])
-            == row['ensembl_transcript_id']
-        )
-        transcript = {
-            'is_best_transcript': is_best_transcript,
-            'name': row['ensembl_transcript_id'],
-            'exons': row.get('genomic_exon_ranges', []),
-            'domains': row.get('AA_domain_ranges', []),
-            'start': row.get('transcript_genomic_start'),
-            'end': row.get('transcript_genomic_end'),
-            'cdna_coding_start': row.get('cdna_coding_start'),
-            'cdna_coding_end': row.get('cdna_coding_end'),
-            'aliases': [],
-        }
-        gene['transcripts'].append(transcript)
-
-    return {'genes': genes.values()}
-
-
 def load_reference_genome(*filepaths: str) -> Dict[str, SeqRecord]:
     """
     Args:
diff --git a/src/tools/migrate_mavis_annotations_2to3.py b/src/tools/migrate_mavis_annotations_2to3.py
new file mode 100644
index 00000000..0fe9d39d
--- /dev/null
+++ b/src/tools/migrate_mavis_annotations_2to3.py
@@ -0,0 +1,190 @@
+import argparse
+import json
+import logging
+import re
+from typing import Dict
+
+import pandas as pd
+
+
+def convert_tab_to_json(filepath: str) -> Dict:
+    """
+    given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
+
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | column name           | example                   | description                                               |
+    +=======================+===========================+===========================================================+
+    | ensembl_transcript_id | ENST000001                |                                                           |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | ensembl_gene_id       | ENSG000001                |                                                           |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | strand                | -1                        | positive or negative 1                                    |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | cdna_coding_start     | 44                        | where translation begins relative to the start of the cdna|
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | cdna_coding_end       | 150                       | where translation terminates                              |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon demitited exon start/ends                      |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | AA_domain_ranges      | DBD:220-251,260-271       | semi-colon delimited list of domains                      |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | hugo_names            | KRAS                      | hugo gene name                                            |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+
+    Args:
+        filepath (str): path to the input tab-delimited file
+
+    Returns:
+        Dict[str,List[Gene]]: a dictionary keyed by chromosome name with values of list of genes on the chromosome
+
+    Warning:
+        does not load translations unless then start with 'M', end with '*' and have a length of multiple 3
+    """
+
+    def parse_exon_list(row):
+        if pd.isnull(row):
+            return []
+        exons = []
+        for temp in re.split('[; ]', row):
+            try:
+                start, end = temp.split('-')
+                exons.append({'start': int(start), 'end': int(end)})
+            except Exception as err:
+                logging.warning(f'exon error: {repr(temp)}, {repr(err)}')
+        return exons
+
+    def parse_domain_list(row):
+        if pd.isnull(row):
+            return []
+        domains = []
+        for domain in row.split(';'):
+            try:
+                name, temp = domain.rsplit(':')
+                temp = temp.split(',')
+                temp = [x.split('-') for x in temp]
+                regions = [{'start': int(x), 'end': int(y)} for x, y in temp]
+                domains.append({'name': name, 'regions': regions})
+            except Exception as err:
+                logging.warning(f'error in domain: {domain}, {row}, {repr(err)}')
+        return domains
+
+    df = pd.read_csv(
+        filepath,
+        dtype={
+            'ensembl_gene_id': str,
+            'ensembl_transcript_id': str,
+            'chr': str,
+            'cdna_coding_start': pd.Int64Dtype(),
+            'cdna_coding_end': pd.Int64Dtype(),
+            'AA_domain_ranges': str,
+            'genomic_exon_ranges': str,
+            'hugo_names': str,
+            'transcript_genomic_start': pd.Int64Dtype(),
+            'transcript_genomic_end': pd.Int64Dtype(),
+            'best_ensembl_transcript_id': str,
+            'gene_start': int,
+            'gene_end': int,
+        },
+        sep='\t',
+        comment='#',
+    )
+
+    for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
+        if col not in df:
+            raise KeyError(f'missing required column: {col}')
+
+    for col, parser in [
+        ('genomic_exon_ranges', parse_exon_list),
+        ('AA_domain_ranges', parse_domain_list),
+    ]:
+        if col in df:
+            df[col] = df[col].apply(parser)
+
+    genes = {}
+    rows = df.where(df.notnull(), None).to_dict('records')
+
+    for row in rows:
+        gene = {
+            'chr': row['chr'],
+            'start': int(row['gene_start']),
+            'end': int(row['gene_end']),
+            'name': row['ensembl_gene_id'],
+            'strand': row['strand'],
+            'aliases': row['hugo_names'].split(';') if row.get('hugo_names') else [],
+            'transcripts': [],
+        }
+        if gene['strand'] in {'true', '1', '+', '+1', 'True', 1, True}:
+            gene['strand'] = '+'
+        elif gene['strand'] in {'false', '-1', '-', 'False', -1, False}:
+            gene['strand'] = '-'
+        if gene['name'] not in genes:
+            genes[gene['name']] = gene
+        else:
+            gene = genes[gene['name']]
+        is_best_transcript = (
+            row.get('best_ensembl_transcript_id', row['ensembl_transcript_id'])
+            == row['ensembl_transcript_id']
+        )
+        transcript = {
+            'is_best_transcript': is_best_transcript,
+            'name': row['ensembl_transcript_id'],
+            'exons': row.get('genomic_exon_ranges', []),
+            'domains': row.get('AA_domain_ranges', []),
+            'start': row.get('transcript_genomic_start'),
+            'end': row.get('transcript_genomic_end'),
+            'cdna_coding_start': row.get('cdna_coding_start'),
+            'cdna_coding_end': row.get('cdna_coding_end'),
+            'aliases': [],
+        }
+        for int_value in ['start', 'end', 'cdna_coding_start', 'cdna_coding_end']:
+            if transcript.get(int_value) is not None:
+                transcript[int_value] = int(transcript[int_value])
+        gene['transcripts'].append(transcript)
+
+    return {'genes': list(genes.values())}
+
+
+if __name__ == '__main__':
+    logging.basicConfig(**{'format': '{message}', 'style': '{', 'level': logging.INFO})
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'input', help='path to the tab-delimated mavis v2 style reference annotations file'
+    )
+    parser.add_argument('output', help='path to the JSON output file')
+
+    args = parser.parse_args()
+
+    annotations = convert_tab_to_json(args.input)
+
+    rows = []
+    logging.info(f'writing: {args.output}')
+    if args.output_format == 'jsonl':
+        with open(args.output, 'w') as fh:
+            for gene in annotations['genes']:
+                fh.write(json.dumps(gene, sort_keys=True) + '\n')
+    elif args.output_format == 'json':
+        with open(args.output, 'w') as fh:
+            fh.write(json.dumps(annotations, sort_keys=True))
+    else:
+        transcripts = []
+
+        for gene in annotations['genes']:
+            meta = {**gene}
+            del meta['transcripts']
+            if gene['transcripts']:
+                for transcript in gene['transcripts']:
+                    transcripts.append(
+                        {**meta, **{f'transcript.{k}': v for k, v in transcript.items()}}
+                    )
+            else:
+                transcripts.append(meta)
+        df = pd.json_normalize(transcripts, max_level=1)
+        json_cols = [
+            'aliases',
+            'transcript.aliases',
+            'transcript.exons',
+            'transcript.domains',
+        ]
+        for col in json_cols:
+            df[col] = df[col].apply(json.dumps)
+        df.to_csv(args.output, index=False, sep='\t')
diff --git a/tests/data/annotations_subsample.json b/tests/data/annotations_subsample.json
index 7c362018..d4ee0a56 100644
--- a/tests/data/annotations_subsample.json
+++ b/tests/data/annotations_subsample.json
@@ -1 +1 @@
-{"genes": [{"chr": "15", "start": 63889592, "end": 63893885, "name": "ENSG00000259662", "strand": "1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000539570", "exons": [{"start": 63889592, "end": 63889944}, {"start": 63893495, "end": 63893885}], "domains": [{"name": "SSF81383", "regions": [{"start": 9, "end": 49}]}], "start": 63889592, "end": 63893885, "cdna_coding_start": 1, "cdna_coding_end": 744, "aliases": []}]}, {"chr": "14", "start": 102027834, "end": 102028748, "name": "ENSG00000258865", "strand": "1", "aliases": ["DIO3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000510508", "exons": [{"start": 102027834, "end": 102028748}], "domains": [{"name": "PF00837", "regions": [{"start": 38, "end": 293}]}, {"name": "SSF52833", "regions": [{"start": 125, "end": 198}]}], "start": 102027834, "end": 102028748, "cdna_coding_start": 1, "cdna_coding_end": 915, "aliases": []}]}, {"chr": "X", "start": 49364778, "end": 49370618, "name": "ENSG00000255738", "strand": "1", "aliases": ["GAGE4"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000381700", "exons": [{"start": 49364778, "end": 49364861}, {"start": 49365327, "end": 49365447}, {"start": 49368271, "end": 49368396}, {"start": 49370596, "end": 49370618}], "domains": [{"name": "PF05831", "regions": [{"start": 1, "end": 116}]}], "start": 49364778, "end": 49370618, "cdna_coding_start": 1, "cdna_coding_end": 354, "aliases": []}]}, {"chr": "10", "start": 89621708, "end": 89622244, "name": "ENSG00000227268", "strand": "-1", "aliases": ["KLLN"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000445946", "exons": [{"start": 89621708, "end": 89622244}], "domains": [], "start": 89621708, "end": 89622244, "cdna_coding_start": 1, "cdna_coding_end": 537, "aliases": []}]}, {"chr": "19", "start": 50193095, "end": 50193750, "name": "ENSG00000224420", "strand": "1", "aliases": ["ADM5"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000420022", "exons": [{"start": 50193095, "end": 50193168}, {"start": 50193363, "end": 50193750}], "domains": [], "start": 50193095, "end": 50193750, "cdna_coding_start": 1, "cdna_coding_end": 462, "aliases": []}]}, {"chr": "4", "start": 69056959, "end": 69083631, "name": "ENSG00000226894", "strand": "-1", "aliases": ["TMPRSS11BNL"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000432593", "exons": [{"start": 69083624, "end": 69083631}, {"start": 69078080, "end": 69078195}, {"start": 69057125, "end": 69057242}, {"start": 69056959, "end": 69057034}], "domains": [{"name": "SSF82671", "regions": [{"start": 35, "end": 87}]}], "start": 69056959, "end": 69083631, "cdna_coding_start": 1, "cdna_coding_end": 318, "aliases": []}]}, {"chr": "1", "start": 179833916, "end": 179834311, "name": "ENSG00000258664", "strand": "-1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000553856", "exons": [{"start": 179833916, "end": 179834311}], "domains": [], "start": 179833916, "end": 179834311, "cdna_coding_start": 1, "cdna_coding_end": 396, "aliases": []}]}, {"chr": "19", "start": 8959608, "end": 9091814, "name": "ENSG00000181143", "strand": "-1", "aliases": ["MUC16"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000397910", "exons": [{"start": 9082340, "end": 9091814}, {"start": 9080451, "end": 9080555}, {"start": 9056173, "end": 9077865}, {"start": 9054235, "end": 9054348}, {"start": 9045564, "end": 9050243}, {"start": 9043426, "end": 9043461}, {"start": 9038380, "end": 9038415}, {"start": 9038077, "end": 9038136}, {"start": 9033610, "end": 9033737}, {"start": 9033231, "end": 9033298}, {"start": 9028224, "end": 9028396}, {"start": 9027540, "end": 9027575}, {"start": 9027216, "end": 9027281}, {"start": 9026191, "end": 9026315}, {"start": 9025591, "end": 9025658}, {"start": 9024826, "end": 9024998}, {"start": 9024461, "end": 9024496}, {"start": 9024134, "end": 9024199}, {"start": 9021054, "end": 9021184}, {"start": 9020765, "end": 9020832}, {"start": 9019985, "end": 9020157}, {"start": 9019600, "end": 9019635}, {"start": 9019275, "end": 9019340}, {"start": 9018437, "end": 9018561}, {"start": 9018133, "end": 9018200}, {"start": 9017346, "end": 9017518}, {"start": 9016981, "end": 9017016}, {"start": 9016660, "end": 9016722}, {"start": 9015621, "end": 9015745}, {"start": 9015318, "end": 9015385}, {"start": 9014532, "end": 9014704}, {"start": 9014169, "end": 9014204}, {"start": 9013845, "end": 9013910}, {"start": 9012774, "end": 9012898}, {"start": 9012468, "end": 9012535}, {"start": 9011322, "end": 9011494}, {"start": 9010971, "end": 9011006}, {"start": 9010648, "end": 9010713}, {"start": 9009588, "end": 9009712}, {"start": 9009267, "end": 9009334}, {"start": 9008173, "end": 9008345}, {"start": 9007809, "end": 9007844}, {"start": 9007487, "end": 9007552}, {"start": 9006642, "end": 9006766}, {"start": 9006344, "end": 9006411}, {"start": 9005559, "end": 9005731}, {"start": 9005194, "end": 9005229}, {"start": 9004869, "end": 9004934}, {"start": 9003566, "end": 9003690}, {"start": 9003293, "end": 9003360}, {"start": 9002501, "end": 9002673}, {"start": 9002153, "end": 9002188}, {"start": 9001831, "end": 9001896}, {"start": 9000442, "end": 9000566}, {"start": 9000147, "end": 9000214}, {"start": 8999392, "end": 8999564}, {"start": 8999025, "end": 8999060}, {"start": 8998698, "end": 8998763}, {"start": 8997412, "end": 8997536}, {"start": 8997118, "end": 8997185}, {"start": 8996321, "end": 8996493}, {"start": 8995954, "end": 8995989}, {"start": 8995635, "end": 8995700}, {"start": 8994417, "end": 8994538}, {"start": 8994142, "end": 8994209}, {"start": 8993373, "end": 8993545}, {"start": 8993007, "end": 8993042}, {"start": 8987210, "end": 8987334}, {"start": 8987045, "end": 8987112}, {"start": 8982157, "end": 8982329}, {"start": 8979217, "end": 8979252}, {"start": 8977640, "end": 8977690}, {"start": 8976739, "end": 8976860}, {"start": 8976581, "end": 8976648}, {"start": 8976260, "end": 8976432}, {"start": 8973972, "end": 8974102}, {"start": 8973549, "end": 8973616}, {"start": 8971676, "end": 8971824}, {"start": 8969276, "end": 8969427}, {"start": 8968880, "end": 8968947}, {"start": 8966650, "end": 8966816}, {"start": 8962354, "end": 8962395}, {"start": 8961952, "end": 8962031}, {"start": 8959608, "end": 8959706}], "domains": [{"name": "PS50324", "regions": [{"start": 466, "end": 934}, {"start": 1638, "end": 3054}, {"start": 3899, "end": 4383}, {"start": 5333, "end": 5532}, {"start": 5907, "end": 5999}, {"start": 7083, "end": 10394}]}, {"name": "SSF48726", "regions": [{"start": 3813, "end": 11805}]}, {"name": "SSF82671", "regions": [{"start": 12377, "end": 12509}, {"start": 13312, "end": 13444}, {"start": 13468, "end": 13600}, {"start": 13000, "end": 13132}, {"start": 12688, "end": 12820}, {"start": 13624, "end": 13756}, {"start": 12533, "end": 12665}, {"start": 13156, "end": 13288}, {"start": 12844, "end": 12976}, {"start": 12219, "end": 12351}, {"start": 13780, "end": 13911}, {"start": 12063, "end": 12195}, {"start": 14064, "end": 14195}, {"start": 13913, "end": 14045}, {"start": 14310, "end": 14440}, {"start": 14202, "end": 14308}]}, {"name": "SM00200", "regions": [{"start": 12068, "end": 12199}, {"start": 12226, "end": 12349}, {"start": 12384, "end": 12513}, {"start": 12851, "end": 12982}, {"start": 13007, "end": 13138}, {"start": 13471, "end": 13604}, {"start": 14314, "end": 14444}]}, {"name": "PS50024", "regions": [{"start": 12068, "end": 12133}, {"start": 13007, "end": 13070}, {"start": 13472, "end": 13538}, {"start": 13628, "end": 13694}, {"start": 14314, "end": 14380}]}, {"name": "PS50325", "regions": [{"start": 14, "end": 12083}]}, {"name": "PF01390", "regions": [{"start": 12074, "end": 12175}, {"start": 12231, "end": 12330}, {"start": 12388, "end": 12489}, {"start": 12544, "end": 12646}, {"start": 12699, "end": 12801}, {"start": 12855, "end": 12952}, {"start": 13011, "end": 13113}, {"start": 13167, "end": 13271}, {"start": 13324, "end": 13427}, {"start": 13479, "end": 13583}, {"start": 13635, "end": 13739}, {"start": 13793, "end": 13894}, {"start": 13922, "end": 14030}, {"start": 14076, "end": 14175}, {"start": 14202, "end": 14271}, {"start": 14319, "end": 14423}]}], "start": 8959608, "end": 9091814, "cdna_coding_start": 1, "cdna_coding_end": 43524, "aliases": []}]}, {"chr": "3", "start": 157815816, "end": 157823813, "name": "ENSG00000258518", "strand": "-1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000483851", "exons": [{"start": 157823468, "end": 157823813}, {"start": 157820467, "end": 157820675}, {"start": 157818043, "end": 157818100}, {"start": 157817649, "end": 157817737}, {"start": 157815816, "end": 157816073}], "domains": [{"name": "PS50071", "regions": [{"start": 138, "end": 198}]}, {"name": "PF00046", "regions": [{"start": 141, "end": 197}]}, {"name": "PS50310", "regions": [{"start": 244, "end": 316}]}, {"name": "PS50316", "regions": [{"start": 247, "end": 259}]}, {"name": "SSF46689", "regions": [{"start": 131, "end": 209}]}, {"name": "SM00389", "regions": [{"start": 140, "end": 202}]}, {"name": "PF03826", "regions": [{"start": 298, "end": 316}]}, {"name": "PS50315", "regions": [{"start": 60, "end": 90}]}, {"name": "PS50803", "regions": [{"start": 301, "end": 314}]}, {"name": "PR00031", "regions": [{"start": 169, "end": 178}, {"start": 178, "end": 194}]}], "start": 157815816, "end": 157823813, "cdna_coding_start": 1, "cdna_coding_end": 960, "aliases": []}]}, {"chr": "10", "start": 225953, "end": 295049, "name": "ENSG00000259741", "strand": "1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000558098", "exons": [{"start": 225953, "end": 226068}, {"start": 255829, "end": 255988}, {"start": 267135, "end": 267296}, {"start": 282778, "end": 282855}, {"start": 283525, "end": 283617}, {"start": 285378, "end": 285465}, {"start": 285996, "end": 286051}, {"start": 286833, "end": 286910}, {"start": 287961, "end": 288079}, {"start": 292706, "end": 292913}, {"start": 293338, "end": 293406}, {"start": 294276, "end": 294548}, {"start": 294843, "end": 295049}], "domains": [{"name": "PS50812", "regions": [{"start": 280, "end": 331}]}, {"name": "PS50016", "regions": [{"start": 100, "end": 148}]}, {"name": "SM00297", "regions": [{"start": 151, "end": 257}]}, {"name": "PF00855", "regions": [{"start": 278, "end": 342}]}, {"name": "SSF57903", "regions": [{"start": 86, "end": 151}]}, {"name": "SSF47370", "regions": [{"start": 130, "end": 268}]}, {"name": "SM00293", "regions": [{"start": 278, "end": 329}]}, {"name": "SM00249", "regions": [{"start": 102, "end": 146}]}, {"name": "PS50014", "regions": [{"start": 186, "end": 238}]}, {"name": "PF00439", "regions": [{"start": 182, "end": 242}]}, {"name": "SSF63748", "regions": [{"start": 270, "end": 396}]}], "start": 225953, "end": 295049, "cdna_coding_start": 1, "cdna_coding_end": 1707, "aliases": []}]}, {"chr": "11", "start": 117160282, "end": 117166263, "name": "ENSG00000265969", "strand": "-1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000292095", "exons": [{"start": 117166214, "end": 117166263}, {"start": 117165847, "end": 117166063}, {"start": 117164587, "end": 117164724}, {"start": 117163770, "end": 117163904}, {"start": 117162428, "end": 117162529}, {"start": 117161616, "end": 117161765}, {"start": 117161204, "end": 117161375}, {"start": 117160282, "end": 117160523}], "domains": [{"name": "SSF50630", "regions": [{"start": 18, "end": 346}]}, {"name": "PF00026", "regions": [{"start": 17, "end": 316}]}, {"name": "PR01816", "regions": [{"start": 15, "end": 25}, {"start": 112, "end": 132}, {"start": 260, "end": 274}, {"start": 315, "end": 327}, {"start": 334, "end": 353}, {"start": 355, "end": 375}]}, {"name": "PR00792", "regions": [{"start": 133, "end": 146}, {"start": 186, "end": 197}, {"start": 292, "end": 307}]}, {"name": "PR01815", "regions": [{"start": 47, "end": 70}, {"start": 168, "end": 187}, {"start": 218, "end": 241}, {"start": 256, "end": 270}, {"start": 316, "end": 339}, {"start": 352, "end": 373}]}], "start": 117160282, "end": 117166263, "cdna_coding_start": 1, "cdna_coding_end": 1206, "aliases": []}]}, {"chr": "14", "start": 55034638, "end": 55255662, "name": "ENSG00000262355", "strand": "1", "aliases": ["SAMD4A"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000305831", "exons": [{"start": 55034638, "end": 55034830}, {"start": 55168780, "end": 55169298}, {"start": 55215533, "end": 55215642}, {"start": 55218169, "end": 55218255}, {"start": 55226879, "end": 55227212}, {"start": 55231173, "end": 55231258}, {"start": 55236822, "end": 55236940}, {"start": 55241652, "end": 55241853}, {"start": 55243132, "end": 55243258}, {"start": 55251255, "end": 55251338}, {"start": 55255634, "end": 55255662}], "domains": [{"name": "PF07647", "regions": [{"start": 234, "end": 292}]}, {"name": "SSF47769", "regions": [{"start": 232, "end": 293}]}, {"name": "PF00536", "regions": [{"start": 235, "end": 292}]}, {"name": "SM00454", "regions": [{"start": 231, "end": 294}]}], "start": 55034638, "end": 55255662, "cdna_coding_start": 1, "cdna_coding_end": 1890, "aliases": []}]}, {"chr": "MT", "start": 3307, "end": 4262, "name": "ENSG00000198888", "strand": "1", "aliases": ["MT-ND1"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361390", "exons": [{"start": 3307, "end": 4262}], "domains": [{"name": "PF00146", "regions": [{"start": 2, "end": 308}]}], "start": 3307, "end": 4262, "cdna_coding_start": 1, "cdna_coding_end": 956, "aliases": []}]}, {"chr": "MT", "start": 4470, "end": 5511, "name": "ENSG00000198763", "strand": "1", "aliases": ["MT-ND2"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361453", "exons": [{"start": 4470, "end": 5511}], "domains": [{"name": "PF06444", "regions": [{"start": 290, "end": 345}]}, {"name": "PR01436", "regions": [{"start": 159, "end": 172}, {"start": 183, "end": 196}, {"start": 202, "end": 220}, {"start": 242, "end": 254}, {"start": 274, "end": 293}]}, {"name": "PF00361", "regions": [{"start": 23, "end": 268}]}], "start": 4470, "end": 5511, "cdna_coding_start": 1, "cdna_coding_end": 1042, "aliases": []}]}, {"chr": "MT", "start": 5904, "end": 7445, "name": "ENSG00000198804", "strand": "1", "aliases": ["MT-CO1"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361624", "exons": [{"start": 5904, "end": 7445}], "domains": [{"name": "PR01165", "regions": [{"start": 5, "end": 30}, {"start": 52, "end": 75}, {"start": 80, "end": 104}, {"start": 122, "end": 134}, {"start": 154, "end": 172}, {"start": 183, "end": 202}, {"start": 234, "end": 255}, {"start": 281, "end": 296}, {"start": 305, "end": 326}, {"start": 340, "end": 358}, {"start": 368, "end": 387}, {"start": 418, "end": 439}]}, {"name": "PS50855", "regions": [{"start": 1, "end": 511}]}, {"name": "SSF81442", "regions": [{"start": 1, "end": 513}]}, {"name": "PF00115", "regions": [{"start": 12, "end": 460}]}], "start": 5904, "end": 7445, "cdna_coding_start": 1, "cdna_coding_end": 1542, "aliases": []}]}, {"chr": "MT", "start": 7586, "end": 8269, "name": "ENSG00000198712", "strand": "1", "aliases": ["MT-CO2"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361739", "exons": [{"start": 7586, "end": 8269}], "domains": [{"name": "SSF81464", "regions": [{"start": 1, "end": 90}]}, {"name": "PF00116", "regions": [{"start": 95, "end": 213}]}, {"name": "TIGR02866", "regions": [{"start": 13, "end": 215}]}, {"name": "PS50999", "regions": [{"start": 1, "end": 91}]}, {"name": "PS50857", "regions": [{"start": 92, "end": 225}]}, {"name": "SSF49503", "regions": [{"start": 91, "end": 220}]}, {"name": "PF02790", "regions": [{"start": 1, "end": 83}]}, {"name": "PR01166", "regions": [{"start": 57, "end": 69}, {"start": 69, "end": 89}, {"start": 91, "end": 110}, {"start": 134, "end": 155}, {"start": 158, "end": 178}, {"start": 178, "end": 195}, {"start": 196, "end": 213}]}], "start": 7586, "end": 8269, "cdna_coding_start": 1, "cdna_coding_end": 684, "aliases": []}]}, {"chr": "MT", "start": 8366, "end": 8572, "name": "ENSG00000228253", "strand": "1", "aliases": ["MT-ATP8"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361851", "exons": [{"start": 8366, "end": 8572}], "domains": [{"name": "PF00895", "regions": [{"start": 1, "end": 56}]}], "start": 8366, "end": 8572, "cdna_coding_start": 1, "cdna_coding_end": 207, "aliases": []}]}, {"chr": "MT", "start": 8527, "end": 9207, "name": "ENSG00000198899", "strand": "1", "aliases": ["MT-ATP6"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361899", "exons": [{"start": 8527, "end": 9207}], "domains": [{"name": "SSF81336", "regions": [{"start": 62, "end": 223}]}, {"name": "PF00119", "regions": [{"start": 21, "end": 224}]}, {"name": "TIGR01131", "regions": [{"start": 9, "end": 225}]}, {"name": "PR00123", "regions": [{"start": 71, "end": 87}, {"start": 132, "end": 147}, {"start": 152, "end": 174}, {"start": 209, "end": 224}]}], "start": 8527, "end": 9207, "cdna_coding_start": 1, "cdna_coding_end": 681, "aliases": []}]}, {"chr": "MT", "start": 9207, "end": 9990, "name": "ENSG00000198938", "strand": "1", "aliases": ["MT-CO3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000362079", "exons": [{"start": 9207, "end": 9990}], "domains": [{"name": "PF00510", "regions": [{"start": 6, "end": 261}]}, {"name": "SSF81452", "regions": [{"start": 1, "end": 260}]}, {"name": "PS50253", "regions": [{"start": 4, "end": 261}]}], "start": 9207, "end": 9990, "cdna_coding_start": 1, "cdna_coding_end": 784, "aliases": []}]}, {"chr": "MT", "start": 10059, "end": 10404, "name": "ENSG00000198840", "strand": "1", "aliases": ["MT-ND3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361227", "exons": [{"start": 10059, "end": 10404}], "domains": [{"name": "PF00507", "regions": [{"start": 13, "end": 113}]}], "start": 10059, "end": 10404, "cdna_coding_start": 1, "cdna_coding_end": 346, "aliases": []}]}, {"chr": "MT", "start": 10470, "end": 10766, "name": "ENSG00000212907", "strand": "1", "aliases": ["MT-ND4L"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361335", "exons": [{"start": 10470, "end": 10766}], "domains": [{"name": "PF00420", "regions": [{"start": 4, "end": 98}]}], "start": 10470, "end": 10766, "cdna_coding_start": 1, "cdna_coding_end": 297, "aliases": []}]}, {"chr": "MT", "start": 10760, "end": 12137, "name": "ENSG00000198886", "strand": "1", "aliases": ["MT-ND4"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361381", "exons": [{"start": 10760, "end": 12137}], "domains": [{"name": "TIGR01972", "regions": [{"start": 59, "end": 456}]}, {"name": "PR01437", "regions": [{"start": 118, "end": 137}, {"start": 149, "end": 173}, {"start": 221, "end": 245}, {"start": 308, "end": 327}, {"start": 360, "end": 386}]}, {"name": "PF00361", "regions": [{"start": 112, "end": 383}]}, {"name": "PF01059", "regions": [{"start": 1, "end": 109}]}], "start": 10760, "end": 12137, "cdna_coding_start": 1, "cdna_coding_end": 1378, "aliases": []}]}, {"chr": "MT", "start": 12337, "end": 14148, "name": "ENSG00000198786", "strand": "1", "aliases": ["MT-ND5"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361567", "exons": [{"start": 12337, "end": 14148}], "domains": [{"name": "PF00662", "regions": [{"start": 62, "end": 123}]}, {"name": "TIGR01974", "regions": [{"start": 8, "end": 501}]}, {"name": "PR01434", "regions": [{"start": 83, "end": 108}, {"start": 111, "end": 131}, {"start": 158, "end": 179}, {"start": 219, "end": 240}, {"start": 241, "end": 267}, {"start": 309, "end": 321}, {"start": 412, "end": 431}]}, {"name": "PF00361", "regions": [{"start": 134, "end": 397}]}, {"name": "PF06455", "regions": [{"start": 422, "end": 602}]}], "start": 12337, "end": 14148, "cdna_coding_start": 1, "cdna_coding_end": 1812, "aliases": []}]}, {"chr": "MT", "start": 14149, "end": 14673, "name": "ENSG00000198695", "strand": "-1", "aliases": ["MT-ND6"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361681", "exons": [{"start": 14149, "end": 14673}], "domains": [{"name": "PF00499", "regions": [{"start": 14, "end": 171}]}], "start": 14149, "end": 14673, "cdna_coding_start": 1, "cdna_coding_end": 525, "aliases": []}]}, {"chr": "MT", "start": 14747, "end": 15887, "name": "ENSG00000198727", "strand": "1", "aliases": ["MT-CYB"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361789", "exons": [{"start": 14747, "end": 15887}], "domains": [{"name": "PF00032", "regions": [{"start": 259, "end": 359}]}, {"name": "PS51003", "regions": [{"start": 210, "end": 380}]}, {"name": "PS51002", "regions": [{"start": 1, "end": 209}]}, {"name": "PF00033", "regions": [{"start": 24, "end": 200}]}, {"name": "SSF81342", "regions": [{"start": 1, "end": 260}]}, {"name": "SSF81648", "regions": [{"start": 261, "end": 379}]}], "start": 14747, "end": 15887, "cdna_coding_start": 1, "cdna_coding_end": 1141, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143738824, "end": 143763329, "name": "ENSG00000262023", "strand": "-1", "aliases": [], "transcripts": [{"is_best_transcript": false, "name": "ENST00000585837", "exons": [{"start": 143751293, "end": 143751331}, {"start": 143745839, "end": 143747881}, {"start": 143740183, "end": 143740305}, {"start": 143738824, "end": 143739441}], "domains": [{"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}], "start": 143738824, "end": 143751331, "cdna_coding_start": 502, "cdna_coding_end": 2172, "aliases": []}, {"is_best_transcript": true, "name": "ENST00000571961", "exons": [{"start": 143751293, "end": 143751355}, {"start": 143745839, "end": 143747881}, {"start": 143739726, "end": 143740305}], "domains": [{"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}], "start": 143739726, "end": 143751355, "cdna_coding_start": 526, "cdna_coding_end": 2196, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000592696", "exons": [{"start": 143747818, "end": 143747881}, {"start": 143739743, "end": 143747688}], "domains": [{"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}], "start": 143739743, "end": 143747881, "cdna_coding_start": 334, "cdna_coding_end": 2040, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000585713", "exons": [{"start": 143751302, "end": 143751349}, {"start": 143747343, "end": 143747881}], "domains": [], "start": 143747343, "end": 143751349, "cdna_coding_start": 511, "cdna_coding_end": 587, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143751669, "end": 143764085, "name": "ENSG00000262150", "strand": "1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000571412", "exons": [{"start": 143761817, "end": 143761924}, {"start": 143762688, "end": 143762795}, {"start": 143763282, "end": 143764085}], "domains": [{"name": "PF00021", "regions": [{"start": 14, "end": 84}]}, {"name": "SM00134", "regions": [{"start": 12, "end": 98}]}, {"name": "SSF57302", "regions": [{"start": 10, "end": 84}]}], "start": 143761817, "end": 143764085, "cdna_coding_start": 84, "cdna_coding_end": 428, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000575167", "exons": [{"start": 143761818, "end": 143761924}, {"start": 143762688, "end": 143763131}], "domains": [{"name": "PF00021", "regions": [{"start": 14, "end": 52}]}], "start": 143761818, "end": 143763131, "cdna_coding_start": 83, "cdna_coding_end": 247, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143781472, "end": 143786488, "name": "ENSG00000262378", "strand": "1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000572300", "exons": [{"start": 143781472, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143784452, "end": 143786488}], "domains": [{"name": "SSF57302", "regions": [{"start": 42, "end": 122}]}], "start": 143781472, "end": 143786488, "cdna_coding_start": 418, "cdna_coding_end": 915, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000571924", "exons": [{"start": 143781474, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143783861, "end": 143784083}], "domains": [], "start": 143781474, "end": 143784083, "cdna_coding_start": 416, "cdna_coding_end": 718, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000574968", "exons": [{"start": 143781475, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143784484, "end": 143784815}], "domains": [], "start": 143781475, "end": 143784815, "cdna_coding_start": 415, "cdna_coding_end": 732, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000570580", "exons": [{"start": 143781833, "end": 143781991}, {"start": 143782961, "end": 143785525}], "domains": [], "start": 143781833, "end": 143785525, "cdna_coding_start": 57, "cdna_coding_end": 314, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143808564, "end": 143818288, "name": "ENSG00000263194", "strand": "1", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000575946", "exons": [{"start": 143808564, "end": 143809220}, {"start": 143816687, "end": 143818288}], "domains": [{"name": "SSF54637", "regions": [{"start": 44, "end": 189}]}], "start": 143808564, "end": 143818288, "cdna_coding_start": 145, "cdna_coding_end": 771, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000573810", "exons": [{"start": 143809084, "end": 143809220}, {"start": 143816687, "end": 143817029}, {"start": 143818218, "end": 143818288}], "domains": [{"name": "SSF54637", "regions": [{"start": 2, "end": 64}]}], "start": 143809084, "end": 143818288, "cdna_coding_start": 1, "cdna_coding_end": 251, "aliases": []}]}, {"chr": "12", "start": 175931, "end": 287626, "name": "ENSG00000120645", "strand": "1", "aliases": ["IQSEC3"], "transcripts": [{"is_best_transcript": false, "name": "ENST00000538872", "exons": [{"start": 175931, "end": 176602}, {"start": 208312, "end": 208380}, {"start": 234799, "end": 235078}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 283765, "end": 287626}], "domains": [{"name": "SM00222", "regions": [{"start": 648, "end": 839}]}, {"name": "SSF50729", "regions": [{"start": 845, "end": 983}]}, {"name": "PS50096", "regions": [{"start": 315, "end": 344}]}, {"name": "PS50099", "regions": [{"start": 1061, "end": 1165}]}, {"name": "SSF48425", "regions": [{"start": 647, "end": 844}]}, {"name": "PS50190", "regions": [{"start": 644, "end": 837}]}, {"name": "PF01369", "regions": [{"start": 651, "end": 839}]}], "start": 175931, "end": 287626, "cdna_coding_start": 119, "cdna_coding_end": 3667, "aliases": []}, {"is_best_transcript": true, "name": "ENST00000382841", "exons": [{"start": 186542, "end": 186878}, {"start": 208312, "end": 208380}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 280413, "end": 280496}], "domains": [{"name": "PS50190", "regions": [{"start": 341, "end": 534}]}, {"name": "PF01369", "regions": [{"start": 348, "end": 536}]}, {"name": "SM00222", "regions": [{"start": 345, "end": 536}]}, {"name": "SSF50729", "regions": [{"start": 542, "end": 680}]}, {"name": "SSF48425", "regions": [{"start": 344, "end": 541}]}, {"name": "PS50096", "regions": [{"start": 12, "end": 41}]}], "start": 186542, "end": 280496, "cdna_coding_start": 413, "cdna_coding_end": 2692, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000326261", "exons": [{"start": 176049, "end": 176602}, {"start": 208312, "end": 208380}, {"start": 234799, "end": 235078}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 283765, "end": 286390}, {"start": 286498, "end": 287620}], "domains": [{"name": "PF01369", "regions": [{"start": 651, "end": 839}]}, {"name": "PS50190", "regions": [{"start": 644, "end": 837}]}, {"name": "SSF48425", "regions": [{"start": 647, "end": 844}]}, {"name": "PS50096", "regions": [{"start": 315, "end": 344}]}, {"name": "PS50099", "regions": [{"start": 1061, "end": 1165}]}, {"name": "SM00222", "regions": [{"start": 648, "end": 839}]}, {"name": "SSF50729", "regions": [{"start": 845, "end": 983}]}], "start": 176049, "end": 287620, "cdna_coding_start": 1, "cdna_coding_end": 3549, "aliases": []}]}, {"chr": "12", "start": 299243, "end": 323736, "name": "ENSG00000111181", "strand": "-1", "aliases": ["SLC6A12"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000359674", "exons": [{"start": 322677, "end": 322863}, {"start": 321192, "end": 321276}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 299243, "end": 300377}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 563}]}], "start": 299243, "end": 322863, "cdna_coding_start": 330, "cdna_coding_end": 2174, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000424061", "exons": [{"start": 322456, "end": 322504}, {"start": 321192, "end": 321276}, {"start": 319371, "end": 319734}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 299302, "end": 300377}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 563}]}], "start": 299302, "end": 322504, "cdna_coding_start": 556, "cdna_coding_end": 2400, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000536824", "exons": [{"start": 323088, "end": 323286}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 300234, "end": 300377}], "domains": [{"name": "PS50267", "regions": [{"start": 35, "end": 563}]}, {"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}], "start": 300234, "end": 323286, "cdna_coding_start": 257, "cdna_coding_end": 2101, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000537793", "exons": [{"start": 320301, "end": 320382}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311958, "end": 312046}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 146}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 146}]}], "start": 311958, "end": 320382, "cdna_coding_start": 140, "cdna_coding_end": 577, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000535347", "exons": [{"start": 323088, "end": 323256}, {"start": 321192, "end": 321276}, {"start": 319057, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 312035, "end": 312046}], "domains": [], "start": 312035, "end": 323256, "cdna_coding_start": 312, "cdna_coding_end": 554, "aliases": []}]}]}
+{"genes": [{"chr": "15", "start": 63889592, "end": 63893885, "name": "ENSG00000259662", "strand": "+", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000539570", "exons": [{"start": 63889592, "end": 63889944}, {"start": 63893495, "end": 63893885}], "domains": [{"name": "SSF81383", "regions": [{"start": 9, "end": 49}]}], "start": 63889592, "end": 63893885, "cdna_coding_start": 1, "cdna_coding_end": 744, "aliases": []}]}, {"chr": "14", "start": 102027834, "end": 102028748, "name": "ENSG00000258865", "strand": "+", "aliases": ["DIO3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000510508", "exons": [{"start": 102027834, "end": 102028748}], "domains": [{"name": "PF00837", "regions": [{"start": 38, "end": 293}]}, {"name": "SSF52833", "regions": [{"start": 125, "end": 198}]}], "start": 102027834, "end": 102028748, "cdna_coding_start": 1, "cdna_coding_end": 915, "aliases": []}]}, {"chr": "X", "start": 49364778, "end": 49370618, "name": "ENSG00000255738", "strand": "+", "aliases": ["GAGE4"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000381700", "exons": [{"start": 49364778, "end": 49364861}, {"start": 49365327, "end": 49365447}, {"start": 49368271, "end": 49368396}, {"start": 49370596, "end": 49370618}], "domains": [{"name": "PF05831", "regions": [{"start": 1, "end": 116}]}], "start": 49364778, "end": 49370618, "cdna_coding_start": 1, "cdna_coding_end": 354, "aliases": []}]}, {"chr": "10", "start": 89621708, "end": 89622244, "name": "ENSG00000227268", "strand": "-", "aliases": ["KLLN"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000445946", "exons": [{"start": 89621708, "end": 89622244}], "domains": [], "start": 89621708, "end": 89622244, "cdna_coding_start": 1, "cdna_coding_end": 537, "aliases": []}]}, {"chr": "19", "start": 50193095, "end": 50193750, "name": "ENSG00000224420", "strand": "+", "aliases": ["ADM5"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000420022", "exons": [{"start": 50193095, "end": 50193168}, {"start": 50193363, "end": 50193750}], "domains": [], "start": 50193095, "end": 50193750, "cdna_coding_start": 1, "cdna_coding_end": 462, "aliases": []}]}, {"chr": "4", "start": 69056959, "end": 69083631, "name": "ENSG00000226894", "strand": "-", "aliases": ["TMPRSS11BNL"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000432593", "exons": [{"start": 69083624, "end": 69083631}, {"start": 69078080, "end": 69078195}, {"start": 69057125, "end": 69057242}, {"start": 69056959, "end": 69057034}], "domains": [{"name": "SSF82671", "regions": [{"start": 35, "end": 87}]}], "start": 69056959, "end": 69083631, "cdna_coding_start": 1, "cdna_coding_end": 318, "aliases": []}]}, {"chr": "1", "start": 179833916, "end": 179834311, "name": "ENSG00000258664", "strand": "-", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000553856", "exons": [{"start": 179833916, "end": 179834311}], "domains": [], "start": 179833916, "end": 179834311, "cdna_coding_start": 1, "cdna_coding_end": 396, "aliases": []}]}, {"chr": "19", "start": 8959608, "end": 9091814, "name": "ENSG00000181143", "strand": "-", "aliases": ["MUC16"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000397910", "exons": [{"start": 9082340, "end": 9091814}, {"start": 9080451, "end": 9080555}, {"start": 9056173, "end": 9077865}, {"start": 9054235, "end": 9054348}, {"start": 9045564, "end": 9050243}, {"start": 9043426, "end": 9043461}, {"start": 9038380, "end": 9038415}, {"start": 9038077, "end": 9038136}, {"start": 9033610, "end": 9033737}, {"start": 9033231, "end": 9033298}, {"start": 9028224, "end": 9028396}, {"start": 9027540, "end": 9027575}, {"start": 9027216, "end": 9027281}, {"start": 9026191, "end": 9026315}, {"start": 9025591, "end": 9025658}, {"start": 9024826, "end": 9024998}, {"start": 9024461, "end": 9024496}, {"start": 9024134, "end": 9024199}, {"start": 9021054, "end": 9021184}, {"start": 9020765, "end": 9020832}, {"start": 9019985, "end": 9020157}, {"start": 9019600, "end": 9019635}, {"start": 9019275, "end": 9019340}, {"start": 9018437, "end": 9018561}, {"start": 9018133, "end": 9018200}, {"start": 9017346, "end": 9017518}, {"start": 9016981, "end": 9017016}, {"start": 9016660, "end": 9016722}, {"start": 9015621, "end": 9015745}, {"start": 9015318, "end": 9015385}, {"start": 9014532, "end": 9014704}, {"start": 9014169, "end": 9014204}, {"start": 9013845, "end": 9013910}, {"start": 9012774, "end": 9012898}, {"start": 9012468, "end": 9012535}, {"start": 9011322, "end": 9011494}, {"start": 9010971, "end": 9011006}, {"start": 9010648, "end": 9010713}, {"start": 9009588, "end": 9009712}, {"start": 9009267, "end": 9009334}, {"start": 9008173, "end": 9008345}, {"start": 9007809, "end": 9007844}, {"start": 9007487, "end": 9007552}, {"start": 9006642, "end": 9006766}, {"start": 9006344, "end": 9006411}, {"start": 9005559, "end": 9005731}, {"start": 9005194, "end": 9005229}, {"start": 9004869, "end": 9004934}, {"start": 9003566, "end": 9003690}, {"start": 9003293, "end": 9003360}, {"start": 9002501, "end": 9002673}, {"start": 9002153, "end": 9002188}, {"start": 9001831, "end": 9001896}, {"start": 9000442, "end": 9000566}, {"start": 9000147, "end": 9000214}, {"start": 8999392, "end": 8999564}, {"start": 8999025, "end": 8999060}, {"start": 8998698, "end": 8998763}, {"start": 8997412, "end": 8997536}, {"start": 8997118, "end": 8997185}, {"start": 8996321, "end": 8996493}, {"start": 8995954, "end": 8995989}, {"start": 8995635, "end": 8995700}, {"start": 8994417, "end": 8994538}, {"start": 8994142, "end": 8994209}, {"start": 8993373, "end": 8993545}, {"start": 8993007, "end": 8993042}, {"start": 8987210, "end": 8987334}, {"start": 8987045, "end": 8987112}, {"start": 8982157, "end": 8982329}, {"start": 8979217, "end": 8979252}, {"start": 8977640, "end": 8977690}, {"start": 8976739, "end": 8976860}, {"start": 8976581, "end": 8976648}, {"start": 8976260, "end": 8976432}, {"start": 8973972, "end": 8974102}, {"start": 8973549, "end": 8973616}, {"start": 8971676, "end": 8971824}, {"start": 8969276, "end": 8969427}, {"start": 8968880, "end": 8968947}, {"start": 8966650, "end": 8966816}, {"start": 8962354, "end": 8962395}, {"start": 8961952, "end": 8962031}, {"start": 8959608, "end": 8959706}], "domains": [{"name": "PS50324", "regions": [{"start": 466, "end": 934}, {"start": 1638, "end": 3054}, {"start": 3899, "end": 4383}, {"start": 5333, "end": 5532}, {"start": 5907, "end": 5999}, {"start": 7083, "end": 10394}]}, {"name": "SSF48726", "regions": [{"start": 3813, "end": 11805}]}, {"name": "SSF82671", "regions": [{"start": 12377, "end": 12509}, {"start": 13312, "end": 13444}, {"start": 13468, "end": 13600}, {"start": 13000, "end": 13132}, {"start": 12688, "end": 12820}, {"start": 13624, "end": 13756}, {"start": 12533, "end": 12665}, {"start": 13156, "end": 13288}, {"start": 12844, "end": 12976}, {"start": 12219, "end": 12351}, {"start": 13780, "end": 13911}, {"start": 12063, "end": 12195}, {"start": 14064, "end": 14195}, {"start": 13913, "end": 14045}, {"start": 14310, "end": 14440}, {"start": 14202, "end": 14308}]}, {"name": "SM00200", "regions": [{"start": 12068, "end": 12199}, {"start": 12226, "end": 12349}, {"start": 12384, "end": 12513}, {"start": 12851, "end": 12982}, {"start": 13007, "end": 13138}, {"start": 13471, "end": 13604}, {"start": 14314, "end": 14444}]}, {"name": "PS50024", "regions": [{"start": 12068, "end": 12133}, {"start": 13007, "end": 13070}, {"start": 13472, "end": 13538}, {"start": 13628, "end": 13694}, {"start": 14314, "end": 14380}]}, {"name": "PS50325", "regions": [{"start": 14, "end": 12083}]}, {"name": "PF01390", "regions": [{"start": 12074, "end": 12175}, {"start": 12231, "end": 12330}, {"start": 12388, "end": 12489}, {"start": 12544, "end": 12646}, {"start": 12699, "end": 12801}, {"start": 12855, "end": 12952}, {"start": 13011, "end": 13113}, {"start": 13167, "end": 13271}, {"start": 13324, "end": 13427}, {"start": 13479, "end": 13583}, {"start": 13635, "end": 13739}, {"start": 13793, "end": 13894}, {"start": 13922, "end": 14030}, {"start": 14076, "end": 14175}, {"start": 14202, "end": 14271}, {"start": 14319, "end": 14423}]}], "start": 8959608, "end": 9091814, "cdna_coding_start": 1, "cdna_coding_end": 43524, "aliases": []}]}, {"chr": "3", "start": 157815816, "end": 157823813, "name": "ENSG00000258518", "strand": "-", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000483851", "exons": [{"start": 157823468, "end": 157823813}, {"start": 157820467, "end": 157820675}, {"start": 157818043, "end": 157818100}, {"start": 157817649, "end": 157817737}, {"start": 157815816, "end": 157816073}], "domains": [{"name": "PS50071", "regions": [{"start": 138, "end": 198}]}, {"name": "PF00046", "regions": [{"start": 141, "end": 197}]}, {"name": "PS50310", "regions": [{"start": 244, "end": 316}]}, {"name": "PS50316", "regions": [{"start": 247, "end": 259}]}, {"name": "SSF46689", "regions": [{"start": 131, "end": 209}]}, {"name": "SM00389", "regions": [{"start": 140, "end": 202}]}, {"name": "PF03826", "regions": [{"start": 298, "end": 316}]}, {"name": "PS50315", "regions": [{"start": 60, "end": 90}]}, {"name": "PS50803", "regions": [{"start": 301, "end": 314}]}, {"name": "PR00031", "regions": [{"start": 169, "end": 178}, {"start": 178, "end": 194}]}], "start": 157815816, "end": 157823813, "cdna_coding_start": 1, "cdna_coding_end": 960, "aliases": []}]}, {"chr": "10", "start": 225953, "end": 295049, "name": "ENSG00000259741", "strand": "+", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000558098", "exons": [{"start": 225953, "end": 226068}, {"start": 255829, "end": 255988}, {"start": 267135, "end": 267296}, {"start": 282778, "end": 282855}, {"start": 283525, "end": 283617}, {"start": 285378, "end": 285465}, {"start": 285996, "end": 286051}, {"start": 286833, "end": 286910}, {"start": 287961, "end": 288079}, {"start": 292706, "end": 292913}, {"start": 293338, "end": 293406}, {"start": 294276, "end": 294548}, {"start": 294843, "end": 295049}], "domains": [{"name": "PS50812", "regions": [{"start": 280, "end": 331}]}, {"name": "PS50016", "regions": [{"start": 100, "end": 148}]}, {"name": "SM00297", "regions": [{"start": 151, "end": 257}]}, {"name": "PF00855", "regions": [{"start": 278, "end": 342}]}, {"name": "SSF57903", "regions": [{"start": 86, "end": 151}]}, {"name": "SSF47370", "regions": [{"start": 130, "end": 268}]}, {"name": "SM00293", "regions": [{"start": 278, "end": 329}]}, {"name": "SM00249", "regions": [{"start": 102, "end": 146}]}, {"name": "PS50014", "regions": [{"start": 186, "end": 238}]}, {"name": "PF00439", "regions": [{"start": 182, "end": 242}]}, {"name": "SSF63748", "regions": [{"start": 270, "end": 396}]}], "start": 225953, "end": 295049, "cdna_coding_start": 1, "cdna_coding_end": 1707, "aliases": []}]}, {"chr": "11", "start": 117160282, "end": 117166263, "name": "ENSG00000265969", "strand": "-", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000292095", "exons": [{"start": 117166214, "end": 117166263}, {"start": 117165847, "end": 117166063}, {"start": 117164587, "end": 117164724}, {"start": 117163770, "end": 117163904}, {"start": 117162428, "end": 117162529}, {"start": 117161616, "end": 117161765}, {"start": 117161204, "end": 117161375}, {"start": 117160282, "end": 117160523}], "domains": [{"name": "SSF50630", "regions": [{"start": 18, "end": 346}]}, {"name": "PF00026", "regions": [{"start": 17, "end": 316}]}, {"name": "PR01816", "regions": [{"start": 15, "end": 25}, {"start": 112, "end": 132}, {"start": 260, "end": 274}, {"start": 315, "end": 327}, {"start": 334, "end": 353}, {"start": 355, "end": 375}]}, {"name": "PR00792", "regions": [{"start": 133, "end": 146}, {"start": 186, "end": 197}, {"start": 292, "end": 307}]}, {"name": "PR01815", "regions": [{"start": 47, "end": 70}, {"start": 168, "end": 187}, {"start": 218, "end": 241}, {"start": 256, "end": 270}, {"start": 316, "end": 339}, {"start": 352, "end": 373}]}], "start": 117160282, "end": 117166263, "cdna_coding_start": 1, "cdna_coding_end": 1206, "aliases": []}]}, {"chr": "14", "start": 55034638, "end": 55255662, "name": "ENSG00000262355", "strand": "+", "aliases": ["SAMD4A"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000305831", "exons": [{"start": 55034638, "end": 55034830}, {"start": 55168780, "end": 55169298}, {"start": 55215533, "end": 55215642}, {"start": 55218169, "end": 55218255}, {"start": 55226879, "end": 55227212}, {"start": 55231173, "end": 55231258}, {"start": 55236822, "end": 55236940}, {"start": 55241652, "end": 55241853}, {"start": 55243132, "end": 55243258}, {"start": 55251255, "end": 55251338}, {"start": 55255634, "end": 55255662}], "domains": [{"name": "PF07647", "regions": [{"start": 234, "end": 292}]}, {"name": "SSF47769", "regions": [{"start": 232, "end": 293}]}, {"name": "PF00536", "regions": [{"start": 235, "end": 292}]}, {"name": "SM00454", "regions": [{"start": 231, "end": 294}]}], "start": 55034638, "end": 55255662, "cdna_coding_start": 1, "cdna_coding_end": 1890, "aliases": []}]}, {"chr": "MT", "start": 3307, "end": 4262, "name": "ENSG00000198888", "strand": "+", "aliases": ["MT-ND1"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361390", "exons": [{"start": 3307, "end": 4262}], "domains": [{"name": "PF00146", "regions": [{"start": 2, "end": 308}]}], "start": 3307, "end": 4262, "cdna_coding_start": 1, "cdna_coding_end": 956, "aliases": []}]}, {"chr": "MT", "start": 4470, "end": 5511, "name": "ENSG00000198763", "strand": "+", "aliases": ["MT-ND2"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361453", "exons": [{"start": 4470, "end": 5511}], "domains": [{"name": "PF06444", "regions": [{"start": 290, "end": 345}]}, {"name": "PR01436", "regions": [{"start": 159, "end": 172}, {"start": 183, "end": 196}, {"start": 202, "end": 220}, {"start": 242, "end": 254}, {"start": 274, "end": 293}]}, {"name": "PF00361", "regions": [{"start": 23, "end": 268}]}], "start": 4470, "end": 5511, "cdna_coding_start": 1, "cdna_coding_end": 1042, "aliases": []}]}, {"chr": "MT", "start": 5904, "end": 7445, "name": "ENSG00000198804", "strand": "+", "aliases": ["MT-CO1"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361624", "exons": [{"start": 5904, "end": 7445}], "domains": [{"name": "PR01165", "regions": [{"start": 5, "end": 30}, {"start": 52, "end": 75}, {"start": 80, "end": 104}, {"start": 122, "end": 134}, {"start": 154, "end": 172}, {"start": 183, "end": 202}, {"start": 234, "end": 255}, {"start": 281, "end": 296}, {"start": 305, "end": 326}, {"start": 340, "end": 358}, {"start": 368, "end": 387}, {"start": 418, "end": 439}]}, {"name": "PS50855", "regions": [{"start": 1, "end": 511}]}, {"name": "SSF81442", "regions": [{"start": 1, "end": 513}]}, {"name": "PF00115", "regions": [{"start": 12, "end": 460}]}], "start": 5904, "end": 7445, "cdna_coding_start": 1, "cdna_coding_end": 1542, "aliases": []}]}, {"chr": "MT", "start": 7586, "end": 8269, "name": "ENSG00000198712", "strand": "+", "aliases": ["MT-CO2"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361739", "exons": [{"start": 7586, "end": 8269}], "domains": [{"name": "SSF81464", "regions": [{"start": 1, "end": 90}]}, {"name": "PF00116", "regions": [{"start": 95, "end": 213}]}, {"name": "TIGR02866", "regions": [{"start": 13, "end": 215}]}, {"name": "PS50999", "regions": [{"start": 1, "end": 91}]}, {"name": "PS50857", "regions": [{"start": 92, "end": 225}]}, {"name": "SSF49503", "regions": [{"start": 91, "end": 220}]}, {"name": "PF02790", "regions": [{"start": 1, "end": 83}]}, {"name": "PR01166", "regions": [{"start": 57, "end": 69}, {"start": 69, "end": 89}, {"start": 91, "end": 110}, {"start": 134, "end": 155}, {"start": 158, "end": 178}, {"start": 178, "end": 195}, {"start": 196, "end": 213}]}], "start": 7586, "end": 8269, "cdna_coding_start": 1, "cdna_coding_end": 684, "aliases": []}]}, {"chr": "MT", "start": 8366, "end": 8572, "name": "ENSG00000228253", "strand": "+", "aliases": ["MT-ATP8"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361851", "exons": [{"start": 8366, "end": 8572}], "domains": [{"name": "PF00895", "regions": [{"start": 1, "end": 56}]}], "start": 8366, "end": 8572, "cdna_coding_start": 1, "cdna_coding_end": 207, "aliases": []}]}, {"chr": "MT", "start": 8527, "end": 9207, "name": "ENSG00000198899", "strand": "+", "aliases": ["MT-ATP6"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361899", "exons": [{"start": 8527, "end": 9207}], "domains": [{"name": "SSF81336", "regions": [{"start": 62, "end": 223}]}, {"name": "PF00119", "regions": [{"start": 21, "end": 224}]}, {"name": "TIGR01131", "regions": [{"start": 9, "end": 225}]}, {"name": "PR00123", "regions": [{"start": 71, "end": 87}, {"start": 132, "end": 147}, {"start": 152, "end": 174}, {"start": 209, "end": 224}]}], "start": 8527, "end": 9207, "cdna_coding_start": 1, "cdna_coding_end": 681, "aliases": []}]}, {"chr": "MT", "start": 9207, "end": 9990, "name": "ENSG00000198938", "strand": "+", "aliases": ["MT-CO3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000362079", "exons": [{"start": 9207, "end": 9990}], "domains": [{"name": "PF00510", "regions": [{"start": 6, "end": 261}]}, {"name": "SSF81452", "regions": [{"start": 1, "end": 260}]}, {"name": "PS50253", "regions": [{"start": 4, "end": 261}]}], "start": 9207, "end": 9990, "cdna_coding_start": 1, "cdna_coding_end": 784, "aliases": []}]}, {"chr": "MT", "start": 10059, "end": 10404, "name": "ENSG00000198840", "strand": "+", "aliases": ["MT-ND3"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361227", "exons": [{"start": 10059, "end": 10404}], "domains": [{"name": "PF00507", "regions": [{"start": 13, "end": 113}]}], "start": 10059, "end": 10404, "cdna_coding_start": 1, "cdna_coding_end": 346, "aliases": []}]}, {"chr": "MT", "start": 10470, "end": 10766, "name": "ENSG00000212907", "strand": "+", "aliases": ["MT-ND4L"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361335", "exons": [{"start": 10470, "end": 10766}], "domains": [{"name": "PF00420", "regions": [{"start": 4, "end": 98}]}], "start": 10470, "end": 10766, "cdna_coding_start": 1, "cdna_coding_end": 297, "aliases": []}]}, {"chr": "MT", "start": 10760, "end": 12137, "name": "ENSG00000198886", "strand": "+", "aliases": ["MT-ND4"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361381", "exons": [{"start": 10760, "end": 12137}], "domains": [{"name": "TIGR01972", "regions": [{"start": 59, "end": 456}]}, {"name": "PR01437", "regions": [{"start": 118, "end": 137}, {"start": 149, "end": 173}, {"start": 221, "end": 245}, {"start": 308, "end": 327}, {"start": 360, "end": 386}]}, {"name": "PF00361", "regions": [{"start": 112, "end": 383}]}, {"name": "PF01059", "regions": [{"start": 1, "end": 109}]}], "start": 10760, "end": 12137, "cdna_coding_start": 1, "cdna_coding_end": 1378, "aliases": []}]}, {"chr": "MT", "start": 12337, "end": 14148, "name": "ENSG00000198786", "strand": "+", "aliases": ["MT-ND5"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361567", "exons": [{"start": 12337, "end": 14148}], "domains": [{"name": "PF00662", "regions": [{"start": 62, "end": 123}]}, {"name": "TIGR01974", "regions": [{"start": 8, "end": 501}]}, {"name": "PR01434", "regions": [{"start": 83, "end": 108}, {"start": 111, "end": 131}, {"start": 158, "end": 179}, {"start": 219, "end": 240}, {"start": 241, "end": 267}, {"start": 309, "end": 321}, {"start": 412, "end": 431}]}, {"name": "PF00361", "regions": [{"start": 134, "end": 397}]}, {"name": "PF06455", "regions": [{"start": 422, "end": 602}]}], "start": 12337, "end": 14148, "cdna_coding_start": 1, "cdna_coding_end": 1812, "aliases": []}]}, {"chr": "MT", "start": 14149, "end": 14673, "name": "ENSG00000198695", "strand": "-", "aliases": ["MT-ND6"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361681", "exons": [{"start": 14149, "end": 14673}], "domains": [{"name": "PF00499", "regions": [{"start": 14, "end": 171}]}], "start": 14149, "end": 14673, "cdna_coding_start": 1, "cdna_coding_end": 525, "aliases": []}]}, {"chr": "MT", "start": 14747, "end": 15887, "name": "ENSG00000198727", "strand": "+", "aliases": ["MT-CYB"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000361789", "exons": [{"start": 14747, "end": 15887}], "domains": [{"name": "PF00032", "regions": [{"start": 259, "end": 359}]}, {"name": "PS51003", "regions": [{"start": 210, "end": 380}]}, {"name": "PS51002", "regions": [{"start": 1, "end": 209}]}, {"name": "PF00033", "regions": [{"start": 24, "end": 200}]}, {"name": "SSF81342", "regions": [{"start": 1, "end": 260}]}, {"name": "SSF81648", "regions": [{"start": 261, "end": 379}]}], "start": 14747, "end": 15887, "cdna_coding_start": 1, "cdna_coding_end": 1141, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143738824, "end": 143763329, "name": "ENSG00000262023", "strand": "-", "aliases": [], "transcripts": [{"is_best_transcript": false, "name": "ENST00000585837", "exons": [{"start": 143751293, "end": 143751331}, {"start": 143745839, "end": 143747881}, {"start": 143740183, "end": 143740305}, {"start": 143738824, "end": 143739441}], "domains": [{"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}], "start": 143738824, "end": 143751331, "cdna_coding_start": 502, "cdna_coding_end": 2172, "aliases": []}, {"is_best_transcript": true, "name": "ENST00000571961", "exons": [{"start": 143751293, "end": 143751355}, {"start": 143745839, "end": 143747881}, {"start": 143739726, "end": 143740305}], "domains": [{"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}], "start": 143739726, "end": 143751355, "cdna_coding_start": 526, "cdna_coding_end": 2196, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000592696", "exons": [{"start": 143747818, "end": 143747881}, {"start": 143739743, "end": 143747688}], "domains": [{"name": "PF03221", "regions": [{"start": 87, "end": 149}]}, {"name": "SM00674", "regions": [{"start": 83, "end": 149}]}, {"name": "PF04218", "regions": [{"start": 14, "end": 66}]}, {"name": "PS50960", "regions": [{"start": 11, "end": 62}]}, {"name": "SSF46689", "regions": [{"start": 79, "end": 144}, {"start": 13, "end": 76}]}, {"name": "PF03184", "regions": [{"start": 179, "end": 382}]}], "start": 143739743, "end": 143747881, "cdna_coding_start": 334, "cdna_coding_end": 2040, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000585713", "exons": [{"start": 143751302, "end": 143751349}, {"start": 143747343, "end": 143747881}], "domains": [], "start": 143747343, "end": 143751349, "cdna_coding_start": 511, "cdna_coding_end": 587, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143751669, "end": 143764085, "name": "ENSG00000262150", "strand": "+", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000571412", "exons": [{"start": 143761817, "end": 143761924}, {"start": 143762688, "end": 143762795}, {"start": 143763282, "end": 143764085}], "domains": [{"name": "PF00021", "regions": [{"start": 14, "end": 84}]}, {"name": "SM00134", "regions": [{"start": 12, "end": 98}]}, {"name": "SSF57302", "regions": [{"start": 10, "end": 84}]}], "start": 143761817, "end": 143764085, "cdna_coding_start": 84, "cdna_coding_end": 428, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000575167", "exons": [{"start": 143761818, "end": 143761924}, {"start": 143762688, "end": 143763131}], "domains": [{"name": "PF00021", "regions": [{"start": 14, "end": 52}]}], "start": 143761818, "end": 143763131, "cdna_coding_start": 83, "cdna_coding_end": 247, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143781472, "end": 143786488, "name": "ENSG00000262378", "strand": "+", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000572300", "exons": [{"start": 143781472, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143784452, "end": 143786488}], "domains": [{"name": "SSF57302", "regions": [{"start": 42, "end": 122}]}], "start": 143781472, "end": 143786488, "cdna_coding_start": 418, "cdna_coding_end": 915, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000571924", "exons": [{"start": 143781474, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143783861, "end": 143784083}], "domains": [], "start": 143781474, "end": 143784083, "cdna_coding_start": 416, "cdna_coding_end": 718, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000574968", "exons": [{"start": 143781475, "end": 143781991}, {"start": 143782961, "end": 143783074}, {"start": 143784484, "end": 143784815}], "domains": [], "start": 143781475, "end": 143784815, "cdna_coding_start": 415, "cdna_coding_end": 732, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000570580", "exons": [{"start": 143781833, "end": 143781991}, {"start": 143782961, "end": 143785525}], "domains": [], "start": 143781833, "end": 143785525, "cdna_coding_start": 57, "cdna_coding_end": 314, "aliases": []}]}, {"chr": "HG418_PATCH", "start": 143808564, "end": 143818288, "name": "ENSG00000263194", "strand": "+", "aliases": [], "transcripts": [{"is_best_transcript": true, "name": "ENST00000575946", "exons": [{"start": 143808564, "end": 143809220}, {"start": 143816687, "end": 143818288}], "domains": [{"name": "SSF54637", "regions": [{"start": 44, "end": 189}]}], "start": 143808564, "end": 143818288, "cdna_coding_start": 145, "cdna_coding_end": 771, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000573810", "exons": [{"start": 143809084, "end": 143809220}, {"start": 143816687, "end": 143817029}, {"start": 143818218, "end": 143818288}], "domains": [{"name": "SSF54637", "regions": [{"start": 2, "end": 64}]}], "start": 143809084, "end": 143818288, "cdna_coding_start": 1, "cdna_coding_end": 251, "aliases": []}]}, {"chr": "12", "start": 175931, "end": 287626, "name": "ENSG00000120645", "strand": "+", "aliases": ["IQSEC3"], "transcripts": [{"is_best_transcript": false, "name": "ENST00000538872", "exons": [{"start": 175931, "end": 176602}, {"start": 208312, "end": 208380}, {"start": 234799, "end": 235078}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 283765, "end": 287626}], "domains": [{"name": "SM00222", "regions": [{"start": 648, "end": 839}]}, {"name": "SSF50729", "regions": [{"start": 845, "end": 983}]}, {"name": "PS50096", "regions": [{"start": 315, "end": 344}]}, {"name": "PS50099", "regions": [{"start": 1061, "end": 1165}]}, {"name": "SSF48425", "regions": [{"start": 647, "end": 844}]}, {"name": "PS50190", "regions": [{"start": 644, "end": 837}]}, {"name": "PF01369", "regions": [{"start": 651, "end": 839}]}], "start": 175931, "end": 287626, "cdna_coding_start": 119, "cdna_coding_end": 3667, "aliases": []}, {"is_best_transcript": true, "name": "ENST00000382841", "exons": [{"start": 186542, "end": 186878}, {"start": 208312, "end": 208380}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 280413, "end": 280496}], "domains": [{"name": "PS50190", "regions": [{"start": 341, "end": 534}]}, {"name": "PF01369", "regions": [{"start": 348, "end": 536}]}, {"name": "SM00222", "regions": [{"start": 345, "end": 536}]}, {"name": "SSF50729", "regions": [{"start": 542, "end": 680}]}, {"name": "SSF48425", "regions": [{"start": 344, "end": 541}]}, {"name": "PS50096", "regions": [{"start": 12, "end": 41}]}], "start": 186542, "end": 280496, "cdna_coding_start": 413, "cdna_coding_end": 2692, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000326261", "exons": [{"start": 176049, "end": 176602}, {"start": 208312, "end": 208380}, {"start": 234799, "end": 235078}, {"start": 247433, "end": 248520}, {"start": 250290, "end": 250451}, {"start": 266191, "end": 266313}, {"start": 266694, "end": 266860}, {"start": 271092, "end": 271231}, {"start": 272660, "end": 272785}, {"start": 274600, "end": 274699}, {"start": 274895, "end": 275056}, {"start": 278179, "end": 278271}, {"start": 280278, "end": 280327}, {"start": 283765, "end": 286390}, {"start": 286498, "end": 287620}], "domains": [{"name": "PF01369", "regions": [{"start": 651, "end": 839}]}, {"name": "PS50190", "regions": [{"start": 644, "end": 837}]}, {"name": "SSF48425", "regions": [{"start": 647, "end": 844}]}, {"name": "PS50096", "regions": [{"start": 315, "end": 344}]}, {"name": "PS50099", "regions": [{"start": 1061, "end": 1165}]}, {"name": "SM00222", "regions": [{"start": 648, "end": 839}]}, {"name": "SSF50729", "regions": [{"start": 845, "end": 983}]}], "start": 176049, "end": 287620, "cdna_coding_start": 1, "cdna_coding_end": 3549, "aliases": []}]}, {"chr": "12", "start": 299243, "end": 323736, "name": "ENSG00000111181", "strand": "-", "aliases": ["SLC6A12"], "transcripts": [{"is_best_transcript": true, "name": "ENST00000359674", "exons": [{"start": 322677, "end": 322863}, {"start": 321192, "end": 321276}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 299243, "end": 300377}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 563}]}], "start": 299243, "end": 322863, "cdna_coding_start": 330, "cdna_coding_end": 2174, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000424061", "exons": [{"start": 322456, "end": 322504}, {"start": 321192, "end": 321276}, {"start": 319371, "end": 319734}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 299302, "end": 300377}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 563}]}], "start": 299302, "end": 322504, "cdna_coding_start": 556, "cdna_coding_end": 2400, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000536824", "exons": [{"start": 323088, "end": 323286}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311906, "end": 312046}, {"start": 310928, "end": 311015}, {"start": 309817, "end": 309949}, {"start": 307963, "end": 308097}, {"start": 307066, "end": 307169}, {"start": 306543, "end": 306667}, {"start": 305936, "end": 306048}, {"start": 305290, "end": 305427}, {"start": 304391, "end": 304493}, {"start": 302443, "end": 302543}, {"start": 301644, "end": 301814}, {"start": 300234, "end": 300377}], "domains": [{"name": "PS50267", "regions": [{"start": 35, "end": 563}]}, {"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}, {"start": 238, "end": 255}, {"start": 320, "end": 340}, {"start": 374, "end": 393}, {"start": 458, "end": 478}, {"start": 498, "end": 518}]}, {"name": "PR01198", "regions": [{"start": 4, "end": 17}, {"start": 565, "end": 582}, {"start": 599, "end": 611}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 560}]}], "start": 300234, "end": 323286, "cdna_coding_start": 257, "cdna_coding_end": 2101, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000537793", "exons": [{"start": 320301, "end": 320382}, {"start": 318939, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 311958, "end": 312046}], "domains": [{"name": "PR00176", "regions": [{"start": 44, "end": 65}, {"start": 73, "end": 92}, {"start": 117, "end": 143}]}, {"name": "PF00209", "regions": [{"start": 36, "end": 146}]}, {"name": "PS50267", "regions": [{"start": 35, "end": 146}]}], "start": 311958, "end": 320382, "cdna_coding_start": 140, "cdna_coding_end": 577, "aliases": []}, {"is_best_transcript": false, "name": "ENST00000535347", "exons": [{"start": 323088, "end": 323256}, {"start": 321192, "end": 321276}, {"start": 319057, "end": 319209}, {"start": 313730, "end": 313864}, {"start": 312035, "end": 312046}], "domains": [], "start": 312035, "end": 323256, "cdna_coding_start": 312, "cdna_coding_end": 554, "aliases": []}]}]}
diff --git a/tests/data/example_genes.json b/tests/data/example_genes.json
index 51e5d477..f1a6cf8e 100644
--- a/tests/data/example_genes.json
+++ b/tests/data/example_genes.json
@@ -7991,6 +7991,8 @@
             "transcripts": [{
                 "name": "ENST00000346085",
                 "is_best_transcript": true,
+                "start": 157099063,
+                "end": 157529495,
                 "cdna_coding_end": 6751,
                 "cdna_coding_start": 2,
                 "exons": [
diff --git a/tests/data/mock_annotations.json b/tests/data/mock_annotations.json
index 4d4bda28..7e331c50 100644
--- a/tests/data/mock_annotations.json
+++ b/tests/data/mock_annotations.json
@@ -1 +1 @@
-{"ensembl_version": 69, "genes": [{"name": "ENSG00000128891", "start": "1", "strand": "-1", "end": "36375", "transcripts": [{"is_best_transcript": "false", "exons": [{"end": "36329", "name": "ENSE00002560148", "start": "36294"}, {"end": "34345", "start": "34090", "name": "ENSE00002761197"}, {"end": "28690", "name": "ENSE00002865842", "start": "28534"}, {"name": "ENSE00002539417", "start": "1", "end": "108"}], "start": "1", "name": "ENST00000560305", "end": "36329", "cdna_coding_start": 49, "aliases": [], "domains": [], "cdna_coding_end": 498}, {"start": "2649", "exons": [{"end": "34345", "name": "ENSE00002761197", "start": "34090"}, {"start": "28534", "name": "ENSE00002865842", "end": "28690"}, {"name": "ENSE00002540983", "start": "24913", "end": "25445"}, {"name": "ENSE00002563185", "start": "2649", "end": "3620"}], "name": "ENST00000559153", "is_best_transcript": "false", "domains": [], "cdna_coding_end": 570, "end": "34345", "cdna_coding_start": 13, "aliases": []}, {"is_best_transcript": "false", "exons": [{"end": "36310", "name": "ENSE00002537918", "start": "36109"}, {"start": "34090", "name": "ENSE00002761197", "end": "34345"}, {"end": "28690", "start": "28534", "name": "ENSE00002865842"}, {"start": "2659", "name": "ENSE00002565021", "end": "3620"}], "start": "2659", "name": "ENST00000561011", "end": "36310", "cdna_coding_start": 215, "aliases": [], "domains": [], "cdna_coding_end": 619}, {"is_best_transcript": "false", "exons": [{"end": "36307", "name": "ENSE00002562556", "start": "36294"}, {"end": "34345", "name": "ENSE00002761197", "start": "34090"}, {"name": "ENSE00002865842", "start": "28534", "end": "28690"}, {"end": "25445", "name": "ENSE00002555814", "start": "24755"}, {"name": "ENSE00002547244", "start": "5661", "end": "10673"}], "start": "5661", "name": "ENST00000559291", "cdna_coding_start": 27, "end": "36307", "aliases": [], "cdna_coding_end": 584, "domains": []}, {"is_best_transcript": "false", "name": "ENST00000559911", "start": "6218", "exons": [{"end": "36329", "name": "ENSE00002560148", "start": "36294"}, {"end": "34345", "start": "34090", "name": "ENSE00002761197"}, {"start": "28534", "name": "ENSE00002865842", "end": "28690"}, {"name": "ENSE00002551447", "start": "10557", "end": "10673"}, {"name": "ENSE00002544927", "start": "6218", "end": "6328"}], "aliases": [], "end": "36329", "cdna_coding_start": 49, "domains": [], "cdna_coding_end": 660}, {"exons": [{"name": "ENSE00002560148", "start": "36294", "end": "36329"}, {"name": "ENSE00002761197", "start": "34090", "end": "34345"}, {"name": "ENSE00002865842", "start": "28534", "end": "28690"}, {"start": "10547", "name": "ENSE00002549191", "end": "10673"}], "start": "10547", "name": "ENST00000558113", "is_best_transcript": "false", "domains": [], "cdna_coding_end": 570, "aliases": [], "end": "36329", "cdna_coding_start": 49}, {"cdna_coding_end": 832, "domains": [], "aliases": ["NP_443081.1", "NM_052849.2"], "cdna_coding_start": 275, "end": "36370", "start": "24417", "exons": [{"name": "ENSE00001528957", "start": "36109", "end": "36370"}, {"start": "34090", "name": "ENSE00002761197", "end": "34345"}, {"start": "28534", "name": "ENSE00002865842", "end": "28690"}, {"end": "25445", "start": "24417", "name": "ENSE00001933860"}], "name": "ENST00000358005", "is_best_transcript": "true"}, {"name": "ENST00000416810", "start": "24422", "exons": [{"end": "36375", "start": "36294", "name": "ENSE00002565648"}, {"end": "34345", "start": "34090", "name": "ENSE00002761197"}, {"start": "28534", "name": "ENSE00002865842", "end": "28690"}, {"end": "25445", "start": "24422", "name": "ENSE00001427910"}], "is_best_transcript": "false", "domains": [], "cdna_coding_end": 652, "aliases": ["NP_001074261.1", "NM_001080792.1"], "end": "36375", "cdna_coding_start": 95}, {"name": "ENST00000558750", "start": "25260", "exons": [{"start": "36084", "name": "ENSE00001759032", "end": "36298"}, {"name": "ENSE00002815778", "start": "34090", "end": "34345"}, {"name": "ENSE00002865842", "start": "28534", "end": "28690"}, {"start": "25260", "name": "ENSE00002570697", "end": "25445"}], "is_best_transcript": "false", "domains": [], "cdna_coding_end": 785, "aliases": ["NP_001074260.1", "NM_001080791.1"], "end": "36298", "cdna_coding_start": 201}, {"cdna_coding_start": 98, "end": "36337", "aliases": [], "cdna_coding_end": 349, "domains": [], "is_best_transcript": "false", "exons": [{"end": "36337", "start": "36294", "name": "ENSE00002551707"}, {"end": "28690", "start": "28534", "name": "ENSE00002785665"}, {"end": "25445", "start": "25298", "name": "ENSE00002565968"}], "start": "25298", "name": "ENST00000558918"}, {"cdna_coding_end": 557, "domains": [], "aliases": [], "cdna_coding_start": 47, "end": "36332", "start": "25336", "exons": [{"end": "36332", "start": "36294", "name": "ENSE00002549749"}, {"end": "34340", "name": "ENSE00002571615", "start": "34090"}, {"end": "28690", "name": "ENSE00002865842", "start": "28534"}, {"name": "ENSE00002556846", "start": "25336", "end": "25445"}], "name": "ENST00000559103", "is_best_transcript": "false"}, {"cdna_coding_end": 484, "domains": [], "aliases": [], "cdna_coding_start": 80, "end": "36360", "name": "ENST00000558871", "start": "27939", "exons": [{"end": "36360", "name": "ENSE00002572819", "start": "36294"}, {"start": "34090", "name": "ENSE00002761197", "end": "34345"}, {"end": "28690", "start": "27939", "name": "ENSE00002536768"}], "is_best_transcript": "false"}], "aliases": ["C15orf57"], "chr": "gene1"}, {"name": "ENSG00000104147", "start": "1", "transcripts": [{"cdna_coding_end": 750, "domains": [], "cdna_coding_start": 61, "end": "23354", "aliases": ["NP_009211.1", "NM_007280.1"], "exons": [{"end": "23354", "name": "ENSE00000930990", "start": "22973"}, {"end": "22714", "start": "22648", "name": "ENSE00000930989"}, {"start": "10391", "name": "ENSE00000942405", "end": "10513"}, {"start": "4006", "name": "ENSE00000942406", "end": "4087"}, {"end": "582", "name": "ENSE00000884003", "start": "1"}], "start": "1", "name": "ENST00000220514", "is_best_transcript": "true"}, {"is_best_transcript": "false", "name": "ENST00000560640", "start": "495", "exons": [{"end": "23220", "start": "22973", "name": "ENSE00002541746"}, {"name": "ENSE00000930989", "start": "22648", "end": "22714"}, {"end": "4087", "name": "ENSE00000942406", "start": "4006"}, {"start": "495", "name": "ENSE00002548921", "end": "582"}], "aliases": [], "end": "23220", "cdna_coding_start": 1, "domains": [], "cdna_coding_end": 485}], "end": "23354", "strand": "-1", "chr": "gene2", "aliases": ["OIP5"]}, {"chr": "gene2", "aliases": ["NUSAP1"], "transcripts": [{"aliases": ["NP_001230072.1", "NM_001243143.1"], "end": "71783", "cdna_coding_start": 265, "domains": [], "cdna_coding_end": 1545, "is_best_transcript": "false", "start": "23427", "exons": [{"end": "23783", "start": "23427", "name": "ENSE00002541618"}, {"end": "33187", "start": "33119", "name": "ENSE00002921169"}, {"end": "39974", "start": "39831", "name": "ENSE00002741972"}, {"end": "41862", "name": "ENSE00002865492", "start": "41766"}, {"end": "46873", "name": "ENSE00002852607", "start": "46772"}, {"name": "ENSE00002899380", "start": "48882", "end": "48991"}, {"end": "56322", "name": "ENSE00002874525", "start": "56135"}, {"end": "62417", "start": "62260", "name": "ENSE00002766779"}, {"name": "ENSE00002819484", "start": "66445", "end": "66561"}, {"end": "68037", "name": "ENSE00002924785", "start": "67929"}, {"end": "71783", "name": "ENSE00002542797", "start": "70826"}], "name": "ENST00000260359"}, {"end": "41819", "cdna_coding_start": 104, "aliases": [], "domains": [], "cdna_coding_end": 334, "is_best_transcript": "false", "start": "23588", "name": "ENST00000559046", "exons": [{"end": "23783", "name": "ENSE00002574418", "start": "23588"}, {"end": "33187", "start": "33119", "name": "ENSE00002921169"}, {"end": "36335", "start": "36219", "name": "ENSE00002559031"}, {"end": "39974", "name": "ENSE00002847051", "start": "39831"}, {"name": "ENSE00002564510", "start": "41766", "end": "41819"}]}, {"aliases": ["NP_001230073.1"], "end": "70996", "cdna_coding_start": 88, "domains": [], "cdna_coding_end": 1224, "is_best_transcript": "false", "start": "23604", "exons": [{"end": "23783", "start": "23604", "name": "ENSE00002312096"}, {"name": "ENSE00002741972", "start": "39831", "end": "39974"}, {"start": "41721", "name": "ENSE00002920477", "end": "41862"}, {"end": "46873", "start": "46772", "name": "ENSE00002852607"}, {"name": "ENSE00002899380", "start": "48882", "end": "48991"}, {"end": "56322", "start": "56138", "name": "ENSE00001668934"}, {"name": "ENSE00002766779", "start": "62260", "end": "62417"}, {"start": "67929", "name": "ENSE00002924785", "end": "68037"}, {"end": "70996", "start": "70826", "name": "ENSE00002254025"}], "name": "ENST00000450592"}, {"cdna_coding_start": 88, "end": "71135", "aliases": ["NP_057443.2", "NM_016359.4"], "cdna_coding_end": 1413, "domains": [], "is_best_transcript": "false", "start": "23604", "exons": [{"start": "23604", "name": "ENSE00002312096", "end": "23783"}, {"end": "33187", "start": "33119", "name": "ENSE00002921169"}, {"name": "ENSE00002741972", "start": "39831", "end": "39974"}, {"end": "41862", "start": "41721", "name": "ENSE00002920477"}, {"name": "ENSE00002852607", "start": "46772", "end": "46873"}, {"end": "48991", "name": "ENSE00002899380", "start": "48882"}, {"end": "56322", "start": "56135", "name": "ENSE00002874525"}, {"name": "ENSE00002766779", "start": "62260", "end": "62417"}, {"start": "66445", "name": "ENSE00002819484", "end": "66561"}, {"start": "67929", "name": "ENSE00002924785", "end": "68037"}, {"name": "ENSE00002563931", "start": "70826", "end": "71135"}], "name": "ENST00000559596"}, {"exons": [{"end": "23783", "start": "23604", "name": "ENSE00002312096"}, {"start": "33119", "name": "ENSE00002921169", "end": "33187"}, {"name": "ENSE00002741972", "start": "39831", "end": "39974"}, {"end": "41862", "name": "ENSE00002920477", "start": "41721"}, {"name": "ENSE00002852607", "start": "46772", "end": "46873"}, {"start": "48882", "name": "ENSE00002899380", "end": "48991"}, {"end": "56322", "name": "ENSE00001668934", "start": "56138"}, {"end": "62417", "start": "62260", "name": "ENSE00002766779"}, {"start": "66445", "name": "ENSE00002819484", "end": "66561"}, {"start": "67929", "name": "ENSE00002924785", "end": "68037"}, {"end": "71783", "start": "70826", "name": "ENSE00002542797"}], "start": "23604", "name": "ENST00000414849", "is_best_transcript": "true", "domains": [], "cdna_coding_end": 1410, "aliases": ["NP_001230071.1", "NP_060924.4", "NM_001243142.1", "NM_018454.7"], "end": "71783", "cdna_coding_start": 88}, {"start": "23619", "name": "ENST00000560747", "exons": [{"end": "23783", "start": "23619", "name": "ENSE00002560678"}, {"start": "33119", "name": "ENSE00002921169", "end": "33187"}, {"end": "39974", "name": "ENSE00002741972", "start": "39831"}, {"end": "41862", "start": "41724", "name": "ENSE00002569544"}, {"end": "46873", "name": "ENSE00002852607", "start": "46772"}, {"end": "48991", "name": "ENSE00002899380", "start": "48882"}, {"end": "56322", "start": "56138", "name": "ENSE00001668934"}, {"start": "62260", "name": "ENSE00002766779", "end": "62417"}, {"end": "66561", "start": "66445", "name": "ENSE00002819484"}, {"name": "ENSE00002924785", "start": "67929", "end": "68037"}, {"end": "71783", "start": "70826", "name": "ENSE00002542797"}], "is_best_transcript": "false", "domains": [], "cdna_coding_end": 1392, "aliases": [], "end": "71783", "cdna_coding_start": 73}, {"is_best_transcript": "false", "exons": [{"end": "23783", "name": "ENSE00002572945", "start": "23646"}, {"end": "33187", "name": "ENSE00002921169", "start": "33119"}, {"start": "39831", "name": "ENSE00002741972", "end": "39974"}, {"end": "41862", "start": "41724", "name": "ENSE00002569544"}, {"end": "46873", "name": "ENSE00002852607", "start": "46772"}, {"start": "48882", "name": "ENSE00002899380", "end": "48991"}, {"end": "56322", "name": "ENSE00002874525", "start": "56135"}, {"start": "62260", "name": "ENSE00002766779", "end": "62417"}, {"name": "ENSE00002819484", "start": "66445", "end": "66561"}, {"start": "67929", "name": "ENSE00002924785", "end": "68037"}, {"start": "70826", "name": "ENSE00002542797", "end": "71783"}], "start": "23646", "name": "ENST00000560177", "end": "71783", "cdna_coding_start": 46, "aliases": [], "domains": [], "cdna_coding_end": 1368}, {"name": "ENST00000557840", "start": "33129", "exons": [{"end": "33187", "name": "ENSE00002562906", "start": "33129"}, {"end": "56322", "start": "56164", "name": "ENSE00002552359"}, {"name": "ENSE00002874978", "start": "62260", "end": "62417"}, {"end": "66484", "start": "66445", "name": "ENSE00002572000"}], "is_best_transcript": "false", "domains": [], "cdna_coding_end": 206, "aliases": [], "end": "66484", "cdna_coding_start": 1}, {"aliases": [], "end": "68005", "cdna_coding_start": 1, "domains": [], "cdna_coding_end": 435, "is_best_transcript": "false", "start": "41835", "exons": [{"end": "41862", "name": "ENSE00002545491", "start": "41835"}, {"start": "46772", "name": "ENSE00002852607", "end": "46873"}, {"name": "ENSE00002899380", "start": "48882", "end": "48991"}, {"start": "56138", "name": "ENSE00001668934", "end": "56322"}, {"name": "ENSE00002538934", "start": "56549", "end": "56596"}, {"end": "62417", "start": "62260", "name": "ENSE00002874978"}, {"end": "68005", "start": "67929", "name": "ENSE00002570378"}], "name": "ENST00000560898"}, {"is_best_transcript": "false", "start": "23461", "exons": [{"end": "23783", "start": "23461", "name": "ENSE00001206443"}, {"end": "33187", "start": "33119", "name": "ENSE00002921169"}, {"start": "39831", "name": "ENSE00002741972", "end": "39974"}, {"end": "41862", "name": "ENSE00002920477", "start": "41721"}, {"name": "ENSE00002852607", "start": "46772", "end": "46873"}, {"end": "48991", "start": "48882", "name": "ENSE00002899380"}, {"start": "56135", "name": "ENSE00002874525", "end": "56322"}, {"end": "62417", "start": "62260", "name": "ENSE00002766779"}, {"start": "67929", "name": "ENSE00002924785", "end": "68037"}, {"start": "70826", "name": "ENSE00000931000", "end": "71780"}], "name": "ENST00000450318", "aliases": [], "end": "71780", "cdna_coding_start": 231, "domains": [], "cdna_coding_end": 1439}], "end": "71783", "strand": "1", "start": "23427", "name": "ENSG00000137804"}, {"aliases": ["PFKFB2"], "chr": "gene3", "end": "31569", "strand": "1", "transcripts": [{"start": "3805", "exons": [{"end": "3911", "name": "ENSE00001872821", "start": "3805"}, {"name": "ENSE00002901181", "start": "5246", "end": "5347"}, {"start": "12498", "name": "ENSE00002934688", "end": "12623"}, {"end": "13261", "start": "13165", "name": "ENSE00002915251"}, {"name": "ENSE00002265243", "start": "13688", "end": "13754"}, {"start": "13892", "name": "ENSE00002223873", "end": "13966"}, {"name": "ENSE00002234935", "start": "14318", "end": "14374"}, {"start": "15581", "name": "ENSE00002233337", "end": "15705"}, {"end": "18251", "start": "18044", "name": "ENSE00000842580"}, {"name": "ENSE00000842581", "start": "18708", "end": "18854"}, {"start": "19969", "name": "ENSE00002876346", "end": "20073"}, {"end": "20954", "start": "20825", "name": "ENSE00001595535"}, {"name": "ENSE00002889740", "start": "21733", "end": "21795"}, {"name": "ENSE00002760808", "start": "22054", "end": "22118"}, {"start": "22749", "name": "ENSE00001185299", "end": "28368"}], "name": "ENST00000367080", "is_best_transcript": "true", "domains": [{"regions": [{"start": "40", "end": "193"}], "desc": "Chromatin_KTI12", "name": "PF08433"}, {"name": "SSF53254", "desc": "", "regions": [{"end": "469", "start": "251"}]}, {"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": "1", "end": "488"}]}, {"regions": [{"end": "250", "start": "34"}], "name": "SSF52540", "desc": ""}, {"regions": [{"end": "249", "start": "27"}], "name": "PF01591", "desc": "6Phosfructo_kin"}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": "124", "end": "138"}, {"start": "150", "end": "164"}, {"end": "190", "start": "176"}, {"start": "230", "end": "251"}, {"start": "252", "end": "274"}, {"start": "329", "end": "345"}]}, {"regions": [{"end": "398", "start": "251"}], "desc": "His_Pase_superF_clade-1", "name": "PF00300"}, {"desc": "His_Pase_superF_clade-1", "name": "SM00855", "regions": [{"start": "251", "end": "398"}]}], "cdna_coding_end": 1642, "end": "28368", "cdna_coding_start": 125, "aliases": ["NP_006203.2", "NM_006212.2"]}, {"aliases": ["NP_001018063.1", "NM_001018053.1"], "end": "31569", "cdna_coding_start": 74, "domains": [{"name": "SSF52540", "desc": "", "regions": [{"end": "250", "start": "34"}]}, {"regions": [{"start": "251", "end": "398"}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}, {"regions": [{"start": "124", "end": "138"}, {"start": "150", "end": "164"}, {"start": "176", "end": "190"}, {"start": "230", "end": "251"}, {"start": "252", "end": "274"}, {"start": "329", "end": "345"}], "desc": "6Pfruct_kin", "name": "PR00991"}, {"desc": "His_Pase_superF_clade-1", "name": "PF00300", "regions": [{"start": "251", "end": "398"}]}, {"regions": [{"end": "249", "start": "27"}], "name": "PF01591", "desc": "6Phosfructo_kin"}, {"name": "PIRSF000709", "desc": "Bifunct_6PFK/fruc_bisP_Ptase", "regions": [{"end": "468", "start": "1"}]}, {"name": "SSF53254", "desc": "", "regions": [{"end": "469", "start": "251"}]}, {"name": "PF08433", "desc": "Chromatin_KTI12", "regions": [{"start": "40", "end": "193"}]}], "cdna_coding_end": 1489, "is_best_transcript": "false", "start": "3856", "name": "ENST00000367079", "exons": [{"start": "3856", "name": "ENSE00001818425", "end": "3911"}, {"end": "5347", "name": "ENSE00002901181", "start": "5246"}, {"end": "12623", "start": "12498", "name": "ENSE00002934688"}, {"end": "13261", "name": "ENSE00002915251", "start": "13165"}, {"end": "13754", "start": "13688", "name": "ENSE00002265243"}, {"end": "13966", "name": "ENSE00002223873", "start": "13892"}, {"end": "14374", "name": "ENSE00002234935", "start": "14318"}, {"start": "15581", "name": "ENSE00002233337", "end": "15705"}, {"name": "ENSE00000842580", "start": "18044", "end": "18251"}, {"name": "ENSE00000842581", "start": "18708", "end": "18854"}, {"end": "20073", "start": "19969", "name": "ENSE00002876346"}, {"name": "ENSE00001595535", "start": "20825", "end": "20954"}, {"end": "21795", "start": "21733", "name": "ENSE00002889740"}, {"name": "ENSE00002760808", "start": "22054", "end": "22118"}, {"start": "29499", "name": "ENSE00001443434", "end": "31569"}]}, {"aliases": [], "end": "22950", "cdna_coding_start": 227, "domains": [{"regions": [{"end": "160", "start": "7"}], "name": "PF08433", "desc": "Chromatin_KTI12"}, {"regions": [{"start": "1", "end": "455"}], "desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709"}, {"regions": [{"end": "436", "start": "218"}], "desc": "", "name": "SSF53254"}, {"regions": [{"end": "105", "start": "91"}, {"end": "131", "start": "117"}, {"start": "143", "end": "157"}, {"start": "197", "end": "218"}, {"end": "241", "start": "219"}, {"start": "296", "end": "312"}], "name": "PR00991", "desc": "6Pfruct_kin"}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": "216", "start": "2"}]}, {"name": "PF00300", "desc": "His_Pase_superF_clade-1", "regions": [{"start": "218", "end": "365"}]}, {"name": "SM00855", "desc": "His_Pase_superF_clade-1", "regions": [{"start": "218", "end": "365"}]}, {"regions": [{"start": "1", "end": "217"}], "name": "SSF52540", "desc": ""}], "cdna_coding_end": 1645, "is_best_transcript": "false", "start": "5245", "exons": [{"start": "5245", "name": "ENSE00002308738", "end": "5347"}, {"name": "ENSE00002226272", "start": "11367", "end": "11475"}, {"end": "12623", "name": "ENSE00002905114", "start": "12498"}, {"end": "13261", "start": "13165", "name": "ENSE00002915251"}, {"name": "ENSE00002265243", "start": "13688", "end": "13754"}, {"end": "13966", "start": "13892", "name": "ENSE00002223873"}, {"end": "14374", "name": "ENSE00002234935", "start": "14318"}, {"end": "15705", "start": "15581", "name": "ENSE00002233337"}, {"end": "18251", "start": "18044", "name": "ENSE00000842580"}, {"end": "18854", "start": "18708", "name": "ENSE00000842581"}, {"end": "20073", "name": "ENSE00002876346", "start": "19969"}, {"name": "ENSE00001595535", "start": "20825", "end": "20954"}, {"name": "ENSE00002889740", "start": "21733", "end": "21795"}, {"name": "ENSE00002760808", "start": "22054", "end": "22118"}, {"name": "ENSE00002308130", "start": "22749", "end": "22950"}], "name": "ENST00000545806"}, {"exons": [{"end": "162", "start": "1", "name": "ENSE00002231494"}, {"name": "ENSE00002892386", "start": "5246", "end": "5347"}, {"name": "ENSE00002797095", "start": "12498", "end": "12623"}, {"name": "ENSE00002308344", "start": "13092", "end": "13261"}, {"name": "ENSE00002265243", "start": "13688", "end": "13754"}, {"end": "13966", "name": "ENSE00002223873", "start": "13892"}, {"start": "14318", "name": "ENSE00002234935", "end": "14374"}, {"start": "15581", "name": "ENSE00002233337", "end": "15705"}, {"end": "18251", "start": "18044", "name": "ENSE00000842580"}, {"end": "18854", "start": "18708", "name": "ENSE00000842581"}, {"end": "20073", "start": "19969", "name": "ENSE00002876346"}, {"name": "ENSE00001595535", "start": "20825", "end": "20954"}, {"end": "21795", "start": "21733", "name": "ENSE00002889740"}, {"start": "22054", "name": "ENSE00002760808", "end": "22118"}, {"end": "29679", "start": "29499", "name": "ENSE00001791500"}], "start": "1", "name": "ENST00000411990", "is_best_transcript": "false", "cdna_coding_end": 1668, "domains": [{"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": "1", "end": "370"}]}, {"desc": "", "name": "SSF53254", "regions": [{"end": "371", "start": "153"}]}, {"desc": "", "name": "SSF52540", "regions": [{"start": "4", "end": "152"}]}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": "26", "end": "40"}, {"start": "52", "end": "66"}, {"start": "78", "end": "92"}, {"start": "132", "end": "153"}, {"end": "176", "start": "154"}, {"start": "231", "end": "247"}]}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": "151", "start": "1"}]}, {"regions": [{"end": "300", "start": "153"}], "name": "PF00300", "desc": "His_Pase_superF_clade-1"}, {"regions": [{"start": "153", "end": "300"}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}], "aliases": [], "cdna_coding_start": 547, "end": "29679"}, {"cdna_coding_start": 84, "end": "29931", "aliases": [], "cdna_coding_end": 878, "domains": [{"regions": [{"start": "1", "end": "64"}], "name": "SSF52540", "desc": ""}, {"desc": "", "name": "SSF53254", "regions": [{"end": "262", "start": "65"}]}, {"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": "1", "end": "261"}]}, {"regions": [{"start": "65", "end": "212"}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": "63", "start": "1"}]}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": "44", "end": "65"}, {"start": "66", "end": "88"}, {"start": "143", "end": "159"}]}, {"regions": [{"end": "212", "start": "65"}], "name": "PF00300", "desc": "His_Pase_superF_clade-1"}], "is_best_transcript": "false", "start": "15549", "name": "ENST00000541914", "exons": [{"name": "ENSE00002294124", "start": "15549", "end": "15705"}, {"name": "ENSE00000842580", "start": "18044", "end": "18251"}, {"start": "18708", "name": "ENSE00000842581", "end": "18854"}, {"end": "20073", "start": "19969", "name": "ENSE00002876346"}, {"start": "20825", "name": "ENSE00001595535", "end": "20954"}, {"name": "ENSE00002760808", "start": "22054", "end": "22118"}, {"name": "ENSE00002284395", "start": "29499", "end": "29931"}]}], "start": "1", "name": "ENSG00000123836"}, {"start": "1", "name": "ENSG00000187416", "chr": "gene4", "aliases": ["LHFPL3"], "transcripts": [{"domains": [{"regions": [{"start": "22", "end": "199"}], "desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242"}], "cdna_coding_end": 835, "aliases": [], "end": "578576", "cdna_coding_start": 167, "exons": [{"end": "569", "name": "ENSE00001713788", "start": "1"}, {"end": "408255", "start": "408019", "name": "ENSE00001349915"}, {"end": "578576", "start": "577531", "name": "ENSE00001723245"}], "start": "1", "name": "ENST00000424859", "is_best_transcript": "false"}, {"start": "45", "name": "ENST00000401970", "exons": [{"end": "569", "name": "ENSE00001554382", "start": "45"}, {"end": "408255", "start": "408019", "name": "ENSE00001349915"}, {"end": "516773", "name": "ENSE00001746769", "start": "516726"}, {"end": "578098", "start": "577531", "name": "ENSE00001593689"}], "is_best_transcript": "false", "domains": [{"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": "199", "start": "22"}]}], "cdna_coding_end": 779, "aliases": [], "end": "578098", "cdna_coding_start": 123}, {"end": "579898", "cdna_coding_start": 125, "aliases": ["NP_945351.1", "NM_199000.2"], "domains": [{"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": "213", "start": "36"}]}, {"regions": [{"end": "22", "start": "4"}], "desc": "", "name": "PS50310"}], "cdna_coding_end": 835, "is_best_transcript": "true", "start": "1", "name": "ENST00000535008", "exons": [{"start": "1", "name": "ENSE00002286305", "end": "573"}, {"end": "141399", "name": "ENSE00002499039", "start": "141389"}, {"name": "ENSE00002324321", "start": "294348", "end": "294368"}, {"end": "408255", "name": "ENSE00002278044", "start": "408055"}, {"name": "ENSE00002230996", "start": "577531", "end": "579898"}]}, {"aliases": [], "cdna_coding_start": 81, "end": "578099", "cdna_coding_end": 779, "domains": [{"name": "PS50310", "desc": "", "regions": [{"start": "4", "end": "22"}]}, {"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": "213", "start": "36"}]}], "is_best_transcript": "false", "name": "ENST00000543266", "start": "45", "exons": [{"end": "573", "name": "ENSE00002312340", "start": "45"}, {"name": "ENSE00002499039", "start": "141389", "end": "141399"}, {"start": "294348", "name": "ENSE00002324321", "end": "294368"}, {"end": "408255", "start": "408055", "name": "ENSE00002278044"}, {"end": "516773", "start": "516726", "name": "ENSE00001746769"}, {"end": "578099", "start": "577531", "name": "ENSE00002270291"}]}], "strand": "1", "end": "579898"}, {"start": "1", "name": "ENSG00000122565", "strand": "1", "end": "12195", "transcripts": [{"cdna_coding_start": 429, "end": "12195", "aliases": ["NP_009207.2", "NM_007276.4"], "cdna_coding_end": 980, "domains": [{"name": "PR00504", "desc": "Chromo_dom_subgr", "regions": [{"end": "35", "start": "27"}, {"end": "54", "start": "40"}, {"end": "67", "start": "55"}]}, {"name": "PF01393", "desc": "Chromo_shadow_dom", "regions": [{"start": "119", "end": "176"}]}, {"regions": [{"end": "175", "start": "106"}, {"start": "15", "end": "79"}], "name": "SSF54160", "desc": "Chromodomain-like"}, {"regions": [{"end": "78", "start": "30"}], "name": "PF00385", "desc": "Chromo_domain"}, {"regions": [{"start": "29", "end": "81"}, {"start": "120", "end": "172"}], "name": "SM00298", "desc": "Chromo_domain/shadow"}, {"regions": [{"end": "177", "start": "115"}], "name": "SM00300", "desc": "Chromo_shadow_dom"}, {"desc": "Chromo_domain/shadow", "name": "PS50013", "regions": [{"end": "88", "start": "30"}, {"end": "179", "start": "121"}]}], "is_best_transcript": "true", "exons": [{"end": "400", "start": "1", "name": "ENSE00001189828"}, {"end": "1861", "name": "ENSE00001433792", "start": "1810"}, {"start": "5207", "name": "ENSE00002858776", "end": "5349"}, {"start": "7232", "name": "ENSE00002882596", "end": "7394"}, {"end": "10595", "start": "10501", "name": "ENSE00002768648"}, {"start": "10921", "name": "ENSE00002778428", "end": "12195"}], "start": "1", "name": "ENST00000337620"}, {"start": "532", "name": "ENST00000396386", "exons": [{"start": "532", "name": "ENSE00001524790", "end": "665"}, {"start": "1810", "name": "ENSE00001433792", "end": "1861"}, {"end": "5349", "name": "ENSE00002858776", "start": "5207"}, {"name": "ENSE00002882596", "start": "7232", "end": "7394"}, {"start": "10501", "name": "ENSE00002768648", "end": "10595"}, {"name": "ENSE00001553667", "start": "10921", "end": "12190"}], "is_best_transcript": "false", "cdna_coding_end": 714, "domains": [{"regions": [{"start": "115", "end": "177"}], "name": "SM00300", "desc": "Chromo_shadow_dom"}, {"regions": [{"end": "88", "start": "30"}, {"end": "179", "start": "121"}], "desc": "Chromo_domain/shadow", "name": "PS50013"}, {"name": "SM00298", "desc": "Chromo_domain/shadow", "regions": [{"start": "29", "end": "81"}, {"start": "120", "end": "172"}]}, {"desc": "Chromo_dom_subgr", "name": "PR00504", "regions": [{"end": "35", "start": "27"}, {"end": "54", "start": "40"}, {"start": "55", "end": "67"}]}, {"regions": [{"start": "106", "end": "175"}, {"start": "15", "end": "79"}], "name": "SSF54160", "desc": "Chromodomain-like"}, {"name": "PF01393", "desc": "Chromo_shadow_dom", "regions": [{"start": "119", "end": "176"}]}, {"regions": [{"end": "78", "start": "30"}], "name": "PF00385", "desc": "Chromo_domain"}], "cdna_coding_start": 163, "end": "12190", "aliases": ["NP_057671.2", "NM_016587.3"]}, {"cdna_coding_end": 1068, "domains": [{"name": "PF00385", "desc": "Chromo_domain", "regions": [{"end": "62", "start": "30"}]}, {"name": "SSF54160", "desc": "Chromodomain-like", "regions": [{"start": "22", "end": "62"}]}, {"desc": "Chromo_dom_subgr", "name": "PR00504", "regions": [{"end": "35", "start": "27"}, {"start": "40", "end": "54"}, {"start": "55", "end": "62"}]}, {"regions": [{"start": "30", "end": "62"}], "name": "PS50013", "desc": "Chromo_domain/shadow"}], "cdna_coding_start": 882, "end": "7251", "aliases": [], "start": "596", "name": "ENST00000456948", "exons": [{"end": "1448", "start": "596", "name": "ENSE00001696375"}, {"start": "1810", "name": "ENSE00001433792", "end": "1861"}, {"end": "5349", "name": "ENSE00002858776", "start": "5207"}, {"start": "7232", "name": "ENSE00001778411", "end": "7251"}], "is_best_transcript": "false"}, {"is_best_transcript": "false", "start": "604", "name": "ENST00000409747", "exons": [{"end": "665", "start": "604", "name": "ENSE00001588865"}, {"end": "1861", "start": "1810", "name": "ENSE00001433792"}, {"start": "5207", "name": "ENSE00002858776", "end": "5349"}, {"name": "ENSE00001577751", "start": "7305", "end": "7394"}, {"start": "10501", "name": "ENSE00002765595", "end": "10595"}, {"start": "10921", "name": "ENSE00002880768", "end": "12195"}], "cdna_coding_start": 91, "end": "12195", "aliases": [], "cdna_coding_end": 396, "domains": [{"regions": [{"start": "15", "end": "56"}], "desc": "Chromodomain-like", "name": "SSF54160"}, {"regions": [{"end": "57", "start": "30"}], "desc": "Chromo_domain", "name": "PF00385"}, {"desc": "Chromo_domain/shadow", "name": "PS50013", "regions": [{"start": "30", "end": "91"}]}]}], "aliases": ["CBX3"], "chr": "gene5"}, {"name": "ENSG00000171862", "start": "1", "end": "108818", "strand": "1", "transcripts": [{"aliases": ["NP_000305.3", "NM_000314.4"], "cdna_coding_start": 1358, "end": "108818", "cdna_coding_end": 2569, "domains": [{"desc": "C2_Ca/lipid-bd_dom_CaLB", "name": "SSF49562", "regions": [{"start": "188", "end": "351"}]}, {"regions": [{"end": "159", "start": "80"}], "name": "PF00782", "desc": "Dual-sp_phosphatase_cat-dom"}, {"desc": "Bifunc_PIno_P3_Pase/Pase_PTEN", "name": "PIRSF038025", "regions": [{"start": "1", "end": "403"}]}, {"regions": [{"start": "59", "end": "181"}], "name": "PF00102", "desc": "Tyr_Pase_rcpt/non-rcpt"}, {"desc": "Tensin_phosphatase_C2-dom", "name": "PF10409", "regions": [{"end": "349", "start": "188"}]}, {"regions": [{"end": "350", "start": "190"}], "desc": "Tensin_phosphatase_C2-dom", "name": "PS51182"}, {"name": "SM00404", "desc": "Tyr_Pase_cat", "regions": [{"end": "183", "start": "23"}]}, {"regions": [{"end": "187", "start": "14"}], "desc": "", "name": "SSF52799"}, {"regions": [{"start": "102", "end": "173"}], "desc": "Tyr/Dual-specificity_Pase", "name": "PS50056"}, {"regions": [{"end": "185", "start": "14"}], "name": "PS51181", "desc": "Phosphatase_tensin-typ"}], "is_best_transcript": "true", "start": "1", "name": "ENST00000371953", "exons": [{"end": "1436", "start": "1", "name": "ENSE00001456562"}, {"end": "30997", "name": "ENSE00001156351", "start": "30913"}, {"end": "62445", "start": "62401", "name": "ENSE00001156344"}, {"end": "67977", "name": "ENSE00002779611", "start": "67934"}, {"start": "69901", "name": "ENSE00001156330", "end": "70139"}, {"end": "89147", "start": "89006", "name": "ENSE00001156327"}, {"end": "94907", "start": "94741", "name": "ENSE00002737042"}, {"start": "97782", "name": "ENSE00001156315", "end": "98006"}, {"end": "108818", "name": "ENSE00001456541", "start": "102175"}]}], "aliases": ["PTEN"], "chr": "gene6"}], "best_transcript_file": "/home/creisle/svn/ensembl_flatfiles/ens69_best_transcript.txt", "script": "generate_ensembl_json_temp.pl", "hugo_mapping_file": "/projects/tumour_char/analysis_scripts/databases/processed_files/drug_target_tables/compiled_gene_drug_pathway.v1_2_4.tsv", "generation_time": "Tue Feb 28 11:58:00 2017", "script_version": "2.1.4"}
\ No newline at end of file
+{"ensembl_version": 69, "genes": [{"name": "ENSG00000128891", "start": 1, "strand": "-", "end": 36375, "transcripts": [{"is_best_transcript": false, "exons": [{"end": 36329, "name": "ENSE00002560148", "start": 36294}, {"end": 34345, "start": 34090, "name": "ENSE00002761197"}, {"end": 28690, "name": "ENSE00002865842", "start": 28534}, {"name": "ENSE00002539417", "start": 1, "end": 108}], "start": 1, "name": "ENST00000560305", "end": 36329, "cdna_coding_start": 49, "aliases": [], "domains": [], "cdna_coding_end": 498}, {"start": 2649, "exons": [{"end": 34345, "name": "ENSE00002761197", "start": 34090}, {"start": 28534, "name": "ENSE00002865842", "end": 28690}, {"name": "ENSE00002540983", "start": 24913, "end": 25445}, {"name": "ENSE00002563185", "start": 2649, "end": 3620}], "name": "ENST00000559153", "is_best_transcript": false, "domains": [], "cdna_coding_end": 570, "end": 34345, "cdna_coding_start": 13, "aliases": []}, {"is_best_transcript": false, "exons": [{"end": 36310, "name": "ENSE00002537918", "start": 36109}, {"start": 34090, "name": "ENSE00002761197", "end": 34345}, {"end": 28690, "start": 28534, "name": "ENSE00002865842"}, {"start": 2659, "name": "ENSE00002565021", "end": 3620}], "start": 2659, "name": "ENST00000561011", "end": 36310, "cdna_coding_start": 215, "aliases": [], "domains": [], "cdna_coding_end": 619}, {"is_best_transcript": false, "exons": [{"end": 36307, "name": "ENSE00002562556", "start": 36294}, {"end": 34345, "name": "ENSE00002761197", "start": 34090}, {"name": "ENSE00002865842", "start": 28534, "end": 28690}, {"end": 25445, "name": "ENSE00002555814", "start": 24755}, {"name": "ENSE00002547244", "start": 5661, "end": 10673}], "start": 5661, "name": "ENST00000559291", "cdna_coding_start": 27, "end": 36307, "aliases": [], "cdna_coding_end": 584, "domains": []}, {"is_best_transcript": false, "name": "ENST00000559911", "start": 6218, "exons": [{"end": 36329, "name": "ENSE00002560148", "start": 36294}, {"end": 34345, "start": 34090, "name": "ENSE00002761197"}, {"start": 28534, "name": "ENSE00002865842", "end": 28690}, {"name": "ENSE00002551447", "start": 10557, "end": 10673}, {"name": "ENSE00002544927", "start": 6218, "end": 6328}], "aliases": [], "end": 36329, "cdna_coding_start": 49, "domains": [], "cdna_coding_end": 660}, {"exons": [{"name": "ENSE00002560148", "start": 36294, "end": 36329}, {"name": "ENSE00002761197", "start": 34090, "end": 34345}, {"name": "ENSE00002865842", "start": 28534, "end": 28690}, {"start": 10547, "name": "ENSE00002549191", "end": 10673}], "start": 10547, "name": "ENST00000558113", "is_best_transcript": false, "domains": [], "cdna_coding_end": 570, "aliases": [], "end": 36329, "cdna_coding_start": 49}, {"cdna_coding_end": 832, "domains": [], "aliases": ["NP_443081.1", "NM_052849.2"], "cdna_coding_start": 275, "end": 36370, "start": 24417, "exons": [{"name": "ENSE00001528957", "start": 36109, "end": 36370}, {"start": 34090, "name": "ENSE00002761197", "end": 34345}, {"start": 28534, "name": "ENSE00002865842", "end": 28690}, {"end": 25445, "start": 24417, "name": "ENSE00001933860"}], "name": "ENST00000358005", "is_best_transcript": true}, {"name": "ENST00000416810", "start": 24422, "exons": [{"end": 36375, "start": 36294, "name": "ENSE00002565648"}, {"end": 34345, "start": 34090, "name": "ENSE00002761197"}, {"start": 28534, "name": "ENSE00002865842", "end": 28690}, {"end": 25445, "start": 24422, "name": "ENSE00001427910"}], "is_best_transcript": false, "domains": [], "cdna_coding_end": 652, "aliases": ["NP_001074261.1", "NM_001080792.1"], "end": 36375, "cdna_coding_start": 95}, {"name": "ENST00000558750", "start": 25260, "exons": [{"start": 36084, "name": "ENSE00001759032", "end": 36298}, {"name": "ENSE00002815778", "start": 34090, "end": 34345}, {"name": "ENSE00002865842", "start": 28534, "end": 28690}, {"start": 25260, "name": "ENSE00002570697", "end": 25445}], "is_best_transcript": false, "domains": [], "cdna_coding_end": 785, "aliases": ["NP_001074260.1", "NM_001080791.1"], "end": 36298, "cdna_coding_start": 201}, {"cdna_coding_start": 98, "end": 36337, "aliases": [], "cdna_coding_end": 349, "domains": [], "is_best_transcript": false, "exons": [{"end": 36337, "start": 36294, "name": "ENSE00002551707"}, {"end": 28690, "start": 28534, "name": "ENSE00002785665"}, {"end": 25445, "start": 25298, "name": "ENSE00002565968"}], "start": 25298, "name": "ENST00000558918"}, {"cdna_coding_end": 557, "domains": [], "aliases": [], "cdna_coding_start": 47, "end": 36332, "start": 25336, "exons": [{"end": 36332, "start": 36294, "name": "ENSE00002549749"}, {"end": 34340, "name": "ENSE00002571615", "start": 34090}, {"end": 28690, "name": "ENSE00002865842", "start": 28534}, {"name": "ENSE00002556846", "start": 25336, "end": 25445}], "name": "ENST00000559103", "is_best_transcript": false}, {"cdna_coding_end": 484, "domains": [], "aliases": [], "cdna_coding_start": 80, "end": 36360, "name": "ENST00000558871", "start": 27939, "exons": [{"end": 36360, "name": "ENSE00002572819", "start": 36294}, {"start": 34090, "name": "ENSE00002761197", "end": 34345}, {"end": 28690, "start": 27939, "name": "ENSE00002536768"}], "is_best_transcript": false}], "aliases": ["C15orf57"], "chr": "gene1"}, {"name": "ENSG00000104147", "start": 1, "transcripts": [{"cdna_coding_end": 750, "domains": [], "cdna_coding_start": 61, "end": 23354, "aliases": ["NP_009211.1", "NM_007280.1"], "exons": [{"end": 23354, "name": "ENSE00000930990", "start": 22973}, {"end": 22714, "start": 22648, "name": "ENSE00000930989"}, {"start": 10391, "name": "ENSE00000942405", "end": 10513}, {"start": 4006, "name": "ENSE00000942406", "end": 4087}, {"end": 582, "name": "ENSE00000884003", "start": 1}], "start": 1, "name": "ENST00000220514", "is_best_transcript": true}, {"is_best_transcript": false, "name": "ENST00000560640", "start": 495, "exons": [{"end": 23220, "start": 22973, "name": "ENSE00002541746"}, {"name": "ENSE00000930989", "start": 22648, "end": 22714}, {"end": 4087, "name": "ENSE00000942406", "start": 4006}, {"start": 495, "name": "ENSE00002548921", "end": 582}], "aliases": [], "end": 23220, "cdna_coding_start": 1, "domains": [], "cdna_coding_end": 485}], "end": 23354, "strand": "-", "chr": "gene2", "aliases": ["OIP5"]}, {"chr": "gene2", "aliases": ["NUSAP1"], "transcripts": [{"aliases": ["NP_001230072.1", "NM_001243143.1"], "end": 71783, "cdna_coding_start": 265, "domains": [], "cdna_coding_end": 1545, "is_best_transcript": false, "start": 23427, "exons": [{"end": 23783, "start": 23427, "name": "ENSE00002541618"}, {"end": 33187, "start": 33119, "name": "ENSE00002921169"}, {"end": 39974, "start": 39831, "name": "ENSE00002741972"}, {"end": 41862, "name": "ENSE00002865492", "start": 41766}, {"end": 46873, "name": "ENSE00002852607", "start": 46772}, {"name": "ENSE00002899380", "start": 48882, "end": 48991}, {"end": 56322, "name": "ENSE00002874525", "start": 56135}, {"end": 62417, "start": 62260, "name": "ENSE00002766779"}, {"name": "ENSE00002819484", "start": 66445, "end": 66561}, {"end": 68037, "name": "ENSE00002924785", "start": 67929}, {"end": 71783, "name": "ENSE00002542797", "start": 70826}], "name": "ENST00000260359"}, {"end": 41819, "cdna_coding_start": 104, "aliases": [], "domains": [], "cdna_coding_end": 334, "is_best_transcript": false, "start": 23588, "name": "ENST00000559046", "exons": [{"end": 23783, "name": "ENSE00002574418", "start": 23588}, {"end": 33187, "start": 33119, "name": "ENSE00002921169"}, {"end": 36335, "start": 36219, "name": "ENSE00002559031"}, {"end": 39974, "name": "ENSE00002847051", "start": 39831}, {"name": "ENSE00002564510", "start": 41766, "end": 41819}]}, {"aliases": ["NP_001230073.1"], "end": 70996, "cdna_coding_start": 88, "domains": [], "cdna_coding_end": 1224, "is_best_transcript": false, "start": 23604, "exons": [{"end": 23783, "start": 23604, "name": "ENSE00002312096"}, {"name": "ENSE00002741972", "start": 39831, "end": 39974}, {"start": 41721, "name": "ENSE00002920477", "end": 41862}, {"end": 46873, "start": 46772, "name": "ENSE00002852607"}, {"name": "ENSE00002899380", "start": 48882, "end": 48991}, {"end": 56322, "start": 56138, "name": "ENSE00001668934"}, {"name": "ENSE00002766779", "start": 62260, "end": 62417}, {"start": 67929, "name": "ENSE00002924785", "end": 68037}, {"end": 70996, "start": 70826, "name": "ENSE00002254025"}], "name": "ENST00000450592"}, {"cdna_coding_start": 88, "end": 71135, "aliases": ["NP_057443.2", "NM_016359.4"], "cdna_coding_end": 1413, "domains": [], "is_best_transcript": false, "start": 23604, "exons": [{"start": 23604, "name": "ENSE00002312096", "end": 23783}, {"end": 33187, "start": 33119, "name": "ENSE00002921169"}, {"name": "ENSE00002741972", "start": 39831, "end": 39974}, {"end": 41862, "start": 41721, "name": "ENSE00002920477"}, {"name": "ENSE00002852607", "start": 46772, "end": 46873}, {"end": 48991, "name": "ENSE00002899380", "start": 48882}, {"end": 56322, "start": 56135, "name": "ENSE00002874525"}, {"name": "ENSE00002766779", "start": 62260, "end": 62417}, {"start": 66445, "name": "ENSE00002819484", "end": 66561}, {"start": 67929, "name": "ENSE00002924785", "end": 68037}, {"name": "ENSE00002563931", "start": 70826, "end": 71135}], "name": "ENST00000559596"}, {"exons": [{"end": 23783, "start": 23604, "name": "ENSE00002312096"}, {"start": 33119, "name": "ENSE00002921169", "end": 33187}, {"name": "ENSE00002741972", "start": 39831, "end": 39974}, {"end": 41862, "name": "ENSE00002920477", "start": 41721}, {"name": "ENSE00002852607", "start": 46772, "end": 46873}, {"start": 48882, "name": "ENSE00002899380", "end": 48991}, {"end": 56322, "name": "ENSE00001668934", "start": 56138}, {"end": 62417, "start": 62260, "name": "ENSE00002766779"}, {"start": 66445, "name": "ENSE00002819484", "end": 66561}, {"start": 67929, "name": "ENSE00002924785", "end": 68037}, {"end": 71783, "start": 70826, "name": "ENSE00002542797"}], "start": 23604, "name": "ENST00000414849", "is_best_transcript": true, "domains": [], "cdna_coding_end": 1410, "aliases": ["NP_001230071.1", "NP_060924.4", "NM_001243142.1", "NM_018454.7"], "end": 71783, "cdna_coding_start": 88}, {"start": 23619, "name": "ENST00000560747", "exons": [{"end": 23783, "start": 23619, "name": "ENSE00002560678"}, {"start": 33119, "name": "ENSE00002921169", "end": 33187}, {"end": 39974, "name": "ENSE00002741972", "start": 39831}, {"end": 41862, "start": 41724, "name": "ENSE00002569544"}, {"end": 46873, "name": "ENSE00002852607", "start": 46772}, {"end": 48991, "name": "ENSE00002899380", "start": 48882}, {"end": 56322, "start": 56138, "name": "ENSE00001668934"}, {"start": 62260, "name": "ENSE00002766779", "end": 62417}, {"end": 66561, "start": 66445, "name": "ENSE00002819484"}, {"name": "ENSE00002924785", "start": 67929, "end": 68037}, {"end": 71783, "start": 70826, "name": "ENSE00002542797"}], "is_best_transcript": false, "domains": [], "cdna_coding_end": 1392, "aliases": [], "end": 71783, "cdna_coding_start": 73}, {"is_best_transcript": false, "exons": [{"end": 23783, "name": "ENSE00002572945", "start": 23646}, {"end": 33187, "name": "ENSE00002921169", "start": 33119}, {"start": 39831, "name": "ENSE00002741972", "end": 39974}, {"end": 41862, "start": 41724, "name": "ENSE00002569544"}, {"end": 46873, "name": "ENSE00002852607", "start": 46772}, {"start": 48882, "name": "ENSE00002899380", "end": 48991}, {"end": 56322, "name": "ENSE00002874525", "start": 56135}, {"start": 62260, "name": "ENSE00002766779", "end": 62417}, {"name": "ENSE00002819484", "start": 66445, "end": 66561}, {"start": 67929, "name": "ENSE00002924785", "end": 68037}, {"start": 70826, "name": "ENSE00002542797", "end": 71783}], "start": 23646, "name": "ENST00000560177", "end": 71783, "cdna_coding_start": 46, "aliases": [], "domains": [], "cdna_coding_end": 1368}, {"name": "ENST00000557840", "start": 33129, "exons": [{"end": 33187, "name": "ENSE00002562906", "start": 33129}, {"end": 56322, "start": 56164, "name": "ENSE00002552359"}, {"name": "ENSE00002874978", "start": 62260, "end": 62417}, {"end": 66484, "start": 66445, "name": "ENSE00002572000"}], "is_best_transcript": false, "domains": [], "cdna_coding_end": 206, "aliases": [], "end": 66484, "cdna_coding_start": 1}, {"aliases": [], "end": 68005, "cdna_coding_start": 1, "domains": [], "cdna_coding_end": 435, "is_best_transcript": false, "start": 41835, "exons": [{"end": 41862, "name": "ENSE00002545491", "start": 41835}, {"start": 46772, "name": "ENSE00002852607", "end": 46873}, {"name": "ENSE00002899380", "start": 48882, "end": 48991}, {"start": 56138, "name": "ENSE00001668934", "end": 56322}, {"name": "ENSE00002538934", "start": 56549, "end": 56596}, {"end": 62417, "start": 62260, "name": "ENSE00002874978"}, {"end": 68005, "start": 67929, "name": "ENSE00002570378"}], "name": "ENST00000560898"}, {"is_best_transcript": false, "start": 23461, "exons": [{"end": 23783, "start": 23461, "name": "ENSE00001206443"}, {"end": 33187, "start": 33119, "name": "ENSE00002921169"}, {"start": 39831, "name": "ENSE00002741972", "end": 39974}, {"end": 41862, "name": "ENSE00002920477", "start": 41721}, {"name": "ENSE00002852607", "start": 46772, "end": 46873}, {"end": 48991, "start": 48882, "name": "ENSE00002899380"}, {"start": 56135, "name": "ENSE00002874525", "end": 56322}, {"end": 62417, "start": 62260, "name": "ENSE00002766779"}, {"start": 67929, "name": "ENSE00002924785", "end": 68037}, {"start": 70826, "name": "ENSE00000931000", "end": 71780}], "name": "ENST00000450318", "aliases": [], "end": 71780, "cdna_coding_start": 231, "domains": [], "cdna_coding_end": 1439}], "end": 71783, "strand": "+", "start": 23427, "name": "ENSG00000137804"}, {"aliases": ["PFKFB2"], "chr": "gene3", "end": 31569, "strand": "+", "transcripts": [{"start": 3805, "exons": [{"end": 3911, "name": "ENSE00001872821", "start": 3805}, {"name": "ENSE00002901181", "start": 5246, "end": 5347}, {"start": 12498, "name": "ENSE00002934688", "end": 12623}, {"end": 13261, "start": 13165, "name": "ENSE00002915251"}, {"name": "ENSE00002265243", "start": 13688, "end": 13754}, {"start": 13892, "name": "ENSE00002223873", "end": 13966}, {"name": "ENSE00002234935", "start": 14318, "end": 14374}, {"start": 15581, "name": "ENSE00002233337", "end": 15705}, {"end": 18251, "start": 18044, "name": "ENSE00000842580"}, {"name": "ENSE00000842581", "start": 18708, "end": 18854}, {"start": 19969, "name": "ENSE00002876346", "end": 20073}, {"end": 20954, "start": 20825, "name": "ENSE00001595535"}, {"name": "ENSE00002889740", "start": 21733, "end": 21795}, {"name": "ENSE00002760808", "start": 22054, "end": 22118}, {"start": 22749, "name": "ENSE00001185299", "end": 28368}], "name": "ENST00000367080", "is_best_transcript": true, "domains": [{"regions": [{"start": 40, "end": 193}], "desc": "Chromatin_KTI12", "name": "PF08433"}, {"name": "SSF53254", "desc": "", "regions": [{"end": 469, "start": 251}]}, {"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": 1, "end": 488}]}, {"regions": [{"end": 250, "start": 34}], "name": "SSF52540", "desc": ""}, {"regions": [{"end": 249, "start": 27}], "name": "PF01591", "desc": "6Phosfructo_kin"}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": 124, "end": 138}, {"start": 150, "end": 164}, {"end": 190, "start": 176}, {"start": 230, "end": 251}, {"start": 252, "end": 274}, {"start": 329, "end": 345}]}, {"regions": [{"end": 398, "start": 251}], "desc": "His_Pase_superF_clade-1", "name": "PF00300"}, {"desc": "His_Pase_superF_clade-1", "name": "SM00855", "regions": [{"start": 251, "end": 398}]}], "cdna_coding_end": 1642, "end": 28368, "cdna_coding_start": 125, "aliases": ["NP_006203.2", "NM_006212.2"]}, {"aliases": ["NP_001018063.1", "NM_001018053.1"], "end": 31569, "cdna_coding_start": 74, "domains": [{"name": "SSF52540", "desc": "", "regions": [{"end": 250, "start": 34}]}, {"regions": [{"start": 251, "end": 398}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}, {"regions": [{"start": 124, "end": 138}, {"start": 150, "end": 164}, {"start": 176, "end": 190}, {"start": 230, "end": 251}, {"start": 252, "end": 274}, {"start": 329, "end": 345}], "desc": "6Pfruct_kin", "name": "PR00991"}, {"desc": "His_Pase_superF_clade-1", "name": "PF00300", "regions": [{"start": 251, "end": 398}]}, {"regions": [{"end": 249, "start": 27}], "name": "PF01591", "desc": "6Phosfructo_kin"}, {"name": "PIRSF000709", "desc": "Bifunct_6PFK/fruc_bisP_Ptase", "regions": [{"end": 468, "start": 1}]}, {"name": "SSF53254", "desc": "", "regions": [{"end": 469, "start": 251}]}, {"name": "PF08433", "desc": "Chromatin_KTI12", "regions": [{"start": 40, "end": 193}]}], "cdna_coding_end": 1489, "is_best_transcript": false, "start": 3856, "name": "ENST00000367079", "exons": [{"start": 3856, "name": "ENSE00001818425", "end": 3911}, {"end": 5347, "name": "ENSE00002901181", "start": 5246}, {"end": 12623, "start": 12498, "name": "ENSE00002934688"}, {"end": 13261, "name": "ENSE00002915251", "start": 13165}, {"end": 13754, "start": 13688, "name": "ENSE00002265243"}, {"end": 13966, "name": "ENSE00002223873", "start": 13892}, {"end": 14374, "name": "ENSE00002234935", "start": 14318}, {"start": 15581, "name": "ENSE00002233337", "end": 15705}, {"name": "ENSE00000842580", "start": 18044, "end": 18251}, {"name": "ENSE00000842581", "start": 18708, "end": 18854}, {"end": 20073, "start": 19969, "name": "ENSE00002876346"}, {"name": "ENSE00001595535", "start": 20825, "end": 20954}, {"end": 21795, "start": 21733, "name": "ENSE00002889740"}, {"name": "ENSE00002760808", "start": 22054, "end": 22118}, {"start": 29499, "name": "ENSE00001443434", "end": 31569}]}, {"aliases": [], "end": 22950, "cdna_coding_start": 227, "domains": [{"regions": [{"end": 160, "start": 7}], "name": "PF08433", "desc": "Chromatin_KTI12"}, {"regions": [{"start": 1, "end": 455}], "desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709"}, {"regions": [{"end": 436, "start": 218}], "desc": "", "name": "SSF53254"}, {"regions": [{"end": 105, "start": 91}, {"end": 131, "start": 117}, {"start": 143, "end": 157}, {"start": 197, "end": 218}, {"end": 241, "start": 219}, {"start": 296, "end": 312}], "name": "PR00991", "desc": "6Pfruct_kin"}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": 216, "start": 2}]}, {"name": "PF00300", "desc": "His_Pase_superF_clade-1", "regions": [{"start": 218, "end": 365}]}, {"name": "SM00855", "desc": "His_Pase_superF_clade-1", "regions": [{"start": 218, "end": 365}]}, {"regions": [{"start": 1, "end": 217}], "name": "SSF52540", "desc": ""}], "cdna_coding_end": 1645, "is_best_transcript": false, "start": 5245, "exons": [{"start": 5245, "name": "ENSE00002308738", "end": 5347}, {"name": "ENSE00002226272", "start": 11367, "end": 11475}, {"end": 12623, "name": "ENSE00002905114", "start": 12498}, {"end": 13261, "start": 13165, "name": "ENSE00002915251"}, {"name": "ENSE00002265243", "start": 13688, "end": 13754}, {"end": 13966, "start": 13892, "name": "ENSE00002223873"}, {"end": 14374, "name": "ENSE00002234935", "start": 14318}, {"end": 15705, "start": 15581, "name": "ENSE00002233337"}, {"end": 18251, "start": 18044, "name": "ENSE00000842580"}, {"end": 18854, "start": 18708, "name": "ENSE00000842581"}, {"end": 20073, "name": "ENSE00002876346", "start": 19969}, {"name": "ENSE00001595535", "start": 20825, "end": 20954}, {"name": "ENSE00002889740", "start": 21733, "end": 21795}, {"name": "ENSE00002760808", "start": 22054, "end": 22118}, {"name": "ENSE00002308130", "start": 22749, "end": 22950}], "name": "ENST00000545806"}, {"exons": [{"end": 162, "start": 1, "name": "ENSE00002231494"}, {"name": "ENSE00002892386", "start": 5246, "end": 5347}, {"name": "ENSE00002797095", "start": 12498, "end": 12623}, {"name": "ENSE00002308344", "start": 13092, "end": 13261}, {"name": "ENSE00002265243", "start": 13688, "end": 13754}, {"end": 13966, "name": "ENSE00002223873", "start": 13892}, {"start": 14318, "name": "ENSE00002234935", "end": 14374}, {"start": 15581, "name": "ENSE00002233337", "end": 15705}, {"end": 18251, "start": 18044, "name": "ENSE00000842580"}, {"end": 18854, "start": 18708, "name": "ENSE00000842581"}, {"end": 20073, "start": 19969, "name": "ENSE00002876346"}, {"name": "ENSE00001595535", "start": 20825, "end": 20954}, {"end": 21795, "start": 21733, "name": "ENSE00002889740"}, {"start": 22054, "name": "ENSE00002760808", "end": 22118}, {"end": 29679, "start": 29499, "name": "ENSE00001791500"}], "start": 1, "name": "ENST00000411990", "is_best_transcript": false, "cdna_coding_end": 1668, "domains": [{"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": 1, "end": 370}]}, {"desc": "", "name": "SSF53254", "regions": [{"end": 371, "start": 153}]}, {"desc": "", "name": "SSF52540", "regions": [{"start": 4, "end": 152}]}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": 26, "end": 40}, {"start": 52, "end": 66}, {"start": 78, "end": 92}, {"start": 132, "end": 153}, {"end": 176, "start": 154}, {"start": 231, "end": 247}]}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": 151, "start": 1}]}, {"regions": [{"end": 300, "start": 153}], "name": "PF00300", "desc": "His_Pase_superF_clade-1"}, {"regions": [{"start": 153, "end": 300}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}], "aliases": [], "cdna_coding_start": 547, "end": 29679}, {"cdna_coding_start": 84, "end": 29931, "aliases": [], "cdna_coding_end": 878, "domains": [{"regions": [{"start": 1, "end": 64}], "name": "SSF52540", "desc": ""}, {"desc": "", "name": "SSF53254", "regions": [{"end": 262, "start": 65}]}, {"desc": "Bifunct_6PFK/fruc_bisP_Ptase", "name": "PIRSF000709", "regions": [{"start": 1, "end": 261}]}, {"regions": [{"start": 65, "end": 212}], "name": "SM00855", "desc": "His_Pase_superF_clade-1"}, {"name": "PF01591", "desc": "6Phosfructo_kin", "regions": [{"end": 63, "start": 1}]}, {"desc": "6Pfruct_kin", "name": "PR00991", "regions": [{"start": 44, "end": 65}, {"start": 66, "end": 88}, {"start": 143, "end": 159}]}, {"regions": [{"end": 212, "start": 65}], "name": "PF00300", "desc": "His_Pase_superF_clade-1"}], "is_best_transcript": false, "start": 15549, "name": "ENST00000541914", "exons": [{"name": "ENSE00002294124", "start": 15549, "end": 15705}, {"name": "ENSE00000842580", "start": 18044, "end": 18251}, {"start": 18708, "name": "ENSE00000842581", "end": 18854}, {"end": 20073, "start": 19969, "name": "ENSE00002876346"}, {"start": 20825, "name": "ENSE00001595535", "end": 20954}, {"name": "ENSE00002760808", "start": 22054, "end": 22118}, {"name": "ENSE00002284395", "start": 29499, "end": 29931}]}], "start": 1, "name": "ENSG00000123836"}, {"start": 1, "name": "ENSG00000187416", "chr": "gene4", "aliases": ["LHFPL3"], "transcripts": [{"domains": [{"regions": [{"start": 22, "end": 199}], "desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242"}], "cdna_coding_end": 835, "aliases": [], "end": 578576, "cdna_coding_start": 167, "exons": [{"end": 569, "name": "ENSE00001713788", "start": 1}, {"end": 408255, "start": 408019, "name": "ENSE00001349915"}, {"end": 578576, "start": 577531, "name": "ENSE00001723245"}], "start": 1, "name": "ENST00000424859", "is_best_transcript": false}, {"start": 45, "name": "ENST00000401970", "exons": [{"end": 569, "name": "ENSE00001554382", "start": 45}, {"end": 408255, "start": 408019, "name": "ENSE00001349915"}, {"end": 516773, "name": "ENSE00001746769", "start": 516726}, {"end": 578098, "start": 577531, "name": "ENSE00001593689"}], "is_best_transcript": false, "domains": [{"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": 199, "start": 22}]}], "cdna_coding_end": 779, "aliases": [], "end": 578098, "cdna_coding_start": 123}, {"end": 579898, "cdna_coding_start": 125, "aliases": ["NP_945351.1", "NM_199000.2"], "domains": [{"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": 213, "start": 36}]}, {"regions": [{"end": 22, "start": 4}], "desc": "", "name": "PS50310"}], "cdna_coding_end": 835, "is_best_transcript": true, "start": 1, "name": "ENST00000535008", "exons": [{"start": 1, "name": "ENSE00002286305", "end": 573}, {"end": 141399, "name": "ENSE00002499039", "start": 141389}, {"name": "ENSE00002324321", "start": 294348, "end": 294368}, {"end": 408255, "name": "ENSE00002278044", "start": 408055}, {"name": "ENSE00002230996", "start": 577531, "end": 579898}]}, {"aliases": [], "cdna_coding_start": 81, "end": 578099, "cdna_coding_end": 779, "domains": [{"name": "PS50310", "desc": "", "regions": [{"start": 4, "end": 22}]}, {"desc": "Lipome_HGMIC_fus_partner-like", "name": "PF10242", "regions": [{"end": 213, "start": 36}]}], "is_best_transcript": false, "name": "ENST00000543266", "start": 45, "exons": [{"end": 573, "name": "ENSE00002312340", "start": 45}, {"name": "ENSE00002499039", "start": 141389, "end": 141399}, {"start": 294348, "name": "ENSE00002324321", "end": 294368}, {"end": 408255, "start": 408055, "name": "ENSE00002278044"}, {"end": 516773, "start": 516726, "name": "ENSE00001746769"}, {"end": 578099, "start": 577531, "name": "ENSE00002270291"}]}], "strand": "+", "end": 579898}, {"start": 1, "name": "ENSG00000122565", "strand": "+", "end": 12195, "transcripts": [{"cdna_coding_start": 429, "end": 12195, "aliases": ["NP_009207.2", "NM_007276.4"], "cdna_coding_end": 980, "domains": [{"name": "PR00504", "desc": "Chromo_dom_subgr", "regions": [{"end": 35, "start": 27}, {"end": 54, "start": 40}, {"end": 67, "start": 55}]}, {"name": "PF01393", "desc": "Chromo_shadow_dom", "regions": [{"start": 119, "end": 176}]}, {"regions": [{"end": 175, "start": 106}, {"start": 15, "end": 79}], "name": "SSF54160", "desc": "Chromodomain-like"}, {"regions": [{"end": 78, "start": 30}], "name": "PF00385", "desc": "Chromo_domain"}, {"regions": [{"start": 29, "end": 81}, {"start": 120, "end": 172}], "name": "SM00298", "desc": "Chromo_domain/shadow"}, {"regions": [{"end": 177, "start": 115}], "name": "SM00300", "desc": "Chromo_shadow_dom"}, {"desc": "Chromo_domain/shadow", "name": "PS50013", "regions": [{"end": 88, "start": 30}, {"end": 179, "start": 121}]}], "is_best_transcript": true, "exons": [{"end": 400, "start": 1, "name": "ENSE00001189828"}, {"end": 1861, "name": "ENSE00001433792", "start": 1810}, {"start": 5207, "name": "ENSE00002858776", "end": 5349}, {"start": 7232, "name": "ENSE00002882596", "end": 7394}, {"end": 10595, "start": 10501, "name": "ENSE00002768648"}, {"start": 10921, "name": "ENSE00002778428", "end": 12195}], "start": 1, "name": "ENST00000337620"}, {"start": 532, "name": "ENST00000396386", "exons": [{"start": 532, "name": "ENSE00001524790", "end": 665}, {"start": 1810, "name": "ENSE00001433792", "end": 1861}, {"end": 5349, "name": "ENSE00002858776", "start": 5207}, {"name": "ENSE00002882596", "start": 7232, "end": 7394}, {"start": 10501, "name": "ENSE00002768648", "end": 10595}, {"name": "ENSE00001553667", "start": 10921, "end": 12190}], "is_best_transcript": false, "cdna_coding_end": 714, "domains": [{"regions": [{"start": 115, "end": 177}], "name": "SM00300", "desc": "Chromo_shadow_dom"}, {"regions": [{"end": 88, "start": 30}, {"end": 179, "start": 121}], "desc": "Chromo_domain/shadow", "name": "PS50013"}, {"name": "SM00298", "desc": "Chromo_domain/shadow", "regions": [{"start": 29, "end": 81}, {"start": 120, "end": 172}]}, {"desc": "Chromo_dom_subgr", "name": "PR00504", "regions": [{"end": 35, "start": 27}, {"end": 54, "start": 40}, {"start": 55, "end": 67}]}, {"regions": [{"start": 106, "end": 175}, {"start": 15, "end": 79}], "name": "SSF54160", "desc": "Chromodomain-like"}, {"name": "PF01393", "desc": "Chromo_shadow_dom", "regions": [{"start": 119, "end": 176}]}, {"regions": [{"end": 78, "start": 30}], "name": "PF00385", "desc": "Chromo_domain"}], "cdna_coding_start": 163, "end": 12190, "aliases": ["NP_057671.2", "NM_016587.3"]}, {"cdna_coding_end": 1068, "domains": [{"name": "PF00385", "desc": "Chromo_domain", "regions": [{"end": 62, "start": 30}]}, {"name": "SSF54160", "desc": "Chromodomain-like", "regions": [{"start": 22, "end": 62}]}, {"desc": "Chromo_dom_subgr", "name": "PR00504", "regions": [{"end": 35, "start": 27}, {"start": 40, "end": 54}, {"start": 55, "end": 62}]}, {"regions": [{"start": 30, "end": 62}], "name": "PS50013", "desc": "Chromo_domain/shadow"}], "cdna_coding_start": 882, "end": 7251, "aliases": [], "start": 596, "name": "ENST00000456948", "exons": [{"end": 1448, "start": 596, "name": "ENSE00001696375"}, {"start": 1810, "name": "ENSE00001433792", "end": 1861}, {"end": 5349, "name": "ENSE00002858776", "start": 5207}, {"start": 7232, "name": "ENSE00001778411", "end": 7251}], "is_best_transcript": false}, {"is_best_transcript": false, "start": 604, "name": "ENST00000409747", "exons": [{"end": 665, "start": 604, "name": "ENSE00001588865"}, {"end": 1861, "start": 1810, "name": "ENSE00001433792"}, {"start": 5207, "name": "ENSE00002858776", "end": 5349}, {"name": "ENSE00001577751", "start": 7305, "end": 7394}, {"start": 10501, "name": "ENSE00002765595", "end": 10595}, {"start": 10921, "name": "ENSE00002880768", "end": 12195}], "cdna_coding_start": 91, "end": 12195, "aliases": [], "cdna_coding_end": 396, "domains": [{"regions": [{"start": 15, "end": 56}], "desc": "Chromodomain-like", "name": "SSF54160"}, {"regions": [{"end": 57, "start": 30}], "desc": "Chromo_domain", "name": "PF00385"}, {"desc": "Chromo_domain/shadow", "name": "PS50013", "regions": [{"start": 30, "end": 91}]}]}], "aliases": ["CBX3"], "chr": "gene5"}, {"name": "ENSG00000171862", "start": 1, "end": 108818, "strand": "+", "transcripts": [{"aliases": ["NP_000305.3", "NM_000314.4"], "cdna_coding_start": 1358, "end": 108818, "cdna_coding_end": 2569, "domains": [{"desc": "C2_Ca/lipid-bd_dom_CaLB", "name": "SSF49562", "regions": [{"start": 188, "end": 351}]}, {"regions": [{"end": 159, "start": 80}], "name": "PF00782", "desc": "Dual-sp_phosphatase_cat-dom"}, {"desc": "Bifunc_PIno_P3_Pase/Pase_PTEN", "name": "PIRSF038025", "regions": [{"start": 1, "end": 403}]}, {"regions": [{"start": 59, "end": 181}], "name": "PF00102", "desc": "Tyr_Pase_rcpt/non-rcpt"}, {"desc": "Tensin_phosphatase_C2-dom", "name": "PF10409", "regions": [{"end": 349, "start": 188}]}, {"regions": [{"end": 350, "start": 190}], "desc": "Tensin_phosphatase_C2-dom", "name": "PS51182"}, {"name": "SM00404", "desc": "Tyr_Pase_cat", "regions": [{"end": 183, "start": 23}]}, {"regions": [{"end": 187, "start": 14}], "desc": "", "name": "SSF52799"}, {"regions": [{"start": 102, "end": 173}], "desc": "Tyr/Dual-specificity_Pase", "name": "PS50056"}, {"regions": [{"end": 185, "start": 14}], "name": "PS51181", "desc": "Phosphatase_tensin-typ"}], "is_best_transcript": true, "start": 1, "name": "ENST00000371953", "exons": [{"end": 1436, "start": 1, "name": "ENSE00001456562"}, {"end": 30997, "name": "ENSE00001156351", "start": 30913}, {"end": 62445, "start": 62401, "name": "ENSE00001156344"}, {"end": 67977, "name": "ENSE00002779611", "start": 67934}, {"start": 69901, "name": "ENSE00001156330", "end": 70139}, {"end": 89147, "start": 89006, "name": "ENSE00001156327"}, {"end": 94907, "start": 94741, "name": "ENSE00002737042"}, {"start": 97782, "name": "ENSE00001156315", "end": 98006}, {"end": 108818, "name": "ENSE00001456541", "start": 102175}]}], "aliases": ["PTEN"], "chr": "gene6"}], "best_transcript_file": "/home/creisle/svn/ensembl_flatfiles/ens69_best_transcript.txt", "script": "generate_ensembl_json_temp.pl", "hugo_mapping_file": "/projects/tumour_char/analysis_scripts/databases/processed_files/drug_target_tables/compiled_gene_drug_pathway.v1_2_4.tsv", "generation_time": "Tue Feb 28 11:58:00 2017", "script_version": "2.1.4"}
diff --git a/tests/data/mock_reference_annotations.full.json b/tests/data/mock_reference_annotations.full.json
new file mode 100644
index 00000000..836273b0
--- /dev/null
+++ b/tests/data/mock_reference_annotations.full.json
@@ -0,0 +1 @@
+{"genes": [{"aliases": ["C9orf47"], "chr": "fakereference9", "end": 5278, "name": "ENSG00000186354", "start": 1, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": 685, "cdna_coding_start": 134, "domains": [], "end": 5278, "exons": [{"end": 322, "start": 1}, {"end": 833, "start": 608}, {"end": 5278, "start": 990}], "is_best_transcript": true, "name": "ENST00000375851", "start": 1}, {"aliases": [], "cdna_coding_end": 783, "cdna_coding_start": 76, "domains": [], "end": 1202, "exons": [{"end": 322, "start": 59}, {"end": 1202, "start": 608}], "is_best_transcript": false, "name": "ENST00000375850", "start": 59}, {"aliases": [], "cdna_coding_end": 677, "cdna_coding_start": 69, "domains": [], "end": 5278, "exons": [{"end": 379, "start": 66}, {"end": 833, "start": 608}, {"end": 5278, "start": 990}], "is_best_transcript": false, "name": "ENST00000334490", "start": 66}]}, {"aliases": ["S1PR3"], "chr": "fakereference9", "end": 14148, "name": "ENSG00000213694", "start": 585, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": 1533, "cdna_coding_start": 397, "domains": [{"name": "PR00362", "regions": [{"end": 62, "start": 49}, {"end": 200, "start": 185}, {"end": 308, "start": 298}]}, {"name": "PR00642", "regions": [{"end": 75, "start": 63}, {"end": 112, "start": 102}, {"end": 155, "start": 139}, {"end": 345, "start": 329}]}, {"name": "PS50262", "regions": [{"end": 298, "start": 56}]}, {"name": "PF10320", "regions": [{"end": 312, "start": 55}]}, {"name": "SSF81321", "regions": [{"end": 340, "start": 1}]}, {"name": "PR00237", "regions": [{"end": 65, "start": 41}, {"end": 95, "start": 74}, {"end": 140, "start": 118}, {"end": 174, "start": 153}, {"end": 219, "start": 196}, {"end": 265, "start": 241}, {"end": 306, "start": 280}]}, {"name": "PR01523", "regions": [{"end": 25, "start": 13}, {"end": 101, "start": 92}, {"end": 123, "start": 112}, {"end": 204, "start": 194}, {"end": 224, "start": 215}, {"end": 283, "start": 272}, {"end": 311, "start": 301}]}, {"name": "PF00001", "regions": [{"end": 298, "start": 56}]}, {"name": "PR01524", "regions": [{"end": 40, "start": 24}, {"end": 155, "start": 139}, {"end": 233, "start": 223}, {"end": 323, "start": 314}, {"end": 340, "start": 326}]}], "end": 14148, "exons": [{"end": 833, "start": 585}, {"end": 14148, "start": 10192}], "is_best_transcript": false, "name": "ENST00000358157", "start": 585}, {"aliases": [], "cdna_coding_end": 5832, "cdna_coding_start": 4696, "domains": [{"name": "PF10320", "regions": [{"end": 312, "start": 55}]}, {"name": "PR00362", "regions": [{"end": 62, "start": 49}, {"end": 200, "start": 185}, {"end": 308, "start": 298}]}, {"name": "PS50262", "regions": [{"end": 298, "start": 56}]}, {"name": "PR00642", "regions": [{"end": 75, "start": 63}, {"end": 112, "start": 102}, {"end": 155, "start": 139}, {"end": 345, "start": 329}]}, {"name": "PR00237", "regions": [{"end": 65, "start": 41}, {"end": 95, "start": 74}, {"end": 140, "start": 118}, {"end": 174, "start": 153}, {"end": 219, "start": 196}, {"end": 265, "start": 241}, {"end": 306, "start": 280}]}, {"name": "PR01523", "regions": [{"end": 25, "start": 13}, {"end": 101, "start": 92}, {"end": 123, "start": 112}, {"end": 204, "start": 194}, {"end": 224, "start": 215}, {"end": 283, "start": 272}, {"end": 311, "start": 301}]}, {"name": "PR01524", "regions": [{"end": 40, "start": 24}, {"end": 155, "start": 139}, {"end": 233, "start": 223}, {"end": 323, "start": 314}, {"end": 340, "start": 326}]}, {"name": "PF00001", "regions": [{"end": 298, "start": 56}]}, {"name": "SSF81321", "regions": [{"end": 340, "start": 1}]}], "end": 14148, "exons": [{"end": 14148, "start": 5644}], "is_best_transcript": true, "name": "ENST00000375846", "start": 5644}]}]}
\ No newline at end of file
diff --git a/tests/data/mock_reference_annotations.full.tsv b/tests/data/mock_reference_annotations.full.tsv
deleted file mode 100644
index 7ead95e3..00000000
--- a/tests/data/mock_reference_annotations.full.tsv
+++ /dev/null
@@ -1,6 +0,0 @@
-ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
-ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000375851	NP_001135885.1;NM_001142413.1	1	5278	134	685	1-322;608-833;990-5278	
-ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000375850		59	1202	76	783	59-322;608-1202	
-ENSG00000186354	C9orf47	fakereference9	1	1	5278	ENST00000375851	ENST00000334490	NP_001001938.1;NM_001001938.3	66	5278	69	677	66-379;608-833;990-5278	
-ENSG00000213694	S1PR3	fakereference9	1	585	14148	ENST00000375846	ENST00000358157	NP_005217.2;NM_005226.3	585	14148	397	1533	585-833;10192-14148	PR00362:49-62,185-200,298-308;PR00642:63-75,102-112,139-155,329-345;PS50262:56-298;PF10320:55-312;SSF81321:1-340;PR00237:41-65,74-95,118-140,153-174,196-219,241-265,280-306;PR01523:13-25,92-101,112-123,194-204,215-224,272-283,301-311;PF00001:56-298;PR01524:24-40,139-155,223-233,314-323,326-340
-ENSG00000213694	S1PR3	fakereference9	1	585	14148	ENST00000375846	ENST00000375846		5644	14148	4696	5832	5644-14148	PF10320:55-312;PR00362:49-62,185-200,298-308;PS50262:56-298;PR00642:63-75,102-112,139-155,329-345;PR00237:41-65,74-95,118-140,153-174,196-219,241-265,280-306;PR01523:13-25,92-101,112-123,194-204,215-224,272-283,301-311;PR01524:24-40,139-155,223-233,314-323,326-340;PF00001:56-298;SSF81321:1-340
diff --git a/tests/data/mock_reference_annotations.json b/tests/data/mock_reference_annotations.json
index 28b06e57..44c96d4e 100644
--- a/tests/data/mock_reference_annotations.json
+++ b/tests/data/mock_reference_annotations.json
@@ -4,7 +4,7 @@
             "chr": "fake",
             "start": 1,
             "end": 1000,
-            "strand": "1",
+            "strand": "+",
             "name": "ENSG0001",
             "aliases": [],
             "transcripts": [
diff --git a/tests/data/mock_reference_annotations.tsv b/tests/data/mock_reference_annotations.tsv
deleted file mode 100644
index 14391a69..00000000
--- a/tests/data/mock_reference_annotations.tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-ensembl_gene_id	chr	strand	gene_start	gene_end	ensembl_transcript_id	transcript_genomic_start	transcript_genomic_end
-GENE-A	fake	+	100	200	TRANSCRIPT-A	100	200
-GENE-B	fake	-	250	350	TRANSCRIPT-B	250	350
-GENE-C	fake	+	300	400	TRANSCRIPT-C	300	400
-GENE-D	fake	-	450	550	TRANSCRIPT-D	450	550
-GENE-E	fake	+	500	600	TRANSCRIPT-E	500	600
-GENE-F	fake	+	550	650	TRANSCRIPT-E	550	650
diff --git a/tests/data/mock_reference_annotations2.json b/tests/data/mock_reference_annotations2.json
new file mode 100644
index 00000000..f1ef1c50
--- /dev/null
+++ b/tests/data/mock_reference_annotations2.json
@@ -0,0 +1 @@
+{"genes": [{"aliases": [], "chr": "fake", "end": 200, "name": "GENE-A", "start": 100, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 200, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-A", "start": 100}]}, {"aliases": [], "chr": "fake", "end": 350, "name": "GENE-B", "start": 250, "strand": "-", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 350, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-B", "start": 250}]}, {"aliases": [], "chr": "fake", "end": 400, "name": "GENE-C", "start": 300, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 400, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-C", "start": 300}]}, {"aliases": [], "chr": "fake", "end": 550, "name": "GENE-D", "start": 450, "strand": "-", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 550, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-D", "start": 450}]}, {"aliases": [], "chr": "fake", "end": 600, "name": "GENE-E", "start": 500, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 600, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-E", "start": 500}]}, {"aliases": [], "chr": "fake", "end": 650, "name": "GENE-F", "start": 550, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 650, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-E", "start": 550}]}]}
\ No newline at end of file
diff --git a/tests/integration/test_annotate.py b/tests/integration/test_annotate.py
index bf816b70..d6e5e2ed 100644
--- a/tests/integration/test_annotate.py
+++ b/tests/integration/test_annotate.py
@@ -31,7 +31,7 @@
 def setUpModule():
     global REFERENCE_ANNOTATIONS, REFERENCE_GENOME, REF_CHR, EXAMPLE_GENES
     EXAMPLE_GENES = get_example_genes()
-    REFERENCE_ANNOTATIONS = load_annotations(get_data('mock_reference_annotations.tsv'))
+    REFERENCE_ANNOTATIONS = load_annotations(get_data('mock_reference_annotations2.json'))
     count = sum([len(genes) for genes in REFERENCE_ANNOTATIONS.values()])
     print('loaded annotations', count)
     assert count >= 6  # make sure this is the file we expect
@@ -1737,7 +1737,7 @@ def test_calculate_orf_nested(self):
 
 class TestAnnotateEvents:
     def test_annotate_events(self):
-        reference_annotations = load_annotations(get_data('mock_reference_annotations.full.tsv'))
+        reference_annotations = load_annotations(get_data('mock_reference_annotations.full.json'))
         b1 = Breakpoint('fakereference9', 658, orient=ORIENT.RIGHT, strand=STRAND.POS)
         b2 = Breakpoint('fakereference9', 10237, orient=ORIENT.RIGHT, strand=STRAND.NEG)
         bpp = BreakpointPair(
diff --git a/tests/integration/test_annotate_fileio.py b/tests/integration/test_annotate_fileio.py
index 926928aa..7b1a09de 100644
--- a/tests/integration/test_annotate_fileio.py
+++ b/tests/integration/test_annotate_fileio.py
@@ -1,40 +1,11 @@
-from mavis.annotate.file_io import convert_tab_to_json, load_annotations
+from mavis.annotate.file_io import load_annotations
 
 from ..util import get_data
 
-TAB = get_data('annotations_subsample.tab')
 JSON = get_data('annotations_subsample.json')
 
 
 class TestAnnotationLoading:
-    def test_convert_tab_to_json(self):
-        json = convert_tab_to_json(TAB, warn=print)
-        assert len(json['genes']) == 32
-
-    def test_tab_equivalent_to_json(self):
-        tab_result = load_annotations(TAB, warn=print)
-        json_result = load_annotations(JSON, warn=print)
-        assert sorted(json_result.keys()) == sorted(tab_result.keys())
-
-    def test_load_tab(self):
-        result = load_annotations(TAB, warn=print)
-        assert len(result.keys()) == 12
-        domains = []
-        for gene in result['12']:
-            for t in gene.spliced_transcripts:
-                print(t)
-                if t.unspliced_transcript.name == 'ENST00000550458':
-                    tl = t.translations[0]
-                    domains = tl.domains
-                    break
-            if domains:
-                break
-        for d in domains:
-            print(d.name, d.regions)
-        assert len(domains) == 2
-        result = load_annotations(get_data('mock_reference_annotations.tsv'), warn=print)
-        assert len(result.keys()) == 1
-
     def test_load_json(self):
         result = load_annotations(JSON, warn=print)
         assert len(result.keys()) == 12

From 2c9bca0a6c1a8a05437bcd1ddaf6e442339cffb9 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 20 Jan 2022 23:59:47 -0800
Subject: [PATCH 085/137] Add conversion script to migration guide

---
 docs/migrating.md | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/docs/migrating.md b/docs/migrating.md
index d56e17c3..91fb0d4f 100644
--- a/docs/migrating.md
+++ b/docs/migrating.md
@@ -19,3 +19,13 @@ config file
 ### Scheduling
 
 MAVIS is now integrated with snakemake instead of handling its own scheduling
+
+## Reference Annotation Files
+
+MAVIS no longer supports the previously deprecated tab-delimited format of the annotations file. If you are still using these files in your project we have provided a script to automatically convert them to the newer format in the tools directory
+
+```bash
+python src/tools/migrate_mavis_annotations_to_jsonl.py \
+    /path/to/tab/file.tab \
+    /path/to/new/json/file.json
+```

From 02c108f779341f8604e8c1586c78e3c57692fe33 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 22 Jan 2022 12:16:21 -0800
Subject: [PATCH 086/137] Add some schema failure unit tests for
 annotations.jon

---
 tests/end_to_end/test_convert.py   |  4 ++--
 tests/unit/test_annotate_fileio.py | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)
 create mode 100644 tests/unit/test_annotate_fileio.py

diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index e0b29e8e..671e4d10 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -114,14 +114,14 @@ def test_sniffle(self):
         record = results['vcf-35777'][0]
         print(record, record.data)
         assert record.data['event_type'] == 'translocation'
-    
+
     def test_cuteSV(self):
         results = self.run_main(get_data('cuteSV.vcf'), SUPPORTED_TOOL.VCF, False)
         print(results.keys())
         record = results['vcf-cuteSV.BND.0'][0]
         print(record, record.data)
         assert record.data['event_type'] == 'inverted translocation'
-    
+
     def test_breakseq2(self):
         self.run_main(get_data('breakseq.vcf'), SUPPORTED_TOOL.BREAKSEQ, False)
 
diff --git a/tests/unit/test_annotate_fileio.py b/tests/unit/test_annotate_fileio.py
new file mode 100644
index 00000000..43823519
--- /dev/null
+++ b/tests/unit/test_annotate_fileio.py
@@ -0,0 +1,22 @@
+import json
+
+import pytest
+from mavis.annotate.file_io import load_annotations
+
+
+@pytest.mark.parametrize(
+    'annotations,error_message_include',
+    [
+        [{'genes': []}, "schema['properties']['genes']"],
+        [
+            {'genes': [{'start': '1'}]},
+            "schema['properties']['genes']['items']['properties']['start']",
+        ],
+    ],
+)
+def test_min_genes_error(annotations, error_message_include, tmp_path):
+    filename = tmp_path / "annotations.json"
+    filename.write_text(json.dumps(annotations))
+    with pytest.raises(AssertionError) as exc:
+        load_annotations(str(filename))
+    assert error_message_include in str(exc.value)

From 7e9b9091583182878fff157eeed146ba6d360f07 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 26 Jan 2022 12:43:40 -0800
Subject: [PATCH 087/137] Standardize logging

---
 src/mavis/align.py                        |  35 ++---
 src/mavis/annotate/file_io.py             |  20 ++-
 src/mavis/annotate/main.py                |  41 +++---
 src/mavis/annotate/variant.py             |  11 +-
 src/mavis/assemble.py                     |  31 ++---
 src/mavis/bam/cache.py                    |  18 ++-
 src/mavis/bam/stats.py                    |  12 +-
 src/mavis/blat.py                         |  11 +-
 src/mavis/cluster/cluster.py              |   8 +-
 src/mavis/cluster/main.py                 |  39 +++---
 src/mavis/config.py                       |  10 +-
 src/mavis/constants.py                    |   3 +-
 src/mavis/illustrate/diagram.py           |  14 +-
 src/mavis/illustrate/scatter.py           |  13 +-
 src/mavis/interval.py                     |   2 +-
 src/mavis/main.py                         |  14 +-
 src/mavis/overlay.py                      |  11 +-
 src/mavis/pairing/main.py                 |  16 +--
 src/mavis/pairing/pairing.py              |  18 +--
 src/mavis/summary/main.py                 |  12 +-
 src/mavis/tools/__init__.py               |  20 ++-
 src/mavis/tools/vcf.py                    |  12 +-
 src/mavis/util.py                         |  98 ++++----------
 src/mavis/validate/base.py                |  29 ++--
 src/mavis/validate/main.py                | 158 ++++++++--------------
 src/tools/calculate_ref_alt_counts.py     |  19 ++-
 tests/integration/test_annotate_fileio.py |   2 +-
 tests/integration/test_args.py            |  20 +--
 tests/integration/test_assemble.py        |   5 +-
 tests/integration/test_bam.py             |  12 +-
 tests/snakemake/test_mini_workflow.py     |  16 +++
 31 files changed, 280 insertions(+), 450 deletions(-)

diff --git a/src/mavis/align.py b/src/mavis/align.py
index 20984ba6..df74feb7 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -5,27 +5,15 @@
 import os
 import re
 import subprocess
-import warnings
-from copy import copy
 
 import pysam
 
 from .bam import cigar as _cigar
 from .bam import read as _read
 from .breakpoint import Breakpoint, BreakpointPair
-from .constants import (
-    CIGAR,
-    COLUMNS,
-    NA_MAPPING_QUALITY,
-    ORIENT,
-    STRAND,
-    SVTYPE,
-    MavisNamespace,
-    reverse_complement,
-)
-from .error import InvalidRearrangement
+from .constants import CIGAR, ORIENT, STRAND, SVTYPE, MavisNamespace, reverse_complement
 from .interval import Interval
-from .util import DEVNULL
+from .util import logger
 
 
 class SUPPORTED_ALIGNER(MavisNamespace):
@@ -234,7 +222,7 @@ def convert_to_duplication(alignment, reference_genome):
                 opposing_strands=alignment.opposing_strands,
                 read1=alignment.read1,
                 read2=alignment.read2,
-                **alignment.data
+                **alignment.data,
             )
             return result
     return alignment
@@ -405,8 +393,7 @@ def align_sequences(
     blat_limit_top_aln=25,
     blat_min_identity=0.7,
     clean_files=True,
-    log=DEVNULL,
-    **kwargs
+    **kwargs,
 ):
     """
     calls the alignment tool and parses the return output for a set of sequences
@@ -428,7 +415,7 @@ def align_sequences(
         if not sequences:
             return []
 
-        log('will use', aligner, 'to align', len(sequences), 'unique sequences', time_stamp=False)
+        logger.debug(f'will use {aligner} to align {len(sequences)} unique sequences')
 
         # call the aligner using subprocess
         if aligner == SUPPORTED_ALIGNER.BLAT:
@@ -454,7 +441,7 @@ def align_sequences(
                     blat_options,
                 ]
             )
-            log('writing aligner logging to:', aligner_output_log, time_stamp=False)
+            logger.debug(f'writing aligner logging to: {aligner_output_log}')
             with open(aligner_output_log, 'w') as log_fh:
                 log_fh.write('>>> {}\n'.format(command))
                 subprocess.check_call(command, shell=True, stdout=log_fh, stderr=log_fh)
@@ -471,7 +458,7 @@ def align_sequences(
             command = '{} -Y {} {} {}'.format(
                 aligner, align_options, aligner_reference, aligner_fa_input_file
             )
-            log('writing aligner logging to:', aligner_output_log, time_stamp=False)
+            logger.debug(f'writing aligner logging to: {aligner_output_log}')
             with open(aligner_output_log, 'w') as log_fh, open(
                 aligner_output_file, 'w'
             ) as aligner_output_fh:
@@ -489,10 +476,8 @@ def align_sequences(
                     try:
                         read.reference_id = input_bam_cache.reference_id(read.reference_name)
                     except KeyError:
-                        log(
-                            'dropping alignment (unknown reference)',
-                            read.reference_name,
-                            time_stamp=False,
+                        logger.warning(
+                            f'dropping alignment (unknown reference): {read.reference_name}'
                         )
                     else:
                         if read.is_paired:
@@ -523,7 +508,7 @@ def align_sequences(
                     try:
                         os.remove(outputfile)
                     except OSError as err:
-                        warnings.warn(repr(err))
+                        logger.warning(repr(err))
 
 
 def select_contig_alignments(evidence, reads_by_query):
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 6b1a16f2..9e6c92c2 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -13,7 +13,7 @@
 
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, translate
 from ..interval import Interval
-from ..util import DEVNULL, LOG
+from ..util import logger
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
 from .protein import Domain, Translation
@@ -60,7 +60,6 @@ def load_masking_regions(*filepaths: str) -> Dict[str, List[BioInterval]]:
 
 def load_annotations(
     *filepaths: str,
-    warn: Callable = DEVNULL,
     reference_genome: Optional[Dict[str, SeqRecord]] = None,
     best_transcripts_only: bool = False,
 ) -> Dict[str, List[Gene]]:
@@ -86,7 +85,6 @@ def load_annotations(
             data,
             reference_genome=reference_genome,
             best_transcripts_only=best_transcripts_only,
-            warn=warn,
         )
 
         for chrom in current_annotations:
@@ -99,7 +97,6 @@ def parse_annotations_json(
     data,
     reference_genome: Optional[Dict[str, SeqRecord]] = None,
     best_transcripts_only=False,
-    warn=DEVNULL,
 ) -> Dict[str, List[Gene]]:
     """
     parses a json of annotation information into annotation objects
@@ -159,7 +156,9 @@ def parse_annotations_json(
                 tx_length = transcript['cdna_coding_end'] - transcript['cdna_coding_start'] + 1
                 # check that the translation makes sense before including it
                 if tx_length % CODON_SIZE != 0:
-                    warn('Ignoring translation. The translated region is not a multiple of three')
+                    logger.warning(
+                        'Ignoring translation. The translated region is not a multiple of three'
+                    )
                     continue
                 tx_length = tx_length // CODON_SIZE
                 domains = []
@@ -180,7 +179,7 @@ def parse_annotations_json(
                             )
                         )
                     except AssertionError as err:
-                        warn(repr(err))
+                        logger.warning(repr(err))
                 translation = Translation(
                     transcript['cdna_coding_start'],
                     transcript['cdna_coding_end'],
@@ -193,9 +192,8 @@ def parse_annotations_json(
                     met = seq[translation.start - 1 : translation.start + 2]
                     stop = seq[translation.end - CODON_SIZE : translation.end]
                     if translate(met) != START_AA or translate(stop) != STOP_AA:
-                        warn(
-                            'Sequence error. The sequence computed from the reference does look like '
-                            'a valid translation'
+                        logger.warning(
+                            'Sequence error. The sequence computed from the reference does look like a valid translation'
                         )
                         continue
                 spl_tx.translations.append(translation)
@@ -375,12 +373,12 @@ def load(self, ignore_cache=False, verbose=True):
             return self
         if self.key in ReferenceFile.CACHE and not ignore_cache:
             if verbose:
-                LOG('cached content:', self.name)
+                logger.info(f'cached content: {self.name}')
             self.content = ReferenceFile.CACHE[self.key].content
             return self
         self.files_exist()
         try:
-            LOG('loading:', self.name, time_stamp=True)
+            logger.info(f'loading: {self.name}')
             self.content = self.loader(*self.name, **self.opt)
             ReferenceFile.CACHE[self.key] = self
         except Exception as err:
diff --git a/src/mavis/annotate/main.py b/src/mavis/annotate/main.py
index e1df8e68..9b08f34e 100644
--- a/src/mavis/annotate/main.py
+++ b/src/mavis/annotate/main.py
@@ -10,7 +10,7 @@
 from ..error import DrawingFitError, NotSpecifiedError
 from ..illustrate.constants import DiagramSettings
 from ..illustrate.diagram import draw_sv_summary_diagram
-from ..util import LOG, generate_complete_stamp, mkdirp, read_inputs
+from ..util import generate_complete_stamp, logger, mkdirp, read_inputs
 from .constants import PASS_FILENAME
 from .file_io import ReferenceFile
 from .fusion import determine_prime
@@ -54,12 +54,8 @@ def draw(drawing_config, ann, reference_genome, template_metadata, drawings_dire
     )
 
     for i, (curr_width, other_settings) in enumerate(drawing_attempts):
-        LOG(
-            'drawing attempt:',
-            i + 1,
-            str(curr_width) + 'px',
-            other_settings if other_settings else '',
-            time_stamp=False,
+        logger.info(
+            f'drawing attempt: {i + 1} {curr_width}px {other_settings if other_settings else ""}'
         )
         try:
             drawing_config.width = curr_width
@@ -68,7 +64,7 @@ def draw(drawing_config, ann, reference_genome, template_metadata, drawings_dire
                 ann,
                 reference_genome=reference_genome,
                 templates=template_metadata,
-                **other_settings
+                **other_settings,
             )
 
             gene_aliases1 = 'NA'
@@ -99,10 +95,10 @@ def draw(drawing_config, ann, reference_genome, template_metadata, drawings_dire
 
             drawing = os.path.join(drawings_directory, name + '.svg')
             legend = os.path.join(drawings_directory, name + '.legend.json')
-            LOG('generating svg:', drawing, time_stamp=False)
+            logger.info(f'generating svg: {drawing}')
             canvas.saveas(drawing)
 
-            LOG('generating legend:', legend, time_stamp=False)
+            logger.info(f'generating legend: {legend}')
             with open(legend, 'w') as fh:
                 json.dump(legend_json, fh)
             break
@@ -118,7 +114,7 @@ def main(
     library: str,
     config: Dict,
     start_time=int(time.time()),
-    **kwargs
+    **kwargs,
 ):
     """
     Args:
@@ -148,7 +144,7 @@ def main(
         expand_orient=True,
         expand_svtype=True,
     )
-    LOG('read {} breakpoint pairs'.format(len(bpps)))
+    logger.info(f'read {len(bpps)} breakpoint pairs')
 
     annotations.load()
     reference_genome.load()
@@ -161,7 +157,6 @@ def main(
         min_domain_mapping_match=config['annotate.min_domain_mapping_match'],
         max_proximity=config['cluster.max_proximity'],
         max_orf_cap=config['annotate.max_orf_cap'],
-        log=LOG,
         filters=annotation_filters,
     )
 
@@ -187,9 +182,9 @@ def main(
         COLUMNS.protein_synon,
     }
     header = None
-    LOG('opening for write:', tabbed_output_file)
+    logger.info(f'opening for write: {tabbed_output_file}')
     tabbed_fh = open(tabbed_output_file, 'w')
-    LOG('opening for write:', fa_output_file)
+    logger.info(f'opening for write: {fa_output_file}')
     fasta_fh = open(fa_output_file, 'w')
 
     try:
@@ -201,14 +196,10 @@ def main(
                 header_req.update(ann_row.keys())
                 header = sort_columns(header_req)
                 tabbed_fh.write('\t'.join([str(c) for c in header]) + '\n')
-            LOG(
-                '({} of {}) current annotation'.format(i + 1, total),
-                ann.annotation_id,
-                ann.transcript1,
-                ann.transcript2,
-                ann.event_type,
+            logger.info(
+                f'({i + 1} of {total}) current annotation {ann.annotation_id} {ann.transcript1} {ann.transcript2} {ann.event_type}'
             )
-            LOG(ann, time_stamp=False)
+            logger.info(str(ann))
             # get the reference sequences for either transcript
             ref_cdna_seq = {}
             ref_protein_seq = {}
@@ -289,9 +280,9 @@ def main(
                 rows = [ann_row]
             for row in rows:
                 tabbed_fh.write('\t'.join([str(row.get(k, None)) for k in header]) + '\n')
-        generate_complete_stamp(output, LOG, start_time=start_time)
+        generate_complete_stamp(output, start_time=start_time)
     finally:
-        LOG('closing:', tabbed_output_file)
+        logger.info(f'closing: {tabbed_output_file}')
         tabbed_fh.close()
-        LOG('closing:', fa_output_file)
+        logger.info(f'closing: {fa_output_file}')
         fasta_fh.close()
diff --git a/src/mavis/annotate/variant.py b/src/mavis/annotate/variant.py
index 0a81f34b..7eaf2149 100644
--- a/src/mavis/annotate/variant.py
+++ b/src/mavis/annotate/variant.py
@@ -8,7 +8,7 @@
 from ..constants import COLUMNS, GENE_PRODUCT_TYPE, PROTOCOL, STOP_AA, STRAND, SVTYPE
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..util import DEVNULL
+from ..util import logger
 from .fusion import FusionTranscript, determine_prime
 from .genomic import Gene, IntergenicRegion, PreTranscript, Transcript
 
@@ -62,7 +62,7 @@ def __init__(
             stranded=bpp.stranded,
             untemplated_seq=bpp.untemplated_seq,
             **bpp.data,
-            **kwargs
+            **kwargs,
         )
 
         # match transcript to breakpoint if reveresed
@@ -850,7 +850,6 @@ def annotate_events(
     min_orf_size: int = 200,
     min_domain_mapping_match: float = 0.95,
     max_orf_cap: int = 3,
-    log: Callable = DEVNULL,
     filters: List[Callable] = None,
 ) -> List[Annotation]:
     """
@@ -873,7 +872,7 @@ def annotate_events(
     results = []
     total = len(bpps)
     for i, bpp in enumerate(bpps):
-        log('({} of {}) gathering annotations for'.format(i + 1, total), bpp)
+        logger.info(f'({i + 1} of {total}) gathering annotations for {repr(bpp)}')
         bpp.data[COLUMNS.validation_id] = bpp.data.get(COLUMNS.validation_id, str(uuid()))
         ann_list = _gather_annotations(annotations, bpp, proximity=max_proximity)
         for f in filters:
@@ -904,6 +903,6 @@ def annotate_events(
             except NotImplementedError:
                 pass  # anti-sense fusions will throw this error
             except KeyError as e:
-                log('warning. could not build fusion product', repr(e))
-        log('generated', len(ann_list), 'annotations', time_stamp=False)
+                logger.warning(f'warning. could not build fusion product: {repr(e)}')
+        logger.info(f'generated {len(ann_list)} annotations')
     return results
diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index 0683dbad..c5654924 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -1,5 +1,4 @@
 import itertools
-import warnings
 
 import distance
 import networkx as nx
@@ -8,7 +7,7 @@
 from .bam.read import calculate_alignment_score, nsb_align, sequence_complexity
 from .constants import reverse_complement
 from .interval import Interval
-from .util import DEVNULL
+from .util import logger
 
 
 class Contig:
@@ -247,9 +246,7 @@ def digraph_connected_components(graph, subgraph=None):
     return nx.connected_components(g)
 
 
-def pull_contigs_from_component(
-    assembly, component, min_edge_trim_weight, assembly_max_paths, log=DEVNULL
-):
+def pull_contigs_from_component(assembly, component, min_edge_trim_weight, assembly_max_paths):
     """
     builds contigs from the a connected component of the assembly DeBruijn graph
 
@@ -258,7 +255,6 @@ def pull_contigs_from_component(
         component (list):  list of nodes which make up the connected component
         min_edge_trim_weight (int): the minimum weight to not remove a non cutting edge/path
         assembly_max_paths (int): the maximum number of paths allowed before the graph is further simplified
-        log (Callable): the log function
 
     Returns:
         Dict[str,int]: the paths/contigs and their scores
@@ -287,12 +283,8 @@ def pull_contigs_from_component(
 
             if w > edge_weights[-1]:
                 continue
-            log(
-                'reducing estimated paths. Current estimate is {}+ from'.format(paths_est),
-                len(component),
-                'nodes',
-                'filter increase',
-                w,
+            logger.debug(
+                f'reducing estimated paths. Current estimate is {paths_est}+ from {len(component)} nodes filter increase {w}',
             )
             assembly.trim_forks_by_freq(w)
             assembly.trim_noncutting_paths_by_freq(w)
@@ -353,8 +345,7 @@ def assemble(
     assembly_max_paths=20,
     assembly_min_uniq=0.01,
     min_complexity=0,
-    log=lambda *pos, **kwargs: None,
-    **kwargs
+    **kwargs,
 ):
     """
     for a set of sequences creates a DeBruijnGraph
@@ -372,7 +363,6 @@ def assemble(
         min_contig_length: Minimum length of contigs assemble to attempt remapping reads to. Shorter contigs will be ignored
         remap_min_exact_match: see [assembly_min_exact_match_to_remap](/configuration/settings/#assembly_min_exact_match_to_remap)
         assembly_max_paths: see [assembly_max_paths](/configuration/settings/#assembly_max_paths)
-        log (Callable): the log function
 
     Returns:
         List[Contig]: a list of putative contigs
@@ -404,7 +394,7 @@ def assemble(
     for component in digraph_connected_components(assembly):
         subgraph = assembly.subgraph(component)
         if not nx.is_directed_acyclic_graph(subgraph):
-            log('dropping cyclic component', time_stamp=False)
+            logger.debug('dropping cyclic component')
             for node in subgraph.get_nodes():
                 assembly.remove_node(node)
     # initial data cleaning
@@ -422,12 +412,11 @@ def assemble(
                 component,
                 min_edge_trim_weight=min_edge_trim_weight,
                 assembly_max_paths=assembly_max_paths,
-                log=log,
             )
         )
 
     # now map the contigs to the possible input sequences
-    log('filtering contigs by size and complexity', len(path_scores), time_stamp=False)
+    logger.debug(f'filtering contigs by size and complexity {len(path_scores)}')
     contigs = []
     for seq, score in list(path_scores.items()):
         contig = Contig(seq, score)
@@ -435,10 +424,10 @@ def assemble(
             not min_complexity or contig.complexity() >= min_complexity
         ):
             contigs.append(contig)
-    log('filtering similar contigs', len(contigs))
+    logger.debug(f'filtering similar contigs {len(contigs)}')
     # remap the input reads
     contigs = filter_contigs(contigs, assembly_min_uniq)
-    log('remapping reads to {} contigs'.format(len(contigs)))
+    logger.debug(f'remapping reads to {len(contigs)} contigs')
 
     for input_seq in sequences:
         maps_to = {}  # contig, score
@@ -470,7 +459,7 @@ def assemble(
             assert len(best_alignments) >= 1
             for contig, read in best_alignments:
                 contig.add_mapped_sequence(read, len(best_alignments))
-    log('assemblies complete')
+    logger.debug('assemblies complete')
     return contigs
 
 
diff --git a/src/mavis/bam/cache.py b/src/mavis/bam/cache.py
index dcd0c0f4..866d4003 100644
--- a/src/mavis/bam/cache.py
+++ b/src/mavis/bam/cache.py
@@ -1,14 +1,12 @@
 import atexit
-import logging
 import re
-import warnings
 
 import pysam
 
-from .read import SamRead
+from .. import util as _util
 from ..annotate.base import ReferenceName
 from ..interval import Interval
-from .. import util as _util
+from .read import SamRead
 
 
 class BamCache:
@@ -50,7 +48,7 @@ def add_read(self, read):
             read (pysam.AlignedSegment): the read to add to the cache
         """
         if not read.is_unmapped and read.reference_start == read.reference_end:
-            _util.LOG('ignoring invalid read', read.query_name, level=logging.DEBUG)
+            _util.logger.debug(f'ignoring invalid read: {read.query_name}')
             return
         if not isinstance(read, SamRead):
             read = SamRead.copy(read)
@@ -168,7 +166,7 @@ def fetch(
             if stop_on_cached_read and self.has_read(read):
                 break
             if not read.is_unmapped and read.reference_start == read.reference_end:
-                _util.LOG('ignoring invalid read', read.query_name, level=logging.DEBUG)
+                _util.logger.debug(f'ignoring invalid read {read.query_name}')
                 continue
             read = SamRead.copy(read)
             if not filter_if(read):
@@ -231,7 +229,7 @@ def fetch_from_bins(
                 if bin_limit is not None and count >= running_surplus:
                     break
                 if not read.is_unmapped and read.reference_start == read.reference_end:
-                    _util.LOG('ignoring invalid read', read.query_name, level=logging.DEBUG)
+                    _util.logger.debug(f'ignoring invalid read {read.query_name}')
                     continue
                 read = SamRead.copy(read)
                 if not filter_if(read):
@@ -273,10 +271,10 @@ def get_mate(self, read, primary_only=True, allow_file_access=False):
             if not allow_file_access or read.mate_is_unmapped:
                 raise KeyError('mate is not found in the cache')
             else:
-                warnings.warn(
-                    'looking for uncached mate of {0}. This requires file access and'
+                _util.logger.warning(
+                    f'looking for uncached mate of {read.query_name}. This requires file access and'
                     ' requests may be slow. This should also not be using in a loop iterating using the file pointer '
-                    ' as it will change the file pointer position'.format(read.query_name)
+                    ' as it will change the file pointer position'
                 )
                 m = self.fh.mate(read)
                 m = SamRead.copy(m)
diff --git a/src/mavis/bam/stats.py b/src/mavis/bam/stats.py
index e161227c..7901c044 100644
--- a/src/mavis/bam/stats.py
+++ b/src/mavis/bam/stats.py
@@ -1,12 +1,12 @@
 #!/projects/tumour_char/analysis_scripts/python/centos06/anaconda3_v2.3.0/bin/python
 import math
+import os
 import statistics as stats
 import warnings
 
-import os
-
-from .read import sequenced_strand
 from ..constants import STRAND
+from ..util import logger
+from .read import sequenced_strand
 
 os.environ["OMP_NUM_THREADS"] = "4"  # export OMP_NUM_THREADS=4
 os.environ["OPENBLAS_NUM_THREADS"] = "4"  # export OPENBLAS_NUM_THREADS=4
@@ -140,10 +140,8 @@ def compute_transcriptome_bam_stats(
         randoms = [int(n * len(total_annotations)) for n in np.random.rand(sample_size)]
         genes = [total_annotations[r] for r in randoms]
     else:
-        warnings.warn(
-            'insufficient annotations to match requested sample size. requested {}, but only {} annotations'.format(
-                sample_size, len(total_annotations)
-            )
+        logger.warning(
+            f'insufficient annotations to match requested sample size. requested {sample_size}, but only {len(total_annotations)} annotations'
         )
 
     fragment_hist = Histogram()
diff --git a/src/mavis/blat.py b/src/mavis/blat.py
index 6efd0fb2..de95d5cb 100644
--- a/src/mavis/blat.py
+++ b/src/mavis/blat.py
@@ -9,7 +9,6 @@
 -- http://wiki.bits.vib.be/index.php/Blat
 
 """
-import logging
 import math
 import re
 
@@ -28,7 +27,7 @@
     reverse_complement,
 )
 from .interval import Interval
-from .util import LOG
+from .util import logger
 
 
 class Blat:
@@ -202,7 +201,7 @@ def split_csv_trailing_ints(x):
                         )
                 final_rows.append(row)
             except AssertionError as err:
-                LOG(type(err), ':', str(err), level=logging.DEBUG)
+                logger.debug(f'{type(err)}:{err}')
         return header, final_rows
 
     @staticmethod
@@ -391,11 +390,9 @@ def process_blat_output(
             try:
                 read = Blat.pslx_row_to_pysam(row, input_bam_cache, reference_genome)
             except KeyError as err:
-                LOG(
-                    'warning: reference template name not recognized', str(err), level=logging.DEBUG
-                )
+                logger.debug(f'warning: reference template name not recognized: {err}')
             except AssertionError as err:
-                LOG('warning: invalid blat alignment', repr(err), level=logging.DEBUG)
+                logger.warning(f'invalid blat alignment {repr(err)}')
             else:
                 reads.append((row, read))
 
diff --git a/src/mavis/cluster/cluster.py b/src/mavis/cluster/cluster.py
index 71fa39bd..36941d57 100644
--- a/src/mavis/cluster/cluster.py
+++ b/src/mavis/cluster/cluster.py
@@ -1,13 +1,13 @@
 from __future__ import division
 
+import itertools
 from collections import namedtuple
 from copy import copy
-import itertools
 
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import ORIENT, STRAND
 from ..interval import Interval
-from ..util import LOG
+from ..util import logger
 
 
 class BreakpointPairGroupKey(
@@ -285,7 +285,7 @@ def pair_center_distance(pair1, pair2):
     for group_key in sorted(set(list(groups) + list(phase2_groups))):
         count = len(groups.get(group_key, [])) + len(phase2_groups.get(group_key, []))
         if verbose:
-            LOG(group_key, 'pairs:', count)
+            logger.info(f'{group_key} pairs: {count}')
         nodes = merge_by_union(
             groups.get(group_key, []),
             group_key,
@@ -375,7 +375,7 @@ def pair_center_distance(pair1, pair2):
                 )
                 nodes.setdefault(new_bpp, []).append(pair)
         if verbose:
-            LOG('merged', count, 'down to', len(nodes))
+            logger.info(f'merged {count} down to {len(nodes)}')
         for node, pairs in nodes.items():
             if node in mapping:
                 raise KeyError('duplicate merge node', str(node), node, pair_key(node))
diff --git a/src/mavis/cluster/main.py b/src/mavis/cluster/main.py
index ae07c755..aec884ba 100644
--- a/src/mavis/cluster/main.py
+++ b/src/mavis/cluster/main.py
@@ -9,10 +9,10 @@
 from ..breakpoint import BreakpointPair
 from ..constants import COLUMNS, SUBCOMMAND
 from ..util import (
-    LOG,
     filter_on_overlap,
     filter_uninformative,
     generate_complete_stamp,
+    logger,
     mkdirp,
     output_tabbed_file,
     read_inputs,
@@ -27,12 +27,11 @@ def split_clusters(
     clusters: List[BreakpointPair],
     outputdir: str,
     total_batches: int,
-    min_clusters_per_file: int = 0,
     write_bed_summary: bool = True,
 ):
     """
     For a set of clusters creates a bed file representation of all clusters.
-    Also splits the clusters evenly into multiple files based on the user parameters (min_clusters_per_file, max_files)
+    Also splits the clusters evenly into multiple files based on the user parameters (max_files)
 
     Returns:
         list: of output file names (not including the bed file)
@@ -119,7 +118,7 @@ def main(
     other_chr = set()
     unfiltered_breakpoint_pairs = []
     filtered_pairs = []
-    LOG('filtering by library and chr name')
+    logger.info('filtering by library and chr name')
     for bpp in breakpoint_pairs:
         if bpp.library is None:
             bpp.library = library
@@ -140,41 +139,38 @@ def main(
         other_chr -= set(config[f'{SECTION}.limit_to_chr'])
     breakpoint_pairs = unfiltered_breakpoint_pairs
     if other_libs:
-        LOG(
-            'warning: ignoring breakpoints found for other libraries:',
-            sorted([lib for lib in other_libs]),
+        logger.info(
+            f'warning: ignoring breakpoints found for other libraries: {sorted([lib for lib in other_libs])}',
         )
     if other_chr:
-        LOG('warning: filtered events on chromosomes', other_chr)
+        logger.info(f'warning: filtered events on chromosomes {other_chr}')
     # filter by masking file
     breakpoint_pairs, masked_pairs = filter_on_overlap(breakpoint_pairs, masking.content)
     for bpp in masked_pairs:
         filtered_pairs.append(bpp)
     # filter by informative
     if config[f'{SECTION}.uninformative_filter']:
-        LOG('filtering from', len(breakpoint_pairs), 'breakpoint pairs using informative filter')
+        logger.info(
+            f'filtering from {len(breakpoint_pairs)} breakpoint pairs using informative filter'
+        )
         pass_clusters, uninformative_clusters = filter_uninformative(
             annotations.content, breakpoint_pairs, max_proximity=config[f'{SECTION}.max_proximity']
         )
-        LOG(
-            'filtered from',
-            len(breakpoint_pairs),
-            'down to',
-            len(pass_clusters),
-            '(removed {})'.format(len(uninformative_clusters)),
+        logger.info(
+            f'filtered from {len(breakpoint_pairs)} down to {len(pass_clusters)} (removed {uninformative_clusters})'
         )
         breakpoint_pairs = pass_clusters
         for bpp in uninformative_clusters:
             bpp.data[COLUMNS.filter_comment] = 'Uninformative'
             filtered_pairs.append(bpp)
     else:
-        LOG('did not apply uninformative filter')
+        logger.info('did not apply uninformative filter')
 
     mkdirp(output)
     output_tabbed_file(filtered_pairs, filtered_output)
 
     if not config[f'{SECTION}.split_only']:
-        LOG('computing clusters')
+        logger.info('computing clusters')
         clusters = merge_breakpoint_pairs(
             breakpoint_pairs,
             cluster_radius=config[f'{SECTION}.cluster_radius'],
@@ -211,9 +207,9 @@ def main(
                 common_data = set(common_data)
                 if len(common_data) == 1:
                     cluster.data[item] = list(common_data)[0]
-        LOG('computed', len(clusters), 'clusters', time_stamp=False)
-        LOG('cluster input pairs distribution', sorted(hist.items()), time_stamp=False)
-        LOG('cluster intervals lengths', sorted(length_hist.items()), time_stamp=False)
+        logger.info(f'computed {len(clusters)} clusters')
+        logger.info(f'cluster input pairs distribution {sorted(hist.items())}')
+        logger.info(f'cluster intervals lengths {sorted(length_hist.items())}')
         # map input pairs to cluster ids
         # now create the mapping from the original input files to the cluster(s)
 
@@ -234,9 +230,8 @@ def main(
         breakpoint_pairs,
         output,
         total_batches=lib_config['total_batches'],
-        min_clusters_per_file=config[f'{SECTION}.min_clusters_per_file'],
         write_bed_summary=True,
     )
 
-    generate_complete_stamp(output, LOG, start_time=start_time)
+    generate_complete_stamp(output, start_time=start_time)
     return output_files
diff --git a/src/mavis/config.py b/src/mavis/config.py
index a0b4341a..f043263e 100644
--- a/src/mavis/config.py
+++ b/src/mavis/config.py
@@ -1,17 +1,11 @@
 import argparse
-import os
 from copy import copy as _copy
-from typing import Dict, Optional
-
-import snakemake
-from mavis_config import bash_expands
-from snakemake.exceptions import WorkflowError
-from snakemake.utils import validate as snakemake_validate
+from typing import Dict
 
 from .annotate.file_io import ReferenceFile
 from .bam import stats
 from .bam.cache import BamCache
-from .constants import INTEGER_COLUMNS, PROTOCOL, SUBCOMMAND, float_fraction
+from .constants import PROTOCOL, float_fraction
 from .util import cast_boolean, filepath
 
 
diff --git a/src/mavis/constants.py b/src/mavis/constants.py
index fc27c03d..35a4731b 100644
--- a/src/mavis/constants.py
+++ b/src/mavis/constants.py
@@ -2,7 +2,6 @@
 module responsible for small utility functions and constants used throughout the structural_variant package
 """
 import argparse
-import os
 import re
 from typing import List
 
@@ -10,7 +9,7 @@
 from Bio.Alphabet.IUPAC import ambiguous_dna
 from Bio.Data.IUPACData import ambiguous_dna_values
 from Bio.Seq import Seq
-from mavis_config.constants import SUBCOMMAND, MavisNamespace
+from mavis_config.constants import MavisNamespace
 
 PROGNAME: str = 'mavis'
 EXIT_OK: int = 0
diff --git a/src/mavis/illustrate/diagram.py b/src/mavis/illustrate/diagram.py
index c9f757fd..f9e6fabc 100644
--- a/src/mavis/illustrate/diagram.py
+++ b/src/mavis/illustrate/diagram.py
@@ -4,13 +4,11 @@
 """
 from svgwrite import Drawing
 
-from .elements import draw_exon_track, draw_genes, draw_template, draw_ustranscript, draw_vmarker
-from .scatter import draw_scatter
-from .util import generate_interval_mapping, LabelMapping
-
 from ..annotate.genomic import IntergenicRegion
 from ..interval import Interval
-from ..util import DEVNULL
+from .elements import draw_exon_track, draw_genes, draw_template, draw_ustranscript, draw_vmarker
+from .scatter import draw_scatter
+from .util import LabelMapping, generate_interval_mapping
 
 # draw gene level view
 # draw gene box
@@ -382,9 +380,7 @@ def draw_sv_summary_diagram(
     return canvas, legend
 
 
-def draw_multi_transcript_overlay(
-    config, gene, vmarkers=None, window_buffer=0, plots=None, log=DEVNULL
-):
+def draw_multi_transcript_overlay(config, gene, vmarkers=None, window_buffer=0, plots=None):
     vmarkers = [] if vmarkers is None else vmarkers
     plots = [] if plots is None else plots
 
@@ -434,7 +430,7 @@ def draw_multi_transcript_overlay(
 
     for plot in plots:
         if plot.points:
-            plot_group = draw_scatter(config, canvas, plot, mapping, log=log)
+            plot_group = draw_scatter(config, canvas, plot, mapping)
             main_group.add(plot_group)
             plot_group.translate(x, y)
             y += plot.height + config.padding * 2
diff --git a/src/mavis/illustrate/scatter.py b/src/mavis/illustrate/scatter.py
index 976a6a85..fd78a66f 100644
--- a/src/mavis/illustrate/scatter.py
+++ b/src/mavis/illustrate/scatter.py
@@ -2,7 +2,7 @@
 
 from ..bam.read import pileup, sequenced_strand
 from ..interval import Interval
-from ..util import DEVNULL, LOG
+from ..util import logger
 
 
 def bam_to_scatter(
@@ -40,7 +40,7 @@ def bam_to_scatter(
     if not axis_name:
         axis_name = os.path.basename(bam_file)
     # one plot per bam
-    LOG('reading:', bam_file)
+    logger.info(f'reading: {bam_file}')
     plot = None
     samfile = pysam.AlignmentFile(bam_file, 'rb')
 
@@ -63,7 +63,7 @@ def read_filter(read):
         except ValueError:  # chrom not in bam
             pass
 
-        LOG('scatter plot {} has {} points'.format(axis_name, len(points)))
+        logger.info(f'scatter plot {axis_name} has {len(points)} points')
         plot = ScatterPlot(
             points,
             axis_name,
@@ -123,7 +123,7 @@ def __init__(
         self.density = density
 
 
-def draw_scatter(ds, canvas, plot, xmapping, log=DEVNULL):
+def draw_scatter(ds, canvas, plot, xmapping):
     """
     given a xmapping, draw the scatter plot svg group
 
@@ -163,10 +163,7 @@ def draw_scatter(ds, canvas, plot, xmapping, log=DEVNULL):
             )
         except IndexError:
             pass
-    log(
-        'drew {} of {} points (density={})'.format(len(circles), len(plot.points), plot.density),
-        time_stamp=False,
-    )
+    logger.info(f'drew {len(circles)} of {len(plot.points)} points (density={plot.density})')
 
     for x_px, y_px, color in px_points:
         if x_px.length() > ds.scatter_marker_radius:
diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index c78e3aa8..3c73caf9 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -1,4 +1,4 @@
-from typing import Callable, Optional
+from typing import Optional
 
 
 class Interval:
diff --git a/src/mavis/main.py b/src/mavis/main.py
index ea2f93aa..d417d7d5 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -31,7 +31,6 @@ def convert_main(inputs, outputfile, file_type, strand_specific=False, assume_no
         inputs,
         file_type,
         strand_specific,
-        _util.LOG,
         True,
         assume_no_untemplated=assume_no_untemplated,
     )
@@ -186,8 +185,8 @@ def main(argv=None):
         log_conf['filename'] = args.log
     logging.basicConfig(**log_conf)
 
-    _util.LOG('MAVIS: {}'.format(__version__))
-    _util.LOG('hostname:', platform.node(), time_stamp=False)
+    _util.logger.info(f'MAVIS: {__version__}')
+    _util.logger.info(f'hostname: {platform.node()}')
     _util.log_arguments(args)
 
     config: Dict = dict()
@@ -267,7 +266,7 @@ def main(argv=None):
             # add bam stats to the config if missing
             if not config.get('skip_stage.validate'):
                 _config.add_bamstats_to_config(config)
-            _util.LOG(f'writing: {args.outputfile}')
+            _util.logger.info(f'writing: {args.outputfile}')
             with open(args.outputfile, 'w') as fh:
                 fh.write(json.dumps(config, sort_keys=True, indent='  '))
         else:
@@ -284,11 +283,10 @@ def main(argv=None):
         hours = duration - duration % 3600
         minutes = duration - hours - (duration - hours) % 60
         seconds = duration - hours - minutes
-        _util.LOG(
-            'run time (hh/mm/ss): {}:{:02d}:{:02d}'.format(hours // 3600, minutes // 60, seconds),
-            time_stamp=False,
+        _util.logger.info(
+            'run time (hh/mm/ss): {}:{:02d}:{:02d}'.format(hours // 3600, minutes // 60, seconds)
         )
-        _util.LOG('run time (s): {}'.format(duration), time_stamp=False)
+        _util.logger.info(f'run time (s): {duration}')
     except Exception as err:
         raise err
     finally:
diff --git a/src/mavis/overlay.py b/src/mavis/overlay.py
index 9543ff84..fccd5e34 100644
--- a/src/mavis/overlay.py
+++ b/src/mavis/overlay.py
@@ -92,10 +92,8 @@ def main(
         for gene in annotations.content[chrom]:
             if gene_name in gene.aliases or gene_name == gene.name:
                 gene_to_draw = gene
-                _util.LOG(
-                    'Found target gene: {}(aka. {}) {}:{}-{}'.format(
-                        gene.name, gene.aliases, gene.chr, gene.start, gene.end
-                    )
+                _util.logger.info(
+                    f'Found target gene: {gene.name}(aka. {gene.aliases}) {gene.chr}:{gene.start}-{gene.end}'
                 )
                 break
     if gene_to_draw is None:
@@ -141,17 +139,16 @@ def main(
                 vmarkers=vmarkers,
                 plots=plots,
                 window_buffer=buffer_length,
-                log=_util.LOG,
             )
             break
         except DrawingFitError as err:
             if attempts > max_drawing_retries:
                 raise err
-            _util.LOG('Drawing fit: extending window', drawing_width_iter_increase)
+            _util.logger.info(f'Drawing fit: extending window {drawing_width_iter_increase}')
             settings.width += drawing_width_iter_increase
             attempts += 1
 
     svg_output_file = os.path.join(output, '{}_{}_overlay.svg'.format(gene_to_draw.name, gene_name))
-    _util.LOG('writing:', svg_output_file)
+    _util.logger.info(f'writing: {svg_output_file}')
 
     canvas.saveas(svg_output_file)
diff --git a/src/mavis/pairing/main.py b/src/mavis/pairing/main.py
index 850d0173..8b92586c 100644
--- a/src/mavis/pairing/main.py
+++ b/src/mavis/pairing/main.py
@@ -8,7 +8,7 @@
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
-from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs
+from ..util import generate_complete_stamp, logger, output_tabbed_file, read_inputs
 from .pairing import inferred_equivalent, pair_by_distance, product_key
 
 
@@ -55,7 +55,7 @@ def main(
             expand_svtype=False,
         )
     )
-    LOG('read {} breakpoint pairs'.format(len(bpps)))
+    logger.info(f'read {len(bpps)} breakpoint pairs')
 
     # load all transcripts
     reference_transcripts = dict()
@@ -99,20 +99,16 @@ def main(
 
     distance_pairings: Dict[str, Set[str]] = {}
     product_pairings: Dict[str, Set[str]] = {}
-    LOG('computing distance based pairings')
+    logger.info('computing distance based pairings')
     # pairwise comparison of breakpoints between all libraries
     for set_num, (category, calls) in enumerate(
         sorted(calls_by_cat.items(), key=lambda x: (len(x[1]), x[0]), reverse=True)
     ):
-        LOG(
-            'comparing set {} of {} with {} items'.format(
-                set_num + 1, len(calls_by_cat), len(calls)
-            )
-        )
+        logger.info(f'comparing set {set_num + 1} of {len(calls_by_cat)} with {len(calls)} items')
         for node, adj_list in pair_by_distance(calls, distances, against_self=False).items():
             distance_pairings.setdefault(node, set()).update(adj_list)
 
-    LOG('computing inferred (by product) pairings')
+    logger.info('computing inferred (by product) pairings')
     for calls in calls_by_ann.values():
         calls_by_lib: Dict[str, List[BreakpointPair]] = {}
         for call in calls:
@@ -140,4 +136,4 @@ def main(
 
     fname = os.path.join(output, 'mavis_paired.tab')
     output_tabbed_file(bpps, fname)
-    generate_complete_stamp(output, LOG)
+    generate_complete_stamp(output)
diff --git a/src/mavis/pairing/pairing.py b/src/mavis/pairing/pairing.py
index f1fb52b5..9d5d87f2 100644
--- a/src/mavis/pairing/pairing.py
+++ b/src/mavis/pairing/pairing.py
@@ -1,11 +1,11 @@
-from typing import Callable, Dict, List, Optional, Set
+from typing import Dict, List, Optional, Set
 
 from ..annotate.variant import determine_prime
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, ORIENT, PRIME, PROTOCOL, STRAND
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..util import DEVNULL
+from ..util import logger
 from .constants import PAIRING_DISTANCES
 
 
@@ -183,7 +183,7 @@ def equivalent(event1: BreakpointPair, event2: BreakpointPair, distances=None) -
 
 
 def pair_by_distance(
-    calls: List[BreakpointPair], distances, log: Callable = DEVNULL, against_self: bool = False
+    calls: List[BreakpointPair], distances, against_self: bool = False
 ) -> Dict[str, Set[str]]:
     """
     for a set of input calls, pair by distance
@@ -195,14 +195,8 @@ def pair_by_distance(
     max_distance = max(distances.values())
     max_useq = max([len(c.untemplated_seq) if c.untemplated_seq else 0 for c in calls] + [0])
     max_distance += max_useq * 2
-    log(
-        'lowest_resolution',
-        lowest_resolution,
-        'max_distance',
-        max_distance,
-        'possible comparisons',
-        len(break1_sorted) * len(break1_sorted),
-        time_stamp=False,
+    logger.debug(
+        f'lowest_resolution:{lowest_resolution} max_distance:{max_distance} possible comparisons:{len(break1_sorted) * len(break1_sorted)}'
     )
 
     comparisons = 0
@@ -241,7 +235,7 @@ def pair_by_distance(
             if equivalent(current, other, distances=distances):
                 distance_pairings.setdefault(product_key(current), set()).add(product_key(other))
                 distance_pairings.setdefault(product_key(other), set()).add(product_key(current))
-    log('computed {} comparisons'.format(comparisons), time_stamp=False)
+    logger.debug(f'computed {comparisons} comparisons')
     return distance_pairings
 
 
diff --git a/src/mavis/summary/main.py b/src/mavis/summary/main.py
index 94f79ddb..309da1b8 100644
--- a/src/mavis/summary/main.py
+++ b/src/mavis/summary/main.py
@@ -9,7 +9,7 @@
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
-from ..util import LOG, generate_complete_stamp, output_tabbed_file, read_inputs, soft_cast
+from ..util import generate_complete_stamp, logger, output_tabbed_file, read_inputs, soft_cast
 from .constants import HOMOPOLYMER_MIN_LENGTH
 from .summary import (
     annotate_dgv,
@@ -282,12 +282,12 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
 
     rows = []
     for lib in bpps_by_library:
-        LOG('annotating dgv for', lib)
+        logger.info(f'annotating dgv for {lib}')
         if not dgv_annotation.is_empty():
             annotate_dgv(
                 bpps_by_library[lib], dgv_annotation.content, distance=10
             )  # TODO make distance a parameter
-        LOG('adding pairing states for', lib)
+        logger.info(f'adding pairing states for {lib}')
         for row in bpps_by_library[lib]:
             # in case no pairing was done, add default (applicable to single library summaries)
             row.data.setdefault(COLUMNS.inferred_pairing, '')
@@ -312,7 +312,7 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
                         other_protocol=other_protocol,
                         other_disease_state=other_disease_state,
                         is_matched=other_lib in paired_libraries,
-                        inferred_is_matched=other_lib in inferred_paired_libraries
+                        inferred_is_matched=other_lib in inferred_paired_libraries,
                     )
                 else:
                     pairing_state = 'Not Applicable'
@@ -324,7 +324,7 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
         output, 'mavis_summary_all_{}.tab'.format('_'.join(sorted(list(libraries.keys()))))
     )
     output_tabbed_file(rows, fname, header=output_columns)
-    LOG('wrote {} structural variants to {}'.format(len(rows), fname))
+    logger.info(f'wrote {len(rows)} structural variants to {fname}')
     output_tabbed_file(filtered_pairs, os.path.join(output, 'filtered_pairs.tab'))
     # output by library non-synon protein-product
     for lib in bpps_by_library:
@@ -344,4 +344,4 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
             ):
                 lib_rows.append(row)
         output_tabbed_file(lib_rows, filename, header=output_columns)
-    generate_complete_stamp(output, LOG)
+    generate_complete_stamp(output)
diff --git a/src/mavis/tools/__init__.py b/src/mavis/tools/__init__.py
index 8dfc2db0..a1b814bd 100644
--- a/src/mavis/tools/__init__.py
+++ b/src/mavis/tools/__init__.py
@@ -7,7 +7,7 @@
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import COLUMNS, ORIENT, STRAND, SVTYPE
 from ..error import InvalidRearrangement
-from ..util import DEVNULL, read_bpp_from_input_file
+from ..util import logger, read_bpp_from_input_file
 from .breakdancer import convert_file as _convert_breakdancer_file
 from .chimerascan import convert_row as _parse_chimerascan
 from .cnvnator import convert_row as _parse_cnvnator
@@ -21,7 +21,6 @@ def convert_tool_output(
     fnames: List[str],
     file_type: str = SUPPORTED_TOOL.MAVIS,
     stranded: bool = False,
-    log: Callable = DEVNULL,
     collapse: bool = True,
     assume_no_untemplated: bool = True,
 ) -> List[BreakpointPair]:
@@ -32,14 +31,14 @@ def convert_tool_output(
     for fname in fnames:
         result.extend(
             _convert_tool_output(
-                fname, file_type, stranded, log, assume_no_untemplated=assume_no_untemplated
+                fname, file_type, stranded, assume_no_untemplated=assume_no_untemplated
             )
         )
     if collapse:
         collapse_mapping: Dict[BreakpointPair, List[BreakpointPair]] = {}
         for bpp in result:
             collapse_mapping.setdefault(bpp, []).append(bpp)
-        log('collapsed', len(result), 'to', len(collapse_mapping), 'calls')
+        logger.debug(f'collapsed {len(result)} to {len(collapse_mapping)} calls')
         result = []
         temp_sets = set()
         for bpp, bpp_list in collapse_mapping.items():
@@ -217,7 +216,7 @@ def _convert_tool_row(
                 untemplated_seq=untemplated_seq,
                 event_type=event_type,
                 stranded=stranded,
-                **{COLUMNS.tools: file_type, COLUMNS.tracking_id: std_row[COLUMNS.tracking_id]}
+                **{COLUMNS.tools: file_type, COLUMNS.tracking_id: std_row[COLUMNS.tracking_id]},
             )
 
             for col, value in std_row.items():
@@ -242,10 +241,9 @@ def _convert_tool_output(
     input_file: str,
     file_type: str = SUPPORTED_TOOL.MAVIS,
     stranded: bool = False,
-    log: Callable = DEVNULL,
     assume_no_untemplated: bool = True,
 ) -> List[BreakpointPair]:
-    log('reading:', input_file)
+    logger.info(f'reading: {input_file}')
     result = []
     rows = None
     if file_type == SUPPORTED_TOOL.MAVIS:
@@ -288,7 +286,7 @@ def _convert_tool_output(
         SUPPORTED_TOOL.BREAKSEQ,
         SUPPORTED_TOOL.STRELKA,
     ]:
-        rows = read_vcf(input_file, file_type, log)
+        rows = read_vcf(input_file)
     elif file_type == SUPPORTED_TOOL.BREAKDANCER:
         rows = _convert_breakdancer_file(input_file)
     else:
@@ -296,16 +294,16 @@ def _convert_tool_output(
         df.columns = [c[1:] if c.startswith('#') else c for c in df.columns]
         rows = df.where(df.notnull(), None).to_dict('records')
     if rows:
-        log('found', len(rows), 'rows')
+        logger.info('found', len(rows), 'rows')
         for row in rows:
             try:
                 std_rows = _convert_tool_row(
                     row, file_type, stranded, assume_no_untemplated=assume_no_untemplated
                 )
             except Exception as err:
-                log('Error in converting row', row)
+                logger.error(f'Error in converting row {row}')
                 raise err
             else:
                 result.extend(std_rows)
-    log('generated', len(result), 'breakpoint pairs')
+    logger.info(f'generated {len(result)} breakpoint pairs')
     return result
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index eea0fadf..7717d743 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -12,8 +12,7 @@
     from typing_extensions import TypedDict
 
 from ..constants import COLUMNS, ORIENT, SVTYPE
-from ..util import DEVNULL
-from .constants import SUPPORTED_TOOL
+from ..util import logger
 
 PANDAS_DEFAULT_NA_VALUES = [
     '-1.#IND',
@@ -122,7 +121,7 @@ def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
         raise NotImplementedError('alt specification in unexpected format', alt)
 
 
-def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
+def convert_record(record, record_mapping={}) -> List[Dict]:
     """
     converts a vcf record
 
@@ -143,7 +142,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             try:
                 value = record.info[key]
             except UnicodeDecodeError as err:
-                log('Ignoring invalid INFO field {} with error: {}'.format(key, err))
+                logger.warning(f'Ignoring invalid INFO field {key} with error: {err}')
             else:
                 try:
                     value = value[0] if len(value) == 1 else value
@@ -297,12 +296,11 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
     return header_lines, df
 
 
-def convert_file(input_file: str, file_type: str, log):
+def convert_file(input_file: str):
     """process a VCF file
 
     Args:
         input_file: the input file name
-        file_type: the input type
 
     Raises:
         err: [description]
@@ -313,7 +311,7 @@ def convert_file(input_file: str, file_type: str, log):
 
     for variant_record in convert_pandas_rows_to_variants(data):
         try:
-            rows.extend(convert_record(variant_record, log=log))
+            rows.extend(convert_record(variant_record))
         except NotImplementedError as err:
             logging.warning(str(err))
     return rows
diff --git a/src/mavis/util.py b/src/mavis/util.py
index 446520d3..cfb269da 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -3,13 +3,8 @@
 import logging
 import os
 import re
-import sys
 import time
-from argparse import Namespace
-from datetime import datetime
-from functools import partial
-from glob import glob
-from typing import Any, Callable, Dict, List, Optional, Set
+from typing import Any, Callable, Dict, List, Set
 
 import pandas as pd
 from mavis_config import bash_expands
@@ -25,7 +20,6 @@
     STRAND,
     SUMMARY_LIST_COLUMNS,
     SVTYPE,
-    MavisNamespace,
     sort_columns,
 )
 from .error import InvalidRearrangement
@@ -33,43 +27,7 @@
 
 ENV_VAR_PREFIX = 'MAVIS_'
 
-
-class Log:
-    """
-    wrapper aroung the builtin logging to make it more readable
-    """
-
-    def __init__(self, indent_str='  ', indent_level=0, level=logging.INFO):
-        self.indent_str = indent_str
-        self.indent_level = indent_level
-        self.level = level
-
-    def __call__(self, *pos, time_stamp=False, level=None, indent_level=0, **kwargs):
-        if level is None and self.level is None:
-            return
-        elif self.level is not None:
-            level = self.level
-
-        stamp = datetime.now().strftime('[%Y-%m-%d %H:%M:%S]') if time_stamp else ' ' * 21
-        indent_prefix = self.indent_str * (self.indent_level + indent_level)
-        message = '{} {}{}'.format(stamp, indent_prefix, ' '.join([str(p) for p in pos]))
-        logging.log(level, message, **kwargs)
-
-    def indent(self):
-        return Log(self.indent_str, self.indent_level + 1, self.level)
-
-    def dedent(self):
-        return Log(self.indent_str, max(0, self.indent_level - 1), self.level)
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *pos):
-        pass
-
-
-LOG = Log()
-DEVNULL = Log(level=None)
+logger = logging.getLogger('mavis')
 
 
 def filepath(path):
@@ -151,30 +109,30 @@ def log_arguments(args):
     Args:
         args (Namespace): the namespace to print arguments for
     """
-    LOG('arguments', time_stamp=True)
-    with LOG.indent() as log:
-        for arg, val in sorted(args.__dict__.items()):
-            if isinstance(val, list):
-                if len(val) <= 1:
-                    log(arg, '= {}'.format(val))
-                    continue
-                log(arg, '= [')
-                for v in val:
-                    log(repr(v), indent_level=1)
-                log(']')
-            elif (
-                any([isinstance(val, typ) for typ in [str, int, float, bool, tuple]]) or val is None
-            ):
-                log(arg, '=', repr(val))
-            else:
-                log(arg, '=', object.__repr__(val))
+    logger.info('arguments')
+
+    indent = ' '
+
+    for arg, val in sorted(args.__dict__.items()):
+        if isinstance(val, list):
+            if len(val) <= 1:
+                logger.info(f'{indent}{arg} = {val}')
+                continue
+            logger.info(f'{indent}{arg} = [')
+            for v in val:
+                logger.info(f'{indent * 2}{repr(v)}')
+            logger.info(f'{indent}]')
+        elif any([isinstance(val, typ) for typ in [str, int, float, bool, tuple]]) or val is None:
+            logger.info(f'{indent}{arg}= {repr(val)}')
+        else:
+            logger.info(f'{arg} = {object.__repr__(val)}')
 
 
 def mkdirp(dirname):
     """
     Make a directory or path of directories. Suppresses the error that is normally raised when the directory already exists
     """
-    LOG("creating output directory: '{}'".format(dirname))
+    logger.info(f"creating output directory: '{dirname}'")
     try:
         os.makedirs(dirname)
     except OSError as exc:  # Python >2.5: http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
@@ -193,7 +151,7 @@ def filter_on_overlap(bpps, regions_by_reference_name):
         bpps (List[mavis.breakpoint.BreakpointPair]): list of breakpoint pairs to be filtered
         regions_by_reference_name (Dict[str,List[mavis.annotate.base.BioInterval]]): regions to filter against
     """
-    LOG('filtering from', len(bpps), 'using overlaps with regions filter')
+    logger.info(f'filtering from {len(bpps)} using overlaps with regions filter')
     failed = []
     passed = []
     for bpp in bpps:
@@ -213,7 +171,7 @@ def filter_on_overlap(bpps, regions_by_reference_name):
             failed.append(bpp)
         else:
             passed.append(bpp)
-    LOG('filtered from', len(bpps), 'down to', len(passed), '(removed {})'.format(len(failed)))
+    logger.info(f'filtered from {len(bpps)} down to {len(passed)} (removed {len(failed)})')
     return passed, failed
 
 
@@ -221,13 +179,13 @@ def read_inputs(inputs, required_columns=[], **kwargs):
     bpps = []
 
     for finput in bash_expands(*inputs):
-        LOG('loading:', finput)
+        logger.info('loading: {finput}')
         bpps.extend(
             read_bpp_from_input_file(
                 finput, required_columns=[COLUMNS.protocol, *required_columns], **kwargs
             )
         )
-    LOG('loaded', len(bpps), 'breakpoint pairs')
+    logger.info(f'loaded {len(bpps)} breakpoint pairs')
     return bpps
 
 
@@ -245,14 +203,14 @@ def output_tabbed_file(bpps: List[BreakpointPair], filename: str, header=None):
         if not custom_header:
             header.update(row.keys())  # type: ignore
     header = sort_columns(header)
-    LOG('writing:', filename)
+    logger.info(f'writing: {filename}')
     df = pd.DataFrame.from_records(rows, columns=header)
     df = df.fillna('None')
     df.to_csv(filename, columns=header, index=False, sep='\t')
 
 
 def write_bed_file(filename, bed_rows):
-    LOG('writing:', filename)
+    logger.info(f'writing: {filename}')
     with open(filename, 'w') as fh:
         for bed in bed_rows:
             fh.write('\t'.join([str(c) for c in bed]) + '\n')
@@ -279,7 +237,7 @@ def get_connected_components(adj_matrix):
     return components
 
 
-def generate_complete_stamp(output_dir, log=DEVNULL, prefix='MAVIS.', start_time=None):
+def generate_complete_stamp(output_dir, prefix='MAVIS.', start_time=None):
     """
     writes a complete stamp, optionally including the run time if start_time is given
 
@@ -297,7 +255,7 @@ def generate_complete_stamp(output_dir, log=DEVNULL, prefix='MAVIS.', start_time
         'some_output_dir/MAVIS.COMPLETE'
     """
     stamp = os.path.join(output_dir, str(prefix) + 'COMPLETE')
-    log('complete:', stamp)
+    logger.info('complete: {stamp}')
     with open(stamp, 'w') as fh:
         if start_time is not None:
             duration = int(time.time()) - start_time
diff --git a/src/mavis/validate/base.py b/src/mavis/validate/base.py
index 1225d69c..6866930b 100644
--- a/src/mavis/validate/base.py
+++ b/src/mavis/validate/base.py
@@ -24,7 +24,7 @@
 )
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..util import DEVNULL
+from ..util import logger
 
 
 class Evidence(BreakpointPair):
@@ -836,7 +836,7 @@ def decide_sequenced_strand(self, reads: Set[pysam.AlignedSegment]):
                 strand_calls,
             )
 
-    def assemble_contig(self, log=DEVNULL):
+    def assemble_contig(self):
         """
         uses the split reads and the partners of the half mapped reads to create a contig
         representing the sequence across the breakpoints
@@ -875,7 +875,7 @@ def assemble_contig(self, log=DEVNULL):
             rqs_comp = reverse_complement(mate.query_sequence)
             assembly_sequences.setdefault(rqs_comp, set()).add(mate)
 
-        log('assembly size of {} sequences'.format(len(assembly_sequences) // 2))
+        logger.info(f'assembly size of {len(assembly_sequences) // 2} sequences')
 
         kmer_size = self.read_length * self.config['validate.assembly_kmer_size']
         remap_min_overlap = max(
@@ -888,7 +888,6 @@ def assemble_contig(self, log=DEVNULL):
             min_edge_trim_weight=self.config['validate.assembly_min_edge_trim_weight'],
             assembly_max_paths=self.config['validate.assembly_max_paths'],
             min_contig_length=self.read_length,
-            log=log,
             remap_min_overlap=remap_min_overlap,
             remap_min_exact_match=self.config['validate.assembly_min_exact_match_to_remap'],
             assembly_min_uniq=self.config['validate.assembly_min_uniq'],
@@ -919,11 +918,8 @@ def assemble_contig(self, log=DEVNULL):
                 not self.interchromosomal and len(self.break1 | self.break2) < self.read_length
             ):
                 filtered_contigs.append(ctg)
-        log(
-            'filtered contigs from {} to {} based on remapped reads from both breakpoints'.format(
-                len(contigs), len(filtered_contigs)
-            ),
-            time_stamp=False,
+        logger.info(
+            f'filtered contigs from {len(contigs)} to {len(filtered_contigs)} based on remapped reads from both breakpoints'
         )
         contigs = filtered_contigs
 
@@ -985,7 +981,7 @@ def assemble_contig(self, log=DEVNULL):
             list(filtered_contigs.values()), key=lambda x: (x.remap_score() * -1, x.seq)
         )
 
-    def load_evidence(self, log=DEVNULL):
+    def load_evidence(self):
         """
         open the associated bam file and read and store the evidence
         does some preliminary read-quality filtering
@@ -1117,7 +1113,7 @@ def filter_if_true(read):
                 mates = self.bam_cache.get_mate(flanking_read, allow_file_access=False)
                 for mate in mates:
                     if mate.is_unmapped:
-                        log('ignoring unmapped mate', mate.query_name, level=logging.DEBUG)
+                        logger.debug(f'ignoring unmapped mate {mate.query_name}')
                         continue
                     self.collect_flanking_pair(flanking_read, mate)
             except KeyError:
@@ -1163,7 +1159,7 @@ def filter_if_true(read):
                     mates = self.bam_cache.get_mate(flanking_read, allow_file_access=False)
                     for mate in mates:
                         if mate.is_unmapped:
-                            log('ignoring unmapped mate', mate.query_name, level=logging.DEBUG)
+                            logger.debug(f'ignoring unmapped mate {mate.query_name}')
                             continue
                         try:
                             self.collect_compatible_flanking_pair(
@@ -1175,11 +1171,8 @@ def filter_if_true(read):
                     pass
 
         # now collect the half mapped reads
-        log(
-            'collected',
-            len(half_mapped_partners1 | half_mapped_partners2),
-            'putative half mapped reads',
-            time_stamp=False,
+        logger.info(
+            f'collected {len(half_mapped_partners1 | half_mapped_partners2)} putative half mapped reads',
         )
         mates_found = 0
         for read in half_mapped_partners1 | half_mapped_partners2:
@@ -1191,7 +1184,7 @@ def filter_if_true(read):
                     self.collect_half_mapped(read, mate)
             except KeyError:
                 pass
-        log(mates_found, 'half-mapped mates found')
+        logger.info(f'{mates_found} half-mapped mates found')
 
     def copy(self):
         raise NotImplementedError('not appropriate for copy of evidence')
diff --git a/src/mavis/validate/main.py b/src/mavis/validate/main.py
index 4bec5fcd..ab8a48df 100644
--- a/src/mavis/validate/main.py
+++ b/src/mavis/validate/main.py
@@ -17,9 +17,9 @@
 from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL
 from ..util import (
-    LOG,
     filter_on_overlap,
     generate_complete_stamp,
+    logger,
     mkdirp,
     output_tabbed_file,
     read_inputs,
@@ -109,13 +109,11 @@ def main(
                     read_length=config['libraries'][library]['read_length'],
                     median_fragment_size=config['libraries'][library]['median_fragment_size'],
                     config=config,
-                    **bpp.data
+                    **bpp.data,
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:
-                warnings.warn(
-                    'Dropping breakpoint pair ({}) as bad input {}'.format(str(bpp), str(err))
-                )
+                logger.warning(f'Dropping breakpoint pair ({bpp}) as bad input {err}')
         elif bpp.data[COLUMNS.protocol] == PROTOCOL.TRANS:
             try:
                 evidence = TranscriptomeEvidence(
@@ -132,11 +130,11 @@ def main(
                     median_fragment_size=config['libraries'][library]['median_fragment_size'],
                     strand_determining_read=config['libraries'][library]['strand_determining_read'],
                     config=config,
-                    **bpp.data
+                    **bpp.data,
                 )
                 evidence_clusters.append(evidence)
             except ValueError as err:
-                warnings.warn('Dropping ({}) as bad input {}'.format(str(bpp), str(err)))
+                logger.warning(f'Dropping ({bpp}) as bad input {err}')
         else:
             raise ValueError('protocol error', bpp.data[COLUMNS.protocol])
 
@@ -158,66 +156,40 @@ def main(
     )
     contig_sequences = {}
     for i, evidence in enumerate(evidence_clusters):
-        LOG()
-        LOG(
-            '({} of {})'.format(i + 1, len(evidence_clusters)),
-            'gathered evidence for:',
-            evidence.cluster_id,
-            ''
-            if COLUMNS.tracking_id not in evidence.data
-            else '(tracking_id: {})'.format(evidence.tracking_id),
-            time_stamp=True,
-        )
-        LOG(evidence, time_stamp=False)
-        LOG('possible event type(s):', BreakpointPair.classify(evidence), time_stamp=False)
-        LOG(
-            'outer window regions:  {}:{}-{}  {}:{}-{}'.format(
-                evidence.break1.chr,
-                evidence.outer_window1[0],
-                evidence.outer_window1[1],
-                evidence.break2.chr,
-                evidence.outer_window2[0],
-                evidence.outer_window2[1],
+        logger.info(
+            f'({i + 1} of {len(evidence_clusters)}) gathered evidence for: {evidence.cluster_id}'
+            + (
+                ''
+                if COLUMNS.tracking_id not in evidence.data
+                else f' (tracking_id: {evidence.tracking_id})'
             ),
-            time_stamp=False,
         )
-        LOG(
-            'inner window regions:  {}:{}-{}  {}:{}-{}'.format(
-                evidence.break1.chr,
-                evidence.inner_window1[0],
-                evidence.inner_window1[1],
-                evidence.break2.chr,
-                evidence.inner_window2[0],
-                evidence.inner_window2[1],
-            ),
-            time_stamp=False,
+        logger.info(repr(evidence))
+        logger.info(f'possible event type(s): {BreakpointPair.classify(evidence)}')
+        logger.info(
+            f'outer window regions: {evidence.break1.chr}:{evidence.outer_window1[0]}-{evidence.outer_window1[1]}  {evidence.break2.chr}:{evidence.outer_window2[0]}-{evidence.outer_window2[1]}'
         )
-        evidence.load_evidence(log=LOG)
-        LOG(
-            'flanking pairs: {};'.format(len(evidence.flanking_pairs)),
-            'split reads: {}, {};'.format(*[len(a) for a in evidence.split_reads]),
-            'half-mapped reads: {}, {};'.format(*[len(a) for a in evidence.half_mapped]),
-            'spanning-reads: {};'.format(len(evidence.spanning_reads)),
-            'compatible flanking pairs:',
-            len(evidence.compatible_flanking_pairs),
-            time_stamp=False,
+        logger.info(
+            f'inner window regions: {evidence.break1.chr}:{evidence.inner_window1[0]}-{evidence.inner_window1[1]}  {evidence.break2.chr}:{evidence.inner_window2[0]}-{evidence.inner_window2[1]}'
         )
-        evidence.assemble_contig(log=LOG)
-        LOG('assembled {} contigs'.format(len(evidence.contigs)), time_stamp=False)
+        evidence.load_evidence()
+        logger.info(
+            f'flanking pairs: {len(evidence.flanking_pairs)}'
+            + '; split reads: {}, {}'.format(*[len(a) for a in evidence.split_reads])
+            + '; half-mapped reads: {}, {}'.format(*[len(a) for a in evidence.half_mapped])
+            + f'; spanning-reads: {len(evidence.spanning_reads)}; compatible flanking pairs: {len(evidence.compatible_flanking_pairs)}',
+        )
+        evidence.assemble_contig()
+        logger.info(f'assembled {len(evidence.contigs)} contigs')
         for contig in evidence.contigs:
             name = 'seq-{}'.format(hashlib.md5(contig.seq.encode('utf-8')).hexdigest())
-            LOG(
-                '>',
-                name,
-                '(size={}; reads={:.0f}; coverage={:.2f})'.format(
-                    len(contig.seq), contig.remap_score(), contig.remap_coverage()
-                ),
-                time_stamp=False,
+            logger.info(
+                f'> {name} (size={len(contig.seq)}; reads={contig.remap_score():.0f}; coverage={contig.remap_coverage():.2f})'
             )
-            LOG(contig.seq[:140], time_stamp=False)
+            logger.info(contig.seq[:140])
             contig_sequences[name] = contig.seq
 
-    LOG('will output:', contig_aligner_fa, contig_aligner_output)
+    logger.info(f'will output: {contig_aligner_fa} ${contig_aligner_output}')
     raw_contig_alignments = align_sequences(
         contig_sequences,
         input_bam_cache,
@@ -230,11 +202,10 @@ def main(
         aligner_output_log=contig_aligner_log,
         blat_min_identity=config['validate.blat_min_identity'],
         blat_limit_top_aln=config['validate.blat_limit_top_aln'],
-        log=LOG,
     )
     for evidence in evidence_clusters:
         select_contig_alignments(evidence, raw_contig_alignments)
-    LOG('alignment complete', time_stamp=True)
+    logger.info('alignment complete')
     event_calls = []
     total_pass = 0
     write_bed_file(
@@ -243,25 +214,17 @@ def main(
     )
     validation_counts = {}
     for index, evidence in enumerate(evidence_clusters):
-        LOG()
-        LOG(
-            '({} of {}) calling events for: {} {} (tracking_id: {})'.format(
-                index + 1,
-                len(evidence_clusters),
-                evidence.cluster_id,
-                evidence.putative_event_types(),
-                evidence.tracking_id,
-            ),
-            time_stamp=True,
+        logger.info(
+            f'({index + 1} of {len(evidence_clusters)}) calling events for: {evidence.cluster_id} {evidence.putative_event_types()} (tracking_id: {evidence.tracking_id})'
         )
-        LOG('source:', evidence)
+        logger.info(f'source: {evidence}')
         calls = []
         failure_comment = None
         try:
             calls = call_events(evidence)
             event_calls.extend(calls)
         except UserWarning as err:
-            LOG('warning: error in calling events', repr(err))
+            logger.warning('error in calling events {repr(err)}')
             failure_comment = str(err)
 
         if not calls:
@@ -273,34 +236,26 @@ def main(
         else:
             total_pass += 1
 
-        LOG('called {} event(s)'.format(len(calls)), time_stamp=True)
+        logger.info(f'called {len(calls)} event(s)')
         for call in calls:
-            LOG(call)
+            logger.info(call)
             if call.call_method == CALL_METHOD.CONTIG:
-                LOG(
-                    '\t{} {} [{}] contig_alignment_score: {}, contig_alignment_mq: {} contig_alignment_rank: {}'.format(
-                        call.event_type,
-                        call.call_method,
-                        call.contig_alignment.query_name,
-                        round(call.contig_alignment.score(), 2),
-                        tuple(call.contig_alignment.mapping_quality()),
-                        tuple(call.contig_alignment.alignment_rank()),
-                    )
+                logger.info(
+                    f'{call.event_type} {call.call_method} [{call.contig_alignment.query_name}] contig_alignment_score: {round(call.contig_alignment.score(), 2)}, contig_alignment_mq: {tuple(call.contig_alignment.mapping_quality())} contig_alignment_rank: {tuple(call.contig_alignment.alignment_rank())}'
                 )
-                LOG('\talignment:', call.contig_alignment.alignment_id())
+                logger.info(f'alignment: {call.contig_alignment.alignment_id()}')
             elif call.contig_alignment:
-                LOG(
-                    '\t{} {} alignment:'.format(call.event_type, call.call_method),
-                    call.contig_alignment.alignment_id(),
+                logger.info(
+                    f'{call.event_type} {call.call_method} alignment: {call.contig_alignment.alignment_id()}'
                 )
             else:
-                LOG('\t{} {}'.format(call.event_type, call.call_method), time_stamp=False)
+                logger.info('{call.event_type} {call.call_method}')
             validation_counts[call.cluster_id] = validation_counts.get(call.cluster_id, 0) + 1
             call.data[COLUMNS.validation_id] = '{}-v{}'.format(
                 call.cluster_id, validation_counts[call.cluster_id]
             )
-            LOG(
-                '\tremapped reads: {}; spanning reads: {}; split reads: [{} ({}), {} ({}), {}]'
+            logger.info(
+                'remapped reads: {}; spanning reads: {}; split reads: [{} ({}), {} ({}), {}]'
                 ', flanking pairs: {}{}'.format(
                     0 if not call.contig else len(call.contig.input_reads),
                     len(call.spanning_reads),
@@ -327,11 +282,8 @@ def main(
         call.data.update(
             {COLUMNS.break1_homologous_seq: b1_homseq, COLUMNS.break2_homologous_seq: b2_homseq}
         )
-    LOG(
-        '{} putative calls resulted in {} events with 1 or more event call'.format(
-            len(evidence_clusters), total_pass
-        ),
-        time_stamp=True,
+    logger.info(
+        f'{len(evidence_clusters)} putative calls resulted in {total_pass} events with 1 or more event call'
     )
     output_tabbed_file(event_calls, passed_output_file)
     output_tabbed_file(filtered_evidence_clusters, failed_output_file)
@@ -342,7 +294,7 @@ def main(
 
     if config['validate.write_evidence_files']:
         with pysam.AlignmentFile(contig_bam, 'wb', template=input_bam_cache.fh) as fh:
-            LOG('writing:', contig_bam, time_stamp=True)
+            logger.info(f'writing: {contig_bam}')
             for evidence in evidence_clusters:
                 for contig in evidence.contigs:
                     for aln in contig.alignments:
@@ -354,7 +306,7 @@ def main(
 
         # write the evidence
         with pysam.AlignmentFile(raw_evidence_bam, 'wb', template=input_bam_cache.fh) as fh:
-            LOG('writing:', raw_evidence_bam, time_stamp=True)
+            logger.info(f'writing: {raw_evidence_bam}')
             reads = set()
             for evidence in evidence_clusters:
                 reads.update(evidence.supporting_reads())
@@ -363,23 +315,23 @@ def main(
                 fh.write(read)
         # now sort the contig bam
         sort = re.sub(r'.bam$', '.sorted.bam', contig_bam)
-        LOG('sorting the bam file:', contig_bam, time_stamp=True)
+        logger.info(f'sorting the bam file: {contig_bam}')
         pysam.sort('-o', sort, contig_bam)
         contig_bam = sort
-        LOG('indexing the sorted bam:', contig_bam)
+        logger.info(f'indexing the sorted bam: {contig_bam}')
         pysam.index(contig_bam)
 
         # then sort the evidence bam file
         sort = re.sub(r'.bam$', '.sorted.bam', raw_evidence_bam)
-        LOG('sorting the bam file:', raw_evidence_bam, time_stamp=True)
+        logger.info(f'sorting the bam file: {raw_evidence_bam}')
         pysam.sort('-o', sort, raw_evidence_bam)
         raw_evidence_bam = sort
-        LOG('indexing the sorted bam:', raw_evidence_bam)
+        logger.info(f'indexing the sorted bam: {raw_evidence_bam}')
         pysam.index(raw_evidence_bam)
 
         # write the igv batch file
         with open(igv_batch_file, 'w') as fh:
-            LOG('writing:', igv_batch_file, time_stamp=True)
+            logger.info(f'writing: {igv_batch_file}')
 
             fh.write('load {} name="{}"\n'.format(passed_bed_file, 'passed events'))
             fh.write('load {} name="{}"\n'.format(contig_bam, 'aligned contigs'))
@@ -392,4 +344,4 @@ def main(
                     config['libraries'][library]['protocol'],
                 )
             )
-        generate_complete_stamp(output, LOG, start_time=start_time)
+        generate_complete_stamp(output, start_time=start_time)
diff --git a/src/tools/calculate_ref_alt_counts.py b/src/tools/calculate_ref_alt_counts.py
index cbb3be43..70992f7a 100644
--- a/src/tools/calculate_ref_alt_counts.py
+++ b/src/tools/calculate_ref_alt_counts.py
@@ -9,8 +9,7 @@
 import pysam
 from mavis.annotate.file_io import load_reference_genome
 from mavis.constants import SVTYPE
-from mavis.util import LOG as log
-from mavis.util import output_tabbed_file, read_inputs
+from mavis.util import logger, output_tabbed_file, read_inputs
 from mavis.validate.call import EventCall
 
 
@@ -122,7 +121,7 @@ class RefAltCalculator:
 
     def __init__(self, input_bams, reference_genome, max_event_size=6, buffer=1):
         if isinstance(reference_genome, str):
-            log('loading:', reference_genome, time_stamp=True)
+            logger.info(f'loading: {reference_genome}')
             self.reference_genome = load_reference_genome(reference_genome)
         else:
             self.reference_genome = reference_genome
@@ -154,19 +153,17 @@ def calculate_ref_counts(self, bpp):
             raise ValueError("Cannot determine ref and alt count for non precise breakpoint pairs")
 
         if bpp not in self.bpp_cache:
-            log("processing {}".format(bpp))
+            logger.info(f'processing {bpp}')
             data = dict()
             for name, read_length, bam in self.input_bams:
                 ref, alt, ign, mul, ref_sequence, alt_sequence = calculate_ref_count(
                     bpp, read_length, self.reference_genome, bam, self.buffer
                 )
-                log(bpp, name)
-                log(
-                    'Calculated counts: Ref: {}, Alt: {}, Mul: {}, Ignored: {} '.format(
-                        len(ref), len(alt), len(mul), len(ign)
-                    )
+                logger.info(f'{bpp} {name}')
+                logger.info(
+                    f'Calculated counts: Ref: {len(ref)}, Alt: {len(alt)}, Mul: {len(mul)}, Ignored: {len(ign)}'
                 )
-                log('Ref_probe: {}, Alt_probe: {}'.format(ref_sequence, alt_sequence))
+                logger.info(f'Ref_probe: {ref_sequence}, Alt_probe: {alt_sequence}')
                 info = {
                     '{}_ref_count'.format(name): len(ref),
                     '{}_alt_count'.format(name): len(alt),
@@ -201,7 +198,7 @@ def calculate_all_counts(self, input_files, output_file):
                 filtered_events.append(bpp)
                 continue
 
-        log('filtered {} events'.format(len(filtered_events)))
+        logger.info(f'filtered {len(filtered_events)} events')
 
         output_tabbed_file(processed_bpps.values(), output_file)
         return processed_bpps, filtered_events
diff --git a/tests/integration/test_annotate_fileio.py b/tests/integration/test_annotate_fileio.py
index 7b1a09de..53572a15 100644
--- a/tests/integration/test_annotate_fileio.py
+++ b/tests/integration/test_annotate_fileio.py
@@ -7,5 +7,5 @@
 
 class TestAnnotationLoading:
     def test_load_json(self):
-        result = load_annotations(JSON, warn=print)
+        result = load_annotations(JSON)
         assert len(result.keys()) == 12
diff --git a/tests/integration/test_args.py b/tests/integration/test_args.py
index db14bc55..492509f3 100644
--- a/tests/integration/test_args.py
+++ b/tests/integration/test_args.py
@@ -1,7 +1,7 @@
 import json
 import sys
 import tempfile
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
 import pytest
 from mavis import util
@@ -65,7 +65,7 @@ def test_trans_multiple_annotations_no_masking(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(cluster_main, 'main', util.DEVNULL):
+        with patch.object(cluster_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
@@ -102,7 +102,7 @@ def test_trans_multiple_annotations_with_masking(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(cluster_main, 'main', util.DEVNULL):
+        with patch.object(cluster_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
@@ -123,7 +123,7 @@ def test_error_missing_annotations_translib_uninform(self, configpath, output_di
             )
         )
         args = ['mavis', 'cluster', '--library', 'translib', '--output', output_dir]
-        with patch.object(cluster_main, 'main', util.DEVNULL):
+        with patch.object(cluster_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
 
@@ -163,7 +163,7 @@ def test_error_missing_annotations_translib(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
 
@@ -208,7 +208,7 @@ def test_ok_multi_ref_genome(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 mavis_main()
 
@@ -256,7 +256,7 @@ def test_error_multi_aligner_ref(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
 
@@ -300,7 +300,7 @@ def test_error_missing_aligner_ref(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
 
@@ -344,7 +344,7 @@ def test_error_missing_reference_genome(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
 
@@ -391,6 +391,6 @@ def test_error_bad_aligner_ref(self, configpath, output_dir):
             '--config',
             str(configpath),
         ]
-        with patch.object(validate_main, 'main', util.DEVNULL):
+        with patch.object(validate_main, 'main', Mock()):
             with patch.object(sys, 'argv', args):
                 expect_error(self, mavis_main)
diff --git a/tests/integration/test_assemble.py b/tests/integration/test_assemble.py
index b91b1c4d..07e22b64 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/integration/test_assemble.py
@@ -5,7 +5,6 @@
 from mavis.assemble import Contig, assemble, filter_contigs
 from mavis.constants import reverse_complement
 from mavis.interval import Interval
-from mavis.util import LOG
 from mavis_config import DEFAULTS
 
 from ..util import get_data, long_running_test
@@ -363,7 +362,6 @@ def test_large_assembly(self, large_assembly_seq):
             min_edge_trim_weight=DEFAULTS['validate.assembly_min_edge_trim_weight'],
             assembly_max_paths=DEFAULTS['validate.assembly_max_paths'],
             min_contig_length=150,
-            log=LOG,
             remap_min_exact_match=30,
             assembly_min_uniq=DEFAULTS['validate.assembly_min_uniq'],
         )
@@ -615,7 +613,6 @@ def test_assemble_short_contig(self):
             assembly_min_uniq=0.1,
             min_contig_length=125,
             remap_min_exact_match=15,
-            log=LOG,
         )
 
         target = 'GGGCACGGCTGCAGCGTCGCGGTGCATCAAGCTTGCTATGGCATTGTTCAAGTACCCACTGGACCGTGGTTTTGCAGGAAATGTGAATCTCAGGAGAGAGCAGCCAGAGTGATACAGTTTATGTAACTTGATGGAAGAA'
@@ -627,7 +624,7 @@ def test_assemble_short_contig(self):
     @timeout_decorator.timeout(120)
     @long_running_test
     def test_long_filter_bug(self, long_filter_seq):
-        contigs = assemble(long_filter_seq, 111, 3, 8, 0.1, 0.1, log=LOG)
+        contigs = assemble(long_filter_seq, 111, 3, 8, 0.1, 0.1)
         for c in contigs:
             print(c.seq, c.remap_score())
         assert len(contigs)
diff --git a/tests/integration/test_bam.py b/tests/integration/test_bam.py
index 7f8b87f7..9ccbc09d 100644
--- a/tests/integration/test_bam.py
+++ b/tests/integration/test_bam.py
@@ -54,7 +54,7 @@ def test_add_read(self):
         b.add_read(r)
         assert len(b.cache.values()) == 1
 
-    @mock.patch('mavis.util.LOG')
+    @mock.patch('mavis.util.logger')
     def test_add_invalid_read(self, log_patcher):
         bad_read = mock.Mock(
             is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
@@ -62,9 +62,9 @@ def test_add_invalid_read(self, log_patcher):
         cache = BamCache(MockBamFileHandle())
         cache.add_read(bad_read)
         assert len(cache.cache) == 0
-        log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
+        log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
 
-    @mock.patch('mavis.util.LOG')
+    @mock.patch('mavis.util.logger')
     def test_fetch_invalid_read(self, log_patcher):
         bad_read = mock.Mock(
             is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
@@ -74,9 +74,9 @@ def test_fetch_invalid_read(self, log_patcher):
         cache = BamCache(fh)
         cache.fetch('chr', 1, 10)
         assert len(cache.cache) == 0
-        log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
+        log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
 
-    @mock.patch('mavis.util.LOG')
+    @mock.patch('mavis.util.logger')
     def test_bin_fetch_invalid_read(self, log_patcher):
         bad_read = mock.Mock(
             is_unmapped=False, reference_start=0, reference_end=0, query_name='BAD_READ'
@@ -86,7 +86,7 @@ def test_bin_fetch_invalid_read(self, log_patcher):
         cache = BamCache(fh)
         cache.fetch_from_bins('chr', 1, 10)
         assert len(cache.cache) == 0
-        log_patcher.assert_called_with('ignoring invalid read', 'BAD_READ', level=logging.DEBUG)
+        log_patcher.method_calls[0].assert_called_with('ignoring invalid read: BAD_READ')
 
     def test_reference_id(self):
         fh = MockBamFileHandle({'1': 0})
diff --git a/tests/snakemake/test_mini_workflow.py b/tests/snakemake/test_mini_workflow.py
index 2b559234..4d725a67 100644
--- a/tests/snakemake/test_mini_workflow.py
+++ b/tests/snakemake/test_mini_workflow.py
@@ -1,3 +1,4 @@
+import glob
 import json
 import os
 import shutil
@@ -12,6 +13,19 @@
 from ..util import glob_exists, long_running_test, package_relative_file
 
 
+def tail_logfiles(dirname, n_lines=10):
+    """
+    Prints the tail of txt files in this dir. This is useful for debugging snakemake tests since
+    the logs are deleted with the temp dir when the test fails
+    """
+    for filename in glob.glob(os.path.join(dirname, '*.log.txt')):
+        with open(filename, 'r') as fh:
+            lines = fh.readlines()
+            start_line = max([0, len(lines) - n_lines])
+            print(f'TAIL: {filename}')
+            print('\n'.join(lines[start_line:]))
+
+
 @pytest.fixture
 def blat_output_dir():
     temp_output = tempfile.mkdtemp()
@@ -85,6 +99,7 @@ def test_workflow(output_dir):
 
         except SystemExit as err:
             if err.code != 0:
+                tail_logfiles(os.path.join(output_dir, 'output_dir', 'logs'))
                 raise err
 
     for expected_file in [
@@ -121,6 +136,7 @@ def test_no_validate_worflow(output_dir):
 
         except SystemExit as err:
             if err.code != 0:
+                tail_logfiles(os.path.join(output_dir, 'output_dir', 'logs'))
                 raise err
 
     for expected_file in [

From 9a93374be3e00271c32c6080e39ebeca530f4421 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 26 Jan 2022 12:47:55 -0800
Subject: [PATCH 088/137] Fix linting errors

---
 src/mavis/annotate/constants.py  |  2 +-
 src/mavis/annotate/genomic.py    |  7 +++----
 src/mavis/bam/read.py            | 19 +++++++++----------
 src/mavis/bam/stats.py           |  1 -
 src/mavis/breakpoint.py          |  2 +-
 src/mavis/illustrate/elements.py | 13 ++++++-------
 src/mavis/overlay.py             |  2 +-
 src/mavis/pairing/constants.py   |  2 +-
 src/mavis/pairing/main.py        |  2 +-
 src/mavis/summary/main.py        |  3 +--
 src/mavis/tools/__init__.py      |  2 +-
 src/mavis/tools/breakdancer.py   |  3 +--
 src/mavis/validate/base.py       |  2 --
 src/mavis/validate/evidence.py   |  1 -
 src/mavis/validate/main.py       |  1 -
 15 files changed, 26 insertions(+), 36 deletions(-)

diff --git a/src/mavis/annotate/constants.py b/src/mavis/annotate/constants.py
index 62882a9b..3f80c171 100644
--- a/src/mavis/annotate/constants.py
+++ b/src/mavis/annotate/constants.py
@@ -1,6 +1,6 @@
 import re
 
-from ..constants import MavisNamespace, float_fraction
+from ..constants import MavisNamespace
 
 PASS_FILENAME = 'annotations.tab'
 
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index 9891cebe..a6d18439 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -1,12 +1,11 @@
-from copy import copy
 import itertools
 
+from ..constants import ORIENT, STRAND, reverse_complement
+from ..error import NotSpecifiedError
+from ..interval import Interval
 from .base import BioInterval, ReferenceName
 from .constants import SPLICE_SITE_TYPE
 from .splicing import SpliceSite, SplicingPattern
-from ..constants import ORIENT, reverse_complement, STRAND
-from ..error import NotSpecifiedError
-from ..interval import Interval
 
 
 class Template(BioInterval):
diff --git a/src/mavis/bam/read.py b/src/mavis/bam/read.py
index 753c8414..f41f31cb 100644
--- a/src/mavis/bam/read.py
+++ b/src/mavis/bam/read.py
@@ -1,28 +1,27 @@
-from copy import copy
 import itertools
 import re
-import subprocess
+from copy import copy
 
 import pysam
 from Bio.Data import IUPACData as iupac
 
-from . import cigar as _cigar
-from .cigar import (
-    EVENT_STATES,
-    QUERY_ALIGNED_STATES,
-    REFERENCE_ALIGNED_STATES,
-    convert_cigar_to_string,
-)
 from ..constants import (
     CIGAR,
     DNA_ALPHABET,
+    NA_MAPPING_QUALITY,
     ORIENT,
     READ_PAIR_TYPE,
     STRAND,
     SVTYPE,
-    NA_MAPPING_QUALITY,
 )
 from ..interval import Interval
+from . import cigar as _cigar
+from .cigar import (
+    EVENT_STATES,
+    QUERY_ALIGNED_STATES,
+    REFERENCE_ALIGNED_STATES,
+    convert_cigar_to_string,
+)
 
 
 class SamRead(pysam.AlignedSegment):
diff --git a/src/mavis/bam/stats.py b/src/mavis/bam/stats.py
index 7901c044..ea6d1603 100644
--- a/src/mavis/bam/stats.py
+++ b/src/mavis/bam/stats.py
@@ -2,7 +2,6 @@
 import math
 import os
 import statistics as stats
-import warnings
 
 from ..constants import STRAND
 from ..util import logger
diff --git a/src/mavis/breakpoint.py b/src/mavis/breakpoint.py
index 10218017..81518527 100644
--- a/src/mavis/breakpoint.py
+++ b/src/mavis/breakpoint.py
@@ -3,7 +3,7 @@
 from copy import copy as _copy
 from typing import Callable, Dict, List, Optional, Set, Tuple
 
-from .constants import CIGAR, COLUMNS, DNA_ALPHABET, ORIENT, STRAND, SVTYPE, reverse_complement
+from .constants import COLUMNS, DNA_ALPHABET, ORIENT, STRAND, SVTYPE, reverse_complement
 from .error import InvalidRearrangement, NotSpecifiedError
 from .interval import Interval
 
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index cb79549b..a5f1e762 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -2,20 +2,19 @@
 This is the primary module responsible for generating svg visualizations
 
 """
-import itertools
 import re
 
+from ..annotate.variant import FusionTranscript
+from ..constants import CODON_SIZE, GIEMSA_STAIN, ORIENT, STRAND
+from ..error import DrawingFitError, NotSpecifiedError
+from ..interval import Interval
 from .util import (
+    LabelMapping,
+    Tag,
     dynamic_label_color,
     generate_interval_mapping,
-    LabelMapping,
     split_intervals_into_tracks,
-    Tag,
 )
-from ..annotate.variant import FusionTranscript
-from ..constants import CODON_SIZE, GIEMSA_STAIN, ORIENT, STRAND
-from ..error import DrawingFitError, NotSpecifiedError
-from ..interval import Interval
 
 # draw gene level view
 # draw gene box
diff --git a/src/mavis/overlay.py b/src/mavis/overlay.py
index fccd5e34..71f6c239 100644
--- a/src/mavis/overlay.py
+++ b/src/mavis/overlay.py
@@ -1,5 +1,5 @@
 import os
-from typing import Dict, List, Tuple, Union
+from typing import Dict, List, Tuple
 
 from . import annotate as _annotate
 from . import util as _util
diff --git a/src/mavis/pairing/constants.py b/src/mavis/pairing/constants.py
index cb55e123..ea0a63a4 100644
--- a/src/mavis/pairing/constants.py
+++ b/src/mavis/pairing/constants.py
@@ -2,7 +2,7 @@
 
 from mavis_config import DEFAULTS
 
-from ..constants import CALL_METHOD, MavisNamespace
+from ..constants import CALL_METHOD
 
 PAIRING_DISTANCES: Dict[str, int] = {
     CALL_METHOD.FLANK: DEFAULTS['pairing.flanking_call_distance'],
diff --git a/src/mavis/pairing/main.py b/src/mavis/pairing/main.py
index 8b92586c..48809ba9 100644
--- a/src/mavis/pairing/main.py
+++ b/src/mavis/pairing/main.py
@@ -7,7 +7,7 @@
 
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
-from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
+from ..constants import CALL_METHOD, COLUMNS, SPLICE_TYPE, SVTYPE
 from ..util import generate_complete_stamp, logger, output_tabbed_file, read_inputs
 from .pairing import inferred_equivalent, pair_by_distance, product_key
 
diff --git a/src/mavis/summary/main.py b/src/mavis/summary/main.py
index 309da1b8..73c9240d 100644
--- a/src/mavis/summary/main.py
+++ b/src/mavis/summary/main.py
@@ -1,7 +1,6 @@
 import os
 import re
 import time
-from functools import partial
 from typing import Dict, List, Tuple
 
 import pandas as pd
@@ -9,7 +8,7 @@
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
 from ..constants import CALL_METHOD, COLUMNS, PROTOCOL, SPLICE_TYPE, SVTYPE
-from ..util import generate_complete_stamp, logger, output_tabbed_file, read_inputs, soft_cast
+from ..util import generate_complete_stamp, logger, output_tabbed_file, read_inputs
 from .constants import HOMOPOLYMER_MIN_LENGTH
 from .summary import (
     annotate_dgv,
diff --git a/src/mavis/tools/__init__.py b/src/mavis/tools/__init__.py
index a1b814bd..8649b31f 100644
--- a/src/mavis/tools/__init__.py
+++ b/src/mavis/tools/__init__.py
@@ -1,5 +1,5 @@
 import itertools
-from typing import Callable, Dict, List
+from typing import Dict, List
 
 import pandas as pd
 from shortuuid import uuid
diff --git a/src/mavis/tools/breakdancer.py b/src/mavis/tools/breakdancer.py
index e1f8361e..4f8d2562 100644
--- a/src/mavis/tools/breakdancer.py
+++ b/src/mavis/tools/breakdancer.py
@@ -1,5 +1,4 @@
 import re
-from argparse import Namespace
 
 import pandas as pd
 
@@ -34,7 +33,7 @@ def convert_file(input_file):
         },
     )
     if 'num_Reads_lib' not in df:
-        raise KeyError(f'missing required column: num_Reads_lib')
+        raise KeyError('missing required column: num_Reads_lib')
 
     for bam, lib in bam_to_lib.items():
         df['num_Reads_lib'] = df['num_Reads_lib'].str.replace(bam, lib)
diff --git a/src/mavis/validate/base.py b/src/mavis/validate/base.py
index 6866930b..e73d4846 100644
--- a/src/mavis/validate/base.py
+++ b/src/mavis/validate/base.py
@@ -1,5 +1,4 @@
 import itertools
-import logging
 from abc import abstractmethod
 from typing import Dict, List, Optional, Set, Tuple
 
@@ -16,7 +15,6 @@
     COLUMNS,
     NA_MAPPING_QUALITY,
     ORIENT,
-    PROTOCOL,
     PYSAM_READ_FLAGS,
     STRAND,
     SVTYPE,
diff --git a/src/mavis/validate/evidence.py b/src/mavis/validate/evidence.py
index 19f2fbdb..610c4206 100644
--- a/src/mavis/validate/evidence.py
+++ b/src/mavis/validate/evidence.py
@@ -241,7 +241,6 @@ def distance(self, start: int, end: int, strand: str = STRAND.NS, chrom: Optiona
         mixed = []
         inter = []
         transcripts = self._select_transcripts(chrom, strand)
-        genomic_distance = Evidence.distance(start, end).end
         # try to calculate assuming the positions are exonic
         for transcript in itertools.chain.from_iterable([t.transcripts for t in transcripts]):
             if not transcript.reference_object.position & Interval(start, end):
diff --git a/src/mavis/validate/main.py b/src/mavis/validate/main.py
index ab8a48df..325dbf3c 100644
--- a/src/mavis/validate/main.py
+++ b/src/mavis/validate/main.py
@@ -3,7 +3,6 @@
 import os
 import re
 import time
-import warnings
 from typing import Dict, List
 
 import pysam

From 31119fcabeb512e7587440413a4d8f7a3ef23cef Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 26 Jan 2022 12:53:28 -0800
Subject: [PATCH 089/137] Import subcommand from mavis_config

---
 src/mavis/cluster/main.py        | 3 ++-
 src/mavis/main.py                | 2 +-
 tests/end_to_end/test_convert.py | 3 ++-
 tests/end_to_end/test_help.py    | 2 +-
 tests/end_to_end/test_overlay.py | 2 +-
 5 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mavis/cluster/main.py b/src/mavis/cluster/main.py
index aec884ba..17567f52 100644
--- a/src/mavis/cluster/main.py
+++ b/src/mavis/cluster/main.py
@@ -3,11 +3,12 @@
 import time
 from typing import Dict, List
 
+from mavis_config.constants import SUBCOMMAND
 from shortuuid import uuid
 
 from ..annotate.file_io import ReferenceFile
 from ..breakpoint import BreakpointPair
-from ..constants import COLUMNS, SUBCOMMAND
+from ..constants import COLUMNS
 from ..util import (
     filter_on_overlap,
     filter_uninformative,
diff --git a/src/mavis/main.py b/src/mavis/main.py
index d417d7d5..0bf4eb82 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -9,6 +9,7 @@
 from typing import Dict
 
 from mavis_config import validate_config
+from mavis_config.constants import SUBCOMMAND
 
 from . import __version__
 from . import config as _config
@@ -16,7 +17,6 @@
 from .align import get_aligner_version
 from .annotate import main as annotate_main
 from .cluster import main as cluster_main
-from .constants import SUBCOMMAND
 from .overlay import check_overlay_args
 from .overlay import main as overlay_main
 from .pairing import main as pairing_main
diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index 671e4d10..00f8ea4a 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -5,10 +5,11 @@
 import unittest
 from unittest.mock import patch
 
-from mavis.constants import ORIENT, SUBCOMMAND, SVTYPE
+from mavis.constants import ORIENT, SVTYPE
 from mavis.main import main
 from mavis.tools import SUPPORTED_TOOL
 from mavis.util import read_bpp_from_input_file
+from mavis_config.constants import SUBCOMMAND
 
 from ..util import get_data, glob_exists
 
diff --git a/tests/end_to_end/test_help.py b/tests/end_to_end/test_help.py
index 6d3cdd24..d73fa2fc 100644
--- a/tests/end_to_end/test_help.py
+++ b/tests/end_to_end/test_help.py
@@ -1,8 +1,8 @@
 import sys
 from unittest.mock import patch
 
-from mavis.constants import SUBCOMMAND
 from mavis.main import main
+from mavis_config.constants import SUBCOMMAND
 
 
 class TestHelpMenu:
diff --git a/tests/end_to_end/test_overlay.py b/tests/end_to_end/test_overlay.py
index 5950701d..b0584de8 100644
--- a/tests/end_to_end/test_overlay.py
+++ b/tests/end_to_end/test_overlay.py
@@ -6,8 +6,8 @@
 from unittest.mock import patch
 
 import pytest
-from mavis.constants import SUBCOMMAND
 from mavis.main import main
+from mavis_config.constants import SUBCOMMAND
 
 from ..util import get_data, glob_exists
 

From fefc0958e26008fb9282f6bdf6ad1f44d364e11f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 26 Jan 2022 13:14:33 -0800
Subject: [PATCH 090/137] Add time and level to log format

---
 src/mavis/main.py          | 6 +++++-
 src/mavis/pairing/main.py  | 2 +-
 src/mavis/summary/main.py  | 2 +-
 src/mavis/util.py          | 4 ++--
 src/mavis/validate/main.py | 2 +-
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/mavis/main.py b/src/mavis/main.py
index 0bf4eb82..5c7c8ecd 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -176,7 +176,11 @@ def main(argv=None):
     if args.command == SUBCOMMAND.OVERLAY:
         args = check_overlay_args(args, parser)
 
-    log_conf = {'format': '{message}', 'style': '{', 'level': args.log_level}
+    log_conf = {
+        'format': '{asctime} [{levelname}] {message}',
+        'style': '{',
+        'level': args.log_level,
+    }
 
     original_logging_handlers = logging.root.handlers[:]
     for handler in logging.root.handlers:
diff --git a/src/mavis/pairing/main.py b/src/mavis/pairing/main.py
index 48809ba9..270e88d9 100644
--- a/src/mavis/pairing/main.py
+++ b/src/mavis/pairing/main.py
@@ -136,4 +136,4 @@ def main(
 
     fname = os.path.join(output, 'mavis_paired.tab')
     output_tabbed_file(bpps, fname)
-    generate_complete_stamp(output)
+    generate_complete_stamp(output, start_time=start_time)
diff --git a/src/mavis/summary/main.py b/src/mavis/summary/main.py
index 73c9240d..93f49726 100644
--- a/src/mavis/summary/main.py
+++ b/src/mavis/summary/main.py
@@ -343,4 +343,4 @@ def main(inputs: List[str], output: str, config: Dict, start_time=int(time.time(
             ):
                 lib_rows.append(row)
         output_tabbed_file(lib_rows, filename, header=output_columns)
-    generate_complete_stamp(output)
+    generate_complete_stamp(output, start_time=start_time)
diff --git a/src/mavis/util.py b/src/mavis/util.py
index cfb269da..7009cb48 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -179,7 +179,7 @@ def read_inputs(inputs, required_columns=[], **kwargs):
     bpps = []
 
     for finput in bash_expands(*inputs):
-        logger.info('loading: {finput}')
+        logger.info(f'loading: {finput}')
         bpps.extend(
             read_bpp_from_input_file(
                 finput, required_columns=[COLUMNS.protocol, *required_columns], **kwargs
@@ -255,7 +255,7 @@ def generate_complete_stamp(output_dir, prefix='MAVIS.', start_time=None):
         'some_output_dir/MAVIS.COMPLETE'
     """
     stamp = os.path.join(output_dir, str(prefix) + 'COMPLETE')
-    logger.info('complete: {stamp}')
+    logger.info(f'complete: {stamp}')
     with open(stamp, 'w') as fh:
         if start_time is not None:
             duration = int(time.time()) - start_time
diff --git a/src/mavis/validate/main.py b/src/mavis/validate/main.py
index 325dbf3c..9cbc1383 100644
--- a/src/mavis/validate/main.py
+++ b/src/mavis/validate/main.py
@@ -163,7 +163,7 @@ def main(
                 else f' (tracking_id: {evidence.tracking_id})'
             ),
         )
-        logger.info(repr(evidence))
+        logger.info(str(evidence))
         logger.info(f'possible event type(s): {BreakpointPair.classify(evidence)}')
         logger.info(
             f'outer window regions: {evidence.break1.chr}:{evidence.outer_window1[0]}-{evidence.outer_window1[1]}  {evidence.break2.chr}:{evidence.outer_window2[0]}-{evidence.outer_window2[1]}'

From b1b890ac9ac4cca73696072f3a718fc5ff5328e2 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Wed, 26 Jan 2022 13:27:38 -0800
Subject: [PATCH 091/137] adddressed Sniffles support

---
 src/mavis/interval.py        |   2 +
 src/mavis/tools/constants.py |   4 +
 src/mavis/tools/vcf.py       | 203 +++++++++++++++--------------------
 tests/unit/test_tools_vcf.py |  30 +++++-
 4 files changed, 120 insertions(+), 119 deletions(-)

diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index c78e3aa8..7d0d5d96 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -30,6 +30,8 @@ def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_
 
         self.start = self.number_type(self.start)
         self.end = self.number_type(self.end)
+        if self.start == 0 and self.end == 1: 
+            self.start = 1
         if self.start > self.end:
             raise AttributeError('interval start > end is not allowed', self.start, self.end)
         self.freq = int(freq)
diff --git a/src/mavis/tools/constants.py b/src/mavis/tools/constants.py
index d2412dfa..821d6a6a 100644
--- a/src/mavis/tools/constants.py
+++ b/src/mavis/tools/constants.py
@@ -51,6 +51,10 @@ class SUPPORTED_TOOL(MavisNamespace):
         'dup': [SVTYPE.DUP],
         'ITD': [SVTYPE.DUP],
         'IDP': [SVTYPE.INS],
+        'DEL/INV': [SVTYPE.DEL, SVTYPE.INV],
+        'DUP/INS': [SVTYPE.DUP, SVTYPE.INS],
+        'INVDUP': [SVTYPE.INV, SVTYPE.DUP, SVTYPE.INS],
+        'INV/INVDUP': [SVTYPE.INV, SVTYPE.DUP, SVTYPE.INS],
     }
 )
 
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index c25fe27d..04dc7bfb 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -2,7 +2,6 @@
 import re
 from dataclasses import dataclass
 from typing import Dict, List, Optional, Tuple
-from copy import deepcopy
 
 import pandas as pd
 
@@ -17,19 +16,19 @@
 from .constants import SUPPORTED_TOOL
 
 PANDAS_DEFAULT_NA_VALUES = [
-    "-1.#IND",
-    "1.#QNAN",
-    "1.#IND",
-    "-1.#QNAN",
-    "#N/A",
-    "N/A",
-    "NA",
-    "#NA",
-    "NULL",
-    "NaN",
-    "-NaN",
-    "nan",
-    "-nan",
+    '-1.#IND',
+    '1.#QNAN',
+    '1.#IND',
+    '-1.#QNAN',
+    '#N/A',
+    'N/A',
+    'NA',
+    '#NA',
+    'NULL',
+    'NaN',
+    '-NaN',
+    'nan',
+    '-nan',
 ]
 
 
@@ -54,7 +53,7 @@ class VcfRecordType:
 
     @property
     def stop(self) -> Optional[int]:
-        return self.info.get("END", self.pos)
+        return self.info.get('END', self.pos)
 
 
 def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
@@ -76,51 +75,51 @@ def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
     | ru]p]        | LL      |
     """
     # ru[p[
-    match = re.match(r"^(?P<ref>\w)(?P<useq>\w*)\[(?P<chr>[^:]+):(?P<pos>\d+)\[$", alt)
+    match = re.match(r'^(?P<ref>\w)(?P<useq>\w*)\[(?P<chr>[^:]+):(?P<pos>\d+)\[$', alt)
     if match:
         return (
-            match.group("chr"),
-            int(match.group("pos")),
+            match.group('chr'),
+            int(match.group('pos')),
             ORIENT.LEFT,
             ORIENT.RIGHT,
-            match.group("ref"),
-            match.group("useq"),
+            match.group('ref'),
+            match.group('useq'),
         )
     # [p[ur
-    match = re.match(r"^\[(?P<chr>[^:]+):(?P<pos>\d+)\[(?P<useq>\w*)(?P<ref>\w)$", alt)
+    match = re.match(r'^\[(?P<chr>[^:]+):(?P<pos>\d+)\[(?P<useq>\w*)(?P<ref>\w)$', alt)
     if match:
         return (
-            match.group("chr"),
-            int(match.group("pos")),
+            match.group('chr'),
+            int(match.group('pos')),
             ORIENT.RIGHT,
             ORIENT.RIGHT,
-            match.group("ref"),
-            match.group("useq"),
+            match.group('ref'),
+            match.group('useq'),
         )
     # ]p]ur
-    match = re.match(r"^\](?P<chr>[^:]+):(?P<pos>\d+)\](?P<useq>\w*)(?P<ref>\w)$", alt)
+    match = re.match(r'^\](?P<chr>[^:]+):(?P<pos>\d+)\](?P<useq>\w*)(?P<ref>\w)$', alt)
     if match:
         return (
-            match.group("chr"),
-            int(match.group("pos")),
+            match.group('chr'),
+            int(match.group('pos')),
             ORIENT.RIGHT,
             ORIENT.LEFT,
-            match.group("ref"),
-            match.group("useq"),
+            match.group('ref'),
+            match.group('useq'),
         )
     # ru]p]
-    match = re.match(r"^(?P<ref>\w)(?P<useq>\w*)\](?P<chr>[^:]+):(?P<pos>\d+)\]$", alt)
+    match = re.match(r'^(?P<ref>\w)(?P<useq>\w*)\](?P<chr>[^:]+):(?P<pos>\d+)\]$', alt)
     if match:
         return (
-            match.group("chr"),
-            int(match.group("pos")),
+            match.group('chr'),
+            int(match.group('pos')),
             ORIENT.LEFT,
             ORIENT.LEFT,
-            match.group("ref"),
-            match.group("useq"),
+            match.group('ref'),
+            match.group('useq'),
         )
     else:
-        raise NotImplementedError("alt specification in unexpected format", alt)
+        raise NotImplementedError('alt specification in unexpected format', alt)
 
 
 def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
@@ -144,7 +143,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             try:
                 value = record.info[key]
             except UnicodeDecodeError as err:
-                log("Ignoring invalid INFO field {} with error: {}".format(key, err))
+                log('Ignoring invalid INFO field {} with error: {}'.format(key, err))
             else:
                 try:
                     value = value[0] if len(value) == 1 else value
@@ -153,27 +152,27 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             info[key] = value
 
         std_row = {}
-        if record.id and record.id != "N":  # to account for NovoBreak N in the ID field
-            std_row["id"] = record.id
+        if record.id and record.id != 'N':  # to account for NovoBreak N in the ID field
+            std_row['id'] = record.id
 
-        if info.get("SVTYPE") == "BND":
+        if info.get('SVTYPE') == 'BND':
             chr2, end, orient1, orient2, ref, alt = parse_bnd_alt(alt)
             std_row[COLUMNS.break1_orientation] = orient1
             std_row[COLUMNS.break2_orientation] = orient2
             std_row[COLUMNS.untemplated_seq] = alt
             if record.ref != ref:
                 raise AssertionError(
-                    "Expected the ref specification in the vcf record to match the sequence "
-                    "in the alt string: {} vs {}".format(record.ref, ref)
+                    'Expected the ref specification in the vcf record to match the sequence '
+                    'in the alt string: {} vs {}'.format(record.ref, ref)
                 )
         else:
-            chr2 = info.get("CHR2", record.chrom)
+            chr2 = info.get('CHR2', record.chrom)
             end = record.stop
             if (
                 alt
                 and record.ref
-                and re.match(r"^[A-Z]+$", alt)
-                and re.match(r"^[A-Z]+", record.ref)
+                and re.match(r'^[A-Z]+$', alt)
+                and re.match(r'^[A-Z]+', record.ref)
             ):
                 std_row[COLUMNS.untemplated_seq] = alt[1:]
                 size = len(alt) - len(record.ref)
@@ -183,7 +182,7 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
                     std_row[COLUMNS.event_type] = SVTYPE.DEL
         std_row.update({COLUMNS.break1_chromosome: record.chrom, COLUMNS.break2_chromosome: chr2})
         if info.get(
-            "PRECISE", False
+            'PRECISE', False
         ):  # DELLY CI only apply when split reads were not used to refine the breakpoint which is then flagged
             std_row.update(
                 {
@@ -197,97 +196,65 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
             std_row.update(
                 {
                     COLUMNS.break1_position_start: max(
-                        1, record.pos + info.get("CIPOS", (0, 0))[0]
+                        1, record.pos + info.get('CIPOS', (0, 0))[0]
                     ),
-                    COLUMNS.break1_position_end: record.pos + info.get("CIPOS", (0, 0))[1],
-                    COLUMNS.break2_position_start: max(1, end + info.get("CIEND", (0, 0))[0]),
-                    COLUMNS.break2_position_end: end + info.get("CIEND", (0, 0))[1],
+                    COLUMNS.break1_position_end: record.pos + info.get('CIPOS', (0, 0))[1],
+                    COLUMNS.break2_position_start: max(1, end + info.get('CIEND', (0, 0))[0]),
+                    COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
                 }
             )
 
-        std_row2 = {}
-
-        if "SVTYPE" in info:
-            if info["SVTYPE"] in dir(SVTYPE):
-                std_row[COLUMNS.event_type] = info["SVTYPE"]
-            elif "/" in info["SVTYPE"]:
-                std_row2 = deepcopy(std_row)
-                std_row[COLUMNS.event_type] = info["SVTYPE"].split("/")[0]
-                std_row2[COLUMNS.event_type] = info["SVTYPE"].split("/")[1]
+        if 'SVTYPE' in info:
+            std_row[COLUMNS.event_type] = info['SVTYPE']
 
         try:
-            orient1, orient2 = info["CT"].split("to")
-            connection_type = {"3": ORIENT.LEFT, "5": ORIENT.RIGHT, "N": ORIENT.NS}
+            orient1, orient2 = info['CT'].split('to')
+            connection_type = {'3': ORIENT.LEFT, '5': ORIENT.RIGHT, 'N': ORIENT.NS}
             std_row[COLUMNS.break1_orientation] = connection_type[orient1]
             std_row[COLUMNS.break2_orientation] = connection_type[orient2]
-            if bool(std_row2):
-                std_row2[COLUMNS.break1_orientation] = connection_type[orient1]
-                std_row2[COLUMNS.break2_orientation] = connection_type[orient2]
         except KeyError:
             pass
-        if bool(std_row2):
-            std_row2.update(
-                {
-                    k: v
-                    for k, v in info.items()
-                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
-                }
-            )
-            std_row.update(
-                {
-                    k: v
-                    for k, v in info.items()
-                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
-                }
-            )
-            records.append(std_row)
-            records.append(std_row2)
-        else:
-            std_row.update(
-                {
-                    k: v
-                    for k, v in info.items()
-                    if k not in {"CHR2", "SVTYPE", "CIPOS", "CIEND", "CT"}
-                }
-            )
-            records.append(std_row)
+        std_row.update(
+            {k: v for k, v in info.items() if k not in {'CHR2', 'SVTYPE', 'CIPOS', 'CIEND', 'CT'}}
+        )
+        records.append(std_row)
     return records
 
 
 def convert_pandas_rows_to_variants(df):
     def parse_info(info_field):
         info = {}
-        for pair in info_field.split(";"):
-            if "=" in pair:
-                key, value = pair.split("=", 1)
+        for pair in info_field.split(';'):
+            if '=' in pair:
+                key, value = pair.split('=', 1)
                 info[key] = value
             else:
                 info[pair] = True
 
         # convert info types
         for key in info:
-            if key in {"CIPOS", "CIEND"}:
-                ci_start, ci_end = info[key].split(",")
+            if key in {'CIPOS', 'CIEND'}:
+                ci_start, ci_end = info[key].split(',')
                 info[key] = (int(ci_start), int(ci_end))
-            elif key == "END":
+            elif key == 'END':
                 info[key] = int(info[key])
 
         return info
 
-    df["info"] = df["INFO"].apply(parse_info)
-    df["alts"] = df["ALT"].apply(lambda a: a.split(","))
+    df['info'] = df['INFO'].apply(parse_info)
+    df['alts'] = df['ALT'].apply(lambda a: a.split(','))
 
     rows = []
     for _, row in df.iterrows():
 
         rows.append(
             VcfRecordType(
-                id=row["ID"],
-                pos=row["POS"],
-                info=VcfInfoType(row["info"]),
-                chrom=row["CHROM"],
-                ref=row["REF"],
-                alts=row["alts"],
+                id=row['ID'],
+                pos=row['POS'],
+                info=VcfInfoType(row['info']),
+                chrom=row['CHROM'],
+                ref=row['REF'],
+                alts=row['alts'],
             )
         )
     return rows
@@ -299,9 +266,9 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
     """
     # read the comment/header information
     header_lines = []
-    with open(input_file, "r") as fh:
-        line = "##"
-        while line.startswith("##"):
+    with open(input_file, 'r') as fh:
+        line = '##'
+        while line.startswith('##'):
             header_lines.append(line)
             line = fh.readline().strip()
         header_lines = header_lines[1:]
@@ -311,21 +278,21 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
         sep="\t",
         skiprows=len(header_lines),
         dtype={
-            "CHROM": str,
-            "POS": int,
-            "ID": str,
-            "INFO": str,
-            "FORMAT": str,
-            "REF": str,
-            "ALT": str,
+            'CHROM': str,
+            'POS': int,
+            'ID': str,
+            'INFO': str,
+            'FORMAT': str,
+            'REF': str,
+            'ALT': str,
         },
         na_values=PANDAS_DEFAULT_NA_VALUES + ["."],
     )
-    df = df.rename(columns={df.columns[0]: df.columns[0].replace("#", "")})
-    required_columns = ["CHROM", "INFO", "POS", "REF", "ALT", "ID"]
+    df = df.rename(columns={df.columns[0]: df.columns[0].replace('#', '')})
+    required_columns = ['CHROM', 'INFO', 'POS', 'REF', 'ALT', 'ID']
     for col in required_columns:
         if col not in df.columns:
-            raise KeyError(f"Missing required column: {col}")
+            raise KeyError(f'Missing required column: {col}')
     # convert the format fields using the header
     return header_lines, df
 
@@ -349,4 +316,4 @@ def convert_file(input_file: str, file_type: str, log):
             rows.extend(convert_record(variant_record, log=log))
         except NotImplementedError as err:
             logging.warning(str(err))
-    return rows
+    return rows
\ No newline at end of file
diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
index cffe9ade..8af3067f 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/unit/test_tools_vcf.py
@@ -1,4 +1,5 @@
-from mavis.tools.vcf import pandas_vcf
+from mavis.tools import SUPPORTED_TOOL, _convert_tool_row
+from mavis.tools.vcf import VcfInfoType, VcfRecordType, convert_record, pandas_vcf
 
 from ..util import get_data
 
@@ -7,3 +8,30 @@ def test_read_vcf():
     header, df = pandas_vcf(get_data('delly_events.vcf'))
     assert len(header) == 63
     assert df.shape[0] == 31
+
+
+def test_convert_record():
+    variant = VcfRecordType(
+        9000,
+        12000,
+        'chr14_KI270722v1_random',
+        alts=['N[chr17_GL000205v2_random:0['],
+        ref='N',
+        info=VcfInfoType(
+            IMPRECISE=True,
+            SVMETHOD="Snifflesv1.0.11",
+            SVTYPE="BND",
+            SUPTYPE="SR",
+            SVLEN="0",
+            STRANDS="+-",
+            RE="5",
+            REF_strand="0,0",
+            AF="1",
+        ),
+    )
+    records = convert_record(variant)
+    assert len(records) == 1
+    record = records[0]
+    assert record.get('break2_position_start') == 1
+    assert record.get('break2_position_end') == 1
+    assert record.get('break2_chromosome') == 'chr17_GL000205v2_random'

From fda62136eb5ff1cffbcd161f345ee23e72134602 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Wed, 26 Jan 2022 13:31:43 -0800
Subject: [PATCH 092/137] re-lint files

---
 src/mavis/interval.py  | 2 +-
 src/mavis/tools/vcf.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index 7d0d5d96..03c788f9 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -30,7 +30,7 @@ def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_
 
         self.start = self.number_type(self.start)
         self.end = self.number_type(self.end)
-        if self.start == 0 and self.end == 1: 
+        if self.start == 0 and self.end == 1:
             self.start = 1
         if self.start > self.end:
             raise AttributeError('interval start > end is not allowed', self.start, self.end)
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index 04dc7bfb..92a6c987 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -316,4 +316,4 @@ def convert_file(input_file: str, file_type: str, log):
             rows.extend(convert_record(variant_record, log=log))
         except NotImplementedError as err:
             logging.warning(str(err))
-    return rows
\ No newline at end of file
+    return rows

From a1849d52c29a2c62e3dea33786b4ab8c492efae0 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Wed, 26 Jan 2022 14:09:58 -0800
Subject: [PATCH 093/137] changed unit test as sample Sniffles file changed

---
 tests/end_to_end/test_convert.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index e0b29e8e..ff3870a6 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -111,7 +111,7 @@ def test_vcf(self):
     def test_sniffle(self):
         results = self.run_main(get_data('sniffles.vcf'), SUPPORTED_TOOL.VCF, False)
         print(results.keys())
-        record = results['vcf-35777'][0]
+        record = results['vcf-30259'][0]
         print(record, record.data)
         assert record.data['event_type'] == 'translocation'
     

From 6c5076a2ffb9f82a5f777c4ed2073aff90bdeb85 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 28 Jan 2022 15:32:26 -0800
Subject: [PATCH 094/137] Add more type annotations

---
 docs/hooks.py                    |   8 +-
 src/mavis/align.py               |  22 +--
 src/mavis/annotate/base.py       |   3 +-
 src/mavis/annotate/file_io.py    |   8 +-
 src/mavis/annotate/fusion.py     |  18 ++-
 src/mavis/annotate/genomic.py    | 249 +++++++++++++++++--------------
 src/mavis/annotate/protein.py    | 148 ++++++++++--------
 src/mavis/annotate/splicing.py   |  29 ++--
 src/mavis/annotate/variant.py    |  82 ++++++----
 src/mavis/assemble.py            |  58 +++----
 src/mavis/bam/cache.py           | 127 ++++++++--------
 src/mavis/bam/cigar.py           |  72 +++++----
 src/mavis/bam/read.py            |  92 ++++++------
 src/mavis/blat.py                |  46 +++---
 src/mavis/breakpoint.py          |   5 +-
 src/mavis/cluster/cluster.py     |  29 ++--
 src/mavis/cluster/main.py        |   4 +-
 src/mavis/illustrate/diagram.py  |  59 +++++---
 src/mavis/illustrate/elements.py |  20 ++-
 src/mavis/interval.py            |  74 ++++-----
 src/mavis/tools/vcf.py           |   8 +-
 src/mavis/types.py               |  11 ++
 22 files changed, 662 insertions(+), 510 deletions(-)
 create mode 100644 src/mavis/types.py

diff --git a/docs/hooks.py b/docs/hooks.py
index 44931755..b76386d5 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -1,11 +1,9 @@
 import json
 import os
-import re
 from textwrap import dedent
 
+import pkg_resources
 from markdown_refdocs.main import extract_to_markdown
-from mavis.schemas import DEFAULTS
-from mavis.util import ENV_VAR_PREFIX
 
 
 def json_to_pytype(record):
@@ -130,7 +128,7 @@ def generate_settings_doc(schema_file):
 
 
 def build_package_docs(config):
-    schema_file = os.path.join(os.path.dirname(__file__), '../src/mavis/schemas/config.json')
+    schema_file = pkg_resources.resource_filename('mavis_config', 'config.json')
     generate_settings_doc(schema_file)
     package_dir = os.path.join(os.path.dirname(__file__), '../src/mavis')
     output_dir = os.path.join(os.path.dirname(__file__), 'package')
@@ -142,5 +140,5 @@ def build_package_docs(config):
         hide_private=True,
         hide_undoc=True,
         hide_undoc_args=True,
-        namespace_headers=True,
+        namespace_headers=False,
     )
diff --git a/src/mavis/align.py b/src/mavis/align.py
index df74feb7..2d6b6898 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -5,6 +5,7 @@
 import os
 import re
 import subprocess
+from typing import Dict
 
 import pysam
 
@@ -13,6 +14,7 @@
 from .breakpoint import Breakpoint, BreakpointPair
 from .constants import CIGAR, ORIENT, STRAND, SVTYPE, MavisNamespace, reverse_complement
 from .interval import Interval
+from .types import ReferenceGenome
 from .util import logger
 
 
@@ -140,7 +142,7 @@ def breakpoint_contig_remapped_depth(breakpoint, contig, read):
         return contig.remap_depth(qrange)
 
 
-def get_aligner_version(aligner):
+def get_aligner_version(aligner: str) -> str:
     """
     executes a subprocess to try and run the aligner without arguments and parse the version number from the output
 
@@ -167,10 +169,10 @@ def get_aligner_version(aligner):
         raise NotImplementedError(aligner)
 
 
-def query_coverage_interval(read):
+def query_coverage_interval(read: pysam.AlignedSegment) -> Interval:
     """
     Returns:
-        mavis.interval.Interval: The portion of the original query sequence that is aligned by this read
+        The portion of the original query sequence that is aligned by this read
     """
     seq = read.query_sequence
     st = 0
@@ -182,7 +184,7 @@ def query_coverage_interval(read):
     return Interval(st, end)
 
 
-def convert_to_duplication(alignment, reference_genome):
+def convert_to_duplication(alignment, reference_genome: ReferenceGenome):
     """
     Given a breakpoint call, tests if the untemplated sequences matches the preceding
     reference sequence. If it does this is annotated as a duplication and the new
@@ -382,11 +384,11 @@ def call_paired_read_event(read1, read2, is_stranded=False):
 
 
 def align_sequences(
-    sequences,
+    sequences: Dict[str, str],
     input_bam_cache,
-    reference_genome,
-    aligner,
-    aligner_reference,
+    reference_genome: ReferenceGenome,
+    aligner: str,
+    aligner_reference: str,
     aligner_output_file='aligner_out.temp',
     aligner_fa_input_file='aligner_in.fa',
     aligner_output_log='aligner_out.log',
@@ -399,11 +401,11 @@ def align_sequences(
     calls the alignment tool and parses the return output for a set of sequences
 
     Args:
-        sequences (Dict[str,str]): dictionary of sequences by name
+        sequences: dictionary of sequences by name
         input_bam_cache (BamCache): bam cache to be used as a template for reading the alignments
         reference_genome: the reference genome
         aligner (SUPPORTED_ALIGNER): the name of the aligner to be used
-        aligner_reference (str): path to the aligner reference file
+        aligner_reference: path to the aligner reference file
     """
     try:
         # write the input sequences to a fasta file
diff --git a/src/mavis/annotate/base.py b/src/mavis/annotate/base.py
index bf0dea8f..950444fa 100644
--- a/src/mavis/annotate/base.py
+++ b/src/mavis/annotate/base.py
@@ -3,6 +3,7 @@
 
 from ..constants import STRAND
 from ..interval import Interval
+from ..types import ReferenceGenome
 
 
 class ReferenceName(str):
@@ -133,7 +134,7 @@ def __lt__(self, other):
     def __hash__(self):
         return hash(self.key())
 
-    def get_seq(self, reference_genome=None, ignore_cache=False):
+    def get_seq(self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache=False):
         """
         get the sequence for the current annotation object
 
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 9e6c92c2..4b6ba264 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -8,11 +8,11 @@
 
 import pandas as pd
 from Bio import SeqIO
-from Bio.SeqRecord import SeqRecord
 from snakemake.utils import validate as snakemake_validate
 
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, translate
 from ..interval import Interval
+from ..types import ReferenceGenome
 from ..util import logger
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
@@ -60,7 +60,7 @@ def load_masking_regions(*filepaths: str) -> Dict[str, List[BioInterval]]:
 
 def load_annotations(
     *filepaths: str,
-    reference_genome: Optional[Dict[str, SeqRecord]] = None,
+    reference_genome: Optional[ReferenceGenome] = None,
     best_transcripts_only: bool = False,
 ) -> Dict[str, List[Gene]]:
     """
@@ -95,7 +95,7 @@ def load_annotations(
 
 def parse_annotations_json(
     data,
-    reference_genome: Optional[Dict[str, SeqRecord]] = None,
+    reference_genome: Optional[ReferenceGenome] = None,
     best_transcripts_only=False,
 ) -> Dict[str, List[Gene]]:
     """
@@ -202,7 +202,7 @@ def parse_annotations_json(
     return genes_by_chr
 
 
-def load_reference_genome(*filepaths: str) -> Dict[str, SeqRecord]:
+def load_reference_genome(*filepaths: str) -> ReferenceGenome:
     """
     Args:
         filepaths: the paths to the files containing the input fasta genomes
diff --git a/src/mavis/annotate/fusion.py b/src/mavis/annotate/fusion.py
index e0bf0026..b02e3259 100644
--- a/src/mavis/annotate/fusion.py
+++ b/src/mavis/annotate/fusion.py
@@ -1,9 +1,10 @@
-from .genomic import Exon, Transcript, PreTranscript
-from .protein import calculate_orf, Domain, Translation
 from ..breakpoint import Breakpoint
-from ..constants import ORIENT, PRIME, PROTOCOL, reverse_complement, STRAND, SVTYPE
+from ..constants import ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE, reverse_complement
 from ..error import NotSpecifiedError
 from ..interval import Interval, IntervalMapping
+from ..types import ReferenceGenome
+from .genomic import Exon, PreTranscript, Transcript
+from .protein import Domain, Translation, calculate_orf
 
 
 def determine_prime(transcript, breakpoint):
@@ -87,7 +88,12 @@ def map_region_to_genome(self, chr, interval_on_fusion, genome_interval, flipped
 
     @classmethod
     def _build_single_gene_inversion(
-        cls, ann, reference_genome, min_orf_size, max_orf_cap, min_domain_mapping_match
+        cls,
+        ann,
+        reference_genome: ReferenceGenome,
+        min_orf_size,
+        max_orf_cap,
+        min_domain_mapping_match,
     ):
         """
         builds a fusion transcript for a single gene inversion. Note that this is an incomplete
@@ -283,7 +289,7 @@ def _build_single_gene_duplication(
     def build(
         cls,
         ann,
-        reference_genome,
+        reference_genome: ReferenceGenome,
         min_orf_size=None,
         max_orf_cap=None,
         min_domain_mapping_match=None,
@@ -291,7 +297,7 @@ def build(
         """
         Args:
             ann (Annotation): the annotation object we want to build a FusionTranscript for
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
+            reference_genome: dict of reference sequence
                 by template/chr name
 
         Returns:
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index a6d18439..14ca0a7b 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -1,10 +1,13 @@
 import itertools
+from typing import Dict, List, Optional, Tuple
 
 from ..constants import ORIENT, STRAND, reverse_complement
 from ..error import NotSpecifiedError
 from ..interval import Interval
+from ..types import ReferenceGenome
 from .base import BioInterval, ReferenceName
 from .constants import SPLICE_SITE_TYPE
+from .protein import Translation
 from .splicing import SpliceSite, SplicingPattern
 
 
@@ -29,12 +32,12 @@ def __hash__(self):
 
 
 class IntergenicRegion(BioInterval):
-    def __init__(self, chr, start, end, strand):
+    def __init__(self, chr: str, start: int, end: int, strand: str):
         """
         Args:
-            chr (str): the reference object/chromosome for this region
-            start (int): the start of the IntergenicRegion
-            end (int): the end of the IntergenicRegion
+            chr: the reference object/chromosome for this region
+            start: the start of the IntergenicRegion
+            end: the end of the IntergenicRegion
             strand (STRAND): the strand the region is defined on
 
         Example:
@@ -65,14 +68,23 @@ def to_dict(self):
 class Gene(BioInterval):
     """ """
 
-    def __init__(self, chr, start, end, name=None, strand=STRAND.NS, aliases=None, seq=None):
+    def __init__(
+        self,
+        chr: str,
+        start: int,
+        end: int,
+        name: Optional[str] = None,
+        strand: str = STRAND.NS,
+        aliases: Optional[List[str]] = None,
+        seq: Optional[str] = None,
+    ):
         """
         Args:
-            chr (str): the chromosome
-            name (str): the gene name/id i.e. ENSG0001
+            chr: the chromosome
+            name: the gene name/id i.e. ENSG0001
             strand (STRAND): the genomic strand '+' or '-'
-            aliases (List[str]): a list of aliases. For example the hugo name could go here
-            seq (str): genomic seq of the gene
+            aliases: a list of aliases. For example the hugo name could go here
+            seq: genomic seq of the gene
         Example:
             >>> Gene('X', 1, 1000, 'ENG0001', '+', ['KRAS'])
         """
@@ -102,13 +114,13 @@ def sort_key(t):
         raise ValueError('input transcript is not associated with this gene', transcript)
 
     @property
-    def transcripts(self):
-        """List[PreTranscript] list of unspliced transcripts"""
+    def transcripts(self) -> List['PreTranscript']:
+        """list of unspliced transcripts"""
         return self.unspliced_transcripts
 
     @property
-    def translations(self):
-        """List[mavis.annotate.protein.Translation] list of translations"""
+    def translations(self) -> List[Translation]:
+        """list of translations"""
         translations = []
         for pre_transcript in self.unspliced_transcripts:
             for tx in pre_transcript.transcripts:
@@ -125,14 +137,13 @@ def key(self):
         """see :func:`structural_variant.annotate.base.BioInterval.key`"""
         return BioInterval.key(self), self.strand
 
-    def get_seq(self, reference_genome, ignore_cache=False):
+    def get_seq(self, reference_genome: ReferenceGenome, ignore_cache: bool = False) -> str:
         """
         gene sequence is always given wrt to the positive forward strand regardless of gene strand
 
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence by
-                template/chr name
-            ignore_cache (bool): if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
+            reference_genome: dict of reference sequence by template/chr name
+            ignore_cache: if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
 
         Returns:
             str: the sequence of the gene
@@ -145,8 +156,8 @@ def get_seq(self, reference_genome, ignore_cache=False):
             return str(reference_genome[self.chr].seq[self.start - 1 : self.end]).upper()
 
     @property
-    def spliced_transcripts(self):
-        """List[Transcript]: list of transcripts"""
+    def spliced_transcripts(self) -> List['Transcript']:
+        """list of transcripts"""
         spl = []
         for t in self.unspliced_transcripts:
             spl.extend(t.spliced_transcripts)
@@ -164,23 +175,23 @@ class Exon(BioInterval):
 
     def __init__(
         self,
-        start,
-        end,
-        transcript=None,
-        name=None,
-        intact_start_splice=True,
-        intact_end_splice=True,
-        seq=None,
-        strand=None,
+        start: int,
+        end: int,
+        transcript: Optional['PreTranscript'] = None,
+        name: Optional[str] = None,
+        intact_start_splice: bool = True,
+        intact_end_splice: bool = True,
+        seq: Optional[str] = None,
+        strand: Optional[str] = None,
     ):
         """
         Args:
-            start (int): the genomic start position
-            end (int): the genomic end position
-            name (str): the name of the exon
-            transcript (PreTranscript): the 'parent' transcript this exon belongs to
-            intact_start_splice (bool): if the starting splice site has been abrogated
-            intact_end_splice (bool): if the end splice site has been abrogated
+            start: the genomic start position
+            end: the genomic end position
+            name: the name of the exon
+            transcript: the 'parent' transcript this exon belongs to
+            intact_start_splice: if the starting splice site has been abrogated
+            intact_end_splice: if the end splice site has been abrogated
         Raises:
             AttributeError: if the exon start > the exon end
         Example:
@@ -233,32 +244,32 @@ def transcript(self):
         return self.reference_object
 
     @property
-    def donor_splice_site(self):
-        """mavis.interval.Interval: the genomic range describing the splice site"""
+    def donor_splice_site(self) -> Interval:
+        """the genomic range describing the splice site"""
         if self.is_reverse:
             return self.start_splice_site
         else:
             return self.end_splice_site
 
     @property
-    def acceptor_splice_site(self):
-        """mavis.interval.Interval: the genomic range describing the splice site"""
+    def acceptor_splice_site(self) -> Interval:
+        """the genomic range describing the splice site"""
         if self.is_reverse:
             return self.end_splice_site
         else:
             return self.start_splice_site
 
     @property
-    def donor(self):
-        """`int`: returns the genomic exonic position of the donor splice site"""
+    def donor(self) -> int:
+        """returns the genomic exonic position of the donor splice site"""
         if self.is_reverse:
             return self.start
         else:
             return self.end
 
     @property
-    def acceptor(self):
-        """`int`: returns the genomic exonic position of the acceptor splice site"""
+    def acceptor(self) -> int:
+        """returns the genomic exonic position of the acceptor splice site"""
         if self.is_reverse:
             return self.end
         else:
@@ -278,24 +289,24 @@ class PreTranscript(BioInterval):
 
     def __init__(
         self,
-        exons,
-        gene=None,
-        name=None,
-        strand=None,
-        spliced_transcripts=None,
-        seq=None,
-        is_best_transcript=False,
+        exons: List[Exon],
+        gene: Optional[Gene] = None,
+        name: Optional[str] = None,
+        strand: Optional[str] = None,
+        spliced_transcripts: Optional[List['Transcript']] = None,
+        seq: Optional[str] = None,
+        is_best_transcript: bool = False,
     ):
         """creates a new transcript object
 
         Args:
-            exons (List[Exon]): list of Exon that make up the transcript
-            genomic_start (int): genomic start position of the transcript
-            genomic_end (int): genomic end position of the transcript
-            gene (Gene): the gene this transcript belongs to
-            name (str): name of the transcript
+            exons: list of Exon that make up the transcript
+            genomic_start: genomic start position of the transcript
+            genomic_end: genomic end position of the transcript
+            gene: the gene this transcript belongs to
+            name: name of the transcript
             strand (STRAND): strand the transcript is on, defaults to the strand of the Gene if not specified
-            seq (str): unspliced cDNA seq
+            seq: unspliced cDNA seq
         """
         # cannot use mutable default args in the function decl
         self.exons = exons
@@ -332,12 +343,12 @@ def __init__(
         except AttributeError:
             pass
 
-    def generate_splicing_patterns(self):
+    def generate_splicing_patterns(self) -> List[SplicingPattern]:
         """
         returns a list of splice sites to be connected as a splicing pattern
 
         Returns:
-            List[SplicingPattern]: List of positions to be spliced together
+            List of positions to be spliced together
 
         Note:
             see [theory - predicting splicing patterns](/background/theory/#predicting-splicing-patterns)
@@ -355,10 +366,12 @@ def gene(self):
         """Gene: the gene this transcript belongs to"""
         return self.reference_object
 
-    def _genomic_to_cdna_mapping(self, splicing_pattern):
+    def _genomic_to_cdna_mapping(
+        self, splicing_pattern: SplicingPattern
+    ) -> Dict[Interval, Interval]:
         """
         Args:
-            splicing_pattern (SplicingPattern): list of genomic splice sites 3'5' repeating
+            splicing_pattern: list of genomic splice sites 3'5' repeating
         """
         mapping = {}
         length = 1
@@ -377,7 +390,9 @@ def _genomic_to_cdna_mapping(self, splicing_pattern):
             length += len(exon)
         return mapping
 
-    def _cdna_to_genomic_mapping(self, splicing_pattern):
+    def _cdna_to_genomic_mapping(
+        self, splicing_pattern: SplicingPattern
+    ) -> Dict[Interval, Interval]:
         """
         Args:
             splicing_pattern (SplicingPattern): list of genomic splice sites 3'5' repeating
@@ -385,18 +400,17 @@ def _cdna_to_genomic_mapping(self, splicing_pattern):
         mapping = {v: k for k, v in self._genomic_to_cdna_mapping(splicing_pattern).items()}
         return mapping
 
-    def convert_genomic_to_cdna(self, pos, splicing_pattern):
+    def convert_genomic_to_cdna(self, pos: int, splicing_pattern: SplicingPattern) -> int:
         """
         Args:
-            pos (int): the genomic position to be converted
-            splicing_pattern (SplicingPattern): list of genomic splice sites 3'5' repeating
+            pos: the genomic position to be converted
+            splicing_pattern: list of genomic splice sites 3'5' repeating
 
         Returns:
-            int: the cdna equivalent
+            the cdna equivalent
 
         Raises:
-            mavis.error.IndexError: when a genomic position not present in the
-                cdna is attempted to be converted
+            mavis.error.IndexError: when a genomic position not present in the cdna is attempted to be converted
         """
         cdna_pos, shift = self.convert_genomic_to_nearest_cdna(pos, splicing_pattern)
         if shift != 0:
@@ -404,17 +418,17 @@ def convert_genomic_to_cdna(self, pos, splicing_pattern):
         return cdna_pos
 
     def convert_genomic_to_nearest_cdna(
-        self, pos, splicing_pattern, stick_direction=None, allow_outside=True
-    ):
+        self, pos: int, splicing_pattern: SplicingPattern, stick_direction=None, allow_outside=True
+    ) -> Tuple[int, int]:
         """
         converts a genomic position to its cdna equivalent or (if intronic) the nearest cdna and shift
 
         Args:
-            pos (int): the genomic position
-            splicing_pattern (SplicingPattern): the splicing pattern
+            pos: the genomic position
+            splicing_pattern: the splicing pattern
 
         Returns:
-            Tuple[int,int]: the exonic cdna position and the intronic shift
+            the exonic cdna position and the intronic shift
 
         """
         mapping = self._genomic_to_cdna_mapping(splicing_pattern)
@@ -459,11 +473,11 @@ def convert_genomic_to_nearest_cdna(
                 raise NotImplementedError('Unexpected error', self.exons, pos)
         raise IndexError('position does not fall within the current transcript', pos, mapping)
 
-    def convert_cdna_to_genomic(self, pos, splicing_pattern):
+    def convert_cdna_to_genomic(self, pos: int, splicing_pattern: SplicingPattern):
         """
         Args:
-            pos (int): cdna position
-            splicing_pattern (SplicingPattern): list of genomic splice sites 3'5' repeating
+            pos: cdna position
+            splicing_pattern: list of genomic splice sites 3'5' repeating
 
         Returns:
             int: the genomic equivalent
@@ -484,15 +498,15 @@ def convert_cdna_to_genomic(self, pos, splicing_pattern):
             mapping, pos, True if self.get_strand() == STRAND.NEG else False
         )
 
-    def exon_number(self, exon):
+    def exon_number(self, exon: Exon) -> int:
         """
         exon numbering is based on the direction of translation
 
         Args:
-            exon (Exon): the exon to be numbered
+            exon: the exon to be numbered
 
         Returns:
-            int: the exon number (1 based)
+            the exon number (1 based)
 
         Raises:
             AttributeError: if the strand is not given or the exon does not belong to the transcript
@@ -508,15 +522,16 @@ def exon_number(self, exon):
                 raise NotSpecifiedError('strand must be pos or neg to calculate the exon number')
         raise AttributeError('can only calculate phase on associated exons')
 
-    def get_seq(self, reference_genome=None, ignore_cache=False):
+    def get_seq(
+        self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache: bool = False
+    ) -> str:
         """
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
-            ignore_cache (bool): if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
+            reference_genome: dict of reference sequence by template/chr name
+            ignore_cache: if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
 
         Returns:
-            str: the sequence of the transcript including introns (but relative to strand)
+            the sequence of the transcript including introns (but relative to strand)
         """
         if self.seq and not ignore_cache:
             return self.seq
@@ -535,16 +550,20 @@ def get_seq(self, reference_genome=None, ignore_cache=False):
             ).upper()
         return str(reference_genome[self.gene.chr].seq[self.start - 1 : self.end]).upper()
 
-    def get_cdna_seq(self, splicing_pattern, reference_genome=None, ignore_cache=False):
+    def get_cdna_seq(
+        self,
+        splicing_pattern: SplicingPattern,
+        reference_genome: Optional[ReferenceGenome] = None,
+        ignore_cache: bool = False,
+    ) -> str:
         """
         Args:
-            splicing_pattern (SplicingPattern): the list of splicing positions
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
-            ignore_cache (bool): if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
+            splicing_pattern: the list of splicing positions
+            reference_genom: dict of reference sequence by template/chr name
+            ignore_cache: if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
 
         Returns:
-            str: the spliced cDNA sequence
+            the spliced cDNA sequence
         """
         temp = sorted([self.start] + [s.pos for s in splicing_pattern] + [self.end])
         cdna_start = min(temp)
@@ -560,8 +579,8 @@ def get_cdna_seq(self, splicing_pattern, reference_genome=None, ignore_cache=Fal
         return spliced_seq if self.get_strand() == STRAND.POS else reverse_complement(spliced_seq)
 
     @property
-    def translations(self):
-        """List[mavis.annotate.protein.Translation]: list of translations associated with this transcript"""
+    def translations(self) -> List[Translation]:
+        """list of translations associated with this transcript"""
         translations = []
         for spl_tx in self.spliced_transcripts:
             for translation in spl_tx.translations:
@@ -569,28 +588,35 @@ def translations(self):
         return translations
 
     @property
-    def transcripts(self):
-        """List[Transcript]: list of spliced transcripts"""
+    def transcripts(self) -> List['Transcript']:
+        """list of spliced transcripts"""
         return self.spliced_transcripts
 
 
 class Transcript(BioInterval):
-    def __init__(self, pre_transcript, splicing_patt, seq=None, translations=None):
+    reference_object: PreTranscript
+
+    def __init__(
+        self,
+        pre_transcript: PreTranscript,
+        splicing_patt: List[int],
+        seq: Optional[str] = None,
+        translations: Optional[List[Translation]] = None,
+    ):
         """
         splicing pattern is given in genomic coordinates
 
         Args:
-            pre_transcript (PreTranscript): the unspliced transcript
-            splicing_patt (List[int]): the list of splicing positions
-            seq (str): the cdna sequence
-            translations (List[mavis.annotate.protein.Translation]):
-             the list of translations of this transcript
+            pre_transcript: the unspliced transcript
+            splicing_patt: the list of splicing positions
+            seq: the cdna sequence
+            translations: the list of translations of this transcript
         """
         pos = sorted([pre_transcript.start, pre_transcript.end] + [s.pos for s in splicing_patt])
         splicing_patt.sort()
         self.splicing_pattern = splicing_patt
         length = sum([t - s + 1 for s, t in zip(pos[::2], pos[1::2])])
-        BioInterval.__init__(self, pre_transcript, 1, length, seq=None)
+        BioInterval.__init__(self, pre_transcript, 1, length, seq=seq)
         self.exons = [Exon(s, t, self) for s, t in zip(pos[::2], pos[1::2])]
         self.translations = [] if translations is None else [tx for tx in translations]
 
@@ -606,13 +632,13 @@ def __init__(self, pre_transcript, splicing_patt, seq=None, translations=None):
         elif len(splicing_patt) % 2 != 0:
             raise AssertionError('splicing pattern must be a list of 3\'5\' splicing positions')
 
-    def convert_genomic_to_cdna(self, pos):
+    def convert_genomic_to_cdna(self, pos: int) -> int:
         """
         Args:
-            pos (int): the genomic position to be converted
+            pos: the genomic position to be converted
 
         Returns:
-            int: the cdna equivalent
+            the cdna equivalent
 
         Raises:
             IndexError: when a genomic position not present in the cdna is attempted to be converted
@@ -624,25 +650,26 @@ def convert_genomic_to_nearest_cdna(self, pos, **kwargs):
             pos, self.splicing_pattern, **kwargs
         )
 
-    def convert_cdna_to_genomic(self, pos):
+    def convert_cdna_to_genomic(self, pos: int) -> int:
         """
         Args:
-            pos (int): cdna position
+            pos: cdna position
 
         Returns:
-            int: the genomic equivalent
+            the genomic equivalent
         """
         return self.unspliced_transcript.convert_cdna_to_genomic(pos, self.splicing_pattern)
 
-    def get_seq(self, reference_genome=None, ignore_cache=False):
+    def get_seq(
+        self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache: bool = False
+    ) -> str:
         """
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence by
-                template/chr name
-            ignore_cache (bool): if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
+            reference_genome: dict of reference sequence by template/chr name
+            ignore_cache: if True then stored sequences will be ignored and the function will attempt to retrieve the sequence using the positions and the input reference_genome
 
         Returns:
-            str: the sequence corresponding to the spliced cdna
+            the sequence corresponding to the spliced cdna
         """
         if self.seq and not ignore_cache:
             return self.seq
@@ -652,6 +679,6 @@ def get_seq(self, reference_genome=None, ignore_cache=False):
         return seq[self.start - 1 : self.end]
 
     @property
-    def unspliced_transcript(self):
-        """PreTranscript: the unspliced transcript this splice variant belongs to"""
+    def unspliced_transcript(self) -> PreTranscript:
+        """the unspliced transcript this splice variant belongs to"""
         return self.reference_object
diff --git a/src/mavis/annotate/protein.py b/src/mavis/annotate/protein.py
index acf5172d..50d31f33 100644
--- a/src/mavis/annotate/protein.py
+++ b/src/mavis/annotate/protein.py
@@ -1,20 +1,27 @@
 import itertools
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
 
-from .base import BioInterval
 from ..constants import CODON_SIZE, START_AA, STOP_AA, translate
 from ..error import NotSpecifiedError
 from ..interval import Interval
+from ..types import ReferenceGenome
+from .base import BioInterval
 
+if TYPE_CHECKING:
+    from .genomic import Transcript
 
-def calculate_orf(spliced_cdna_sequence, min_orf_size=None):
+
+def calculate_orf(
+    spliced_cdna_sequence: str, min_orf_size: Optional[Union[float, int]] = None
+) -> List[Interval]:
     """
     calculate all possible open reading frames given a spliced cdna sequence (no introns)
 
     Args:
-        spliced_cdna_sequence (str): the sequence
+        spliced_cdna_sequence: the sequence
 
     Returns:
-        List[Interval]: list of open reading frame positions on the input sequence
+        list of open reading frame positions on the input sequence
     """
     # do not revcomp
     assert START_AA != STOP_AA
@@ -48,16 +55,22 @@ def __init__(self, start, end, seq=None, domain=None, name=None):
 
 
 class Domain:
-    def __init__(self, name, regions, translation=None, data=None):
+    def __init__(
+        self,
+        name: str,
+        regions: List[DomainRegion],
+        translation: Optional['Translation'] = None,
+        data=None,
+    ):
         """
         Args:
-            name (str): the name of the domain i.e. PF00876
-            regions (List[DomainRegion]): the amino acid ranges that are part of the domain
-            transcript (Transcript): the 'parent' transcript this domain belongs to
+            name: the name of the domain i.e. PF00876
+            regions: the amino acid ranges that are part of the domain
+            translation: the 'parent' translation this domain belongs to
         Raises:
             AttributeError: if the end of any region is less than the start
         Example:
-            >>> Domain('DNA binding domain', [(1, 4), (10, 24)], transcript)
+            >>> Domain('DNA binding domain', [(1, 4), (10, 24)], translation)
         """
         self.reference_object = translation
         self.name = name
@@ -77,28 +90,27 @@ def __init__(self, name, regions, translation=None, data=None):
                 self.regions[i] = DomainRegion(curr[0], curr[1])
 
     @property
-    def translation(self):
-        """mavis.annotate.Translation: the Translation this domain belongs to"""
+    def translation(self) -> Optional['Translation']:
+        """the Translation this domain belongs to"""
         return self.reference_object
 
     def key(self):
         """Tuple: a tuple representing the items expected to be unique. for hashing and comparing"""
         return tuple([self.name, self.translation])
 
-    def score_region_mapping(self, reference_genome=None):
+    def score_region_mapping(
+        self, reference_genome: Optional[ReferenceGenome] = None
+    ) -> Tuple[int, int]:
         """
         compares the sequence in each DomainRegion to the sequence collected for that domain region from the
         translation object
 
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
-            tuple of int and int: tuple contains
-
-                - int: the number of matching amino acids
-                - int: the total number of amino acids
+            - int: the number of matching amino acids
+            - int: the total number of amino acids
         """
         if self.translation:
             aa_seq = self.translation.get_aa_seq(reference_genome)
@@ -116,17 +128,18 @@ def score_region_mapping(self, reference_genome=None):
         else:
             raise NotSpecifiedError('insufficient sequence information')
 
-    def get_seqs(self, reference_genome=None, ignore_cache=False):
+    def get_seqs(
+        self, reference_genome: ReferenceGenome = None, ignore_cache: bool = False
+    ) -> List[str]:
         """
         returns the amino acid sequences for each of the domain regions associated with
         this domain in the order of the regions (sorted by start)
 
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
-            List[str]: list of amino acid sequences for each DomainRegion
+            list of amino acid sequences for each DomainRegion
 
         Raises:
             AttributeError: if there is not enough sequence information given to determine this
@@ -147,7 +160,12 @@ def get_seqs(self, reference_genome=None, ignore_cache=False):
                 raise NotSpecifiedError('insufficient sequence information')
         return [sequences[r] for r in self.regions]
 
-    def align_seq(self, input_sequence, reference_genome=None, min_region_match=0.5):
+    def align_seq(
+        self,
+        input_sequence: str,
+        reference_genome: Optional[ReferenceGenome] = None,
+        min_region_match: float = 0.5,
+    ) -> Tuple[int, int, List[DomainRegion]]:
         """
         align each region to the input sequence starting with the last one.
         then take the subset of sequence that remains to align the second last and so on
@@ -155,16 +173,14 @@ def align_seq(self, input_sequence, reference_genome=None, min_region_match=0.5)
         then raise an error
 
         Args:
-            input_sequence (str): the sequence to be aligned to
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
-            min_region_match (float): percent between 0 and 1. Each region must have a score len(seq) * min_region_match
+            input_sequence: the sequence to be aligned to
+            reference_genome: dict of reference sequence by template/chr name
+            min_region_match: percent between 0 and 1. Each region must have a score len(seq) * min_region_match
 
         Returns:
-            Tuple[int,int,List[DomainRegion]]:
-                - the number of matches
-                - the total number of amino acids to be aligned
-                - the list of domain regions on the new input sequence
+            - the number of matches
+            - the total number of amino acids to be aligned
+            - the list of domain regions on the new input sequence
 
         Raises:
             AttributeError: if sequence information is not available
@@ -234,7 +250,15 @@ def align_seq(self, input_sequence, reference_genome=None, min_region_match=0.5)
 
 
 class Translation(BioInterval):
-    def __init__(self, start, end, transcript=None, domains=None, seq=None, name=None):
+    def __init__(
+        self,
+        start: int,
+        end: int,
+        transcript: Optional['Transcript'] = None,
+        domains: Optional[List[Domain]] = None,
+        seq=None,
+        name=None,
+    ):
         """
         describes the splicing pattern and cds start and end with reference to a particular transcript
 
@@ -262,27 +286,27 @@ def __init__(self, start, end, transcript=None, domains=None, seq=None, name=Non
             domain.reference_object = self
 
     @property
-    def transcript(self):
-        """mavis.annotate.genomic.Transcript: the spliced transcript this translation belongs to"""
+    def transcript(self) -> 'Transcript':
+        """the spliced transcript this translation belongs to"""
         return self.reference_object
 
-    def convert_aa_to_cdna(self, pos):
+    def convert_aa_to_cdna(self, pos: int) -> Interval:
         """
         Args:
-            pos (int): the amino acid position
+            pos: the amino acid position
 
         Returns:
             Interval: the cdna equivalent (with CODON_SIZE uncertainty)
         """
         return Interval(self.start - 1 + (pos - 1) * 3 + 1, self.start - 1 + pos * 3)
 
-    def convert_cdna_to_aa(self, pos):
+    def convert_cdna_to_aa(self, pos: int) -> int:
         """
         Args:
-            pos (int): the cdna position
+            pos: the cdna position
 
         Returns:
-            int: the protein/amino-acid position
+            the protein/amino-acid position
 
         Raises:
             AttributeError: the cdna position is not translated
@@ -295,32 +319,31 @@ def convert_cdna_to_aa(self, pos):
             aa_pos += 1
         return aa_pos
 
-    def convert_genomic_to_cds(self, pos):
+    def convert_genomic_to_cds(self, pos: int) -> int:
         """
         converts a genomic position to its cds (coding sequence) equivalent
 
         Args:
-            pos (int): the genomic position
+            pos: the genomic position
 
         Returns:
-            int: the cds position (negative if before the initiation start site)
+            the cds position (negative if before the initiation start site)
         """
         cds, shift = self.convert_genomic_to_nearest_cds(pos)
         if shift != 0:
             raise IndexError('conversion failed. position is outside the exonic region')
         return cds
 
-    def convert_genomic_to_nearest_cds(self, pos):
+    def convert_genomic_to_nearest_cds(self, pos: str) -> Tuple[int, int]:
         """
         converts a genomic position to its cds equivalent or (if intronic) the nearest cds and shift
 
         Args:
-            pos (int): the genomic position
+            pos: the genomic position
 
         Returns:
-            tuple of int and int:
-                * *int* - the cds position
-                * *int* - the intronic shift
+            - the cds position
+            - the intronic shift
 
         """
         cds_pos, shift = self.transcript.convert_genomic_to_nearest_cdna(pos)
@@ -330,16 +353,16 @@ def convert_genomic_to_nearest_cds(self, pos):
             cds_pos -= self.start
         return cds_pos, shift
 
-    def convert_genomic_to_cds_notation(self, pos):
+    def convert_genomic_to_cds_notation(self, pos: int) -> str:
         """
         converts a genomic position to its cds (coding sequence) equivalent using
         `hgvs <http://www.hgvs.org/mutnomen/recs-DNA.html>`_ cds notation
 
         Args:
-            pos (int): the genomic position
+            pos: the genomic position
 
         Returns:
-            str: the cds position notation
+            the cds position notation
 
         Example:
             >>> tl = Translation(...)
@@ -366,14 +389,15 @@ def convert_genomic_to_cds_notation(self, pos):
             return '*{}{}'.format(cds_pos - len(self), offset_suffix)
         return '{}{}'.format(cds_pos, offset_suffix)
 
-    def get_cds_seq(self, reference_genome=None, ignore_cache=False):
+    def get_cds_seq(
+        self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache: bool = False
+    ) -> str:
         """
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
-            str: the cds sequence
+            the cds sequence
 
         Raises:
             AttributeError: if the reference sequence has not been given and is not set
@@ -385,24 +409,26 @@ def get_cds_seq(self, reference_genome=None, ignore_cache=False):
             return seq[self.start - 1 : self.end]
         raise NotSpecifiedError('insufficient seq information')
 
-    def get_seq(self, reference_genome=None, ignore_cache=False):
+    def get_seq(
+        self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache: bool = False
+    ):
         """
         wrapper for the sequence method
 
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
         """
         return self.get_cds_seq(reference_genome, ignore_cache)
 
-    def get_aa_seq(self, reference_genome=None, ignore_cache=False):
+    def get_aa_seq(
+        self, reference_genome: Optional[ReferenceGenome] = None, ignore_cache: bool = False
+    ) -> str:
         """
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence
-                by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
-            str: the amino acid sequence
+            the amino acid sequence
 
         Raises:
             AttributeError: if the reference sequence has not been given and is not set
diff --git a/src/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
index ae9d4ef4..b2160ffd 100644
--- a/src/mavis/annotate/splicing.py
+++ b/src/mavis/annotate/splicing.py
@@ -1,4 +1,5 @@
 import itertools
+from typing import Iterable, List, Optional
 
 from ..constants import SPLICE_TYPE, STRAND, reverse_complement
 from ..interval import Interval
@@ -7,7 +8,7 @@
 
 
 class SplicingPattern(list):
-    def __init__(self, *args, splice_type=SPLICE_TYPE.NORMAL):
+    def __init__(self, *args, splice_type: str = SPLICE_TYPE.NORMAL):
         list.__init__(self, *args)
         self.splice_type = splice_type
 
@@ -24,7 +25,7 @@ def __str__(self):
         return '[{}]'.format(', '.join(temp))
 
     @classmethod
-    def classify(cls, pattern, original_sites):
+    def classify(cls, pattern: List[int], original_sites: Iterable[int]) -> str:
         # now need to decide the type for each set
         pattern = sorted(pattern)
         r_introns = []
@@ -80,12 +81,14 @@ def classify(cls, pattern, original_sites):
         return SPLICE_TYPE.COMPLEX
 
     @classmethod
-    def generate_patterns(cls, sites, is_reverse=False):
+    def generate_patterns(
+        cls, sites: Iterable['SpliceSite'], is_reverse=False
+    ) -> List['SplicingPattern']:
         """
         returns a list of splice sites to be connected as a splicing pattern
 
         Returns:
-            List[SplicingPattern]: List of positions to be spliced together
+            List of positions to be spliced together
 
         Note:
             see [theory - predicting splicing patterns](/background/theory/#predicting-splicing-patterns)
@@ -115,7 +118,15 @@ def generate_patterns(cls, sites, is_reverse=False):
 
 class SpliceSite(BioInterval):
     def __init__(
-        self, ref, pos, site_type, intact=True, start=None, end=None, strand=None, seq=None
+        self,
+        ref: BioInterval,
+        pos: int,
+        site_type: str,
+        intact: bool = True,
+        start: Optional[int] = None,
+        end: Optional[int] = None,
+        strand: Optional[str] = None,
+        seq: Optional[str] = None,
     ):
         if start is None or end is None:
             self.strand = strand if strand else ref.get_strand()
@@ -170,17 +181,17 @@ def __repr__(self):
         )
 
 
-def predict_splice_sites(input_sequence, is_reverse=False):
+def predict_splice_sites(input_sequence: str, is_reverse: bool = False) -> List[SpliceSite]:
     """
     looks for the expected splice site sequence patterns in the
     input strings and returns a list of putative splice sites
 
     Args:
-        input_sequence (str): input sequence with respect to the positive/forward strand
-        is_reverse (bool): True when the sequences is transcribed on the reverse strand
+        input_sequence: input sequence with respect to the positive/forward strand
+        is_reverse: True when the sequences is transcribed on the reverse strand
 
     Return:
-        List[SpliceSite]: list of putative splice sites
+        list of putative splice sites
     """
     if is_reverse:
         sequence = reverse_complement(input_sequence)
diff --git a/src/mavis/annotate/variant.py b/src/mavis/annotate/variant.py
index 7eaf2149..bfa9e3a0 100644
--- a/src/mavis/annotate/variant.py
+++ b/src/mavis/annotate/variant.py
@@ -8,9 +8,11 @@
 from ..constants import COLUMNS, GENE_PRODUCT_TYPE, PROTOCOL, STOP_AA, STRAND, SVTYPE
 from ..error import NotSpecifiedError
 from ..interval import Interval
+from ..types import ReferenceGenome
 from ..util import logger
 from .fusion import FusionTranscript, determine_prime
 from .genomic import Gene, IntergenicRegion, PreTranscript, Transcript
+from .protein import Translation
 
 
 class Annotation(BreakpointPair):
@@ -280,24 +282,33 @@ def flatten_fusion_translation(translation):
 
 
 class IndelCall:
-    def __init__(self, refseq, mutseq):
+    nterm_aligned: int
+    cterm_aligned: int
+    ref_seq: str
+    mut_seq: str
+    ins_seq: str
+    del_seq: str
+    is_dup: bool
+    terminates: bool
+
+    def __init__(self, refseq: str, mutseq: str):
         """
         Given two sequences, Assuming there exists a single difference between the two
         call an indel which accounts for the change
 
         Args:
-            refseq (str): The reference (amino acid) sequence
-            mutseq (str): The mutated (amino acid) sequence
+            refseq: The reference (amino acid) sequence
+            mutseq: The mutated (amino acid) sequence
 
         Attributes:
-            nterm_aligned (int): the number of characters aligned consecutively from the start of both strings
-            cterm_aligned (int): the number of characters aligned consecutively from the end of both strings
-            is_dup (bool): flag to indicate a duplication
-            ref_seq (str): the reference sequence
-            mut_seq (str): the mutated sequence
-            ins_seq (str): the inserted sequence
-            del_seq (str): the deleted sequence
-            terminates (bool): both sequences end in stop AAs
+            nterm_aligned: the number of characters aligned consecutively from the start of both strings
+            cterm_aligned: the number of characters aligned consecutively from the end of both strings
+            is_dup: flag to indicate a duplication
+            ref_seq: the reference sequence
+            mut_seq: the mutated sequence
+            ins_seq: the inserted sequence
+            del_seq: the deleted sequence
+            terminates: both sequences end in stop AAs
         """
         self.nterm_aligned = 0
         self.cterm_aligned = 0
@@ -379,7 +390,7 @@ def __init__(self, refseq, mutseq):
                 self.del_seq = self.ref_seq[self.nterm_aligned : 0 - self.cterm_aligned]
                 self.ins_seq = self.mut_seq[self.nterm_aligned : 0 - self.cterm_aligned]
 
-    def hgvs_protein_notation(self):
+    def hgvs_protein_notation(self) -> Optional[str]:
         """
         returns the HGVS protein notation for an indel call
         """
@@ -454,17 +465,21 @@ def __str__(self):
         )
 
 
-def call_protein_indel(ref_translation, fusion_translation, reference_genome=None):
+def call_protein_indel(
+    ref_translation: Translation,
+    fusion_translation: Translation,
+    reference_genome: Optional[ReferenceGenome] = None,
+) -> str:
     """
     compare the fusion protein/aa sequence to the reference protein/aa sequence and
     return an hgvs notation indel call
 
     Args:
-        ref_translation (Translation): the reference protein/translation
-        fusion_translation (Translation): the fusion protein/translation
+        ref_translation: the reference protein/translation
+        fusion_translation: the fusion protein/translation
         reference_genome: the reference genome object used to fetch the reference translation AA sequence
     Returns:
-        str: the [HGVS](/glossary/#HGVS) protein indel notation
+        the [HGVS](/glossary/#HGVS) protein indel notation
     """
     ref_aa_seq = ref_translation.get_aa_seq(reference_genome)
     call = IndelCall(ref_aa_seq, fusion_translation.get_aa_seq())
@@ -519,14 +534,14 @@ def flatten_fusion_transcript(spliced_fusion_transcript):
     return row
 
 
-def overlapping_transcripts(ref_ann, breakpoint):
+def overlapping_transcripts(ref_ann, breakpoint: Breakpoint) -> List[PreTranscript]:
     """
     Args:
         ref_ann (Dict[str,List[Gene]]): the reference list of genes split
             by chromosome
-        breakpoint (Breakpoint): the breakpoint in question
+        breakpoint: the breakpoint in question
     Returns:
-        List[PreTranscript]: a list of possible transcripts
+        a list of possible transcripts
     """
     putative_annotations = set()
     for gene in ref_ann.get(breakpoint.chr, []):
@@ -636,7 +651,9 @@ def _gather_breakpoint_annotations(
     )
 
 
-def _gather_annotations(ref: Dict[str, List[Gene]], bp: BreakpointPair, proximity=None):
+def _gather_annotations(
+    ref: Dict[str, List[Gene]], bp: BreakpointPair, proximity=None
+) -> List[Annotation]:
     """
     each annotation is defined by the annotations selected at the breakpoints
     the other annotations are given relative to this
@@ -647,7 +664,7 @@ def _gather_annotations(ref: Dict[str, List[Gene]], bp: BreakpointPair, proximit
         breakpoint_pairs: breakpoint pair we wish to annotate as events
 
     Returns:
-        List[Annotation]: The annotations
+        The annotations
     """
     annotations = dict()
     break1_pos, break1_neg = _gather_breakpoint_annotations(ref, bp.break1)
@@ -781,21 +798,21 @@ def choose_more_annotated(ann_list: List[Annotation]) -> List[Annotation]:
         return intergenic
 
 
-def choose_transcripts_by_priority(ann_list: List[Annotation]):
+def choose_transcripts_by_priority(ann_list: List[Annotation]) -> List[Annotation]:
     """
     for each set of annotations with the same combinations of genes, choose the
     annotation with the most "best_transcripts" or most "alphanumeric" choices
     of transcript. Throw an error if they are identical
 
     Args:
-        ann_list (List[Annotation]): input annotations
+        ann_list: input annotations
 
     Warning:
         input annotations are assumed to be the same event (the same validation_id)
         the logic used would not apply to different events
 
     Returns:
-        List[Annotation]: the filtered list
+        the filtered list
     """
     annotations_by_gene_combination: Dict[
         Tuple[Optional[Gene], Optional[Gene]], List[Annotation]
@@ -845,7 +862,7 @@ def choose_transcripts_by_priority(ann_list: List[Annotation]):
 def annotate_events(
     bpps: List[BreakpointPair],
     annotations: Dict[str, List[Gene]],
-    reference_genome: Dict[str, str],
+    reference_genome: ReferenceGenome,
     max_proximity: int = 5000,
     min_orf_size: int = 200,
     min_domain_mapping_match: float = 0.95,
@@ -854,18 +871,17 @@ def annotate_events(
 ) -> List[Annotation]:
     """
     Args:
-        bpps (List[mavis.breakpoint.BreakpointPair]): list of events
+        bpps: list of events
         annotations: reference annotations
-        reference_genome (Dict[string,string]): dictionary of reference sequences by name
-        max_proximity (int): see [max_proximity](/configuration/settings/#max_proximity)
-        min_orf_size (int): see [min_orf_size](/configuration/settings/#min_orf_size)
+        reference_genome: dictionary of reference sequences by name
+        max_proximity: see [max_proximity](/configuration/settings/#max_proximity)
+        min_orf_size: see [min_orf_size](/configuration/settings/#min_orf_size)
         min_domain_mapping_match (float): see [min_domain_mapping_match](/configuration/settings/#min_domain_mapping_match)
-        max_orf_cap (int): see [max_orf_cap](/configuration/settings/#max_orf_cap)
-        log (Callable): callable function to take in strings and time_stamp args
-        filters (List[callable]): list of functions taking in a list and returning a list for filtering
+        max_orf_cap: see [max_orf_cap](/configuration/settings/#max_orf_cap)
+        filters: list of functions taking in a list and returning a list for filtering
 
     Returns:
-        List[Annotation]: list of the putative annotations
+        list of the putative annotations
     """
     if filters is None:
         filters = [choose_more_annotated, choose_transcripts_by_priority]
diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index c5654924..c87b4208 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -1,4 +1,5 @@
 import itertools
+from typing import List, Optional
 
 import distance
 import networkx as nx
@@ -27,7 +28,7 @@ def __hash__(self):
     def complexity(self):
         return sequence_complexity(self.seq)
 
-    def add_mapped_sequence(self, read, multimap=1):
+    def add_mapped_sequence(self, read, multimap: int = 1):
         self.remapped_sequences[read] = 1 / multimap
 
     def remap_score(self):
@@ -40,7 +41,7 @@ def remap_coverage(self):
         cov = sum([len(i) for i in itvls])
         return cov / len(self.seq)
 
-    def remap_depth(self, query_range=None):
+    def remap_depth(self, query_range: Optional[Interval] = None):
         """
         the average depth of remapped reads over a give range of the contig sequence
 
@@ -95,12 +96,12 @@ def add_edge(self, n1, n2, freq=1):
     def all_edges(self, *nodes, data=False):
         return self.get_in_edges(*nodes, data=data) + self.get_out_edges(*nodes, data=data)
 
-    def trim_tails_by_freq(self, min_weight):
+    def trim_tails_by_freq(self, min_weight: int):
         """
         for any paths where all edges are lower than the minimum weight trim
 
         Args:
-            min_weight (int): the minimum weight for an edge to be retained
+            min_weight: the minimum weight for an edge to be retained
         """
         ends = sorted(
             [n for n in self.get_nodes() if self.out_degree(n) == 0 or self.in_degree(n) == 0]
@@ -220,7 +221,7 @@ def get_sources(self, subgraph=None):
         return nodeset
 
 
-def digraph_connected_components(graph, subgraph=None):
+def digraph_connected_components(graph: nx.DiGraph, subgraph=None) -> List[List]:
     """
     the networkx module does not support deriving connected
     components from digraphs (only simple graphs)
@@ -229,10 +230,10 @@ def digraph_connected_components(graph, subgraph=None):
     in a simple graph and a digraph
 
     Args:
-        graph (networkx.DiGraph): the input graph to gather components from
+        graph: the input graph to gather components from
 
     Returns:
-        List[List]: returns a list of compnents which are lists of node names
+        returns a list of compnents which are lists of node names
     """
     if subgraph is None:
         subgraph = set(graph.get_nodes())
@@ -246,15 +247,17 @@ def digraph_connected_components(graph, subgraph=None):
     return nx.connected_components(g)
 
 
-def pull_contigs_from_component(assembly, component, min_edge_trim_weight, assembly_max_paths):
+def pull_contigs_from_component(
+    assembly: DeBruijnGraph, component: List, min_edge_trim_weight: int, assembly_max_paths: int
+):
     """
     builds contigs from the a connected component of the assembly DeBruijn graph
 
     Args:
-        assembly (DeBruijnGraph): the assembly graph
-        component (list):  list of nodes which make up the connected component
-        min_edge_trim_weight (int): the minimum weight to not remove a non cutting edge/path
-        assembly_max_paths (int): the maximum number of paths allowed before the graph is further simplified
+        assembly: the assembly graph
+        component:  list of nodes which make up the connected component
+        min_edge_trim_weight: the minimum weight to not remove a non cutting edge/path
+        assembly_max_paths: the maximum number of paths allowed before the graph is further simplified
 
     Returns:
         Dict[str,int]: the paths/contigs and their scores
@@ -304,7 +307,7 @@ def pull_contigs_from_component(assembly, component, min_edge_trim_weight, assem
     return path_scores
 
 
-def filter_contigs(contigs, assembly_min_uniq=0.01):
+def filter_contigs(contigs, assembly_min_uniq: float = 0.01):
     """
     given a list of contigs, removes similar contigs to leave the highest (of the similar) scoring contig only
     """
@@ -339,14 +342,15 @@ def filter_contigs(contigs, assembly_min_uniq=0.01):
 
 
 def assemble(
-    sequences,
-    kmer_size,
-    min_edge_trim_weight=3,
-    assembly_max_paths=20,
-    assembly_min_uniq=0.01,
-    min_complexity=0,
+    sequences: List[str],
+    kmer_size: float,
+    min_edge_trim_weight: int = 3,
+    assembly_max_paths: int = 20,
+    assembly_min_uniq: float = 0.01,
+    min_complexity: float = 0,
+    remap_min_exact_match: int = 6,
     **kwargs,
-):
+) -> List[Contig]:
     """
     for a set of sequences creates a DeBruijnGraph
     simplifies trailing and leading paths where edges fall
@@ -355,17 +359,18 @@ def assemble(
     drops any sequences too small to fit the kmer size
 
     Args:
-        sequences (List[str]): a list of strings/sequences to assemble
-        kmer_size: see [assembly_kmer_size](/configuration/settings/#assembly_kmer_size) the size of the kmer to use
-        min_edge_trim_weight: see [assembly_min_edge_trim_weight](/configuration/settings/#assembly_min_edge_trim_weight)
+        sequences: a list of strings/sequences to assemble
+        kmer_size: see [assembly_kmer_size](/configuration/settings/#validateassembly_kmer_size) the size of the kmer to use
+        min_edge_trim_weight: see [assembly_min_edge_trim_weight](/configuration/settings/#validateassembly_min_edge_trim_weight)
         remap_min_match: Minimum match percentage of the remapped read (based on the exact matches in the cigar)
         remap_min_overlap: defaults to the kmer size. Minimum amount of overlap between the contig and the remapped read
         min_contig_length: Minimum length of contigs assemble to attempt remapping reads to. Shorter contigs will be ignored
-        remap_min_exact_match: see [assembly_min_exact_match_to_remap](/configuration/settings/#assembly_min_exact_match_to_remap)
-        assembly_max_paths: see [assembly_max_paths](/configuration/settings/#assembly_max_paths)
+        remap_min_exact_match: see [assembly_min_exact_match_to_remap](/configuration/settings/#validateassembly_min_exact_match_to_remap)
+        assembly_max_paths: see [assembly_max_paths](/configuration/settings/#validateassembly_max_paths)
+        min_complexity: see [min_call_complexity](/configuration/settings/#validatemin_call_complexity)
 
     Returns:
-        List[Contig]: a list of putative contigs
+        a list of putative contigs
     """
     if not sequences:
         return []
@@ -373,7 +378,6 @@ def assemble(
     kmer_size = int(round(kmer_size, 0))
     min_contig_length = kwargs.pop('min_contig_length', min_seq + 1)
     remap_min_overlap = kwargs.pop('remap_min_overlap', kmer_size)
-    remap_min_exact_match = kwargs.pop('remap_min_exact_match', 6)
     remap_min_match = kwargs.pop('remap_min_match', 0.95)
 
     if kwargs:
diff --git a/src/mavis/bam/cache.py b/src/mavis/bam/cache.py
index 866d4003..aee2cc99 100644
--- a/src/mavis/bam/cache.py
+++ b/src/mavis/bam/cache.py
@@ -1,5 +1,6 @@
 import atexit
 import re
+from typing import Callable, Dict, List, Set, Union
 
 import pysam
 
@@ -15,24 +16,28 @@ class BamCache:
     the file if we've already read that section
     """
 
-    def __init__(self, bamfile, stranded=False):
+    fh: pysam.AlignmentFile
+    stranded: bool
+    cache: Dict
+
+    def __init__(self, bamfile: Union[pysam.AlignmentFile, str], stranded: bool = False):
         """
         Args:
-            bamfile (str): path to the input bam file
+            bamfile: path to the input bam file
         """
         self.cache = {}
         self.stranded = stranded
-        self.fh = bamfile
+
         if not hasattr(bamfile, 'fetch'):
             self.fh = pysam.AlignmentFile(bamfile, 'rb')
         else:
             try:
                 self.fh = bamfile.fh
             except AttributeError:
-                pass
+                self.fh = bamfile
         atexit.register(self.close)  # makes the file 'auto close' on normal python exit
 
-    def valid_chr(self, chrom):
+    def valid_chr(self, chrom: str) -> bool:
         """
         checks if a reference name exists in the bam file header
         """
@@ -42,10 +47,10 @@ def valid_chr(self, chrom):
         except KeyError:
             return False
 
-    def add_read(self, read):
+    def add_read(self, read: pysam.AlignedSegment):
         """
         Args:
-            read (pysam.AlignedSegment): the read to add to the cache
+            read: the read to add to the cache
         """
         if not read.is_unmapped and read.reference_start == read.reference_end:
             _util.logger.debug(f'ignoring invalid read: {read.query_name}')
@@ -56,7 +61,7 @@ def add_read(self, read):
         if read not in self.cache[read.query_name]:
             self.cache[read.query_name].add(read)
 
-    def has_read(self, read):
+    def has_read(self, read: pysam.AlignedSegment) -> bool:
         """
         checks if a read query name exists in the current cache
         """
@@ -66,12 +71,12 @@ def has_read(self, read):
             return True
         return False
 
-    def reference_id(self, chrom):
+    def reference_id(self, chrom: str) -> int:
         """
         Args:
-            chrom (str): the chromosome/reference name
+            chrom: the chromosome/reference name
         Returns:
-            int: the reference id corresponding to input chromosome name
+            the reference id corresponding to input chromosome name
         """
         tid = self.fh.get_tid(chrom)
         if tid == -1:
@@ -82,23 +87,25 @@ def reference_id(self, chrom):
             raise KeyError('invalid reference name not present in bam file', chrom)
         return tid
 
-    def get_read_reference_name(self, read):
+    def get_read_reference_name(self, read: pysam.AlignedSegment) -> str:
         """
         Args:
-            read (pysam.AlignedSegment): the read we want the chromosome name for
+            read: the read we want the chromosome name for
         Returns:
-            str: the name of the chromosome
+            the name of the chromosome
         """
         return ReferenceName(self.fh.get_reference_name(read.reference_id))
 
     @classmethod
-    def _generate_fetch_bins(cls, start, stop, sample_bins, min_bin_size):
+    def _generate_fetch_bins(
+        cls, start: int, stop: int, sample_bins: int, min_bin_size: int
+    ) -> List[Interval]:
         """
         Args:
-            start (int): the start if the area to fetch reads from
-            stop (int): the end of the region
-            sample_bins (int): the number of bins to split the region into
-            min_bin_size (int): the minimum bin size
+            start: the start if the area to fetch reads from
+            stop: the end of the region
+            sample_bins: the number of bins to split the region into
+            min_bin_size: the minimum bin size
         """
         assert min_bin_size > 0
         length = stop - start + 1
@@ -120,28 +127,28 @@ def _generate_fetch_bins(cls, start, stop, sample_bins, min_bin_size):
 
     def fetch(
         self,
-        input_chrom,
-        start,
-        stop,
-        limit=10000,
-        cache_if=lambda x: True,
-        filter_if=lambda x: False,
-        stop_on_cached_read=False,
-    ):
+        input_chrom: str,
+        start: int,
+        stop: int,
+        limit: int = 10000,
+        cache_if: Callable = lambda x: True,
+        filter_if: Callable = lambda x: False,
+        stop_on_cached_read: bool = False,
+    ) -> Set[pysam.AlignedSegment]:
         """
         Args:
-            input_chrom (str): chromosome name
-            start (int): start position
-            end (int): end position
-            limit (int): maximum number of reads to fetch
-            cache_if (Callable):  if returns True then the read is added to the cache
-            filter_if (Callable): if returns True then the read is not returned as part of the result
-            stop_on_cached_read (bool): stop reading at the first read found that is already in the cache
+            input_chrom: chromosome name
+            start: start position
+            end: end position
+            limit: maximum number of reads to fetch
+            cache_if:  if returns True then the read is added to the cache
+            filter_if: if returns True then the read is not returned as part of the result
+            stop_on_cached_read: stop reading at the first read found that is already in the cache
         Note:
             the cache_if and filter_if functions must be any function that takes a read as input and returns a boolean
 
         Returns:
-            Set[pysam.AlignedSegment]: a set of reads which overlap the input region
+            a set of reads which overlap the input region
         """
         # try using the cache to avoid fetching regions more than once
         result = []
@@ -180,32 +187,32 @@ def fetch(
 
     def fetch_from_bins(
         self,
-        input_chrom,
-        start,
-        stop,
-        read_limit=10000,
-        cache=False,
-        sample_bins=3,
-        cache_if=lambda x: True,
-        min_bin_size=10,
-        filter_if=lambda x: False,
-    ):
+        input_chrom: str,
+        start: int,
+        stop: int,
+        read_limit: int = 10000,
+        cache: bool = False,
+        sample_bins: int = 3,
+        cache_if: Callable = lambda x: True,
+        min_bin_size: int = 10,
+        filter_if: Callable = lambda x: False,
+    ) -> Set[pysam.AlignedSegment]:
         """
         wrapper around the fetch method, returns a list to avoid errors with changing the file pointer
         position from within the loop. Also caches reads if requested and can return a limited read number
 
         Args:
-            chrom (str): the chromosome
-            start (int): the start position
-            stop (int): the end position
-            read_limit (int): the maximum number of reads to parse
-            cache (bool): flag to store reads
-            sample_bins (int): number of bins to split the region into
-            cache_if (Callable): function to check to against a read to determine if it should be cached
-            bin_gap_size (int): gap between the bins for the fetch area
+            input_chrom: the chromosome
+            start: the start position
+            stop: the end position
+            read_limit: the maximum number of reads to parse
+            cache: flag to store reads
+            sample_bins: number of bins to split the region into
+            cache_if: function to check to against a read to determine if it should be cached
+            bin_gap_size: gap between the bins for the fetch area
 
         Returns:
-            Set[pysam.AlignedSegment]: set of reads gathered from the region
+            set of reads gathered from the region
         """
         # try using the cache to make grabbing mate pairs easier
         result = []
@@ -242,14 +249,16 @@ def fetch_from_bins(
             running_surplus -= count
         return set(result)
 
-    def get_mate(self, read, primary_only=True, allow_file_access=False):
+    def get_mate(
+        self, read: pysam.AlignedSegment, primary_only: bool = True, allow_file_access: bool = False
+    ) -> List[pysam.AlignedSegment]:
         """
         Args:
-            read (pysam.AlignedSegment): the read
-            primary_only (bool): ignore secondary alignments
-            allow_file_access (bool): determines if the bam can be accessed to try to find the mate
+            read: the read
+            primary_only: ignore secondary alignments
+            allow_file_access: determines if the bam can be accessed to try to find the mate
         Returns:
-            List[pysam.AlignedSegment]: list of mates of the input read
+            list of mates of the input read
         """
         # NOTE: will return all mate alignments that have been cached
         putative_mates = self.cache.get(read.query_name, set())
diff --git a/src/mavis/bam/cigar.py b/src/mavis/bam/cigar.py
index 7c003bce..2610bf7f 100644
--- a/src/mavis/bam/cigar.py
+++ b/src/mavis/bam/cigar.py
@@ -4,7 +4,12 @@
 CIGAR value (i.e. 1 for an insertion), and the second value is the frequency
 """
 import re
+from typing import Tuple
+
+import pysam
+
 from ..constants import CIGAR, DNA_ALPHABET, GAP
+from ..types import CigarTuples
 
 EVENT_STATES = {CIGAR.D, CIGAR.I, CIGAR.X}
 ALIGNED_STATES = {CIGAR.M, CIGAR.X, CIGAR.EQ}
@@ -13,27 +18,26 @@
 CLIPPING_STATE = {CIGAR.S, CIGAR.H}
 
 
-def recompute_cigar_mismatch(read, ref):
+def recompute_cigar_mismatch(read: pysam.AlignedSegment, ref: str) -> CigarTuples:
     """
     for cigar tuples where M is used, recompute to replace with X/= for increased
     utility and specificity
 
     Args:
-        read (pysam.AlignedSegment): the input read
-        ref (str): the reference sequence
+        read: the input read
+        ref: the reference sequence
 
     Returns:
-        List[Tuple[int,int]]: the cigar tuple
+        the cigar tuple
     """
-    result = []
-    offset = 0
+    result: CigarTuples = []
 
     ref_pos = read.reference_start
     seq_pos = 0
 
     for cigar_value, freq in read.cigar:
         if cigar_value in ALIGNED_STATES:
-            for offset in range(0, freq):
+            for _ in range(0, freq):
                 if DNA_ALPHABET.match(ref[ref_pos], read.query_sequence[seq_pos]):
                     if len(result) == 0 or result[-1][0] != CIGAR.EQ:
                         result.append((CIGAR.EQ, 1))
@@ -56,13 +60,13 @@ def recompute_cigar_mismatch(read, ref):
     return result
 
 
-def longest_fuzzy_match(cigar, max_fuzzy_interupt=1):
+def longest_fuzzy_match(cigar: CigarTuples, max_fuzzy_interupt: int = 1) -> int:
     """
     computes the longest sequence of exact matches allowing for 'x' event interrupts
 
     Args:
         cigar: cigar tuples
-        max_fuzzy_interupt (int): number of mismatches allowed
+        max_fuzzy_interupt: number of mismatches allowed
 
     """
     temp = join(cigar)
@@ -85,28 +89,28 @@ def longest_fuzzy_match(cigar, max_fuzzy_interupt=1):
     return longest_fuzzy_match
 
 
-def longest_exact_match(cigar):
+def longest_exact_match(cigar: CigarTuples) -> int:
     """
     returns the longest consecutive exact match
 
     Args:
-        cigar (List[Tuple[int,int]]): the cigar tuples
+        cigar: the cigar tuples
     """
     return longest_fuzzy_match(cigar, 0)
 
 
-def score(cigar, **kwargs):
+def score(cigar: CigarTuples, **kwargs) -> int:
     """scoring based on sw alignment properties with gap extension penalties
 
     Args:
-        cigar (List[Tuple[mavis.constants.CIGAR,int]]): list of cigar tuple values
+        cigar: list of cigar tuple values
         MISMATCH (int): mismatch penalty
         MATCH (int): match penalty
         GAP (int): initial gap penalty
         GAP_EXTEND (int): gap extension penalty
 
     Returns:
-        int: the score value
+        the score value
     """
 
     mismatch = kwargs.pop('MISMATCH', -1)
@@ -129,7 +133,7 @@ def score(cigar, **kwargs):
     return score
 
 
-def match_percent(cigar):
+def match_percent(cigar: CigarTuples) -> float:
     """
     calculates the percent of aligned bases (matches or mismatches) that are matches
     """
@@ -169,7 +173,9 @@ def join(*pos):
     return result
 
 
-def extend_softclipping(cigar, min_exact_to_stop_softclipping):
+def extend_softclipping(
+    cigar: CigarTuples, min_exact_to_stop_softclipping: int
+) -> Tuple[CigarTuples, int]:
     """
     given some input cigar, extends softclipping if there are mismatches/insertions/deletions
     close to the end of the aligned portion. The stopping point is defined by the
@@ -177,11 +183,11 @@ def extend_softclipping(cigar, min_exact_to_stop_softclipping):
     exact match aligned portion to signal stop
 
     Args:
-        original_cigar (List[Tuple[mavis.constants.CIGAR,int]]): the input cigar
-        min_exact_to_stop_softclipping (int): number of exact matches to terminate extension
+        original_cigar: the input cigar
+        min_exact_to_stop_softclipping: number of exact matches to terminate extension
 
     Returns:
-        Tuple[List[Tuple[mavis.constants.CIGAR,int]], int]: new cigar list and shift from the original start position
+        new cigar list and shift from the original start position
     """
     new_cigar = []
     anchors = [
@@ -215,7 +221,9 @@ def extend_softclipping(cigar, min_exact_to_stop_softclipping):
     return new_cigar, start_ref_aligned
 
 
-def compute(ref, alt, force_softclipping=True, min_exact_to_stop_softclipping=6):
+def compute(
+    ref: str, alt: str, force_softclipping: bool = True, min_exact_to_stop_softclipping: int = 6
+) -> Tuple[CigarTuples, int]:
     """
     given a ref and alt sequence compute the cigar string representing the alt
 
@@ -247,7 +255,7 @@ def compute(ref, alt, force_softclipping=True, min_exact_to_stop_softclipping=6)
         return cigar, 0
 
 
-def convert_for_igv(cigar):
+def convert_for_igv(cigar: CigarTuples) -> CigarTuples:
     """
     igv does not support the extended CIGAR values for match v mismatch
 
@@ -263,7 +271,7 @@ def convert_for_igv(cigar):
     return join(result)
 
 
-def alignment_matches(cigar):
+def alignment_matches(cigar: CigarTuples) -> int:
     """
     counts the number of aligned bases irrespective of match/mismatch
     this is equivalent to counting all CIGAR.M
@@ -275,7 +283,7 @@ def alignment_matches(cigar):
     return result
 
 
-def merge_indels(cigar):
+def merge_indels(cigar: CigarTuples) -> CigarTuples:
     """
     For a given cigar tuple, merges adjacent insertions/deletions
 
@@ -298,7 +306,7 @@ def merge_indels(cigar):
     return new_cigar
 
 
-def hgvs_standardize_cigar(read, reference_seq):
+def hgvs_standardize_cigar(read: pysam.AlignedSegment, reference_seq: str) -> CigarTuples:
     """
     extend alignments as long as matches are possible.
     call insertions before deletions
@@ -420,7 +428,7 @@ def hgvs_standardize_cigar(read, reference_seq):
     return join(cigar)
 
 
-def convert_string_to_cigar(string):
+def convert_string_to_cigar(string: str) -> CigarTuples:
     """
     Given a cigar string, converts it to the appropriate cigar tuple
 
@@ -436,11 +444,13 @@ def convert_string_to_cigar(string):
     return cigar
 
 
-def convert_cigar_to_string(cigar):
+def convert_cigar_to_string(cigar: CigarTuples) -> str:
     return ''.join(['{}{}'.format(f, CIGAR.reverse(s) if s != CIGAR.EQ else '=') for s, f in cigar])
 
 
-def merge_internal_events(cigar, inner_anchor=10, outer_anchor=10):
+def merge_internal_events(
+    cigar: CigarTuples, inner_anchor: int = 10, outer_anchor: int = 10
+) -> CigarTuples:
     """
     merges events (insertions, deletions, mismatches) within a cigar if they are
     between exact matches on either side (anchors) and separated by less exact
@@ -449,12 +459,12 @@ def merge_internal_events(cigar, inner_anchor=10, outer_anchor=10):
     does not merge two mismatches, must contain a deletion/insertion
 
     Args:
-        cigar (List): a list of tuples of cigar states and counts
-        inner_anchor (int): minimum number of consecutive exact matches separating events
-        outer_anchor (int): minimum consecutively aligned exact matches to anchor an end for merging
+        cigar: a list of tuples of cigar states and counts
+        inner_anchor: minimum number of consecutive exact matches separating events
+        outer_anchor: minimum consecutively aligned exact matches to anchor an end for merging
 
     Returns:
-        List: new list of cigar tuples with merged events
+        new list of cigar tuples with merged events
 
     Example:
         >>> merge_internal_events([(CIGAR.EQ, 10), (CIGAR.X, 1), (CIGAR.EQ, 2), (CIGAR.D, 1), (CIGAR.EQ, 10)])
diff --git a/src/mavis/bam/read.py b/src/mavis/bam/read.py
index f41f31cb..5a986178 100644
--- a/src/mavis/bam/read.py
+++ b/src/mavis/bam/read.py
@@ -1,6 +1,7 @@
 import itertools
 import re
 from copy import copy
+from typing import Callable, Iterable, List, Optional, Tuple
 
 import pysam
 from Bio.Data import IUPACData as iupac
@@ -158,16 +159,18 @@ def __hash__(self):
         return hash(self.key())
 
 
-def pileup(reads, filter_func=None):
+def pileup(
+    reads: Iterable[pysam.AlignedSegment], filter_func: Optional[Callable] = None
+) -> List[Tuple[int, int]]:
     """
     For a given set of reads generate a pileup of all reads (excluding those for which the filter_func returns True)
 
     Args:
-        reads (Iterable[pysam.AlignedSegment]): reads to pileup
-        filter_func (Callable): function which takes in a  read and returns True if it should be ignored and False otherwise
+        reads: reads to pileup
+        filter_func: function which takes in a  read and returns True if it should be ignored and False otherwise
 
     Returns:
-        Iterable[Tuple[int,int]]: tuples of genomic position and read count at that position
+        tuples of genomic position and read count at that position
 
     Note:
         returns positions using 1-based indexing
@@ -181,13 +184,13 @@ def pileup(reads, filter_func=None):
     return sorted(hist.items())
 
 
-def map_ref_range_to_query_range(read, ref_range):
+def map_ref_range_to_query_range(read: pysam.AlignedSegment, ref_range: Interval) -> Interval:
     """
     Args:
-        ref_range (Interval): 1-based inclusive
-        read (pysam.AlignedSegment): read used for the mapping
+        ref_range: 1-based inclusive
+        read: read used for the mapping
     Returns:
-        Interval: 1-based inclusive range
+        1-based inclusive range
     """
     rpos = read.reference_start
     qpos = 0
@@ -213,17 +216,17 @@ def map_ref_range_to_query_range(read, ref_range):
     return Interval(qstart, qend)
 
 
-def breakpoint_pos(read, orient=ORIENT.NS):
+def breakpoint_pos(read: pysam.AlignedSegment, orient: str = ORIENT.NS) -> int:
     """
     assumes the breakpoint is the position following softclipping on the side with more
     softclipping (unless and orientation has been specified)
 
     Args:
-        read (pysam.AlignedSegment): the read object
-        orient (ORIENT): the orientation
+        read: the read object
+        orient: the orientation
 
     Returns:
-        int: the position of the breakpoint in the input read
+        the position of the breakpoint in the input read
     """
     typ, freq = read.cigar[0]
     end_typ, end_freq = read.cigar[-1]
@@ -266,15 +269,15 @@ def breakpoint_pos(read, orient=ORIENT.NS):
         return read.reference_end - 1
 
 
-def calculate_alignment_score(read, consec_bonus=1):
+def calculate_alignment_score(read: pysam.AlignedSegment, consec_bonus=1) -> float:
     """
     calculates a score for comparing alignments
 
     Args:
-        read (pysam.AlignedSegment): the input read
+        read: the input read
 
     Returns:
-        float: the score
+        the score
     """
     score = 0
     qlen = read.reference_end - read.reference_start
@@ -290,32 +293,27 @@ def calculate_alignment_score(read, consec_bonus=1):
 
 
 def nsb_align(
-    ref,
-    seq,
-    weight_of_score=0.5,
-    min_overlap_percent=1,
-    min_match=0,
+    ref: str,
+    seq: str,
+    min_overlap_percent: float = 1,
+    min_match: float = 0,
     min_consecutive_match=1,
-    scoring_function=calculate_alignment_score,
-):
+    scoring_function: Callable = calculate_alignment_score,
+) -> List[SamRead]:
     """
     given some reference string and a smaller sequence string computes the best non-space-breaking alignment
     i.e. an alignment that does not allow for indels (straight-match). Positions in the aligned segments are
     given relative to the length of the reference sequence (1-based)
 
     Args:
-        ref (str): the reference sequence
-        seq (str): the sequence being aligned
-        weight_of_score (float): when scoring alignments this determines the amount
-            of weight to place on the cigar match. Should be a number between 0 and 1
-        min_overlap_percent (float): the minimum amount of overlap of the input sequence to the reference
-            should be a number between 0 and 1
-        min_match (float): the minimum number of matches compared to total
-        scoring_function (Callable): any function that will take a read as input and return a float
-          used in comparing alignments to choose the best alignment
+        ref: the reference sequence
+        seq: the sequence being aligned
+        min_overlap_percent: the minimum amount of overlap of the input sequence to the reference should be a number between 0 and 1
+        min_match: the minimum number of matches compared to total
+        scoring_function: any function that will take a read as input and return a float used in comparing alignments to choose the best alignment
 
     Returns:
-        List[pysam.AlignedSegment]: list of aligned segments
+        list of aligned segments
 
     Note:
         using a higher min_match may improve performance as low quality alignments are rejected more quickly. However
@@ -399,13 +397,13 @@ def nsb_align(
     return filtered
 
 
-def sequenced_strand(read, strand_determining_read=2):
+def sequenced_strand(read: pysam.AlignedSegment, strand_determining_read: int = 2) -> str:
     """
     determines the strand that was sequenced
 
     Args:
-        read (pysam.AlignedSegment): the read being used to determine the strand
-        strand_determining_read (int): which read in the read pair is the same as the sequenced strand
+        read: the read being used to determine the strand
+        strand_determining_read: which read in the read pair is the same as the sequenced strand
 
     Returns:
         STRAND: the strand that was sequenced
@@ -435,13 +433,13 @@ def sequenced_strand(read, strand_determining_read=2):
     return strand
 
 
-def read_pair_type(read):
+def read_pair_type(read: pysam.AlignedSegment) -> str:
     # check if the read pair is in the expected orientation
     """
     assumptions based on illumina pairs: only 4 possible combinations
 
     Args:
-        read (pysam.AlignedSegment): the input read
+        read: the input read
 
     Returns:
         READ_PAIR_TYPE: the type of input read pair
@@ -474,18 +472,17 @@ def read_pair_type(read):
         raise NotImplementedError('unexpected orientation for pair')
 
 
-def orientation_supports_type(read, event_type):
+def orientation_supports_type(read: pysam.AlignedSegment, event_type: str) -> bool:
     """
     checks if the orientation is compatible with the type of event
 
     Args:
-        read (pysam.AlignedSegment): a read from the pair
-        event_type (SVTYPE): the type of event to check
+        read: a read from the pair
+        event_type: the type of event to check
 
     Returns:
-        bool:
-            - ``True`` - the read pair is in the correct orientation for this event type
-            - ``False`` - the read is not in the correct orientation
+        - ``True`` - the read pair is in the correct orientation for this event type
+        - ``False`` - the read is not in the correct orientation
     """
     if event_type == SVTYPE.DEL or event_type == SVTYPE.INS:
         if read_pair_type(read) != READ_PAIR_TYPE.LR:
@@ -504,7 +501,12 @@ def orientation_supports_type(read, event_type):
     return True
 
 
-def convert_events_to_softclipping(read, orientation, max_event_size, min_anchor_size=None):
+def convert_events_to_softclipping(
+    read: pysam.AlignedSegment,
+    orientation: str,
+    max_event_size: int,
+    min_anchor_size: Optional[int] = None,
+) -> pysam.AlignedSegment:
     """
     given an alignment, simplifies the alignment by grouping everything past the first anchor and including the
     first event considered too large and unaligning them turning them into softclipping
@@ -574,7 +576,7 @@ def convert_events_to_softclipping(read, orientation, max_event_size, min_anchor
     return read
 
 
-def sequence_complexity(seq):
+def sequence_complexity(seq: str) -> float:
     """
     basic measure of sequence complexity
     """
diff --git a/src/mavis/blat.py b/src/mavis/blat.py
index de95d5cb..f379df1b 100644
--- a/src/mavis/blat.py
+++ b/src/mavis/blat.py
@@ -11,11 +11,13 @@
 """
 import math
 import re
+from typing import Dict, List, Tuple
 
 import pandas as pd
 
 from .align import query_coverage_interval
 from .bam import cigar as _cigar
+from .bam.cache import BamCache
 from .bam.cigar import QUERY_ALIGNED_STATES
 from .bam.read import SamRead
 from .constants import (
@@ -27,6 +29,7 @@
     reverse_complement,
 )
 from .interval import Interval
+from .types import ReferenceGenome
 from .util import logger
 
 
@@ -34,11 +37,10 @@ class Blat:
     """ """
 
     @staticmethod
-    def millibad(row, is_protein=False, is_mrna=True):
+    def millibad(row: Dict, is_protein: bool = False, is_mrna: bool = True) -> float:
         """
         this function is used in calculating percent identity
-        direct translation of the perl code
-        # https://genome.ucsc.edu/FAQ/FAQblat.html#blat4
+        direct translation of the perl code (<https://genome.ucsc.edu/FAQ/FAQblat.html#blat4>)
         """
         size_mul = 1 if not is_protein else 3
         if is_protein and is_mrna:
@@ -76,7 +78,7 @@ def millibad(row, is_protein=False, is_mrna=True):
             return 0
 
     @staticmethod
-    def score(row, is_protein=False):
+    def score(row: Dict, is_protein: bool = False) -> int:
         """
         direct translation from ucsc guidelines on replicating the web blat score
         https://genome.ucsc.edu/FAQ/FAQblat.html#blat4
@@ -101,11 +103,16 @@ def score(row, is_protein=False):
         return score
 
     @staticmethod
-    def percent_identity(row, is_protein=False, is_mrna=True):
+    def percent_identity(row: Dict, is_protein: bool = False, is_mrna: bool = True) -> float:
         return 100 - int(Blat.millibad(row, is_protein, is_mrna)) * 0.1
 
     @staticmethod
-    def read_pslx(filename, seqid_to_sequence_mapping, is_protein=False, verbose=True):
+    def read_pslx(
+        filename: str,
+        seqid_to_sequence_mapping: Dict[str, str],
+        is_protein: bool = False,
+        verbose: bool = True,
+    ) -> Tuple[List[str], Dict]:
         header = [
             'match',
             'mismatch',
@@ -205,15 +212,16 @@ def split_csv_trailing_ints(x):
         return header, final_rows
 
     @staticmethod
-    def pslx_row_to_pysam(row, bam_cache, reference_genome):
+    def pslx_row_to_pysam(
+        row: Dict, bam_cache: BamCache, reference_genome: ReferenceGenome
+    ) -> SamRead:
         """
         given a 'row' from reading a pslx file. converts the row to a BlatAlignedSegment object
 
         Args:
-            row Dict[str]: a row object from the 'read_pslx' method
+            row: a row object from the 'read_pslx' method
             bam_cache (BamCache): the bam file/cache to use as a template for creating reference_id from chr name
-            reference_genome (Dict[str,Bio.SeqRecord]):
-              dict of reference sequence by template/chr name
+            reference_genome: reference sequence by template/chr name
 
         """
         chrom = bam_cache.reference_id(row['tname'])
@@ -356,15 +364,15 @@ def pslx_row_to_pysam(row, bam_cache, reference_genome):
 
 
 def process_blat_output(
-    input_bam_cache,
-    query_id_mapping,
-    reference_genome,
-    aligner_output_file='aligner_out.temp',
-    blat_min_percent_of_max_score=0.8,
-    blat_min_identity=0.7,
-    blat_limit_top_aln=25,
-    is_protein=False,
-):
+    input_bam_cache: BamCache,
+    query_id_mapping: Dict[str, str],
+    reference_genome: ReferenceGenome,
+    aligner_output_file: str = 'aligner_out.temp',
+    blat_min_percent_of_max_score: float = 0.8,
+    blat_min_identity: float = 0.7,
+    blat_limit_top_aln: int = 25,
+    is_protein: bool = False,
+) -> Dict[str, List[SamRead]]:
     """
     converts the blat output pslx (unheadered file) to bam reads
     """
diff --git a/src/mavis/breakpoint.py b/src/mavis/breakpoint.py
index 81518527..b28e4e4c 100644
--- a/src/mavis/breakpoint.py
+++ b/src/mavis/breakpoint.py
@@ -6,6 +6,7 @@
 from .constants import COLUMNS, DNA_ALPHABET, ORIENT, STRAND, SVTYPE, reverse_complement
 from .error import InvalidRearrangement, NotSpecifiedError
 from .interval import Interval
+from .types import ReferenceGenome
 
 
 class Breakpoint(Interval):
@@ -429,7 +430,7 @@ def is_putative_indel(self) -> bool:
             return False
         return True
 
-    def breakpoint_sequence_homology(self, reference_genome):
+    def breakpoint_sequence_homology(self, reference_genome: ReferenceGenome):
         """
         for a given set of breakpoints matches the sequence opposite the partner breakpoint
         this sequence comparison is done with reference to a reference genome and does not
@@ -446,7 +447,7 @@ def breakpoint_sequence_homology(self, reference_genome):
             -------TT-TT-------- second break homology
 
         Args:
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference sequence by template/chr name
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
             Tuple[str,str]: homologous sequence at the first breakpoint and second breakpoints
diff --git a/src/mavis/cluster/cluster.py b/src/mavis/cluster/cluster.py
index 36941d57..f2468cb1 100644
--- a/src/mavis/cluster/cluster.py
+++ b/src/mavis/cluster/cluster.py
@@ -1,8 +1,7 @@
-from __future__ import division
-
 import itertools
 from collections import namedtuple
 from copy import copy
+from typing import Dict, List
 
 from ..breakpoint import Breakpoint, BreakpointPair
 from ..constants import ORIENT, STRAND
@@ -67,7 +66,7 @@ def weighted_mean(values, weights=None):
     return sum(x * w for x, w in zip(values, weights)) / sum(weights)
 
 
-def merge_integer_intervals(*intervals, weight_adjustment=0):
+def merge_integer_intervals(*intervals, weight_adjustment: int = 0) -> Interval:
     """
     Merges a set of integer intervals into a single interval where the center is the
     weighted mean of the input intervals. The weight is inversely proportional to the
@@ -76,7 +75,7 @@ def merge_integer_intervals(*intervals, weight_adjustment=0):
     input intervals
 
     Args:
-        weight_adjustment (int): add to length to lower weighting differences between small intervals
+        weight_adjustment: add to length to lower weighting differences between small intervals
     """
     float_offset = 0.99999999
     intervals = list(intervals)
@@ -146,7 +145,12 @@ def all_pair_group_keys(pair, explicit_strand=False):
     return result
 
 
-def merge_by_union(input_pairs, group_key, weight_adjustment=10, cluster_radius=200):
+def merge_by_union(
+    input_pairs: List[BreakpointPair],
+    group_key: BreakpointPairGroupKey,
+    weight_adjustment: int = 10,
+    cluster_radius: int = 200,
+) -> List[BreakpointPair]:
     """
     for a given set of breakpoint pairs, merge the union of all pairs that are
     within the given distance (cluster_radius)
@@ -230,8 +234,11 @@ def merge_by_union(input_pairs, group_key, weight_adjustment=10, cluster_radius=
 
 
 def merge_breakpoint_pairs(
-    input_pairs, cluster_radius=200, cluster_initial_size_limit=25, verbose=False
-):
+    input_pairs: List[BreakpointPair],
+    cluster_radius: int = 200,
+    cluster_initial_size_limit: int = 25,
+    verbose: bool = False,
+) -> Dict[BreakpointPair, List[BreakpointPair]]:
     """
     two-step merging process
 
@@ -242,12 +249,12 @@ def merge_breakpoint_pairs(
         done in order of smallest total breakpoint interval size to largest
 
     Args:
-        input_pairs (List[BreakpointPair]): the pairs to be merged
-        cluster_radius (int) maximum distance allowed for a node to merge
-        cluster_initial_size_limit (int): maximum size of breakpoint intervals allowed in the first merging phase
+        input_pairs: the pairs to be merged
+        cluster_radius: maximum distance allowed for a node to merge
+        cluster_initial_size_limit: maximum size of breakpoint intervals allowed in the first merging phase
 
     Returns:
-        Dict[BreakpointPair,List[BreakpointPair]]: mapping of merged breakpoint pairs to the input pairs used in the merge
+        mapping of merged breakpoint pairs to the input pairs used in the merge
     """
 
     def pair_center_distance(pair1, pair2):
diff --git a/src/mavis/cluster/main.py b/src/mavis/cluster/main.py
index 17567f52..861a1aea 100644
--- a/src/mavis/cluster/main.py
+++ b/src/mavis/cluster/main.py
@@ -29,13 +29,13 @@ def split_clusters(
     outputdir: str,
     total_batches: int,
     write_bed_summary: bool = True,
-):
+) -> List[str]:
     """
     For a set of clusters creates a bed file representation of all clusters.
     Also splits the clusters evenly into multiple files based on the user parameters (max_files)
 
     Returns:
-        list: of output file names (not including the bed file)
+        list of output file names (not including the bed file)
     """
     if write_bed_summary:
         bedfile = os.path.join(outputdir, 'clusters.bed')
diff --git a/src/mavis/illustrate/diagram.py b/src/mavis/illustrate/diagram.py
index f9e6fabc..1c59e90b 100644
--- a/src/mavis/illustrate/diagram.py
+++ b/src/mavis/illustrate/diagram.py
@@ -2,12 +2,18 @@
 This is the primary module responsible for generating svg visualizations
 
 """
+from typing import Iterable, List, Optional
+
+from mavis.annotate.genomic import Gene, Template
+from mavis.annotate.variant import Annotation
+from mavis.types import ReferenceGenome
 from svgwrite import Drawing
 
 from ..annotate.genomic import IntergenicRegion
 from ..interval import Interval
+from .constants import DiagramSettings
 from .elements import draw_exon_track, draw_genes, draw_template, draw_ustranscript, draw_vmarker
-from .scatter import draw_scatter
+from .scatter import ScatterPlot, draw_scatter
 from .util import LabelMapping, generate_interval_mapping
 
 # draw gene level view
@@ -17,18 +23,18 @@
 
 
 def draw_sv_summary_diagram(
-    config,
-    ann,
-    reference_genome=None,
-    templates=None,
-    ignore_absent_templates=True,
-    user_friendly_labels=True,
-    template_display_label_prefix='',
-    draw_reference_transcripts=True,
-    draw_reference_genes=True,
-    draw_reference_templates=True,
-    draw_fusion_transcript=True,
-    stack_reference_transcripts=False,
+    config: DiagramSettings,
+    ann: Annotation,
+    reference_genome: ReferenceGenome = None,
+    templates: List[Template] = None,
+    ignore_absent_templates: bool = True,
+    user_friendly_labels: bool = True,
+    template_display_label_prefix: str = '',
+    draw_reference_transcripts: bool = True,
+    draw_reference_genes: bool = True,
+    draw_reference_templates: bool = True,
+    draw_fusion_transcript: bool = True,
+    stack_reference_transcripts: bool = False,
 ):
     """
     this is the main drawing function. It decides between layouts
@@ -43,17 +49,17 @@ def draw_sv_summary_diagram(
         - fusion transcript/translation
 
     Args:
-        ann (Annotation): the annotation object to be illustrated
-        reference_genome (Dict[str,str]): reference sequences
-        templates (List[Template]): list of templates, used in drawing the template-level view
-        ignore_absent_templates (bool):
+        ann: the annotation object to be illustrated
+        reference_genome: reference sequences
+        templates: list of templates, used in drawing the template-level view
+        ignore_absent_templates:
             if true then will not raise an error if the template information is not given but will
             not draw the template instead
-        show_template (bool): if false the template-level view is not drawn
-        user_friendly_labels (bool):
+        show_template: if false the template-level view is not drawn
+        user_friendly_labels:
             if True, genes are labelled by their aliases (where possible) and domains are labeled by their
             names (where possible)
-        template_display_label_prefix (str): the character to precede the template label
+        template_display_label_prefix: the character to precede the template label
     """
     if not any(
         [
@@ -380,7 +386,18 @@ def draw_sv_summary_diagram(
     return canvas, legend
 
 
-def draw_multi_transcript_overlay(config, gene, vmarkers=None, window_buffer=0, plots=None):
+def draw_multi_transcript_overlay(
+    config: DiagramSettings,
+    gene: Gene,
+    vmarkers: Iterable[Interval] = None,
+    window_buffer: int = 0,
+    plots: Optional[List[ScatterPlot]] = None,
+):
+    """
+    Args:
+        vmarkers: vertical line markers
+        plots: scatter plots to plot on top of the gene diagram
+    """
     vmarkers = [] if vmarkers is None else vmarkers
     plots = [] if plots is None else plots
 
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index a5f1e762..32995761 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -3,11 +3,13 @@
 
 """
 import re
+from typing import List, Tuple
 
 from ..annotate.variant import FusionTranscript
 from ..constants import CODON_SIZE, GIEMSA_STAIN, ORIENT, STRAND
 from ..error import DrawingFitError, NotSpecifiedError
-from ..interval import Interval
+from ..interval import Interval, IntervalMapping
+from .constants import DiagramSettings
 from .util import (
     LabelMapping,
     Tag,
@@ -22,7 +24,9 @@
 HEX_BLACK = '#000000'
 
 
-def draw_legend(config, canvas, swatches, border=True):
+def draw_legend(
+    config: DiagramSettings, canvas, swatches: List[Tuple[str, str]], border: bool = True
+):
     """
     generates an svg group object representing the legend
     """
@@ -83,13 +87,13 @@ def draw_legend(config, canvas, swatches, border=True):
 
 
 def draw_exon_track(
-    config,
+    config: DiagramSettings,
     canvas,
     transcript,
-    mapping,
+    mapping: IntervalMapping,
     colors=None,
-    genomic_min=None,
-    genomic_max=None,
+    genomic_min: int = None,
+    genomic_max: int = None,
     translation=None,
 ):
     """ """
@@ -173,12 +177,12 @@ def draw_exon_track(
 
 
 def draw_transcript_with_translation(
-    config,
+    config: DiagramSettings,
     canvas,
     translation,
     labels,
     colors,
-    mapping,
+    mapping: IntervalMapping,
     reference_genome=None,
     genomic_min=None,
     genomic_max=None,
diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index 3c73caf9..754d4ba0 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -1,10 +1,11 @@
-from typing import Optional
+from typing import Dict, List, Optional, Tuple
 
 
 class Interval:
     start: int
     end: int
     freq: int = 1
+    forward_to_reverse: Optional[bool] = None
 
     def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_type=None):
         """
@@ -133,7 +134,7 @@ def __len__(self):
         """
         return Interval.length(self)
 
-    def length(self):
+    def length(self) -> int:
         try:
             if self.number_type == float:
                 return self[1] - self[0]
@@ -166,7 +167,7 @@ def __repr__(self):
             return '{}({}, {}{})'.format(cls, self.start, self.end, number_type)
 
     @property
-    def center(self):
+    def center(self) -> float:
         """
         the middle of the interval
 
@@ -193,7 +194,7 @@ def __contains__(self, other):
         return False
 
     @classmethod
-    def dist(cls, first, other):
+    def dist(cls, first, other) -> int:
         """returns the minimum distance between intervals
 
         Example:
@@ -215,7 +216,7 @@ def __hash__(self):
         return hash((self[0], self[1], self.freq))
 
     @classmethod
-    def position_in_range(cls, segments, pos):
+    def position_in_range(cls, segments, pos) -> Tuple[int, bool]:
         if len(segments) == 0:
             raise AttributeError('cannot compute on an empty list')
 
@@ -245,7 +246,7 @@ def position_in_range(cls, segments, pos):
         return num, found_inbetween_segment
 
     @classmethod
-    def convert_pos(cls, mapping, pos, forward_to_reverse=None):
+    def convert_pos(cls, mapping: 'IntervalMapping', pos: int, forward_to_reverse=None) -> int:
         i = cls.convert_ratioed_pos(mapping, pos, forward_to_reverse)
         if i.forward_to_reverse:
             return i.end
@@ -253,15 +254,17 @@ def convert_pos(cls, mapping, pos, forward_to_reverse=None):
             return i.start
 
     @classmethod
-    def convert_ratioed_pos(cls, mapping, pos, forward_to_reverse=None):
+    def convert_ratioed_pos(
+        cls, mapping: 'IntervalMapping', pos: int, forward_to_reverse=None
+    ) -> 'Interval':
         """convert any given position given a mapping of intervals to another range
 
         Args:
-            mapping (Dict[Interval,Interval]): a mapping of a set of continuous intervals
-            pos (int): a position in the first coordinate system
+            mapping: a mapping of a set of continuous intervals
+            pos: a position in the first coordinate system
 
         Returns:
-            Interval: the position in the alternate coordinate system given the input mapping
+            the position in the alternate coordinate system given the input mapping
 
         Raises:
             AttributeError: if the input position is outside the set of input segments
@@ -301,31 +304,31 @@ def convert_ratioed_pos(cls, mapping, pos, forward_to_reverse=None):
                     elif not forward_to_reverse:
                         raise AttributeError('direction of mapped intervals is not consistent')
 
-        i, previous_flag = Interval.position_in_range(
+        index, previous_flag = Interval.position_in_range(
             input_intervals, (pos, pos)
         )  # get the input position
-        if i == len(input_intervals) or previous_flag:
+        if index == len(input_intervals) or previous_flag:
             raise IndexError(pos, 'is outside mapped range', mapping)
         else:
             # fell into a mapped region
-            curr = input_intervals[i]
+            curr = input_intervals[index]
             nexxt = mapping[curr]
             if curr[1] - curr[0] == 0:
-                i = Interval(nexxt[0], nexxt[1])
+                result = Interval(nexxt[0], nexxt[1])
             else:
                 ratio = (nexxt[1] - nexxt[0]) / (curr[1] - curr[0])
                 shift = round((pos - curr[0]) * ratio, 0)
                 shift2 = round((pos - curr[0]) * ratio + ratio, 0)
                 number_type = int if ratio == 1 else float
                 if forward_to_reverse:
-                    i = Interval(nexxt[1] - shift2, nexxt[1] - shift, number_type=number_type)
+                    result = Interval(nexxt[1] - shift2, nexxt[1] - shift, number_type=number_type)
                 else:
-                    i = Interval(nexxt[0] + shift, nexxt[0] + shift2, number_type=number_type)
-            setattr(i, 'forward_to_reverse', forward_to_reverse)
-            return i
+                    result = Interval(nexxt[0] + shift, nexxt[0] + shift2, number_type=number_type)
+            result.forward_to_reverse = forward_to_reverse
+            return result
 
     @classmethod
-    def union(cls, *intervals):
+    def union(cls, *intervals) -> 'Interval':
         """
         returns the union of the set of input intervals
 
@@ -338,7 +341,7 @@ def union(cls, *intervals):
         return Interval(min([i[0] for i in intervals]), max([i[1] for i in intervals]))
 
     @classmethod
-    def intersection(cls, *intervals):
+    def intersection(cls, *intervals) -> Optional['Interval']:
         """
         returns None if there is no intersection
 
@@ -357,7 +360,7 @@ def intersection(cls, *intervals):
         return Interval(low, high)
 
     @classmethod
-    def min_nonoverlapping(cls, *intervals):
+    def min_nonoverlapping(cls, *intervals: 'Interval') -> List['Interval']:
         """
         for a list of intervals, orders them and merges any overlap to return a list of non-overlapping intervals
         O(nlogn)
@@ -368,9 +371,9 @@ def min_nonoverlapping(cls, *intervals):
         """
         if len(intervals) == 0:
             return []
-        intervals = sorted(list(intervals), key=lambda x: (x[0], x[1]))
-        new_intervals = [Interval(intervals[0][0], intervals[0][1])]
-        for i in intervals[1:]:
+        sorted_intervals = sorted(list(intervals), key=lambda x: (x[0], x[1]))
+        new_intervals = [Interval(sorted_intervals[0][0], sorted_intervals[0][1])]
+        for i in sorted_intervals[1:]:
             if Interval.overlaps(new_intervals[-1], i):
                 new_intervals[-1] = new_intervals[-1] | i
             else:
@@ -463,26 +466,17 @@ def add(self, src_interval, tgt_interval, opposing_directions=True):
         self.mapping[src_interval] = tgt_interval
         self.opposing_directions[src_interval] = opposing_directions
 
-    def convert_ratioed_pos(self, pos):
-        """convert any given position given a mapping of intervals to another range
+    def convert_ratioed_pos(self, pos: int) -> Interval:
+        """convert any given position given a mapping of intervals to the mapped range
 
         Args:
-            pos (Interval): a position in the first coordinate system
+            pos: a position in the first coordinate system
 
         Returns:
-            the position in the alternate coordinate system given the input mapping
-            - int: if simplify is True
-            - Interval: if simplify is False
+            the Interval the position lands in in the new coordinate system
 
         Raises:
             IndexError: if the input position is not in any of the mapped intervals
-
-        Example:
-            >>> mapping = IntervalMapping(mapping={(1, 10): (101, 110), (11, 20): (555, 564)})
-            >>> mapping.convert_pos(5)
-            5
-            >>> mapping.convert_pos(15)
-            559
         """
         for src_interval, tgt_interval in self.mapping.items():
             if pos in src_interval:
@@ -499,16 +493,14 @@ def convert_ratioed_pos(self, pos):
                     return tgt_interval
         raise IndexError(pos, 'position not found in mapping', self.mapping.keys())
 
-    def convert_pos(self, pos):
+    def convert_pos(self, pos: int) -> int:
         """convert any given position given a mapping of intervals to another range
 
         Args:
-            pos (int): a position in the first coordinate system
+            pos: a position in the first coordinate system
 
         Returns:
             the position in the alternate coordinate system given the input mapping
-            - int: if simplify is True
-            - Interval: if simplify is False
 
         Raises:
             IndexError: if the input position is not in any of the mapped intervals
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index 7717d743..d0ae9c63 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -121,7 +121,7 @@ def parse_bnd_alt(alt: str) -> Tuple[str, int, str, str, str, str]:
         raise NotImplementedError('alt specification in unexpected format', alt)
 
 
-def convert_record(record, record_mapping={}) -> List[Dict]:
+def convert_record(record: VcfRecordType) -> List[Dict]:
     """
     converts a vcf record
 
@@ -220,7 +220,7 @@ def convert_record(record, record_mapping={}) -> List[Dict]:
     return records
 
 
-def convert_pandas_rows_to_variants(df):
+def convert_pandas_rows_to_variants(df: pd.DataFrame) -> List[VcfRecordType]:
     def parse_info(info_field):
         info = {}
         for pair in info_field.split(';'):
@@ -259,7 +259,7 @@ def parse_info(info_field):
     return rows
 
 
-def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
+def pandas_vcf(input_file: str) -> Tuple[List[str], pd.DataFrame]:
     """
     Read a standard vcf file into a pandas dataframe
     """
@@ -296,7 +296,7 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
     return header_lines, df
 
 
-def convert_file(input_file: str):
+def convert_file(input_file: str) -> List[Dict]:
     """process a VCF file
 
     Args:
diff --git a/src/mavis/types.py b/src/mavis/types.py
new file mode 100644
index 00000000..23e3de15
--- /dev/null
+++ b/src/mavis/types.py
@@ -0,0 +1,11 @@
+"""
+Helper classes for type hints
+"""
+
+from typing import Dict, List, Tuple
+
+from Bio.SeqRecord import SeqRecord
+
+ReferenceGenome = Dict[str, SeqRecord]
+
+CigarTuples = List[Tuple[int, int]]

From 2ac6cc6b332405e250fd62f81bcf13ead84ad06a Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 28 Jan 2022 15:40:25 -0800
Subject: [PATCH 095/137] Add more types

---
 MANIFEST.in                    | 1 +
 setup.cfg                      | 1 +
 src/mavis/align.py             | 4 ++--
 src/mavis/annotate/genomic.py  | 4 ++--
 src/mavis/annotate/splicing.py | 5 +++--
 src/mavis/py.typed             | 0
 src/mavis/validate/base.py     | 1 -
 7 files changed, 9 insertions(+), 7 deletions(-)
 create mode 100644 src/mavis/py.typed

diff --git a/MANIFEST.in b/MANIFEST.in
index c1af92d1..16491603 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,5 @@
 recursive-include src *.py *.json
+include src/mavis/py.typed
 include README.md
 include LICENSE
 prune docs
diff --git a/setup.cfg b/setup.cfg
index a34fc7ef..d0a4934c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -84,6 +84,7 @@ dev =
     mkdocs-material==5.4.0
     markdown-include
     mkdocs-simple-hooks==0.1.2
+    types-setuptools>=57.4.7, <58
 deploy =
     twine
     wheel
diff --git a/src/mavis/align.py b/src/mavis/align.py
index 2d6b6898..f4c70712 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -5,7 +5,7 @@
 import os
 import re
 import subprocess
-from typing import Dict
+from typing import Dict, List
 
 import pysam
 
@@ -470,7 +470,7 @@ def align_sequences(
             with pysam.AlignmentFile(
                 aligner_output_file, 'r', check_sq=bool(len(sequences))
             ) as samfile:
-                reads_by_query = {}
+                reads_by_query: Dict[str, List[_read.SamRead]] = {}
                 for read in samfile.fetch():
                     if read.is_unmapped:
                         continue
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index 14ca0a7b..22f6831d 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -244,7 +244,7 @@ def transcript(self):
         return self.reference_object
 
     @property
-    def donor_splice_site(self) -> Interval:
+    def donor_splice_site(self) -> SpliceSite:
         """the genomic range describing the splice site"""
         if self.is_reverse:
             return self.start_splice_site
@@ -252,7 +252,7 @@ def donor_splice_site(self) -> Interval:
             return self.end_splice_site
 
     @property
-    def acceptor_splice_site(self) -> Interval:
+    def acceptor_splice_site(self) -> SpliceSite:
         """the genomic range describing the splice site"""
         if self.is_reverse:
             return self.end_splice_site
diff --git a/src/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
index b2160ffd..fe303592 100644
--- a/src/mavis/annotate/splicing.py
+++ b/src/mavis/annotate/splicing.py
@@ -4,7 +4,8 @@
 from ..constants import SPLICE_TYPE, STRAND, reverse_complement
 from ..interval import Interval
 from .base import BioInterval
-from .constants import ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS, SPLICE_SITE_TYPE
+from .constants import (ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS,
+                        SPLICE_SITE_TYPE)
 
 
 class SplicingPattern(list):
@@ -121,7 +122,7 @@ def __init__(
         self,
         ref: BioInterval,
         pos: int,
-        site_type: str,
+        site_type: int,
         intact: bool = True,
         start: Optional[int] = None,
         end: Optional[int] = None,
diff --git a/src/mavis/py.typed b/src/mavis/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/src/mavis/validate/base.py b/src/mavis/validate/base.py
index e73d4846..65a70bc5 100644
--- a/src/mavis/validate/base.py
+++ b/src/mavis/validate/base.py
@@ -671,7 +671,6 @@ def collect_split_read(self, read: pysam.AlignedSegment, first_breakpoint: bool)
             w[0] - 1 : w[1]
         ]
 
-        putative_alignments = None
         # figure out how much of the read must match when remaped
         min_match_tgt = read.cigar[-1][1] if breakpoint.orient == ORIENT.LEFT else read.cigar[0][1]
         min_match_tgt = min(

From 841c36412aa3f85c23c777a72c0e03800c37c76f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Fri, 28 Jan 2022 15:43:01 -0800
Subject: [PATCH 096/137] Fix linting error

---
 src/mavis/annotate/splicing.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
index fe303592..9de8ce24 100644
--- a/src/mavis/annotate/splicing.py
+++ b/src/mavis/annotate/splicing.py
@@ -4,8 +4,7 @@
 from ..constants import SPLICE_TYPE, STRAND, reverse_complement
 from ..interval import Interval
 from .base import BioInterval
-from .constants import (ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS,
-                        SPLICE_SITE_TYPE)
+from .constants import ACCEPTOR_SEQ, DONOR_SEQ, SPLICE_SITE_RADIUS, SPLICE_SITE_TYPE
 
 
 class SplicingPattern(list):

From 43cd30920db60c5dd106b39b964a89cbacebe957 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Fri, 28 Jan 2022 17:59:35 -0800
Subject: [PATCH 097/137] add quick test to vcf module

---
 src/mavis/interval.py        |  2 --
 src/mavis/tools/vcf.py       | 10 ++++++++++
 tests/data/sniffles.vcf      |  1 +
 tests/unit/test_tools_vcf.py |  6 +++---
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index 03c788f9..c78e3aa8 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -30,8 +30,6 @@ def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_
 
         self.start = self.number_type(self.start)
         self.end = self.number_type(self.end)
-        if self.start == 0 and self.end == 1:
-            self.start = 1
         if self.start > self.end:
             raise AttributeError('interval start > end is not allowed', self.start, self.end)
         self.freq = int(freq)
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index 92a6c987..9dd04c6f 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -204,6 +204,16 @@ def convert_record(record, record_mapping={}, log=DEVNULL) -> List[Dict]:
                 }
             )
 
+        '''
+        As per VCF 4.2 specifications (https://samtools.github.io/hts-specs/VCFv4.2.pdf): 
+        A start_position = 1, end_position = 0 linkage indicates connections to telomeres
+        Change 0 a 1 since coordinates are 1-based and we cannot start before the start of a sequence
+        '''
+
+        if std_row['break1_position_end'] == 0 and std_row['break1_position_start'] == 1:
+            std_row.update({'break1_position_end': 1})
+        elif std_row['break2_position_end'] == 0 and std_row['break2_position_start'] == 1:
+            std_row.update({'break2_position_end': 1})
         if 'SVTYPE' in info:
             std_row[COLUMNS.event_type] = info['SVTYPE']
 
diff --git a/tests/data/sniffles.vcf b/tests/data/sniffles.vcf
index dd631018..c9e475fc 100644
--- a/tests/data/sniffles.vcf
+++ b/tests/data/sniffles.vcf
@@ -335,3 +335,4 @@ chr1	2657257	89_1	CCCTGCACACCCAGGTGAGCATCCGACAGCCTGGAGCAGCACCCACACCCCCAGTTGAGCAT
 chr1	125029102	1150_3	N	<DEL/INV>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr1;END=125029168;STD_quant_start=0;STD_quant_stop=0;Kurtosis_quant_start=1.95204;Kurtosis_quant_stop=4.40106;SVTYPE=DEL/INV;RNAMES=0615a1e2-43d8-4ee1-aad8-b3bc30654032,07df46cf-fab9-4b7a-9c61-63d6fb79063d,0c62bf53-a1fa-4cd8-9f65-9b6d896437f6,0c775ec9-8cd4-46fa-b432-d8fadbe12e8c,13f6e3a3-7e12-4fe1-96e8-6fb510c5e51e,25140c43-b936-4ec7-88ac-5f35ee57eb89,425d50b0-d4c0-4128-befb-3cb9a20d0395,42ec5104-09bf-4324-9579-9099acbf7650,61e7dbc6-c78a-4047-aaee-6660f758bf93,711bb46e-6427-4960-9abf-a11838e69701,76aadab8-9dfe-46bc-a906-eb70126c5841,77b0f627-1ffc-43ff-8f07-a3839b73e859,78e435d7-34fa-4d52-9f3f-189868c74142,7bff020f-5745-4030-a649-c2ca270932f4,801d64f6-1205-4a00-a1c0-eeff98c29be7,928578aa-6777-4ba7-a150-7b2eaa900249,993a19f7-ab8d-4636-aa9f-56566d3ab328,9dc57ac2-909f-467e-a15f-26041dee67d0,a5fdd9d5-ed51-4036-88e5-6009ce233bc6;SUPTYPE=NR;SVLEN=-66;STRANDS=+-;RE=19;REF_strand=0,5;AF=0.791667	GT:DR:DV	0/1:5:19
 chr10	125502113	16341_1	N	<INVDUP>	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr10;END=125508658;STD_quant_start=0;STD_quant_stop=3.1305;Kurtosis_quant_start=nan;Kurtosis_quant_stop=2;SVTYPE=INVDUP;RNAMES=11f6d8be-ef3b-44bd-bc04-b7a4c6619129,235ef779-5cc1-4999-91cd-25c6bbcfbb08,3058d7cf-0c2c-493e-8bd7-97adb6e8c721,40788bbd-835b-4706-8abe-a76b9672804f,4ed139db-33bf-4ed2-b709-840d06a92d5f,929a407e-1103-4f89-81fa-20b902c08c6e,a8b62faf-9df4-497e-a1df-f4d6af7a92e0,c34b12b2-7aee-4c9a-89a2-81d8f9d34a8b;SUPTYPE=SR;SVLEN=6545;STRANDS=++;RE=5;REF_strand=6,4;AF=0.333333	GT:DR:DV	0/1:10:5
 chr11	3653753	16746_1	N	CAACCCTACCTCTGTCTCTCCTCACCGCTATTCTCCCATCAGACATCAGTCTTGTAAATTCCAATTCCTACCTCTGTCTATCCTCACTGCCATTCTCCCACTGACATCAAGTCTTGTAAATCTCCACCTCCTACCTCTGTCTATCCTCACTGCCATTCTCCCATCAGACATCAAGTTCTGTAAATTCCACCTCCTACCTCTCTGTCTATCCTCACTGCCATTCTCCCATCTGGACATCAAGTCTTGTAAATT	.	PASS	IMPRECISE;SVMETHOD=Snifflesv1.0.11;CHR2=chr11;END=3654825;STD_quant_start=139.219;STD_quant_stop=2.84445;Kurtosis_quant_start=-1.96058;Kurtosis_quant_stop=-0.064951;SVTYPE=DUP/INS;RNAMES=00b67655-ed56-4780-b3fe-be29d59e1859,028e509d-93df-4dc8-82e9-a09514d7bc3d,02cce278-7597-4217-8253-5f28a462e699,06eb2443-613b-404c-950a-d4af531205bd,0fcbfcc2-2a80-4da8-b592-d7862d4e5f32,0ffca8ba-4f7f-4475-b7f1-b57ec5c83ed8,130292a2-4dff-4c25-b571-b1ed9fb82f6e,14038c62-2b05-446f-8d58-b34a3f784d23,16732163-f7e5-4bc2-bfc8-a38f908e5ba8,190c6094-4e7b-4f20-91a1-30e6b0d189db,19b242ce-893d-4b98-ac2d-70cf8e216d61,206d22d5-959a-4d55-9d85-eca31de42f0b,21cc7629-0ee7-44b6-9e17-664918ab0ac2,26fd2c01-04d2-40f7-b350-aeea96752a72,2c67fc96-be15-4e19-bb73-662a104fdd1a,329783a5-e55a-4276-8c13-1f8bdbff7700,342c2503-a98b-4a20-9dd9-8190bdd743fa,34c23995-bcaa-4dcd-aefa-3c96d4032492,4f12c658-1e88-44a9-9689-18bc76d12047,5da809c9-cf2c-4562-a703-3d1b12927220,6145c5c6-c4ed-4b30-987f-e653337a0a18,744b6c64-1a96-4dcb-9216-8be6bdcfe7f3,84e6ec27-5a6b-463d-8681-045651b2af07,8eedae6b-ec01-4367-bd47-2081f9df8f33,8f6ff282-26eb-4eb5-89bd-df9010689ba6,9c8fa8a6-da33-4599-b835-24d0220c6139,a2494f89-4dba-472a-9b20-c61d0a0512af,ad3b03b1-1cf7-4a54-a6df-eb7563ddbbea,b2b77f8b-0659-4996-8280-078e8b9463bb,b7819371-05b3-4eac-a229-54a49a852133,bafd9ab5-3cc7-4c21-b48f-186d1a8e5351,bff30357-4e2d-46a7-927e-707223885e25,c2d8bcd3-a488-4709-8d33-f9c000c54d51,c4e7702f-8831-4236-b6a8-6723a3d668f8,ca4a99ee-181d-488a-9eea-e0ef7e9b765e,ca6d0c9c-bc64-4888-b660-18ca49b597b9,ccf84af9-06f8-4bb1-b844-e4512907b8bd,d00d6c06-b2e2-407d-b294-d585efdb53ad,da8571f5-d34e-4e04-a7a8-b2696a4661e0,e1fd56ef-d7ee-4703-8fad-a90383cb4156,ebf37b99-cfbc-4168-a497-a8453d0e698a,f8008b3f-d0e6-474b-82c1-bb28a53b9e01,f8c3fa80-204b-4d1f-b782-358c648e48bd,fa2f213f-f63d-4828-a601-bfdfae84b8e9;SUPTYPE=AL,SR;SVLEN=61;STRANDS=+-;RE=22;REF_strand=1,0;AF=0.956522	GT:DR:DV	1/1:1:22
+chr9    60528555        29582_2 N       N[chr17_GL000205v2_random:0[    .       PASS    PRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=1.61245;STD_quant_stop=0;Kurtosis_quant_start=6.96082;Kurtosis_quant_stop=7;SVTYPE=BND;RNAMES=0025a6d7-4f51-4eb7-90a6-03540aadf4e7,4839f229-9a4e-4c4c-8652-a5c6da4f9ae4,4d7d382d-6a1a-46cd-beb0-4b7028b78356,54cf27a5-d6b8-4f68-8f72-ae38badc6ea4,56756629-0e3f-4e9a-9b6d-34b2fa97837d,5d1be304-66fb-44a3-8873-a5e291ea8db2,69a5ce72-4ab8-48b4-97c6-8872a310c649,73179d48-8c79-4566-b55b-d69a557b88ab,84e94667-fbad-4f22-ba2c-54e7f96b2daf,8b536a68-ddb2-4b04-9ae3-87df2a939fd4,963e22ef-31dd-4f50-8e46-9d8c48417787,a1a46081-76e9-4e38-8cc3-160c59608548,c67f7f2a-a381-403a-848f-0d63bb1ddd2e,c9806188-0f3c-415b-92cb-92c1cae1a085,ef9cd5af-23cc-4ba8-8c3e-8518716b83c1;SUPTYPE=SR;SVLEN=0;STRANDS=+-;RE=15;REF_strand=0,0;AF=1       GT:DR:DV        1/1:0:15chr9	60528555	29582_2	N	N[chr17_GL000205v2_random:0[	.	PASS	PRECISE;SVMETHOD=Snifflesv1.0.11;STD_quant_start=1.61245;STD_quant_stop=0;Kurtosis_quant_start=6.96082;Kurtosis_quant_stop=7;SVTYPE=BND;RNAMES=0025a6d7-4f51-4eb7-90a6-03540aadf4e7,4839f229-9a4e-4c4c-8652-a5c6da4f9ae4,4d7d382d-6a1a-46cd-beb0-4b7028b78356,54cf27a5-d6b8-4f68-8f72-ae38badc6ea4,56756629-0e3f-4e9a-9b6d-34b2fa97837d,5d1be304-66fb-44a3-8873-a5e291ea8db2,69a5ce72-4ab8-48b4-97c6-8872a310c649,73179d48-8c79-4566-b55b-d69a557b88ab,84e94667-fbad-4f22-ba2c-54e7f96b2daf,8b536a68-ddb2-4b04-9ae3-87df2a939fd4,963e22ef-31dd-4f50-8e46-9d8c48417787,a1a46081-76e9-4e38-8cc3-160c59608548,c67f7f2a-a381-403a-848f-0d63bb1ddd2e,c9806188-0f3c-415b-92cb-92c1cae1a085,ef9cd5af-23cc-4ba8-8c3e-8518716b83c1;SUPTYPE=SR;SVLEN=0;STRANDS=+-;RE=15;REF_strand=0,0;AF=1	GT:DR:DV	1/1:0:15
\ No newline at end of file
diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
index 8af3067f..c196418a 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/unit/test_tools_vcf.py
@@ -5,9 +5,9 @@
 
 
 def test_read_vcf():
-    header, df = pandas_vcf(get_data('delly_events.vcf'))
-    assert len(header) == 63
-    assert df.shape[0] == 31
+    header, df = pandas_vcf(get_data('sniffles.vcf'))
+    assert len(header) == 231
+    assert df.shape[0] == 106
 
 
 def test_convert_record():

From db96aec920c48ff686163879eb8bf3c70680d40b Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Fri, 28 Jan 2022 18:50:49 -0800
Subject: [PATCH 098/137] changed test case to include bp1

---
 src/mavis/tools/vcf.py       | 6 +++---
 tests/unit/test_tools_vcf.py | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index d5eeba02..ee6995c5 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -211,7 +211,7 @@ def convert_record(record, record_mapping={}) -> List[Dict]:
 
         if std_row['break1_position_end'] == 0 and std_row['break1_position_start'] == 1:
             std_row.update({'break1_position_end': 1})
-        elif std_row['break2_position_end'] == 0 and std_row['break2_position_start'] == 1:
+        if std_row['break2_position_end'] == 0 and std_row['break2_position_start'] == 1:
             std_row.update({'break2_position_end': 1})
         if 'SVTYPE' in info:
             std_row[COLUMNS.event_type] = info['SVTYPE']
@@ -284,7 +284,7 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
     # read the data
     df = pd.read_csv(
         input_file,
-        sep="\t",
+        sep='\t',
         skiprows=len(header_lines),
         dtype={
             'CHROM': str,
@@ -295,7 +295,7 @@ def pandas_vcf(input_file) -> Tuple[List[str], pd.DataFrame]:
             'REF': str,
             'ALT': str,
         },
-        na_values=PANDAS_DEFAULT_NA_VALUES + ["."],
+        na_values=PANDAS_DEFAULT_NA_VALUES + ['.'],
     )
     df = df.rename(columns={df.columns[0]: df.columns[0].replace('#', '')})
     required_columns = ['CHROM', 'INFO', 'POS', 'REF', 'ALT', 'ID']
diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
index c196418a..122a640d 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/unit/test_tools_vcf.py
@@ -12,8 +12,8 @@ def test_read_vcf():
 
 def test_convert_record():
     variant = VcfRecordType(
-        9000,
-        12000,
+        1,
+        0,
         'chr14_KI270722v1_random',
         alts=['N[chr17_GL000205v2_random:0['],
         ref='N',
@@ -32,6 +32,8 @@ def test_convert_record():
     records = convert_record(variant)
     assert len(records) == 1
     record = records[0]
+    assert record.get('break1_position_start') == 1
+    assert record.get('break1_position_end') == 1
     assert record.get('break2_position_start') == 1
     assert record.get('break2_position_end') == 1
     assert record.get('break2_chromosome') == 'chr17_GL000205v2_random'

From ec1e54bfcd33ef17c837707a3a56d3ea7b79562f Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Sun, 30 Jan 2022 02:34:19 -0800
Subject: [PATCH 099/137] update hooks.py to import from config

---
 docs/hooks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/hooks.py b/docs/hooks.py
index 44931755..02411323 100644
--- a/docs/hooks.py
+++ b/docs/hooks.py
@@ -4,7 +4,7 @@
 from textwrap import dedent
 
 from markdown_refdocs.main import extract_to_markdown
-from mavis.schemas import DEFAULTS
+from mavis_config import DEFAULTS
 from mavis.util import ENV_VAR_PREFIX
 
 

From 4e1c1e94c2c9dd873b8c82472e096b1ef40234e0 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Mon, 31 Jan 2022 12:10:15 -0800
Subject: [PATCH 100/137] fixed checks

---
 src/mavis/tools/vcf.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index ee6995c5..474fc5a1 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -156,6 +156,8 @@ def convert_record(record, record_mapping={}) -> List[Dict]:
 
         if info.get('SVTYPE') == 'BND':
             chr2, end, orient1, orient2, ref, alt = parse_bnd_alt(alt)
+            if end == 0:
+                end = 1  # telomeric BND alt syntax https://github.com/bcgsc/mavis/issues/294
             std_row[COLUMNS.break1_orientation] = orient1
             std_row[COLUMNS.break2_orientation] = orient2
             std_row[COLUMNS.untemplated_seq] = alt
@@ -202,20 +204,6 @@ def convert_record(record, record_mapping={}) -> List[Dict]:
                     COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
                 }
             )
-
-        '''
-        As per VCF 4.2 specifications (https://samtools.github.io/hts-specs/VCFv4.2.pdf): 
-        A start_position = 1, end_position = 0 linkage indicates connections to telomeres
-        Change 0 a 1 since coordinates are 1-based and we cannot start before the start of a sequence
-        '''
-
-        if std_row['break1_position_end'] == 0 and std_row['break1_position_start'] == 1:
-            std_row.update({'break1_position_end': 1})
-        if std_row['break2_position_end'] == 0 and std_row['break2_position_start'] == 1:
-            std_row.update({'break2_position_end': 1})
-        if 'SVTYPE' in info:
-            std_row[COLUMNS.event_type] = info['SVTYPE']
-
         try:
             orient1, orient2 = info['CT'].split('to')
             connection_type = {'3': ORIENT.LEFT, '5': ORIENT.RIGHT, 'N': ORIENT.NS}

From 4c3365fc93b660bf6edcca3b59bf98058d282c3e Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Mon, 31 Jan 2022 12:17:11 -0800
Subject: [PATCH 101/137] unfix deleted lines

---
 src/mavis/tools/vcf.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index f8b6c58b..e52a0cde 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -204,6 +204,10 @@ def convert_record(record: VcfRecordType) -> List[Dict]:
                     COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
                 }
             )
+        
+        if 'SVTYPE' in info:
+            std_row[COLUMNS.event_type] = info['SVTYPE']
+
         try:
             orient1, orient2 = info['CT'].split('to')
             connection_type = {'3': ORIENT.LEFT, '5': ORIENT.RIGHT, 'N': ORIENT.NS}

From 107253499fa080228c67138d6b3d81c3770a978e Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Mon, 31 Jan 2022 12:22:40 -0800
Subject: [PATCH 102/137] re-lint vcf file

---
 src/mavis/tools/vcf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index e52a0cde..ff4c0cb0 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -204,7 +204,7 @@ def convert_record(record: VcfRecordType) -> List[Dict]:
                     COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
                 }
             )
-        
+
         if 'SVTYPE' in info:
             std_row[COLUMNS.event_type] = info['SVTYPE']
 

From debbf27171661dbb48d0c917811fb2b64fbcede9 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Mon, 31 Jan 2022 14:43:18 -0800
Subject: [PATCH 103/137] change unit tests to accomodate changes

---
 tests/unit/test_tools_vcf.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
index 122a640d..c4eac443 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/unit/test_tools_vcf.py
@@ -32,8 +32,5 @@ def test_convert_record():
     records = convert_record(variant)
     assert len(records) == 1
     record = records[0]
-    assert record.get('break1_position_start') == 1
-    assert record.get('break1_position_end') == 1
-    assert record.get('break2_position_start') == 1
     assert record.get('break2_position_end') == 1
     assert record.get('break2_chromosome') == 'chr17_GL000205v2_random'

From 847752cfc2f1191536b84b9ca62cdb9e34e2f1c4 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 1 Feb 2022 13:56:18 -0800
Subject: [PATCH 104/137] Add more type annotations

---
 src/mavis/align.py               |   9 +-
 src/mavis/annotate/base.py       |  23 +++--
 src/mavis/annotate/file_io.py    |  14 +--
 src/mavis/annotate/fusion.py     |  48 ++++-----
 src/mavis/annotate/genomic.py    |   2 +-
 src/mavis/annotate/main.py       |   4 +-
 src/mavis/annotate/protein.py    |  12 +--
 src/mavis/annotate/variant.py    |  44 ++++----
 src/mavis/assemble.py            |   8 +-
 src/mavis/bam/cache.py           |   4 +-
 src/mavis/bam/stats.py           |  71 ++++++-------
 src/mavis/blat.py                |   2 +-
 src/mavis/breakpoint.py          |  20 ++--
 src/mavis/cluster/cluster.py     |   2 +-
 src/mavis/constants.py           |   8 +-
 src/mavis/illustrate/elements.py | 168 +++++++++++++++++++------------
 src/mavis/illustrate/scatter.py  |  61 ++++++-----
 src/mavis/interval.py            |  14 +--
 src/mavis/main.py                |   6 +-
 src/mavis/types.py               |   7 +-
 src/mavis/util.py                |  30 ++++--
 src/mavis/validate/evidence.py   |   4 +-
 22 files changed, 317 insertions(+), 244 deletions(-)

diff --git a/src/mavis/align.py b/src/mavis/align.py
index f4c70712..43e67be6 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -5,7 +5,7 @@
 import os
 import re
 import subprocess
-from typing import Dict, List
+from typing import TYPE_CHECKING, Dict, List
 
 import pysam
 
@@ -17,6 +17,9 @@
 from .types import ReferenceGenome
 from .util import logger
 
+if TYPE_CHECKING:
+    from .bam.cache import BamCache
+
 
 class SUPPORTED_ALIGNER(MavisNamespace):
     """
@@ -385,7 +388,7 @@ def call_paired_read_event(read1, read2, is_stranded=False):
 
 def align_sequences(
     sequences: Dict[str, str],
-    input_bam_cache,
+    input_bam_cache: BamCache,
     reference_genome: ReferenceGenome,
     aligner: str,
     aligner_reference: str,
@@ -402,7 +405,7 @@ def align_sequences(
 
     Args:
         sequences: dictionary of sequences by name
-        input_bam_cache (BamCache): bam cache to be used as a template for reading the alignments
+        input_bam_cache: bam cache to be used as a template for reading the alignments
         reference_genome: the reference genome
         aligner (SUPPORTED_ALIGNER): the name of the aligner to be used
         aligner_reference: path to the aligner reference file
diff --git a/src/mavis/annotate/base.py b/src/mavis/annotate/base.py
index 950444fa..23b7205a 100644
--- a/src/mavis/annotate/base.py
+++ b/src/mavis/annotate/base.py
@@ -59,15 +59,22 @@ class BioInterval:
     strand: Optional[str]
 
     def __init__(
-        self, reference_object, start, end=None, name=None, seq=None, data=None, strand=None
+        self,
+        reference_object,
+        start: int,
+        end: int = None,
+        name: Optional[str] = None,
+        seq: Optional[str] = None,
+        data: Optional[Dict] = None,
+        strand: Optional[str] = None,
     ):
         """
         Args:
             reference_object: the object this interval is on
-            start (int) start of the interval (inclusive)
-            end (int): end of the interval (inclusive)
+            start: start of the interval (inclusive)
+            end: end of the interval (inclusive)
             name: optional
-            seq (str): the seq relating to this interval
+            seq: the seq relating to this interval
 
         Example:
             >>> b = BioInterval('1', 12572784, 12578898, 'q22.2')
@@ -88,13 +95,13 @@ def __init__(
         self.strand = strand
 
     @property
-    def start(self):
-        """*int*: the start position"""
+    def start(self) -> int:
+        """the start position"""
         return self.position.start
 
     @property
-    def end(self):
-        """*int*: the end position"""
+    def end(self) -> int:
+        """the end position"""
         return self.position.end
 
     def __getitem__(self, index):
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 4b6ba264..661b8d7c 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -12,7 +12,7 @@
 
 from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, translate
 from ..interval import Interval
-from ..types import ReferenceGenome
+from ..types import ReferenceAnnotations, ReferenceGenome
 from ..util import logger
 from .base import BioInterval, ReferenceName
 from .genomic import Exon, Gene, PreTranscript, Template, Transcript
@@ -97,7 +97,7 @@ def parse_annotations_json(
     data,
     reference_genome: Optional[ReferenceGenome] = None,
     best_transcripts_only=False,
-) -> Dict[str, List[Gene]]:
+) -> ReferenceAnnotations:
     """
     parses a json of annotation information into annotation objects
     """
@@ -112,7 +112,7 @@ def parse_annotations_json(
         )  # these can get super long
         raise AssertionError(short_msg)
 
-    genes_by_chr: Dict[str, List[Gene]] = {}
+    genes_by_chr: ReferenceAnnotations = {}
 
     for gene_dict in data['genes']:
 
@@ -321,10 +321,10 @@ def __init__(
     ):
         """
         Args:
-            *filepaths (str): list of paths to load
-            file_type (str): Type of file to load
-            eager_load (bool=False): load the files immeadiately
-            assert_exists (bool=False): check that all files exist
+            *filepaths: list of paths to load
+            file_type: Type of file to load
+            eager_load: load the files immeadiately
+            assert_exists: check that all files exist
             **opt: key word arguments to be passed to the load function and used as part of the file cache key
 
         Raises
diff --git a/src/mavis/annotate/fusion.py b/src/mavis/annotate/fusion.py
index b02e3259..2666e1d9 100644
--- a/src/mavis/annotate/fusion.py
+++ b/src/mavis/annotate/fusion.py
@@ -1,3 +1,5 @@
+from typing import TYPE_CHECKING
+
 from ..breakpoint import Breakpoint
 from ..constants import ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE, reverse_complement
 from ..error import NotSpecifiedError
@@ -6,14 +8,17 @@
 from .genomic import Exon, PreTranscript, Transcript
 from .protein import Domain, Translation, calculate_orf
 
+if TYPE_CHECKING:
+    from .variant import Annotation
+
 
-def determine_prime(transcript, breakpoint):
+def determine_prime(transcript: Transcript, breakpoint: Breakpoint) -> int:
     """
     determine the side of the transcript 5' or 3' which is 'kept' given the breakpoint
 
     Args:
-        transcript (Transcript): the transcript
-        breakpoint (Breakpoint): the breakpoint
+        transcript: the transcript
+        breakpoint: the breakpoint
 
     Returns:
         PRIME: 5' or 3'
@@ -56,6 +61,9 @@ class FusionTranscript(PreTranscript):
     The preferred way to construct a FusionTranscript is through the build method.
     """
 
+    last_five_prime_exon: Exon
+    first_three_prime_exon: Exon
+
     def __init__(self):
         self.exon_mapping = {}
         self.exons = []
@@ -71,10 +79,10 @@ def __init__(self):
         self.break1 = None  # first breakpoint position in the fusion transcript
         self.break2 = None  # second breakpoint position in the fusion transcript
 
-    def exon_number(self, exon):
+    def exon_number(self, exon: Exon) -> int:
         """
         Args:
-            exon (Exon): the exon to be numbered
+            exon: the exon to be numbered
 
         Returns:
             int: the number of the exon in the original transcript (prior to fusion)
@@ -87,14 +95,7 @@ def map_region_to_genome(self, chr, interval_on_fusion, genome_interval, flipped
         self.mapping_to_chrs[Interval(interval_on_fusion[0], interval_on_fusion[1])] = chr
 
     @classmethod
-    def _build_single_gene_inversion(
-        cls,
-        ann,
-        reference_genome: ReferenceGenome,
-        min_orf_size,
-        max_orf_cap,
-        min_domain_mapping_match,
-    ):
+    def _build_single_gene_inversion(cls, ann, reference_genome: ReferenceGenome):
         """
         builds a fusion transcript for a single gene inversion. Note that this is an incomplete
         fusion transcript and still requires translations and domain information to be added
@@ -209,9 +210,7 @@ def _build_single_gene_inversion(
         return fusion_pre_transcript
 
     @classmethod
-    def _build_single_gene_duplication(
-        cls, ann, reference_genome, min_orf_size, max_orf_cap, min_domain_mapping_match
-    ):
+    def _build_single_gene_duplication(cls, ann, reference_genome):
         """
         builds a fusion transcript for a single gene duplication. Note that this is an incomplete
         fusion transcript and still requires translations and domain information to be added
@@ -288,20 +287,19 @@ def _build_single_gene_duplication(
     @classmethod
     def build(
         cls,
-        ann,
+        ann: 'Annotation',
         reference_genome: ReferenceGenome,
         min_orf_size=None,
         max_orf_cap=None,
         min_domain_mapping_match=None,
-    ):
+    ) -> 'FusionTranscript':
         """
         Args:
-            ann (Annotation): the annotation object we want to build a FusionTranscript for
-            reference_genome: dict of reference sequence
-                by template/chr name
+            ann: the annotation object we want to build a FusionTranscript for
+            reference_genome: dict of reference sequence by template/chr name
 
         Returns:
-            FusionTranscript: the newly built fusion transcript
+            the newly built fusion transcript
 
         """
         if not ann.transcript1 or not ann.transcript2:
@@ -489,15 +487,13 @@ def build(
                             pass
         return fusion_pre_transcript
 
-    def get_seq(self, reference_genome=None, ignore_cache=False):
+    def get_seq(self):
         return PreTranscript.get_seq(self)
 
-    def get_cdna_seq(self, splicing_pattern, reference_genome=None, ignore_cache=False):
+    def get_cdna_seq(self, splicing_pattern):
         """
         Args:
             splicing_pattern (List[int]): the list of splicing positions
-            reference_genome (Dict[str,Bio.SeqRecord]): dict of reference seq
-                by template/chr name
 
         Returns:
             str: the spliced cDNA seq
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index 22f6831d..700b045c 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -599,7 +599,7 @@ class Transcript(BioInterval):
     def __init__(
         self,
         pre_transcript: PreTranscript,
-        splicing_patt: List[int],
+        splicing_patt: SplicingPattern,
         seq: Optional[str] = None,
         translations: Optional[List[Translation]] = None,
     ):
diff --git a/src/mavis/annotate/main.py b/src/mavis/annotate/main.py
index 9b08f34e..ed426a04 100644
--- a/src/mavis/annotate/main.py
+++ b/src/mavis/annotate/main.py
@@ -118,8 +118,8 @@ def main(
 ):
     """
     Args:
-        inputs (List[str]): list of input files to read
-        output (str): path to the output directory
+        inputs: list of input files to read
+        output: path to the output directory
     """
     reference_genome = ReferenceFile.load_from_config(config, 'reference_genome')
     annotations = ReferenceFile.load_from_config(config, 'annotations')
diff --git a/src/mavis/annotate/protein.py b/src/mavis/annotate/protein.py
index 50d31f33..c7e711c9 100644
--- a/src/mavis/annotate/protein.py
+++ b/src/mavis/annotate/protein.py
@@ -263,11 +263,11 @@ def __init__(
         describes the splicing pattern and cds start and end with reference to a particular transcript
 
         Args:
-            start (int): start of the coding sequence (cds) relative to the start of the first exon in the transcript
-            end (int): end of the coding sequence (cds) relative to the start of the first exon in the transcript
-            transcript (Transcript): the transcript this is a Translation of
-            domains (List[Domain]): a list of the domains on this translation
-            sequence (str): the cds sequence
+            start: start of the coding sequence (cds) relative to the start of the first exon in the transcript
+            end: end of the coding sequence (cds) relative to the start of the first exon in the transcript
+            transcript: the transcript this is a Translation of
+            domains: a list of the domains on this translation
+            sequence: the cds sequence
         """
         domains = [] if domains is None else domains
         BioInterval.__init__(
@@ -334,7 +334,7 @@ def convert_genomic_to_cds(self, pos: int) -> int:
             raise IndexError('conversion failed. position is outside the exonic region')
         return cds
 
-    def convert_genomic_to_nearest_cds(self, pos: str) -> Tuple[int, int]:
+    def convert_genomic_to_nearest_cds(self, pos: int) -> Tuple[int, int]:
         """
         converts a genomic position to its cds equivalent or (if intronic) the nearest cds and shift
 
diff --git a/src/mavis/annotate/variant.py b/src/mavis/annotate/variant.py
index bfa9e3a0..aabb2488 100644
--- a/src/mavis/annotate/variant.py
+++ b/src/mavis/annotate/variant.py
@@ -8,7 +8,7 @@
 from ..constants import COLUMNS, GENE_PRODUCT_TYPE, PROTOCOL, STOP_AA, STRAND, SVTYPE
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..types import ReferenceGenome
+from ..types import Annotations, ReferenceGenome
 from ..util import logger
 from .fusion import FusionTranscript, determine_prime
 from .genomic import Gene, IntergenicRegion, PreTranscript, Transcript
@@ -36,15 +36,20 @@ def validation_id(self) -> Optional[str]:
         return self.data.get(COLUMNS.validation_id)
 
     def __init__(
-        self, bpp: BreakpointPair, transcript1=None, transcript2=None, proximity=5000, **kwargs
+        self,
+        bpp: BreakpointPair,
+        transcript1: Optional[Transcript] = None,
+        transcript2: Optional[Transcript] = None,
+        proximity: int = 5000,
+        **kwargs,
     ):
         """
         Holds a breakpoint call and a set of transcripts, other information is gathered relative to these
 
         Args:
-            bpp (BreakpointPair): the breakpoint pair call. Will be adjusted and then stored based on the transcripts
-            transcript1 (Transcript): transcript at the first breakpoint
-            transcript2 (Transcript): Transcript at the second breakpoint
+            bpp: the breakpoint pair call. Will be adjusted and then stored based on the transcripts
+            transcript1: transcript at the first breakpoint
+            transcript2: Transcript at the second breakpoint
         """
         # narrow the breakpoint windows by the transcripts being used for annotation
         temp = bpp.break1 if transcript1 is None else bpp.break1 & transcript1
@@ -86,12 +91,12 @@ def __init__(
         self.proximity = proximity
         self.fusion = None
 
-    def add_gene(self, input_gene):
+    def add_gene(self, input_gene: Gene):
         """
         adds a input_gene to the current set of annotations. Checks which set it should be added to
 
         Args:
-            input_gene (input_gene): the input_gene being added
+            input_gene: the input_gene being added
         """
         if input_gene.chr not in [self.break1.chr, self.break2.chr]:
             raise AttributeError(
@@ -167,12 +172,12 @@ def add_gene(self, input_gene):
 
             self.genes_proximal_to_break2 = temp
 
-    def flatten(self):
+    def flatten(self) -> Dict:
         """
         generates a dictionary of the annotation information as strings
 
         Returns:
-            Dict[str,str]: dictionary of attribute names and values
+            dictionary of attribute names and values
         """
         row = BreakpointPair.flatten(self)
         row.update(
@@ -245,16 +250,16 @@ def flatten(self):
                 pass
         return row
 
-    def single_transcript(self):
+    def single_transcript(self) -> bool:
         return bool(self.transcript1 == self.transcript2 and self.transcript1)
 
 
-def flatten_fusion_translation(translation):
+def flatten_fusion_translation(translation: Translation) -> Dict:
     """
     for a given fusion product (translation) gather the information to be output to the tabbed files
 
     Args:
-        translation (Translation): the translation which is on the fusion transcript
+        translation: the translation which is on the fusion transcript
     Returns:
         dict: the dictionary of column names to values
     """
@@ -534,11 +539,12 @@ def flatten_fusion_transcript(spliced_fusion_transcript):
     return row
 
 
-def overlapping_transcripts(ref_ann, breakpoint: Breakpoint) -> List[PreTranscript]:
+def overlapping_transcripts(
+    ref_ann: ReferenceAnnotations, breakpoint: Breakpoint
+) -> List[PreTranscript]:
     """
     Args:
-        ref_ann (Dict[str,List[Gene]]): the reference list of genes split
-            by chromosome
+        ref_ann: the reference list of genes split by chromosome
         breakpoint: the breakpoint in question
     Returns:
         a list of possible transcripts
@@ -558,7 +564,7 @@ def overlapping_transcripts(ref_ann, breakpoint: Breakpoint) -> List[PreTranscri
 
 
 def _gather_breakpoint_annotations(
-    ref_ann: Dict[str, List[Gene]], breakpoint: Breakpoint
+    ref_ann: Annotations, breakpoint: Breakpoint
 ) -> Tuple[
     List[Union[PreTranscript, IntergenicRegion]], List[Union[PreTranscript, IntergenicRegion]]
 ]:
@@ -651,9 +657,7 @@ def _gather_breakpoint_annotations(
     )
 
 
-def _gather_annotations(
-    ref: Dict[str, List[Gene]], bp: BreakpointPair, proximity=None
-) -> List[Annotation]:
+def _gather_annotations(ref: Annotations, bp: BreakpointPair, proximity=None) -> List[Annotation]:
     """
     each annotation is defined by the annotations selected at the breakpoints
     the other annotations are given relative to this
@@ -861,7 +865,7 @@ def choose_transcripts_by_priority(ann_list: List[Annotation]) -> List[Annotatio
 
 def annotate_events(
     bpps: List[BreakpointPair],
-    annotations: Dict[str, List[Gene]],
+    annotations: Annotations,
     reference_genome: ReferenceGenome,
     max_proximity: int = 5000,
     min_orf_size: int = 200,
diff --git a/src/mavis/assemble.py b/src/mavis/assemble.py
index c87b4208..235078fa 100644
--- a/src/mavis/assemble.py
+++ b/src/mavis/assemble.py
@@ -467,16 +467,16 @@ def assemble(
     return contigs
 
 
-def kmers(s, size):
+def kmers(s: str, size: int) -> List[str]:
     """
     for a sequence, compute and return a list of all kmers of a specified size
 
     Args:
-        s (str): the input sequence
-        size (int): the size of the kmers
+        s: the input sequence
+        size: the size of the kmers
 
     Returns:
-        List[str]: the list of kmers
+        the list of kmers
 
     Example:
         >>> kmers('abcdef', 2)
diff --git a/src/mavis/bam/cache.py b/src/mavis/bam/cache.py
index aee2cc99..2724ed0a 100644
--- a/src/mavis/bam/cache.py
+++ b/src/mavis/bam/cache.py
@@ -225,7 +225,9 @@ def fetch_from_bins(
                 chrom = 'chr' + chrom
             if chrom not in self.fh.references:
                 raise KeyError('bam file does not contain the expected reference', input_chrom)
-        bins = self.__class__._generate_fetch_bins(start, stop, sample_bins, min_bin_size)
+        bins: List[Interval] = self.__class__._generate_fetch_bins(
+            start, stop, sample_bins, min_bin_size
+        )
         running_surplus = 0
         temp_cache = set()
         for fstart, fend in bins:
diff --git a/src/mavis/bam/stats.py b/src/mavis/bam/stats.py
index ea6d1603..5890870a 100644
--- a/src/mavis/bam/stats.py
+++ b/src/mavis/bam/stats.py
@@ -2,11 +2,16 @@
 import math
 import os
 import statistics as stats
+from typing import TYPE_CHECKING
 
 from ..constants import STRAND
+from ..types import ReferenceAnnotations
 from ..util import logger
 from .read import sequenced_strand
 
+if TYPE_CHECKING:
+    from .cache import BamCache
+
 os.environ["OMP_NUM_THREADS"] = "4"  # export OMP_NUM_THREADS=4
 os.environ["OPENBLAS_NUM_THREADS"] = "4"  # export OPENBLAS_NUM_THREADS=4
 os.environ["MKL_NUM_THREADS"] = "4"  # export MKL_NUM_THREADS=6
@@ -103,29 +108,28 @@ def __add__(self, other):
 
 
 def compute_transcriptome_bam_stats(
-    bam_cache,
-    annotations,
-    sample_size,
-    min_mapping_quality=1,
-    stranded=True,
-    sample_cap=10000,
-    distribution_fraction=0.97,
-):
+    bam_cache: 'BamCache',
+    annotations: ReferenceAnnotations,
+    sample_size: int,
+    min_mapping_quality: int = 1,
+    stranded: bool = True,
+    sample_cap: int = 10000,
+    distribution_fraction: float = 0.97,
+) -> BamStats:
     """
     computes various statistical measures relating the input bam file
 
     Args:
-        bam_file_handle (BamCache): the input bam file handle
-        annotations (object): see :func:`mavis.annotate.load_annotations`
-        sample_size (int): the number of genes to compute stats over
-        log (Callable): outputs logging information
-        min_mapping_quality (int): the minimum mapping quality for a read to be used
-        stranded (bool): if True then reads must match the gene strand
-        sample_cap (int): maximum number of reads to collect for any given sample region
-        distribution_fraction (float): the proportion of the distribution to use in computing stdev
+        bam_file_handle: the input bam file handle
+        annotations: see :func:`mavis.annotate.load_annotations`
+        sample_size: the number of genes to compute stats over
+        min_mapping_quality: the minimum mapping quality for a read to be used
+        stranded: if True then reads must match the gene strand
+        sample_cap: maximum number of reads to collect for any given sample region
+        distribution_fraction: the proportion of the distribution to use in computing stdev
 
     Returns:
-        BamStats: the fragment size median, stdev and the read length in a object
+        the fragment size median, stdev and the read length in a object
     """
     import numpy as np
 
@@ -151,7 +155,7 @@ def compute_transcriptome_bam_stats(
     read_lengths = []
     for gene in genes:
         for read in bam_cache.fetch(
-            gene.chr, gene.start, gene.end, cache_if=lambda x: False, limit=sample_cap
+            gene.chr, gene.start, gene.end, cache_if=lambda _: False, limit=sample_cap
         ):
             if any(
                 [
@@ -213,27 +217,26 @@ def compute_transcriptome_bam_stats(
 
 
 def compute_genome_bam_stats(
-    bam_file_handle,
-    sample_bin_size,
-    sample_size,
-    min_mapping_quality=1,
-    sample_cap=10000,
-    distribution_fraction=0.99,
-):
+    bam_file_handle: 'BamCache',
+    sample_bin_size: int,
+    sample_size: int,
+    min_mapping_quality: int = 1,
+    sample_cap: int = 10000,
+    distribution_fraction: float = 0.99,
+) -> BamStats:
     """
     computes various statistical measures relating the input bam file
 
     Args:
-        bam_file_handle (pysam.AlignmentFile): the input bam file handle
-        sample_bin_size (int): how large to make the sample bin (in bp)
-        sample_size (int): the number of genes to compute stats over
-        log (Callable): outputs logging information
-        min_mapping_quality (int): the minimum mapping quality for a read to be used
-        sample_cap (int): maximum number of reads to collect for any given sample region
-        distribution_fraction (float): the proportion of the distribution to use in computing stdev
+        bam_file_handle: the input bam file handle
+        sample_bin_size: how large to make the sample bin (in bp)
+        sample_size: the number of genes to compute stats over
+        min_mapping_quality: the minimum mapping quality for a read to be used
+        sample_cap: maximum number of reads to collect for any given sample region
+        distribution_fraction: the proportion of the distribution to use in computing stdev
 
     Returns:
-        BamStats: the fragment size median, stdev and the read length in a object
+        the fragment size median, stdev and the read length in a object
     """
     import numpy as np
 
@@ -254,7 +257,7 @@ def compute_genome_bam_stats(
     read_lengths = []
     for bin_chr, bin_start, bin_end in bins:
         for read in bam_file_handle.fetch(
-            bin_chr, bin_start, bin_end, limit=sample_cap, cache_if=lambda x: False
+            bin_chr, bin_start, bin_end, limit=sample_cap, cache_if=lambda _: False
         ):
             if any(
                 [
diff --git a/src/mavis/blat.py b/src/mavis/blat.py
index f379df1b..cbfeaace 100644
--- a/src/mavis/blat.py
+++ b/src/mavis/blat.py
@@ -220,7 +220,7 @@ def pslx_row_to_pysam(
 
         Args:
             row: a row object from the 'read_pslx' method
-            bam_cache (BamCache): the bam file/cache to use as a template for creating reference_id from chr name
+            bam_cache: the bam file/cache to use as a template for creating reference_id from chr name
             reference_genome: reference sequence by template/chr name
 
         """
diff --git a/src/mavis/breakpoint.py b/src/mavis/breakpoint.py
index b28e4e4c..ac3b188f 100644
--- a/src/mavis/breakpoint.py
+++ b/src/mavis/breakpoint.py
@@ -1,5 +1,3 @@
-from __future__ import division
-
 from copy import copy as _copy
 from typing import Callable, Dict, List, Optional, Set, Tuple
 
@@ -24,15 +22,23 @@ class for storing information about a SV breakpoint
     def key(self):
         return (self.chr, self.start, self.end, self.orient, self.strand)
 
-    def __init__(self, chr, start, end=None, orient=ORIENT.NS, strand=STRAND.NS, seq=None):
+    def __init__(
+        self,
+        chr: str,
+        start: int,
+        end: Optional[int] = None,
+        orient=ORIENT.NS,
+        strand=STRAND.NS,
+        seq: Optional[str] = None,
+    ):
         """
         Args:
-            chr (str): the chromosome
-            start (int): the genomic position of the breakpoint
-            end (int): if the breakpoint is uncertain (a range) then specify the end of the range here
+            chr: the chromosome
+            start: the genomic position of the breakpoint
+            end: if the breakpoint is uncertain (a range) then specify the end of the range here
             orient (ORIENT): the orientation (which side is retained at the break)
             strand (STRAND): the strand
-            seq (str): the seq
+            seq: the seq
 
         Examples:
             >>> Breakpoint('1', 1, 2)
diff --git a/src/mavis/cluster/cluster.py b/src/mavis/cluster/cluster.py
index f2468cb1..0627ccb2 100644
--- a/src/mavis/cluster/cluster.py
+++ b/src/mavis/cluster/cluster.py
@@ -150,7 +150,7 @@ def merge_by_union(
     group_key: BreakpointPairGroupKey,
     weight_adjustment: int = 10,
     cluster_radius: int = 200,
-) -> List[BreakpointPair]:
+) -> Dict[BreakpointPairGroupKey, List[BreakpointPair]]:
     """
     for a given set of breakpoint pairs, merge the union of all pairs that are
     within the given distance (cluster_radius)
diff --git a/src/mavis/constants.py b/src/mavis/constants.py
index 35a4731b..ba656d4e 100644
--- a/src/mavis/constants.py
+++ b/src/mavis/constants.py
@@ -68,12 +68,12 @@ class SPLICE_TYPE(MavisNamespace):
 """the number of bases making up a codon"""
 
 
-def reverse_complement(s):
+def reverse_complement(s: str) -> str:
     """
     wrapper for the Bio.Seq reverse_complement method
 
     Args:
-        s (str): the input DNA sequence
+        s: the input DNA sequence
 
     Returns:
         str: the reverse complement of the input sequence
@@ -88,8 +88,8 @@ def reverse_complement(s):
     input_string = str(s)
     if not re.match('^[A-Za-z]*$', input_string):
         raise ValueError('unexpected sequence format. cannot reverse complement', input_string)
-    input_string = Seq(input_string, DNA_ALPHABET)
-    return str(input_string.reverse_complement())
+    seq = Seq(input_string, DNA_ALPHABET)
+    return str(seq.reverse_complement())
 
 
 def translate(s: str, reading_frame: int = 0) -> str:
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index 32995761..ee435ca2 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -3,7 +3,9 @@
 
 """
 import re
-from typing import List, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
+
+import svgwrite
 
 from ..annotate.variant import FusionTranscript
 from ..constants import CODON_SIZE, GIEMSA_STAIN, ORIENT, STRAND
@@ -18,6 +20,10 @@
     split_intervals_into_tracks,
 )
 
+if TYPE_CHECKING:
+    from ..annotate.base import BioInterval
+    from ..annotate.genomic import Exon, Gene
+    from ..breakpoint import Breakpoint
 # draw gene level view
 # draw gene box
 HEX_WHITE = '#FFFFFF'
@@ -26,7 +32,7 @@
 
 def draw_legend(
     config: DiagramSettings, canvas, swatches: List[Tuple[str, str]], border: bool = True
-):
+) -> svgwrite.container.Group:
     """
     generates an svg group object representing the legend
     """
@@ -88,14 +94,14 @@ def draw_legend(
 
 def draw_exon_track(
     config: DiagramSettings,
-    canvas,
+    canvas: svgwrite.drawing.Drawing,
     transcript,
     mapping: IntervalMapping,
     colors=None,
     genomic_min: int = None,
     genomic_max: int = None,
     translation=None,
-):
+) -> svgwrite.container.Group:
     """ """
     colors = {} if colors is None else colors
     main_group = canvas.g(class_='exon_track')
@@ -178,7 +184,7 @@ def draw_exon_track(
 
 def draw_transcript_with_translation(
     config: DiagramSettings,
-    canvas,
+    canvas: svgwrite.drawing.Drawing,
     translation,
     labels,
     colors,
@@ -186,7 +192,7 @@ def draw_transcript_with_translation(
     reference_genome=None,
     genomic_min=None,
     genomic_max=None,
-):
+) -> svgwrite.container.Group:
     main_group = canvas.g()
     pre_transcript = translation.transcript.reference_object
     spl_tx = translation.transcript
@@ -444,17 +450,16 @@ def draw_transcript_with_translation(
 
 
 def draw_ustranscript(
-    config,
-    canvas,
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
     pre_transcript,
-    target_width=None,
-    breakpoints=[],
+    target_width: Optional[int] = None,
+    breakpoints: List['Breakpoint'] = [],
     labels=LabelMapping(),
     colors={},
     mapping=None,
-    reference_genome=None,
     masks=None,
-):
+) -> svgwrite.container.Group:
     """
     builds an svg group representing the transcript. Exons are drawn in a track with the splicing
     information and domains are drawn in separate tracks below
@@ -462,17 +467,13 @@ def draw_ustranscript(
     if there are multiple splicing variants then multiple exon tracks are drawn
 
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
-        target_width (int): the target width of the diagram
+        canvas: the main svgwrite object used to create new svg elements
+        target_width: the target width of the diagram
         pre_transcript (Transcript): the transcript being drawn
-        exon_color (str): the color being used for the fill of the exons
-        utr_color (str): the color for the fill of the UTR regions
-        abrogated_splice_sites (List[int]): list of positions to ignore as splice sites
-        breakpoints (List[Breakpoint]): the breakpoints to overlay
+        breakpoints: the breakpoints to overlay
 
     Return:
-        svgwrite.container.Group: the group element for the transcript diagram
-                Has the added parameters of labels, height, and mapping
+        the group element for the transcript diagram Has the added parameters of labels, height, and mapping
     """
     if pre_transcript.get_strand() not in [STRAND.POS, STRAND.NEG]:
         raise NotSpecifiedError('strand must be positive or negative to draw the pre_transcript')
@@ -610,31 +611,29 @@ def draw_ustranscript(
 
 
 def draw_genes(
-    config,
-    canvas,
-    genes,
-    target_width,
-    breakpoints=None,
-    colors=None,
-    labels=None,
-    plots=None,
-    masks=None,
-):
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    genes: List['Gene'],
+    target_width: int,
+    breakpoints: Optional[List[Breakpoint]] = None,
+    colors: Optional[Dict[str, 'Gene']] = None,
+    labels: Optional[LabelMapping] = None,
+    plots: Optional[List] = None,
+    masks: Optional[List[Interval]] = None,
+) -> svgwrite.container.Group:
     """
     draws the genes given in order of their start position trying to minimize
     the number of tracks required to avoid overlap
 
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
-        target_width (int): the target width of the diagram
-        genes (List[Gene]): the list of genes to draw
-        breakpoints (List[Breakpoint]): the breakpoints to overlay
-        colors (Dict[str,Gene]): dictionary of the colors assigned to each Gene as
-         fill
+        canvas: the main svgwrite object used to create new svg elements
+        target_width: the target width of the diagram
+        genes: the list of genes to draw
+        breakpoints: the breakpoints to overlay
+        colors: dictionary of the colors assigned to each Gene as fill
 
     Return:
-        svgwrite.container.Group: the group element for the diagram.
-            Has the added parameters of labels, height, and mapping
+        the group element for the diagram. Has the added parameters of labels, height, and mapping
     """
     # mutable default argument parameters
     breakpoints = [] if breakpoints is None else breakpoints
@@ -752,15 +751,22 @@ def draw_genes(
     return main_group
 
 
-def draw_vmarker(config, canvas, marker, width, height, label='', color=None):
+def draw_vmarker(
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    marker: 'BioInterval',
+    width: int,
+    height: int,
+    label='',
+    color=None,
+) -> svgwrite.container.Group:
     """
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
-        breakpoint (Breakpoint): the breakpoint to draw
-        width (int): the pixel width
-        height (int): the pixel height
+        canvas: the main svgwrite object used to create new svg elements
+        width: the pixel width
+        height: the pixel height
     Return:
-        svgwrite.container.Group: the group element for the diagram
+        the group element for the diagram
     """
     color = config.marker_color if color is None else color
     g = canvas.g(class_='marker')
@@ -789,15 +795,22 @@ def draw_vmarker(config, canvas, marker, width, height, label='', color=None):
     return g
 
 
-def draw_breakpoint(config, canvas, breakpoint, width, height, label=''):
+def draw_breakpoint(
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    breakpoint: Breakpoint,
+    width: int,
+    height: int,
+    label: str = '',
+) -> svgwrite.container.Group:
     """
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
-        breakpoint (Breakpoint): the breakpoint to draw
-        width (int): the pixel width
-        height (int): the pixel height
+        canvas: the main svgwrite object used to create new svg elements
+        breakpoint: the breakpoint to draw
+        width: the pixel width
+        height: the pixel height
     Return:
-        svgwrite.container.Group: the group element for the diagram
+        the group element for the diagram
     """
     g = canvas.g(class_='breakpoint')
     y = config.padding + config.breakpoint_label_font_size / 2
@@ -841,19 +854,28 @@ def draw_breakpoint(config, canvas, breakpoint, width, height, label=''):
     return g
 
 
-def draw_exon(config, canvas, exon, width, height, fill, label='', translation=None):
+def draw_exon(
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    exon: 'Exon',
+    width: int,
+    height: int,
+    fill: str,
+    label: str = '',
+    translation=None,
+) -> svgwrite.container.Group:
     """
     generates the svg object representing an exon
 
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
+        canvas: the main svgwrite object used to create new svg elements
         exon (Exon): the exon to draw
-        width (int): the pixel width
-        height (int): the pixel height
-        fill (str): the fill color to use for the exon
+        width: the pixel width
+        height: the pixel height
+        fill: the fill color to use for the exon
 
     Return:
-        svgwrite.container.Group: the group element for the diagram
+        the group element for the diagram
 
     Todo:
         add markers for exons with abrogated splice sites
@@ -892,13 +914,19 @@ def draw_exon(config, canvas, exon, width, height, fill, label='', translation=N
 
 
 def draw_template(
-    config, canvas, template, target_width, labels=None, colors=None, breakpoints=None
-):
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    template,
+    target_width,
+    labels=None,
+    colors=None,
+    breakpoints=None,
+) -> svgwrite.container.Group:
     """
     Creates the template/chromosome illustration
 
     Return:
-        svgwrite.container.Group: the group element for the diagram
+        the group element for the diagram
     """
 
     labels = LabelMapping() if labels is None else labels
@@ -1019,19 +1047,27 @@ def draw_template(
     return group
 
 
-def draw_gene(config, canvas, gene, width, height, fill, label='', reference_genome=None):
+def draw_gene(
+    config: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    gene: 'Gene',
+    width: int,
+    height: int,
+    fill: str,
+    label: str = '',
+) -> svgwrite.container.Group:
     """
     generates the svg object representing a gene
 
     Args:
-        canvas (svgwrite.drawing.Drawing): the main svgwrite object used to create new svg elements
-        gene (Gene): the gene to draw
-        width (int): the pixel width
-        height (int): the pixel height
-        fill (str): the fill color to use for the gene
+        canvas: the main svgwrite object used to create new svg elements
+        gene: the gene to draw
+        width: the pixel width
+        height: the pixel height
+        fill: the fill color to use for the gene
 
     Return:
-        svgwrite.container.Group: the group element for the diagram
+        the group element for the diagram
     """
 
     group = canvas.g(class_='gene')
diff --git a/src/mavis/illustrate/scatter.py b/src/mavis/illustrate/scatter.py
index fd78a66f..b71b52ac 100644
--- a/src/mavis/illustrate/scatter.py
+++ b/src/mavis/illustrate/scatter.py
@@ -1,39 +1,42 @@
 import os
+from typing import Optional
+
+import svgwrite
 
 from ..bam.read import pileup, sequenced_strand
-from ..interval import Interval
+from ..interval import Interval, IntervalMapping
 from ..util import logger
+from .constants import DiagramSettings
 
 
 def bam_to_scatter(
-    bam_file,
-    chrom,
-    start,
-    end,
+    bam_file: str,
+    chrom: str,
+    start: int,
+    end: int,
     density,
-    strand=None,
-    axis_name=None,
-    ymax=None,
-    min_mapping_quality=0,
-    strand_determining_read=2,
-    ymax_color='#FF0000',
-):
+    strand: Optional[str] = None,
+    axis_name: Optional[str] = None,
+    ymax: Optional[int] = None,
+    min_mapping_quality: int = 0,
+    strand_determining_read: int = 2,
+    ymax_color: str = '#FF0000',
+) -> 'ScatterPlot':
     """
     pull data from a bam file to set up a scatter plot of the pileup
 
     Args:
-        bam_file (str): path to the bam file
-        chrom (str): chromosome name
-        start (int): genomic start position for the plot
-        end (int): genomic end position for the plot
-        bin_size (int): number of genomic positions to group together and average to reduce data
+        bam_file: path to the bam file
+        chrom: chromosome name
+        start: genomic start position for the plot
+        end: genomic end position for the plot
         strand (STRAND): expected strand
-        axis_name (str): axis name
-        ymax (int): maximum value to plot the y axis
-        min_mapping_quality (int): minimum mapping quality for reads to be considered in the plot
+        axis_name: axis name
+        ymax: maximum value to plot the y axis
+        min_mapping_quality: minimum mapping quality for reads to be considered in the plot
 
     Returns:
-        ScatterPlot: the scatter plot representing the bam pileup
+        the scatter plot representing the bam pileup
     """
     import pysam
 
@@ -123,16 +126,20 @@ def __init__(
         self.density = density
 
 
-def draw_scatter(ds, canvas, plot, xmapping):
+def draw_scatter(
+    ds: DiagramSettings,
+    canvas: svgwrite.drawing.Drawing,
+    plot: ScatterPlot,
+    xmapping: IntervalMapping,
+) -> svgwrite.container.Group:
     """
     given a xmapping, draw the scatter plot svg group
 
     Args:
-        ds (DiagramSettings): the settings/constants to use for building the svg
-        canvas (svgwrite.canvas): the svgwrite object used to create new svg elements
-        plot (ScatterPlot): the plot to be drawn
-        xmapping (Dict[Interval,Interval]):
-            dict used for conversion of coordinates in the xaxis to pixel positions
+        ds: the settings/constants to use for building the svg
+        canvas: the svgwrite object used to create new svg elements
+        plot: the plot to be drawn
+        xmapping: dict used for conversion of coordinates in the xaxis to pixel positions
     """
     from shapely.geometry import Point as sPoint
 
diff --git a/src/mavis/interval.py b/src/mavis/interval.py
index 754d4ba0..3fdd205d 100644
--- a/src/mavis/interval.py
+++ b/src/mavis/interval.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional, Tuple
+from typing import List, Optional, Tuple
 
 
 class Interval:
@@ -10,9 +10,9 @@ class Interval:
     def __init__(self, start: int, end: Optional[int] = None, freq: int = 1, number_type=None):
         """
         Args:
-            start (int): the start of the interval (inclusive)
-            end (int): the end of the interval (inclusive)
-            freq (int): the frequency or weight of the interval
+            start: the start of the interval (inclusive)
+            end: the end of the interval (inclusive)
+            freq: the frequency or weight of the interval
         """
         self.start = start
         self.end = end if end is not None else start
@@ -98,13 +98,13 @@ def __getitem__(self, index):
         raise IndexError('index input accessor is out of bounds: 1 or 2 only', index)
 
     @classmethod
-    def overlaps(cls, first, other):
+    def overlaps(cls, first: 'Interval', other: 'Interval') -> bool:
         """
         checks if two intervals have any portion of their given ranges in common
 
         Args:
-            first (Interval): an interval to be compared
-            other (Interval): an interval to be compared
+            first: an interval to be compared
+            other: an interval to be compared
 
         Example:
             >>> Interval.overlaps(Interval(1, 4), Interval(5, 7))
diff --git a/src/mavis/main.py b/src/mavis/main.py
index 5c7c8ecd..c7e94bbe 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -6,7 +6,7 @@
 import platform
 import sys
 import time
-from typing import Dict
+from typing import Dict, List, Optional
 
 from mavis_config import validate_config
 from mavis_config.constants import SUBCOMMAND
@@ -160,13 +160,13 @@ def create_parser(argv):
     return parser, parser.parse_args(argv)
 
 
-def main(argv=None):
+def main(argv: Optional[List[str]] = None):
     """
     sets up the parser and checks the validity of command line args
     loads reference files and redirects into subcommand main functions
 
     Args:
-        argv (list): List of arguments, defaults to command line arguments
+        argv: List of arguments, defaults to command line arguments
     """
     if argv is None:  # need to do at run time or patching will not behave as expected
         argv = sys.argv[1:]
diff --git a/src/mavis/types.py b/src/mavis/types.py
index 23e3de15..080c69ac 100644
--- a/src/mavis/types.py
+++ b/src/mavis/types.py
@@ -2,10 +2,13 @@
 Helper classes for type hints
 """
 
-from typing import Dict, List, Tuple
+from typing import TYPE_CHECKING, Dict, List, Tuple
 
 from Bio.SeqRecord import SeqRecord
 
-ReferenceGenome = Dict[str, SeqRecord]
+if TYPE_CHECKING:
+    from .annotate.genomic import Gene
 
+ReferenceGenome = Dict[str, SeqRecord]
+ReferenceAnnotations = Dict[str, List['Gene']]
 CigarTuples = List[Tuple[int, int]]
diff --git a/src/mavis/util.py b/src/mavis/util.py
index 7009cb48..d4d34508 100644
--- a/src/mavis/util.py
+++ b/src/mavis/util.py
@@ -4,7 +4,7 @@
 import os
 import re
 import time
-from typing import Any, Callable, Dict, List, Set
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set
 
 import pandas as pd
 from mavis_config import bash_expands
@@ -25,6 +25,9 @@
 from .error import InvalidRearrangement
 from .interval import Interval
 
+if TYPE_CHECKING:
+    from mavis.annotate.base import BioInterval
+
 ENV_VAR_PREFIX = 'MAVIS_'
 
 logger = logging.getLogger('mavis')
@@ -143,13 +146,15 @@ def mkdirp(dirname):
     return dirname
 
 
-def filter_on_overlap(bpps, regions_by_reference_name):
+def filter_on_overlap(
+    bpps: List[BreakpointPair], regions_by_reference_name: Dict[str, List['BioInterval']]
+):
     """
     filter a set of breakpoint pairs based on overlap with a set of genomic regions
 
     Args:
-        bpps (List[mavis.breakpoint.BreakpointPair]): list of breakpoint pairs to be filtered
-        regions_by_reference_name (Dict[str,List[mavis.annotate.base.BioInterval]]): regions to filter against
+        bpps: list of breakpoint pairs to be filtered
+        regions_by_reference_name: regions to filter against
     """
     logger.info(f'filtering from {len(bpps)} using overlaps with regions filter')
     failed = []
@@ -175,7 +180,9 @@ def filter_on_overlap(bpps, regions_by_reference_name):
     return passed, failed
 
 
-def read_inputs(inputs, required_columns=[], **kwargs):
+def read_inputs(
+    inputs: List[str], required_columns: List[str] = [], **kwargs
+) -> List[BreakpointPair]:
     bpps = []
 
     for finput in bash_expands(*inputs):
@@ -237,18 +244,19 @@ def get_connected_components(adj_matrix):
     return components
 
 
-def generate_complete_stamp(output_dir, prefix='MAVIS.', start_time=None):
+def generate_complete_stamp(
+    output_dir: str, prefix: str = 'MAVIS.', start_time: Optional[int] = None
+) -> str:
     """
     writes a complete stamp, optionally including the run time if start_time is given
 
     Args:
-        output_dir (str): path to the output dir the stamp should be written in
-        log (Callable): function to print logging messages to
-        prefix (str): prefix for the stamp name
-        start_time (int): the start time
+        output_dir: path to the output dir the stamp should be written in
+        prefix: prefix for the stamp name
+        start_time: the start time
 
     Return:
-        str: path to the complete stamp
+        path to the complete stamp
 
     Example:
         >>> generate_complete_stamp('some_output_dir')
diff --git a/src/mavis/validate/evidence.py b/src/mavis/validate/evidence.py
index 610c4206..cc89725f 100644
--- a/src/mavis/validate/evidence.py
+++ b/src/mavis/validate/evidence.py
@@ -75,9 +75,7 @@ def __init__(self, *pos, **kwargs):
             self.compatible_window1 = self.generate_window(compt_break1)
             self.compatible_window2 = self.generate_window(compt_break2)
 
-    def compute_fragment_size(
-        self, read: pysam.AlignedSegment, mate: Optional[pysam.AlignedSegment] = None
-    ):
+    def compute_fragment_size(self, read: pysam.AlignedSegment):
         return Interval(abs(read.template_length))
 
 

From f7038ee4a3824a63906c3dab7387b50ec58fb6c1 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 1 Feb 2022 14:07:34 -0800
Subject: [PATCH 105/137] Fix type checking conditional imports

---
 src/mavis/align.py               |  2 +-
 src/mavis/annotate/variant.py    | 10 ++++++----
 src/mavis/illustrate/elements.py |  3 ++-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/mavis/align.py b/src/mavis/align.py
index 43e67be6..223afddb 100644
--- a/src/mavis/align.py
+++ b/src/mavis/align.py
@@ -388,7 +388,7 @@ def call_paired_read_event(read1, read2, is_stranded=False):
 
 def align_sequences(
     sequences: Dict[str, str],
-    input_bam_cache: BamCache,
+    input_bam_cache: 'BamCache',
     reference_genome: ReferenceGenome,
     aligner: str,
     aligner_reference: str,
diff --git a/src/mavis/annotate/variant.py b/src/mavis/annotate/variant.py
index aabb2488..dd92d84d 100644
--- a/src/mavis/annotate/variant.py
+++ b/src/mavis/annotate/variant.py
@@ -8,7 +8,7 @@
 from ..constants import COLUMNS, GENE_PRODUCT_TYPE, PROTOCOL, STOP_AA, STRAND, SVTYPE
 from ..error import NotSpecifiedError
 from ..interval import Interval
-from ..types import Annotations, ReferenceGenome
+from ..types import ReferenceAnnotations, ReferenceGenome
 from ..util import logger
 from .fusion import FusionTranscript, determine_prime
 from .genomic import Gene, IntergenicRegion, PreTranscript, Transcript
@@ -564,7 +564,7 @@ def overlapping_transcripts(
 
 
 def _gather_breakpoint_annotations(
-    ref_ann: Annotations, breakpoint: Breakpoint
+    ref_ann: ReferenceAnnotations, breakpoint: Breakpoint
 ) -> Tuple[
     List[Union[PreTranscript, IntergenicRegion]], List[Union[PreTranscript, IntergenicRegion]]
 ]:
@@ -657,7 +657,9 @@ def _gather_breakpoint_annotations(
     )
 
 
-def _gather_annotations(ref: Annotations, bp: BreakpointPair, proximity=None) -> List[Annotation]:
+def _gather_annotations(
+    ref: ReferenceAnnotations, bp: BreakpointPair, proximity=None
+) -> List[Annotation]:
     """
     each annotation is defined by the annotations selected at the breakpoints
     the other annotations are given relative to this
@@ -865,7 +867,7 @@ def choose_transcripts_by_priority(ann_list: List[Annotation]) -> List[Annotatio
 
 def annotate_events(
     bpps: List[BreakpointPair],
-    annotations: Annotations,
+    annotations: ReferenceAnnotations,
     reference_genome: ReferenceGenome,
     max_proximity: int = 5000,
     min_orf_size: int = 200,
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index ee435ca2..07ac4233 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -8,6 +8,7 @@
 import svgwrite
 
 from ..annotate.variant import FusionTranscript
+from ..breakpoint import Breakpoint
 from ..constants import CODON_SIZE, GIEMSA_STAIN, ORIENT, STRAND
 from ..error import DrawingFitError, NotSpecifiedError
 from ..interval import Interval, IntervalMapping
@@ -23,7 +24,7 @@
 if TYPE_CHECKING:
     from ..annotate.base import BioInterval
     from ..annotate.genomic import Exon, Gene
-    from ..breakpoint import Breakpoint
+
 # draw gene level view
 # draw gene box
 HEX_WHITE = '#FFFFFF'

From 256794c4aff344a9183e6b00ebbfde967e147599 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 1 Feb 2022 14:10:13 -0800
Subject: [PATCH 106/137] Do not change argument signatures

---
 src/mavis/annotate/fusion.py     | 22 ++++++++++++++++++----
 src/mavis/illustrate/elements.py |  1 +
 src/mavis/validate/evidence.py   |  2 +-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/src/mavis/annotate/fusion.py b/src/mavis/annotate/fusion.py
index 2666e1d9..0430753b 100644
--- a/src/mavis/annotate/fusion.py
+++ b/src/mavis/annotate/fusion.py
@@ -95,7 +95,14 @@ def map_region_to_genome(self, chr, interval_on_fusion, genome_interval, flipped
         self.mapping_to_chrs[Interval(interval_on_fusion[0], interval_on_fusion[1])] = chr
 
     @classmethod
-    def _build_single_gene_inversion(cls, ann, reference_genome: ReferenceGenome):
+    def _build_single_gene_inversion(
+        cls,
+        ann,
+        reference_genome: ReferenceGenome,
+        min_orf_size,
+        max_orf_cap,
+        min_domain_mapping_match,
+    ):
         """
         builds a fusion transcript for a single gene inversion. Note that this is an incomplete
         fusion transcript and still requires translations and domain information to be added
@@ -210,7 +217,14 @@ def _build_single_gene_inversion(cls, ann, reference_genome: ReferenceGenome):
         return fusion_pre_transcript
 
     @classmethod
-    def _build_single_gene_duplication(cls, ann, reference_genome):
+    def _build_single_gene_duplication(
+        cls,
+        ann,
+        reference_genome,
+        min_orf_size,
+        max_orf_cap,
+        min_domain_mapping_match,
+    ):
         """
         builds a fusion transcript for a single gene duplication. Note that this is an incomplete
         fusion transcript and still requires translations and domain information to be added
@@ -487,10 +501,10 @@ def build(
                             pass
         return fusion_pre_transcript
 
-    def get_seq(self):
+    def get_seq(self, reference_genome=None, ignore_cache=False):
         return PreTranscript.get_seq(self)
 
-    def get_cdna_seq(self, splicing_pattern):
+    def get_cdna_seq(self, splicing_pattern, reference_genome=None, ignore_cache=False):
         """
         Args:
             splicing_pattern (List[int]): the list of splicing positions
diff --git a/src/mavis/illustrate/elements.py b/src/mavis/illustrate/elements.py
index 07ac4233..92a68e0b 100644
--- a/src/mavis/illustrate/elements.py
+++ b/src/mavis/illustrate/elements.py
@@ -460,6 +460,7 @@ def draw_ustranscript(
     colors={},
     mapping=None,
     masks=None,
+    reference_genome=None,
 ) -> svgwrite.container.Group:
     """
     builds an svg group representing the transcript. Exons are drawn in a track with the splicing
diff --git a/src/mavis/validate/evidence.py b/src/mavis/validate/evidence.py
index cc89725f..db013e46 100644
--- a/src/mavis/validate/evidence.py
+++ b/src/mavis/validate/evidence.py
@@ -75,7 +75,7 @@ def __init__(self, *pos, **kwargs):
             self.compatible_window1 = self.generate_window(compt_break1)
             self.compatible_window2 = self.generate_window(compt_break2)
 
-    def compute_fragment_size(self, read: pysam.AlignedSegment):
+    def compute_fragment_size(self, read: pysam.AlignedSegment, mate=None):
         return Interval(abs(read.template_length))
 
 

From 5d81602aa5e4534b0a78eb747e4eb9dfe98e1240 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 1 Feb 2022 22:04:44 -0800
Subject: [PATCH 107/137] Add support for converting gtf/gff3 files

resolves: #302
---
 docs/inputs/reference.md                      |  20 +
 docs/migrating.md                             |   3 +-
 src/mavis/annotate/annotations_schema.json    |  14 +-
 src/tools/convert_annotations_format.py       | 470 ++++++++++++++++++
 src/tools/migrate_mavis_annotations_2to3.py   | 190 -------
 .../data/Homo_sapiens.GRCh38.105.chr.kras.gtf | 186 +++++++
 .../data/Homo_sapiens.GRCh38.105.kras.gff3    |  19 +
 .../tools/test_convert_annotations_format.py  |  22 +
 8 files changed, 728 insertions(+), 196 deletions(-)
 create mode 100644 src/tools/convert_annotations_format.py
 delete mode 100644 src/tools/migrate_mavis_annotations_2to3.py
 create mode 100644 tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
 create mode 100644 tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
 create mode 100644 tests/tools/test_convert_annotations_format.py

diff --git a/docs/inputs/reference.md b/docs/inputs/reference.md
index 5eff1cbb..854e1058 100644
--- a/docs/inputs/reference.md
+++ b/docs/inputs/reference.md
@@ -163,6 +163,26 @@ python tools/generate_ensembl_json.py -s human -r 75 -o ensembl_human_v75.json
 
 This will produce the JSON file required as input by MAVIS
 
+### Conversion from Other Standard Formats
+
+If you have a GTF or GFF3 file you can convert them to match the MAVIS json format with the helper script provided in the tools folder
+
+```bash
+python src/tools/convert_annotations_format.py \
+    /path/to/gtf/file \
+    --input_type gtf \
+    output_mavis_annotations.json
+```
+
+or similarly for the GFF3 format
+
+```bash
+python src/tools/convert_annotations_format.py \
+    /path/to/gff3/file \
+    --input_type gff3 \
+    output_mavis_annotations.json
+```
+
 
 ## DGV (Database of Genomic Variants)
 
diff --git a/docs/migrating.md b/docs/migrating.md
index 91fb0d4f..db9c76b2 100644
--- a/docs/migrating.md
+++ b/docs/migrating.md
@@ -25,7 +25,8 @@ MAVIS is now integrated with snakemake instead of handling its own scheduling
 MAVIS no longer supports the previously deprecated tab-delimited format of the annotations file. If you are still using these files in your project we have provided a script to automatically convert them to the newer format in the tools directory
 
 ```bash
-python src/tools/migrate_mavis_annotations_to_jsonl.py \
+python src/tools/convert_annotations_format.py \
     /path/to/tab/file.tab \
+    --input_type v2 \
     /path/to/new/json/file.json
 ```
diff --git a/src/mavis/annotate/annotations_schema.json b/src/mavis/annotate/annotations_schema.json
index 04d0cc50..83f1b501 100644
--- a/src/mavis/annotate/annotations_schema.json
+++ b/src/mavis/annotate/annotations_schema.json
@@ -1,6 +1,6 @@
 {
     "$schema": "http://json-schema.org/draft-07/schema#",
-    "additionalProperties": false,
+    "additionalProperties": true,
     "properties": {
         "best_transcript_file": {
             "type": "string"
@@ -13,6 +13,7 @@
         },
         "genes": {
             "items": {
+                "additionalProperties": true,
                 "properties": {
                     "aliases": {
                         "default": [
@@ -50,6 +51,7 @@
                         "default": [
                         ],
                         "items": {
+                            "additionalProperties": true,
                             "properties": {
                                 "aliases": {
                                     "default": [
@@ -61,25 +63,26 @@
                                     "type": "array"
                                 },
                                 "cdna_coding_end": {
+                                    "default": null,
                                     "minimum": 1,
                                     "type": [
                                         "integer",
                                         "null"
-                                    ],
-                                    "default": null
+                                    ]
                                 },
                                 "cdna_coding_start": {
+                                    "default": null,
                                     "minimum": 1,
                                     "type": [
                                         "integer",
                                         "null"
-                                    ],
-                                    "default": null
+                                    ]
                                 },
                                 "domains": {
                                     "default": [
                                     ],
                                     "items": {
+                                        "additionalProperties": true,
                                         "properties": {
                                             "name": {
                                                 "minLength": 1,
@@ -116,6 +119,7 @@
                                     "defualt": [
                                     ],
                                     "items": {
+                                        "additionalProperties": true,
                                         "properties": {
                                             "end": {
                                                 "minimum": 1,
diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
new file mode 100644
index 00000000..13dfb8e5
--- /dev/null
+++ b/src/tools/convert_annotations_format.py
@@ -0,0 +1,470 @@
+import argparse
+import json
+import logging
+import re
+from typing import Dict
+
+import pandas as pd
+import pkg_resources
+from snakemake.utils import validate as snakemake_validate
+
+PANDAS_DEFAULT_NA_VALUES = [
+    '-1.#IND',
+    '1.#QNAN',
+    '1.#IND',
+    '-1.#QNAN',
+    '#N/A',
+    'N/A',
+    'NA',
+    '#NA',
+    'NULL',
+    'NaN',
+    '-NaN',
+    'nan',
+    '-nan',
+]
+
+
+def convert_tab_to_json(filepath: str) -> Dict:
+    """
+    given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
+
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | column name           | example                   | description                                               |
+    +=======================+===========================+===========================================================+
+    | ensembl_transcript_id | ENST000001                |                                                           |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | ensembl_gene_id       | ENSG000001                |                                                           |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | strand                | -1                        | positive or negative 1                                    |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | cdna_coding_start     | 44                        | where translation begins relative to the start of the cdna|
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | cdna_coding_end       | 150                       | where translation terminates                              |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon demitited exon start/ends                      |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | AA_domain_ranges      | DBD:220-251,260-271       | semi-colon delimited list of domains                      |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+    | hugo_names            | KRAS                      | hugo gene name                                            |
+    +-----------------------+---------------------------+-----------------------------------------------------------+
+
+    Args:
+        filepath (str): path to the input tab-delimited file
+
+    Returns:
+        Dict[str,List[Gene]]: a dictionary keyed by chromosome name with values of list of genes on the chromosome
+
+    Warning:
+        does not load translations unless then start with 'M', end with '*' and have a length of multiple 3
+    """
+
+    def parse_exon_list(row):
+        if pd.isnull(row):
+            return []
+        exons = []
+        for temp in re.split('[; ]', row):
+            try:
+                start, end = temp.split('-')
+                exons.append({'start': int(start), 'end': int(end)})
+            except Exception as err:
+                logging.warning(f'exon error: {repr(temp)}, {repr(err)}')
+        return exons
+
+    def parse_domain_list(row):
+        if pd.isnull(row):
+            return []
+        domains = []
+        for domain in row.split(';'):
+            try:
+                name, temp = domain.rsplit(':')
+                temp = temp.split(',')
+                temp = [x.split('-') for x in temp]
+                regions = [{'start': int(x), 'end': int(y)} for x, y in temp]
+                domains.append({'name': name, 'regions': regions})
+            except Exception as err:
+                logging.warning(f'error in domain: {domain}, {row}, {repr(err)}')
+        return domains
+
+    df = pd.read_csv(
+        filepath,
+        dtype={
+            'ensembl_gene_id': str,
+            'ensembl_transcript_id': str,
+            'chr': str,
+            'cdna_coding_start': pd.Int64Dtype(),
+            'cdna_coding_end': pd.Int64Dtype(),
+            'AA_domain_ranges': str,
+            'genomic_exon_ranges': str,
+            'hugo_names': str,
+            'transcript_genomic_start': pd.Int64Dtype(),
+            'transcript_genomic_end': pd.Int64Dtype(),
+            'best_ensembl_transcript_id': str,
+            'gene_start': int,
+            'gene_end': int,
+        },
+        sep='\t',
+        comment='#',
+    )
+
+    for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
+        if col not in df:
+            raise KeyError(f'missing required column: {col}')
+
+    for col, parser in [
+        ('genomic_exon_ranges', parse_exon_list),
+        ('AA_domain_ranges', parse_domain_list),
+    ]:
+        if col in df:
+            df[col] = df[col].apply(parser)
+
+    genes = {}
+    rows = df.where(df.notnull(), None).to_dict('records')
+
+    for row in rows:
+        gene = {
+            'chr': row['chr'],
+            'start': int(row['gene_start']),
+            'end': int(row['gene_end']),
+            'name': row['ensembl_gene_id'],
+            'strand': row['strand'],
+            'aliases': row['hugo_names'].split(';') if row.get('hugo_names') else [],
+            'transcripts': [],
+        }
+        if gene['strand'] in {'true', '1', '+', '+1', 'True', 1, True}:
+            gene['strand'] = '+'
+        elif gene['strand'] in {'false', '-1', '-', 'False', -1, False}:
+            gene['strand'] = '-'
+        if gene['name'] not in genes:
+            genes[gene['name']] = gene
+        else:
+            gene = genes[gene['name']]
+        is_best_transcript = (
+            row.get('best_ensembl_transcript_id', row['ensembl_transcript_id'])
+            == row['ensembl_transcript_id']
+        )
+        transcript = {
+            'is_best_transcript': is_best_transcript,
+            'name': row['ensembl_transcript_id'],
+            'exons': row.get('genomic_exon_ranges', []),
+            'domains': row.get('AA_domain_ranges', []),
+            'start': row.get('transcript_genomic_start'),
+            'end': row.get('transcript_genomic_end'),
+            'cdna_coding_start': row.get('cdna_coding_start'),
+            'cdna_coding_end': row.get('cdna_coding_end'),
+            'aliases': [],
+        }
+        for int_value in ['start', 'end', 'cdna_coding_start', 'cdna_coding_end']:
+            if transcript.get(int_value) is not None:
+                transcript[int_value] = int(transcript[int_value])
+        gene['transcripts'].append(transcript)
+
+    return {'genes': list(genes.values())}
+
+
+def convert_pandas_gff_to_mavis(df) -> Dict:
+    df['parent_type'] = df.Parent.str.split(':').str[0]
+    genelike_features = {'gene', 'ncRNA_gene', 'biological_region', 'pseudogene'}
+
+    def pull_alias_terms(row):
+        aliases = []
+        if row['Name']:
+            aliases.append(row['Name'])
+        if row['Alias']:
+            aliases.extend(row['Alias'].split(','))
+        return aliases
+
+    genes_by_id = {}
+    for row in df[df.type.isin(genelike_features)].to_dict('records'):
+        genes_by_id[row['feature_id']] = {
+            'start': row['start'],
+            'end': row['end'],
+            'chr': row['seqid'],
+            'aliases': pull_alias_terms(row),
+            'strand': row['strand'],
+            'transcripts': [],
+            'name': row['feature_id'] + '.' + row['version'],
+        }
+    logging.info(f'loaded {len(genes_by_id)} genes')
+
+    transcripts_by_id = {}
+
+    for row in df[df.parent_type == 'gene'].to_dict('records'):
+        for parent in row['Parent'].split(','):
+            gene_id = parent.split(':')[1]
+            if gene_id not in genes_by_id:
+                raise KeyError(
+                    f'cannot find gene ({gene_id}) skipping transcript ({row["feature_id"]})'
+                )
+            feature_id = row['feature_id']
+            transcript = {
+                'name': feature_id + '.' + row['version'],
+                'start': row['start'],
+                'end': row['end'],
+                'aliases': pull_alias_terms(row),
+                'domains': [],
+                'exons': [],
+                'cdna_coding_start': None,
+                'cdna_coding_end': None,
+            }
+            genes_by_id[gene_id]['transcripts'].append(transcript)
+            transcripts_by_id[feature_id] = transcript
+
+    logging.info(f'loaded {len(transcripts_by_id)} transcripts')
+    # now cds
+    cds_count = 0
+    for row in df[df.type == 'CDS'].to_dict('records'):
+        for parent in row['Parent'].split(','):
+            transcript_id = parent.split(':')[1]
+            if transcript_id not in transcripts_by_id:
+                raise KeyError(
+                    f'failed to find parent transcript ({transcript_id}) skipping cds ({row["feature_id"]})'
+                )
+            transcripts_by_id[transcript_id].update(
+                {'cdna_coding_start': row['start'], 'cdna_coding_end': row['end']}
+            )
+            cds_count += 1
+    logging.info(f'loaded {cds_count} cds regions')
+    # exons
+    exons_count = 0
+    for row in df[df.type == 'exon'].to_dict('records'):
+        for parent in row['Parent'].split(','):
+            transcript_id = parent.split(':')[1]
+            if transcript_id not in transcripts_by_id:
+                raise KeyError(
+                    f'failed to find parent transcript ({transcript_id}) skipping exon ({row["feature_id"]})'
+                )
+            transcripts_by_id[transcript_id]['exons'].append(
+                {
+                    'start': row['start'],
+                    'end': row['end'],
+                    'name': row['feature_id'] + '.' + row['version'],
+                }
+            )
+            exons_count += 1
+
+    logging.info(f'loaded {exons_count} exons')
+
+    result = {'genes': list(genes_by_id.values())}
+    try:
+        snakemake_validate(
+            result, pkg_resources.resource_filename('mavis.annotate', 'annotations_schema.json')
+        )
+    except Exception as err:
+        short_msg = '. '.join(
+            [line for line in str(err).split('\n') if line.strip()][:3]
+        )  # these can get super long
+        raise AssertionError(short_msg)
+    return result
+
+
+def convert_gff3_to_mavis(filename: str, no_alt) -> Dict:
+    """
+    Convert an input gff3 file to the JSON format accepted by MAVIS
+    """
+    df = pd.read_csv(
+        filename,
+        sep='\t',
+        dtype={
+            'seqid': str,
+            'source': str,
+            'type': str,
+            'start': int,
+            'end': int,
+            'score': str,
+            'strand': str,
+            'phase': str,
+            'attributes': str,
+        },
+        index_col=False,
+        header=None,
+        comment='#',
+        na_values=['.'] + PANDAS_DEFAULT_NA_VALUES,
+        names=['seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes'],
+    )
+    if no_alt:
+        df = df[~df.seqid.str.startswith('GL')]
+        df = df[~df.seqid.str.startswith('KI')]
+    df['row_index'] = df.index
+
+    skip_types = {
+        'five_prime_UTR',
+        'five_prime_UTR',
+    }
+    df = df[~df.type.isin(skip_types)]
+
+    attribute_columns = [
+        'ID',
+        'Name',
+        'Alias',
+        'Parent',
+        'Target',
+        'Gap',
+        'Derives_from',
+        'Note',
+        'DBxref',
+        'Ontology_term',
+        'rank',
+        'version',
+        'exon_id',
+    ]
+
+    def split_attributes(row):
+        result = {}
+        for attr in row.attributes.split(';'):
+            name, value = attr.split('=')
+            result[name] = value
+        return [row.row_index] + [result.get(c, '') for c in attribute_columns]
+
+    prev_size = df.shape[0]
+    attrs_df = pd.DataFrame(
+        df.apply(split_attributes, axis=1).tolist(),
+        columns=['row_index'] + attribute_columns,
+    )
+    assert prev_size == attrs_df.shape[0]
+    df = df.merge(attrs_df, on=['row_index'])
+
+    assert prev_size == df.shape[0]
+
+    df['feature_id'] = df['ID'].apply(lambda id: id.split(':')[1] if ':' in id else '')
+    df.loc[(df.feature_id == '') & (df.type == 'exon'), 'feature_id'] = df.exon_id
+    df = df[df.feature_id != '']
+    df['strand'] = df.strand.fillna('?')
+    return convert_pandas_gff_to_mavis(df)
+
+
+def convert_gff2_to_mavis(filename: str, no_alt) -> Dict:
+    """
+    Convert an input gff2/gtf file to the JSON format accepted by MAVIS
+    """
+    df = pd.read_csv(
+        filename,
+        sep='\t',
+        dtype={
+            'seqname': str,
+            'source': str,
+            'feature': str,
+            'start': int,
+            'end': int,
+            'score': str,
+            'strand': str,
+            'frame': str,
+            'attribute': str,
+        },
+        index_col=False,
+        header=None,
+        comment='#',
+        na_values=['.'] + PANDAS_DEFAULT_NA_VALUES,
+        names=[
+            'seqname',
+            'source',
+            'feature',
+            'start',
+            'end',
+            'score',
+            'strand',
+            'frame',
+            'attribute',
+        ],
+    ).rename(
+        columns={'feature': 'type', 'seqname': 'seqid', 'frame': 'phase', 'attribute': 'attributes'}
+    )  # match gff3 names
+    df['row_index'] = df.index
+
+    if no_alt:
+        df = df[~df.seqid.str.startswith('GL')]
+        df = df[~df.seqid.str.startswith('KI')]
+
+    skip_types = {
+        'five_prime_utr',
+        'five_prime_utr',
+    }
+    df = df[~df.type.isin(skip_types)]
+
+    attribute_columns = [
+        'gene_id',
+        'gene_version',
+        'gene_name',
+        'transcript_id',
+        'transcript_version',
+        'transcript_name',
+        'exon_id',
+        'exon_version',
+    ]
+
+    def split_attributes(row):
+        result = {}
+        for attr in row.attributes.split(';'):
+            if not attr:
+                continue
+            m = re.match(r'^\s*([^"]+)\s+"(.*)"$', attr)
+            if not m:
+                raise KeyError(f'attributes do not follow expected pattern: {attr}')
+            result[m.group(1)] = m.group(2)
+        return [row.row_index] + [result.get(c, '') for c in attribute_columns]
+
+    prev_size = df.shape[0]
+    attrs_df = pd.DataFrame(
+        df.apply(split_attributes, axis=1).tolist(),
+        columns=['row_index'] + attribute_columns,
+    )
+    assert prev_size == attrs_df.shape[0]
+    df = df.merge(attrs_df, on=['row_index'])
+    assert prev_size == df.shape[0]
+
+    df['Alias'] = ''
+    df['feature_id'] = ''
+    df.loc[df.type == 'exon', 'feature_id'] = df.exon_id
+    df.loc[df.type == 'gene', 'feature_id'] = df.gene_id
+    df.loc[df.type == 'transcript', 'feature_id'] = df.transcript_id
+
+    df['Name'] = ''
+    df.loc[df.type == 'gene', 'Name'] = df.gene_name
+    df.loc[df.type == 'transcript', 'Name'] = df.transcript_name
+    df['strand'] = df.strand.fillna('?')
+
+    df['Parent'] = ''
+    df.loc[(df.type == 'transcript') & (df.gene_id != ''), 'Parent'] = 'gene:' + df.gene_id
+    df.loc[(df.type == 'exon') & (df.transcript_id != ''), 'Parent'] = (
+        'transcript:' + df.transcript_id
+    )
+    df.loc[(df.type == 'CDS') & (df.transcript_id != ''), 'Parent'] = (
+        'transcript:' + df.transcript_id
+    )
+
+    df['version'] = ''
+    df.loc[df.type == 'transcript', 'version'] = df.transcript_version
+    df.loc[df.type == 'exon', 'version'] = df.exon_version
+    df.loc[df.type == 'gene', 'version'] = df.gene_version
+
+    df['strand'] = df.strand.fillna('?')
+    return convert_pandas_gff_to_mavis(df)
+
+
+if __name__ == '__main__':
+    logging.basicConfig(format='{message}', style='{', level=logging.INFO)
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'input', help='path to the tab-delimated mavis v2 style reference annotations file'
+    )
+    parser.add_argument('--input_type', default='v2', choices=['v2', 'gff3', 'gtf'])
+    parser.add_argument('output', help='path to the JSON output file')
+    parser.add_argument(
+        '--keep_alt',
+        help='do not filter out chromosome/seqid names starting with GL or KI',
+        action='store_true',
+        default=False,
+    )
+
+    args = parser.parse_args()
+
+    if args.input_type == 'v2':
+        annotations = convert_tab_to_json(args.input)
+    elif args.input_type == 'gtf':
+        annotations = convert_gff2_to_mavis(args.input, not args.keep_alt)
+    else:
+        annotations = convert_gff3_to_mavis(args.input, not args.keep_alt)
+
+    logging.info(f'writing: {args.output}')
+    with open(args.output, 'w') as fh:
+        fh.write(json.dumps(annotations, sort_keys=True))
diff --git a/src/tools/migrate_mavis_annotations_2to3.py b/src/tools/migrate_mavis_annotations_2to3.py
deleted file mode 100644
index 0fe9d39d..00000000
--- a/src/tools/migrate_mavis_annotations_2to3.py
+++ /dev/null
@@ -1,190 +0,0 @@
-import argparse
-import json
-import logging
-import re
-from typing import Dict
-
-import pandas as pd
-
-
-def convert_tab_to_json(filepath: str) -> Dict:
-    """
-    given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
-
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | column name           | example                   | description                                               |
-    +=======================+===========================+===========================================================+
-    | ensembl_transcript_id | ENST000001                |                                                           |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | ensembl_gene_id       | ENSG000001                |                                                           |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | strand                | -1                        | positive or negative 1                                    |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | cdna_coding_start     | 44                        | where translation begins relative to the start of the cdna|
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | cdna_coding_end       | 150                       | where translation terminates                              |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon demitited exon start/ends                      |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | AA_domain_ranges      | DBD:220-251,260-271       | semi-colon delimited list of domains                      |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-    | hugo_names            | KRAS                      | hugo gene name                                            |
-    +-----------------------+---------------------------+-----------------------------------------------------------+
-
-    Args:
-        filepath (str): path to the input tab-delimited file
-
-    Returns:
-        Dict[str,List[Gene]]: a dictionary keyed by chromosome name with values of list of genes on the chromosome
-
-    Warning:
-        does not load translations unless then start with 'M', end with '*' and have a length of multiple 3
-    """
-
-    def parse_exon_list(row):
-        if pd.isnull(row):
-            return []
-        exons = []
-        for temp in re.split('[; ]', row):
-            try:
-                start, end = temp.split('-')
-                exons.append({'start': int(start), 'end': int(end)})
-            except Exception as err:
-                logging.warning(f'exon error: {repr(temp)}, {repr(err)}')
-        return exons
-
-    def parse_domain_list(row):
-        if pd.isnull(row):
-            return []
-        domains = []
-        for domain in row.split(';'):
-            try:
-                name, temp = domain.rsplit(':')
-                temp = temp.split(',')
-                temp = [x.split('-') for x in temp]
-                regions = [{'start': int(x), 'end': int(y)} for x, y in temp]
-                domains.append({'name': name, 'regions': regions})
-            except Exception as err:
-                logging.warning(f'error in domain: {domain}, {row}, {repr(err)}')
-        return domains
-
-    df = pd.read_csv(
-        filepath,
-        dtype={
-            'ensembl_gene_id': str,
-            'ensembl_transcript_id': str,
-            'chr': str,
-            'cdna_coding_start': pd.Int64Dtype(),
-            'cdna_coding_end': pd.Int64Dtype(),
-            'AA_domain_ranges': str,
-            'genomic_exon_ranges': str,
-            'hugo_names': str,
-            'transcript_genomic_start': pd.Int64Dtype(),
-            'transcript_genomic_end': pd.Int64Dtype(),
-            'best_ensembl_transcript_id': str,
-            'gene_start': int,
-            'gene_end': int,
-        },
-        sep='\t',
-        comment='#',
-    )
-
-    for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
-        if col not in df:
-            raise KeyError(f'missing required column: {col}')
-
-    for col, parser in [
-        ('genomic_exon_ranges', parse_exon_list),
-        ('AA_domain_ranges', parse_domain_list),
-    ]:
-        if col in df:
-            df[col] = df[col].apply(parser)
-
-    genes = {}
-    rows = df.where(df.notnull(), None).to_dict('records')
-
-    for row in rows:
-        gene = {
-            'chr': row['chr'],
-            'start': int(row['gene_start']),
-            'end': int(row['gene_end']),
-            'name': row['ensembl_gene_id'],
-            'strand': row['strand'],
-            'aliases': row['hugo_names'].split(';') if row.get('hugo_names') else [],
-            'transcripts': [],
-        }
-        if gene['strand'] in {'true', '1', '+', '+1', 'True', 1, True}:
-            gene['strand'] = '+'
-        elif gene['strand'] in {'false', '-1', '-', 'False', -1, False}:
-            gene['strand'] = '-'
-        if gene['name'] not in genes:
-            genes[gene['name']] = gene
-        else:
-            gene = genes[gene['name']]
-        is_best_transcript = (
-            row.get('best_ensembl_transcript_id', row['ensembl_transcript_id'])
-            == row['ensembl_transcript_id']
-        )
-        transcript = {
-            'is_best_transcript': is_best_transcript,
-            'name': row['ensembl_transcript_id'],
-            'exons': row.get('genomic_exon_ranges', []),
-            'domains': row.get('AA_domain_ranges', []),
-            'start': row.get('transcript_genomic_start'),
-            'end': row.get('transcript_genomic_end'),
-            'cdna_coding_start': row.get('cdna_coding_start'),
-            'cdna_coding_end': row.get('cdna_coding_end'),
-            'aliases': [],
-        }
-        for int_value in ['start', 'end', 'cdna_coding_start', 'cdna_coding_end']:
-            if transcript.get(int_value) is not None:
-                transcript[int_value] = int(transcript[int_value])
-        gene['transcripts'].append(transcript)
-
-    return {'genes': list(genes.values())}
-
-
-if __name__ == '__main__':
-    logging.basicConfig(**{'format': '{message}', 'style': '{', 'level': logging.INFO})
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        'input', help='path to the tab-delimated mavis v2 style reference annotations file'
-    )
-    parser.add_argument('output', help='path to the JSON output file')
-
-    args = parser.parse_args()
-
-    annotations = convert_tab_to_json(args.input)
-
-    rows = []
-    logging.info(f'writing: {args.output}')
-    if args.output_format == 'jsonl':
-        with open(args.output, 'w') as fh:
-            for gene in annotations['genes']:
-                fh.write(json.dumps(gene, sort_keys=True) + '\n')
-    elif args.output_format == 'json':
-        with open(args.output, 'w') as fh:
-            fh.write(json.dumps(annotations, sort_keys=True))
-    else:
-        transcripts = []
-
-        for gene in annotations['genes']:
-            meta = {**gene}
-            del meta['transcripts']
-            if gene['transcripts']:
-                for transcript in gene['transcripts']:
-                    transcripts.append(
-                        {**meta, **{f'transcript.{k}': v for k, v in transcript.items()}}
-                    )
-            else:
-                transcripts.append(meta)
-        df = pd.json_normalize(transcripts, max_level=1)
-        json_cols = [
-            'aliases',
-            'transcript.aliases',
-            'transcript.exons',
-            'transcript.domains',
-        ]
-        for col in json_cols:
-            df[col] = df[col].apply(json.dumps)
-        df.to_csv(args.output, index=False, sep='\t')
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf b/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
new file mode 100644
index 00000000..ce1f904f
--- /dev/null
+++ b/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
@@ -0,0 +1,186 @@
+6	havana	gene	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene";
+6	havana	transcript	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; transcript_id "ENST00000407852"; transcript_version "2"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "KRASP1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA";
+6	havana	exon	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; transcript_id "ENST00000407852"; transcript_version "2"; exon_number "1"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "KRASP1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001550689"; exon_version "2"; tag "basic"; transcript_support_level "NA";
+12	ensembl_havana	gene	25205246	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding";
+12	havana	transcript	25205246	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00001644818"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000509798"; protein_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	exon	25213114	25213206	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003927570"; exon_version "1"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	stop_codon	25213204	25213206	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	exon	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00002477035"; exon_version "3"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	three_prime_utr	25213114	25213203	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	havana	three_prime_utr	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690406"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-211"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; tag "cds_start_NF"; tag "mRNA_start_NF";
+12	ensembl_havana	transcript	25205246	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00003903543"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25215437	25215560	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001189807"; exon_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	CDS	25215444	25215560	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	stop_codon	25215441	25215443	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	exon	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00002477035"; exon_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	three_prime_utr	25215437	25215440	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	three_prime_utr	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
+12	ensembl_havana	transcript	25205246	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003903543"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00002456976"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	three_prime_utr	25205246	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	havana	transcript	25205250	25250908	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25250751	25250908	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003928105"; exon_version "1";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00000936617"; exon_version "1";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000510431"; protein_version "1";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25230483	25230621	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003930732"; exon_version "1";
+12	havana	CDS	25230568	25230621	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000510431"; protein_version "1";
+12	havana	stop_codon	25230565	25230567	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003930847"; exon_version "1";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003937476"; exon_version "1";
+12	havana	exon	25205250	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; exon_number "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003934058"; exon_version "1";
+12	havana	five_prime_utr	25250751	25250908	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25230483	25230564	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25205250	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686877"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-206"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	transcript	25205258	25250935	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	exon	25250764	25250935	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003934964"; exon_version "1"; tag "basic";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000508921"; protein_version "1"; tag "basic";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic";
+12	havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000508921"; protein_version "1"; tag "basic";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic";
+12	havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000508921"; protein_version "1"; tag "basic";
+12	havana	exon	25205258	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003924510"; exon_version "1"; tag "basic";
+12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000508921"; protein_version "1"; tag "basic";
+12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	five_prime_utr	25250764	25250935	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	three_prime_utr	25205258	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000685328"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-205"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	transcript	25205260	25250899	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25250751	25250899	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003938559"; exon_version "1"; tag "basic";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000509223"; protein_version "1"; tag "basic";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25227234	25227337	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003923411"; exon_version "1"; tag "basic";
+12	havana	CDS	25227234	25227337	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000509223"; protein_version "1"; tag "basic";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic";
+12	havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000509223"; protein_version "1"; tag "basic";
+12	havana	exon	25205260	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003927775"; exon_version "1"; tag "basic";
+12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000509223"; protein_version "1"; tag "basic";
+12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25250751	25250899	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	three_prime_utr	25205260	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000693229"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-214"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	transcript	25205270	25250927	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25250751	25250927	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003930705"; exon_version "1";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00000936617"; exon_version "1";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000510511"; protein_version "1";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003930939"; exon_version "1";
+12	havana	CDS	25225765	25225773	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000510511"; protein_version "1";
+12	havana	stop_codon	25225762	25225764	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25205270	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003933328"; exon_version "1";
+12	havana	five_prime_utr	25250751	25250927	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25225614	25225761	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25205270	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000687356"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-208"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	transcript	25205343	25250917	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25250751	25250917	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003923448"; exon_version "1"; tag "basic";
+12	havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003923061"; exon_version "1"; tag "basic";
+12	havana	CDS	25227234	25227325	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000510254"; protein_version "1"; tag "basic";
+12	havana	start_codon	25227323	25227325	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic";
+12	havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000510254"; protein_version "1"; tag "basic";
+12	havana	exon	25205343	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003925822"; exon_version "1"; tag "basic";
+12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000510254"; protein_version "1"; tag "basic";
+12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25250751	25250917	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25227326	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	three_prime_utr	25205343	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000692768"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-213"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	transcript	25206933	25250444	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	exon	25250255	25250444	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003932539"; exon_version "1"; tag "basic";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000509238"; protein_version "1"; tag "basic";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic";
+12	havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000509238"; protein_version "1"; tag "basic";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic";
+12	havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000509238"; protein_version "1"; tag "basic";
+12	havana	exon	25206933	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003930148"; exon_version "1"; tag "basic";
+12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000509238"; protein_version "1"; tag "basic";
+12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	five_prime_utr	25250255	25250444	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	three_prime_utr	25206933	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688940"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-210"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic";
+12	havana	transcript	25207948	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003903543"; exon_version "1";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00000936617"; exon_version "1";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000508568"; protein_version "1";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25228775	25228891	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003925179"; exon_version "1";
+12	havana	CDS	25228850	25228891	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; protein_id "ENSP00000508568"; protein_version "1";
+12	havana	stop_codon	25228847	25228849	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003930847"; exon_version "1";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003937476"; exon_version "1";
+12	havana	exon	25207948	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; exon_number "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay"; exon_id "ENSE00003935620"; exon_version "1";
+12	havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25228775	25228846	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	three_prime_utr	25207948	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000690804"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-212"; transcript_source "havana"; transcript_biotype "nonsense_mediated_decay";
+12	havana	transcript	25209178	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	exon	25250751	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002446502"; exon_version "1"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000452512"; protein_version "1"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	exon	25209178	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002464674"; exon_version "2"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000452512"; protein_version "1"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	five_prime_utr	25250751	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	three_prime_utr	25209178	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000557334"; transcript_version "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5 (assigned to previous version 5)";
+12	havana	transcript	25209673	25227997	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688228"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-209"; transcript_source "havana"; transcript_biotype "retained_intron";
+12	havana	exon	25227234	25227997	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688228"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-209"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003935871"; exon_version "1";
+12	havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688228"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-209"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003937476"; exon_version "1";
+12	havana	exon	25209673	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000688228"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-209"; transcript_source "havana"; transcript_biotype "retained_intron"; exon_id "ENSE00003925173"; exon_version "1";
+12	havana	transcript	25232558	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25250764	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002530521"; exon_version "1"; tag "basic";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000510479"; protein_version "1"; tag "basic";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	exon	25232558	25235226	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003927408"; exon_version "1"; tag "basic";
+12	havana	CDS	25235209	25235226	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000510479"; protein_version "1"; tag "basic";
+12	havana	stop_codon	25235206	25235208	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25250764	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	three_prime_utr	25232558	25235205	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000686969"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-207"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic";
+12	havana	transcript	25232591	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00003903543"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000451856"; protein_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	exon	25232591	25235226	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002478081"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	CDS	25235209	25235226	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000451856"; protein_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	stop_codon	25235206	25235208	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
+12	havana	three_prime_utr	25232591	25235205	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000556131"; transcript_version "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1 (assigned to previous version 1)";
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3 b/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
new file mode 100644
index 00000000..be16e852
--- /dev/null
+++ b/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
@@ -0,0 +1,19 @@
+12	ensembl_havana	gene	25205246	25250936	.	-	.	ID=gene:ENSG00000133703;Name=KRAS;biotype=protein_coding;description=KRAS proto-oncogene%2C GTPase [Source:HGNC Symbol%3BAcc:HGNC:6407];gene_id=ENSG00000133703;logic_name=ensembl_havana_gene_homo_sapiens;version=14
+12	havana	mRNA	25205246	25225773	.	-	.	ID=transcript:ENST00000690406;Parent=gene:ENSG00000133703;Name=KRAS-211;biotype=nonsense_mediated_decay;transcript_id=ENST00000690406;version=1
+12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000256078;Parent=gene:ENSG00000133703;Name=KRAS-201;biotype=protein_coding;ccdsid=CCDS8703.1;tag=basic;transcript_id=ENST00000256078;transcript_support_level=1 (assigned to previous version 8);version=10
+12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000311936;Parent=gene:ENSG00000133703;Name=KRAS-202;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000311936;transcript_support_level=1 (assigned to previous version 7);version=8
+12	havana	mRNA	25205250	25250908	.	-	.	ID=transcript:ENST00000686877;Parent=gene:ENSG00000133703;Name=KRAS-206;biotype=nonsense_mediated_decay;transcript_id=ENST00000686877;version=1
+12	havana	mRNA	25205258	25250935	.	-	.	ID=transcript:ENST00000685328;Parent=gene:ENSG00000133703;Name=KRAS-205;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000685328;version=1
+12	havana	mRNA	25205260	25250899	.	-	.	ID=transcript:ENST00000693229;Parent=gene:ENSG00000133703;Name=KRAS-214;biotype=protein_coding;tag=basic;transcript_id=ENST00000693229;version=1
+12	havana	mRNA	25205270	25250927	.	-	.	ID=transcript:ENST00000687356;Parent=gene:ENSG00000133703;Name=KRAS-208;biotype=nonsense_mediated_decay;transcript_id=ENST00000687356;version=1
+12	havana	mRNA	25205343	25250917	.	-	.	ID=transcript:ENST00000692768;Parent=gene:ENSG00000133703;Name=KRAS-213;biotype=protein_coding;tag=basic;transcript_id=ENST00000692768;version=1
+12	havana	mRNA	25206933	25250444	.	-	.	ID=transcript:ENST00000688940;Parent=gene:ENSG00000133703;Name=KRAS-210;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000688940;version=1
+12	havana	mRNA	25207948	25250929	.	-	.	ID=transcript:ENST00000690804;Parent=gene:ENSG00000133703;Name=KRAS-212;biotype=nonsense_mediated_decay;transcript_id=ENST00000690804;version=1
+12	havana	mRNA	25209178	25250936	.	-	.	ID=transcript:ENST00000557334;Parent=gene:ENSG00000133703;Name=KRAS-204;biotype=protein_coding;tag=basic;transcript_id=ENST00000557334;transcript_support_level=5 (assigned to previous version 5);version=6
+12	havana	lnc_RNA	25209673	25227997	.	-	.	ID=transcript:ENST00000688228;Parent=gene:ENSG00000133703;Name=KRAS-209;biotype=retained_intron;transcript_id=ENST00000688228;version=1
+12	havana	mRNA	25232558	25250929	.	-	.	ID=transcript:ENST00000686969;Parent=gene:ENSG00000133703;Name=KRAS-207;biotype=protein_coding;tag=basic;transcript_id=ENST00000686969;version=1
+12	havana	mRNA	25232591	25250929	.	-	.	ID=transcript:ENST00000556131;Parent=gene:ENSG00000133703;Name=KRAS-203;biotype=protein_coding;tag=basic;transcript_id=ENST00000556131;transcript_support_level=1 (assigned to previous version 1);version=2
+12	havana	ncRNA_gene	25210652	25211233	.	+	.	ID=gene:ENSG00000274987;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000274987;logic_name=havana_homo_sapiens;version=1
+12	havana	ncRNA_gene	25225103	25225665	.	+	.	ID=gene:ENSG00000275197;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000275197;logic_name=havana_homo_sapiens;version=1
+6	havana	pseudogene	54770583	54771134	.	+	.	ID=gene:ENSG00000220635;Name=KRASP1;biotype=processed_pseudogene;description=KRAS proto-oncogene%2C GTPase pseudogene 1 [Source:HGNC Symbol%3BAcc:HGNC:6406];gene_id=ENSG00000220635;logic_name=havana_homo_sapiens;version=2
+6	havana	pseudogenic_transcript	54770583	54771134	.	+	.	ID=transcript:ENST00000407852;Parent=gene:ENSG00000220635;Name=KRASP1-201;biotype=processed_pseudogene;tag=basic;transcript_id=ENST00000407852;transcript_support_level=NA;version=2
diff --git a/tests/tools/test_convert_annotations_format.py b/tests/tools/test_convert_annotations_format.py
new file mode 100644
index 00000000..0f837b30
--- /dev/null
+++ b/tests/tools/test_convert_annotations_format.py
@@ -0,0 +1,22 @@
+import os
+
+from tools.convert_annotations_format import convert_gff2_to_mavis, convert_gff3_to_mavis
+
+
+def test_load_gff3():
+    input = os.path.join(os.path.dirname(__file__), 'data', 'Homo_sapiens.GRCh38.105.chr.kras.gtf')
+    data = convert_gff2_to_mavis(input, False)
+    assert len(data['genes']) == 2
+    assert sum([len(g['transcripts']) for g in data['genes']]) == 15
+    exons = 0
+    for gene in data['genes']:
+        for transcript in gene['transcripts']:
+            exons += len(transcript['exons'])
+    assert exons == 62
+
+
+def test_load_gtf():
+    input = os.path.join(os.path.dirname(__file__), 'data', 'Homo_sapiens.GRCh38.105.kras.gff3')
+    data = convert_gff3_to_mavis(input, False)
+    assert len(data['genes']) == 4
+    assert sum([len(g['transcripts']) for g in data['genes']]) == 15

From 0c0f0516b1cfa89f587cc2fde746d68ba41a10d2 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 2 Feb 2022 10:18:52 -0800
Subject: [PATCH 108/137] Add example with semi colon in attribute

---
 src/tools/convert_annotations_format.py       | 21 ++++++++++++++-----
 .../data/Homo_sapiens.GRCh38.105.chr.kras.gtf |  2 +-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index 13dfb8e5..fd3c2b3b 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -165,6 +165,7 @@ def parse_domain_list(row):
 def convert_pandas_gff_to_mavis(df) -> Dict:
     df['parent_type'] = df.Parent.str.split(':').str[0]
     genelike_features = {'gene', 'ncRNA_gene', 'biological_region', 'pseudogene'}
+    consumed = set()
 
     def pull_alias_terms(row):
         aliases = []
@@ -185,6 +186,7 @@ def pull_alias_terms(row):
             'transcripts': [],
             'name': row['feature_id'] + '.' + row['version'],
         }
+        consumed.add(row['row_index'])
     logging.info(f'loaded {len(genes_by_id)} genes')
 
     transcripts_by_id = {}
@@ -209,6 +211,7 @@ def pull_alias_terms(row):
             }
             genes_by_id[gene_id]['transcripts'].append(transcript)
             transcripts_by_id[feature_id] = transcript
+            consumed.add(row['row_index'])
 
     logging.info(f'loaded {len(transcripts_by_id)} transcripts')
     # now cds
@@ -218,12 +221,13 @@ def pull_alias_terms(row):
             transcript_id = parent.split(':')[1]
             if transcript_id not in transcripts_by_id:
                 raise KeyError(
-                    f'failed to find parent transcript ({transcript_id}) skipping cds ({row["feature_id"]})'
+                    f'failed to find parent transcript ({transcript_id}) skipping cds on line ({row["row_index"] + 1})'
                 )
             transcripts_by_id[transcript_id].update(
                 {'cdna_coding_start': row['start'], 'cdna_coding_end': row['end']}
             )
             cds_count += 1
+            consumed.add(row['row_index'])
     logging.info(f'loaded {cds_count} cds regions')
     # exons
     exons_count = 0
@@ -232,7 +236,7 @@ def pull_alias_terms(row):
             transcript_id = parent.split(':')[1]
             if transcript_id not in transcripts_by_id:
                 raise KeyError(
-                    f'failed to find parent transcript ({transcript_id}) skipping exon ({row["feature_id"]})'
+                    f'failed to find parent transcript ({transcript_id}) skipping exon ({row["feature_id"]}) on line {row["row_index"] + 1}'
                 )
             transcripts_by_id[transcript_id]['exons'].append(
                 {
@@ -242,9 +246,16 @@ def pull_alias_terms(row):
                 }
             )
             exons_count += 1
+            consumed.add(row['row_index'])
 
     logging.info(f'loaded {exons_count} exons')
 
+    ignored_df = df[~df.row_index.isin(consumed)]
+    if ignored_df.shape[0]:
+        logging.warning(
+            f'Ignored {ignored_df.shape[0]} rows that did not match the expected types: {ignored_df.type.unique()}'
+        )
+
     result = {'genes': list(genes_by_id.values())}
     try:
         snakemake_validate(
@@ -282,10 +293,10 @@ def convert_gff3_to_mavis(filename: str, no_alt) -> Dict:
         na_values=['.'] + PANDAS_DEFAULT_NA_VALUES,
         names=['seqid', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase', 'attributes'],
     )
+    df['row_index'] = df.index
     if no_alt:
         df = df[~df.seqid.str.startswith('GL')]
         df = df[~df.seqid.str.startswith('KI')]
-    df['row_index'] = df.index
 
     skip_types = {
         'five_prime_UTR',
@@ -394,10 +405,10 @@ def convert_gff2_to_mavis(filename: str, no_alt) -> Dict:
 
     def split_attributes(row):
         result = {}
-        for attr in row.attributes.split(';'):
+        for attr in row.attributes.split('";'):
             if not attr:
                 continue
-            m = re.match(r'^\s*([^"]+)\s+"(.*)"$', attr)
+            m = re.match(r'^\s*([^"]+)\s+"(.*)"?$', attr)
             if not m:
                 raise KeyError(f'attributes do not follow expected pattern: {attr}')
             result[m.group(1)] = m.group(2)
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf b/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
index ce1f904f..1246b11c 100644
--- a/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
+++ b/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
@@ -28,7 +28,7 @@
 12	ensembl_havana	three_prime_utr	25215437	25215440	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
 12	ensembl_havana	three_prime_utr	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)";
 12	ensembl_havana	transcript	25205246	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
-12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003903543"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
+12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00003903543"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";note "some note with; a semi-colon"
 12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
 12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";
 12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "14"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)";

From 2987227f5e3c5f5839d3ac86fd24bbf559b845eb Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Wed, 2 Feb 2022 15:04:35 -0800
Subject: [PATCH 109/137] revert old changes

---
 src/mavis/tools/vcf.py       |  7 ++++--
 tests/unit/test_tools_vcf.py | 43 ++++++++++++++++++++++++++++--------
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index ff4c0cb0..f756df43 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -156,8 +156,6 @@ def convert_record(record: VcfRecordType) -> List[Dict]:
 
         if info.get('SVTYPE') == 'BND':
             chr2, end, orient1, orient2, ref, alt = parse_bnd_alt(alt)
-            if end == 0:
-                end = 1  # telomeric BND alt syntax https://github.com/bcgsc/mavis/issues/294
             std_row[COLUMNS.break1_orientation] = orient1
             std_row[COLUMNS.break2_orientation] = orient2
             std_row[COLUMNS.untemplated_seq] = alt
@@ -204,6 +202,11 @@ def convert_record(record: VcfRecordType) -> List[Dict]:
                     COLUMNS.break2_position_end: end + info.get('CIEND', (0, 0))[1],
                 }
             )
+        if std_row['break1_position_end'] == 0 and std_row['break1_position_start'] == 1:
+            # addresses cases where pos = 0 and telomeric BND alt syntax https://github.com/bcgsc/mavis/issues/294
+            std_row.update({'break1_position_end': 1})
+        if std_row['break2_position_end'] == 0 and std_row['break2_position_start'] == 1:
+            std_row.update({'break2_position_end': 1})
 
         if 'SVTYPE' in info:
             std_row[COLUMNS.event_type] = info['SVTYPE']
diff --git a/tests/unit/test_tools_vcf.py b/tests/unit/test_tools_vcf.py
index c4eac443..2036e656 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/unit/test_tools_vcf.py
@@ -11,10 +11,10 @@ def test_read_vcf():
 
 
 def test_convert_record():
-    variant = VcfRecordType(
-        1,
-        0,
-        'chr14_KI270722v1_random',
+    variant_imprecise = VcfRecordType(
+        id='mock-BND-imprecise',
+        pos=0,
+        chrom='chr14_KI270722v1_random',
         alts=['N[chr17_GL000205v2_random:0['],
         ref='N',
         info=VcfInfoType(
@@ -29,8 +29,33 @@ def test_convert_record():
             AF="1",
         ),
     )
-    records = convert_record(variant)
-    assert len(records) == 1
-    record = records[0]
-    assert record.get('break2_position_end') == 1
-    assert record.get('break2_chromosome') == 'chr17_GL000205v2_random'
+    variant_precise = VcfRecordType(
+        id='mock-BND-precise',
+        pos=0,
+        chrom='chr14_KI270722v1_random',
+        alts=[']chrUn_GL000216v2:142821]N'],
+        ref='N',
+        info=VcfInfoType(
+            IMPRECISE=False,
+            SVMETHOD="Snifflesv1.0.11",
+            SVTYPE="BND",
+            SUPTYPE="SR",
+            SVLEN="0",
+            STRANDS="+-",
+            RE="5",
+            REF_strand="0,0",
+            AF="1",
+        ),
+    )
+    imprecise_records = convert_record(variant_imprecise)
+    assert len(imprecise_records) == 1
+    imprecise_records = imprecise_records[0]
+    assert imprecise_records.get('break1_position_end') == 1
+
+    precise_records = convert_record(variant_precise)
+    assert len(precise_records) == 1
+    precise_records = precise_records[0]
+    assert precise_records.get('break1_position_end') == 1
+
+    assert precise_records.get('break1_chromosome') == 'chr14_KI270722v1_random'
+    assert imprecise_records.get('break1_chromosome') == 'chr14_KI270722v1_random'

From 7c939e38c7db9739ab719f7d1b5751d44d9a6a5e Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 20:55:13 -0800
Subject: [PATCH 110/137] Support transform of gff and gtf files

---
 src/tools/convert_annotations_format.py       |  628 +-
 tests/data/example_genes.json                 | 8038 +----------------
 .../data/Homo_sapiens.GRCh38.105.kras.gff3    |   19 -
 .../tools/data/Homo_sapiens.GRCh38.kras.gff3  |  163 +
 .../data/Homo_sapiens.GRCh38.kras.gff3.json   |    1 +
 ....kras.gtf => Homo_sapiens.GRCh38.kras.gtf} |    0
 .../data/Homo_sapiens.GRCh38.kras.gtf.json    |    1 +
 tests/tools/data/K02718.1.gff3                |   24 +
 tests/tools/data/K02718.1.gff3.json           |  243 +
 tests/tools/data/K02718.1.gtf                 |   32 +
 tests/tools/data/K02718.1.gtf.json            |  188 +
 tests/tools/data/example_genes.v2.json        | 7700 ++++++++++++++++
 tests/tools/data/example_genes.v3.json        |    1 +
 .../tools/test_convert_annotations_format.py  |   71 +-
 14 files changed, 8912 insertions(+), 8197 deletions(-)
 delete mode 100644 tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
 create mode 100644 tests/tools/data/Homo_sapiens.GRCh38.kras.gff3
 create mode 100644 tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json
 rename tests/tools/data/{Homo_sapiens.GRCh38.105.chr.kras.gtf => Homo_sapiens.GRCh38.kras.gtf} (100%)
 create mode 100644 tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json
 create mode 100644 tests/tools/data/K02718.1.gff3
 create mode 100644 tests/tools/data/K02718.1.gff3.json
 create mode 100644 tests/tools/data/K02718.1.gtf
 create mode 100644 tests/tools/data/K02718.1.gtf.json
 create mode 100644 tests/tools/data/example_genes.v2.json
 create mode 100644 tests/tools/data/example_genes.v3.json

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index fd3c2b3b..28ca9dac 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -2,11 +2,14 @@
 import json
 import logging
 import re
-from typing import Dict
+import traceback
+from typing import Dict, Tuple
 
 import pandas as pd
-import pkg_resources
-from snakemake.utils import validate as snakemake_validate
+from mavis.annotate.file_io import parse_annotations_json
+
+# pd.set_option('display.width', 250)
+pd.options.display.width = 0
 
 PANDAS_DEFAULT_NA_VALUES = [
     '-1.#IND',
@@ -25,6 +28,85 @@
 ]
 
 
+GFF_GENELIKE_FEATURES = {
+    'gene',
+    'ncRNA_gene',
+    'biological_region',
+    'pseudogene',
+    'enhancer',
+    'promoter',
+    'region',
+    'protein_binding_site',
+}
+GFF_RNALIKE_FEATURES = {
+    'rna',
+    'mRNA',
+    'lncRNA',
+    'transcript',
+    'lnc_RNA',
+    'pseudogenic_transcript',
+    'snRNA',
+    'miRNA',
+    'unconfirmed_transcript',
+    'ncRNA',
+    'snoRNA',
+    'scRNA',
+}
+GFF_ALL_FEATURES = GFF_GENELIKE_FEATURES | GFF_RNALIKE_FEATURES | {'CDS', 'exon'}
+GFF_ID_DELIMITER = '_'
+GFF_ATTRS = [
+    'Alias',
+    'bound_moiety',
+    'DBxref',
+    'Derives_from',
+    'exon_id',
+    'exon_number',
+    'exon_version',
+    'function',
+    'Gap',
+    'gene_id',
+    'gene_name',
+    'gene_version',
+    'ID',
+    'Name',
+    'Note',
+    'old-name',
+    'Ontology_term',
+    'Parent',
+    'product',
+    'protein_id',
+    'protein_version',
+    'rank',
+    'standard_name',
+    'Target',
+    'transcript_id',
+    'transcript_name',
+    'transcript_version',
+    'version',
+]
+GFF_KEY_COLS = ['feature_id', 'type', 'seqid', 'strand']
+
+
+def agg_strings_unique(series):
+    series = series.fillna('')
+    return ';'.join([s for s in series.astype(str).unique()])
+
+
+def strip_empty_fields(input_obj):
+    """Remove all empty string fields from some dictionary object to reduce the size"""
+
+    if isinstance(input_obj, dict):
+        result = {}
+        for k, v in input_obj.items():
+            if v == '' or (isinstance(v, list) and not len(v)):
+                continue
+            result[k] = strip_empty_fields(v)
+        return result
+    elif isinstance(input_obj, list):
+        return [strip_empty_fields(v) for v in input_obj]
+    return input_obj
+
+
 def convert_tab_to_json(filepath: str) -> Dict:
     """
     given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
@@ -42,7 +124,7 @@ def convert_tab_to_json(filepath: str) -> Dict:
     +-----------------------+---------------------------+-----------------------------------------------------------+
     | cdna_coding_end       | 150                       | where translation terminates                              |
     +-----------------------+---------------------------+-----------------------------------------------------------+
-    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon demitited exon start/ends                      |
+    | genomic_exon_ranges   | 100-201;334-412;779-830   | semi-colon delimited exon start/ends                      |
     +-----------------------+---------------------------+-----------------------------------------------------------+
     | AA_domain_ranges      | DBD:220-251,260-271       | semi-colon delimited list of domains                      |
     +-----------------------+---------------------------+-----------------------------------------------------------+
@@ -162,117 +244,403 @@ def parse_domain_list(row):
     return {'genes': list(genes.values())}
 
 
+def strip_id_field(feature_id) -> Tuple[str, str]:
+    """
+    Remove type prefix from ID if applicable
+    """
+    prefix_map = {k: k for k in ['gene', 'transcript', 'cds', 'exon']}
+    prefix_map.update({k: 'gene' for k in GFF_GENELIKE_FEATURES})
+    prefix_map.update({k: 'transcript' for k in GFF_RNALIKE_FEATURES})
+    if feature_id:
+        for prefix in prefix_map:
+            if feature_id.lower().startswith(prefix):
+                return prefix_map.get(prefix, prefix), feature_id[len(prefix) + 1 :]
+    return '', feature_id
+
+
+def parse_gff_id(row):
+    """
+    Get the unique ID of the current row/feature
+    """
+    _, feature_id = strip_id_field(row.ID if 'ID' in row else '')
+
+    if not feature_id:
+        if row.type == 'exon' and 'exon_id' in row:
+            return row.exon_id
+        elif row.type == 'gene' and 'gene_id' in row:
+            return row.gene_id
+        elif row.type == 'transcript' and 'transcript_id' in row:
+            return row.transcript_id
+        elif row.type.lower() == 'cds' and 'protein_id' in row:
+            return row.protein_id
+    return feature_id
+
+
+def pull_alias_terms(row):
+    aliases = []
+    for field in ['Name', 'standard_name', 'old-name']:
+        if row[field] and not pd.isnull(row[field]):
+            aliases.extend(row[field].split(';'))
+    if row.Alias and not pd.isnull(row.Alias):
+        aliases.extend(row.Alias.split(','))
+    return [a for a in aliases if a != row.feature_id]
+
+
+class NumberedFeatureGenerator:
+    def __init__(self):
+        self.counter = 0
+
+    def __call__(self, features, parent_id, prefix='-T'):
+        result = f'{parent_id}{prefix}{self.counter}'
+        while result in features:
+            self.counter += 1
+            result = f'{parent_id}{prefix}{self.counter}'
+        return result
+
+
+def split_col_into_rows(df, col, delimiter=',', new_col=None):
+    """
+    Given some string column in a dataframe, split the column by the delimiter and for each resulting value duplicate the existing row
+    """
+    if not new_col:
+        new_col = col
+    new_df = df.copy().reset_index()
+
+    s = new_df[col].str.split(delimiter).apply(pd.Series, 1).stack()
+    s.index = s.index.droplevel(-1)
+    s.name = new_col
+
+    if new_col == col:
+        new_df = new_df.drop(columns=[new_col])
+    return new_df.merge(s, left_index=True, right_index=True)
+
+
+def print_marker(df, links_df=None):
+    stack = traceback.extract_stack(limit=2)[0]
+    print(f'{stack.filename}:{stack.lineno} {stack.name}')
+    print(df.shape, links_df.shape if links_df is not None else '')
+    print(df.groupby(['type']).agg({'feature_id': 'count', 'feature_id': 'unique'}).reset_index())
+
+
+def fix_dangling_parent_reference(nodes_df, links_df):
+    """
+    Insert a pseudo element for any parents referenced by an element that do not already have their own line/definition
+
+    Returns the elements to be added to the node definitions
+    """
+    dangling_refs = links_df.rename(
+        {
+            'parent_id': 'feature_id',
+            'parent_type': 'type',
+            'feature_id': 'child_id',
+            'type': 'child_type',
+        }
+    ).merge(nodes_df[GFF_KEY_COLS], how='left', indicator=True)
+    dangling_refs = dangling_refs[dangling_refs._merge == 'left_only']
+    # now join back to its children to create coordinates that are the interval covering all connected children
+    dangling_refs = dangling_refs.merge(
+        nodes_df[GFF_KEY_COLS + ['start', 'end', 'row_index']].rename(
+            columns={'feature_id': 'child_id', 'type': 'child_type'}
+        )
+    )
+    dangling_refs = (
+        dangling_refs.groupby(GFF_KEY_COLS)
+        .agg(
+            {
+                'start': 'min',
+                'end': 'max',
+                'row_index': agg_strings_unique,
+            }
+        )
+        .reset_index()
+    )
+    if dangling_refs.shape[0]:
+        logging.warning(f'Inserting {dangling_refs.shape[0]} missing parent element definitions')
+
+    return pd.concat([nodes_df, dangling_refs]).reset_index(drop=True), links_df
+
+
+def fix_orphan_elements(nodes_df, links_df):
+    """
+    When there are non-gene elements that do not have a parent assigned to them, connect them to a
+    inserted 'mock' gene instead
+    """
+    links_df = links_df.copy()
+
+    links_df['_orphan'] = False
+    links_df.loc[
+        (links_df.parent_id == '') & (links_df.type.isin({'CDS', 'exon'})), '_orphan'
+    ] = True
+    links_df.loc[links_df._orphan, 'parent_id'] = 'G' + GFF_ID_DELIMITER + links_df.feature_id
+    links_df.loc[links_df._orphan, 'parent_type'] = 'gene'
+
+    new_genes_df = (
+        links_df[links_df._orphan]
+        .merge(nodes_df[GFF_KEY_COLS + ['start', 'end']])
+        .rename(
+            columns={
+                'feature_id': 'child_id',
+                'type': 'child_type',
+                'parent_id': 'feature_id',
+                'parent_type': 'type',
+            }
+        )
+    )
+    new_genes_df = (
+        new_genes_df.groupby(GFF_KEY_COLS)
+        .agg({'start': 'min', 'end': 'max', 'row_index': agg_strings_unique})
+        .reset_index()
+    )
+
+    links_df = links_df.drop(columns=['_orphan'])
+    if new_genes_df.shape[0]:
+        logging.warning(
+            f'Inserting {new_genes_df.shape[0]} new genes to connect to orphan elements'
+        )
+    return pd.concat([nodes_df, new_genes_df]).reset_index(drop=True), links_df
+
+
+def insert_missing_transcripts(nodes_df, links_df):
+    """
+    For any cds elements with a direct parent gene, create a transcript and link them through that instead
+    """
+    direct_links_df = links_df[(links_df.parent_type == 'gene') & (links_df.type != 'transcript')]
+    rest_links_df = links_df[(links_df.parent_type != 'gene') | (links_df.type == 'transcript')]
+
+    src_transcript_df = direct_links_df.copy()
+    src_transcript_df['feature_id'] = src_transcript_df.parent_id + GFF_ID_DELIMITER + 'T'
+    src_transcript_df['type'] = 'transcript'
+
+    tgt_transcript_df = direct_links_df.copy()
+    tgt_transcript_df['parent_id'] = tgt_transcript_df.parent_id + GFF_ID_DELIMITER + 'T'
+    tgt_transcript_df['parent_type'] = 'transcript'
+
+    links_df = pd.concat([rest_links_df, src_transcript_df, tgt_transcript_df]).reset_index(
+        drop=True
+    )
+
+    if direct_links_df.shape[0]:
+        logging.warning(
+            f'Inserting {direct_links_df.shape[0]} transcripts between lower element to gene connections'
+        )
+
+    return fix_dangling_parent_reference(nodes_df, links_df)
+
+
+def validate_gff_coordinates(nodes_df, links_df):
+    """
+    Check that all child elements have coordinates within the coordinates of their parent elements
+    """
+    df = links_df.merge(nodes_df[GFF_KEY_COLS + ['start', 'end']]).merge(
+        nodes_df[GFF_KEY_COLS + ['start', 'end']].rename(
+            columns={
+                'feature_id': 'parent_id',
+                'type': 'parent_type',
+                'start': 'parent_start',
+                'end': 'parent_end',
+            }
+        )
+    )
+    df['error'] = False
+    df.loc[(df.parent_start > df.start) | (df.parent_end < df.end), 'error'] = True
+
+    errors = df[df.error]
+    if errors.shape[0]:
+        for _, row in errors.iterrows():
+            logging.debug(
+                f'{row.feature_id} ({row.start}-{row.end}) is not within its parent element {row.parent_id} ({row.parent_start}-{row.parent_end})'
+            )
+        raise ValueError(f'{errors.shape[0]} entries with impossible coordinates')
+
+
 def convert_pandas_gff_to_mavis(df) -> Dict:
-    df['parent_type'] = df.Parent.str.split(':').str[0]
-    genelike_features = {'gene', 'ncRNA_gene', 'biological_region', 'pseudogene'}
-    consumed = set()
-
-    def pull_alias_terms(row):
-        aliases = []
-        if row['Name']:
-            aliases.append(row['Name'])
-        if row['Alias']:
-            aliases.extend(row['Alias'].split(','))
-        return aliases
+    df['error'] = ''
+    df.loc[~df.type.isin(GFF_ALL_FEATURES), 'error'] = 'unrecognized type ' + df.type
+    df = split_col_into_rows(df, 'Parent', ',')
+    # simplify the type
+    df['biotype'] = df.type.fillna('')
+
+    def simplify_type(t):
+        if t in GFF_GENELIKE_FEATURES:
+            return 'gene'
+        elif t in GFF_RNALIKE_FEATURES:
+            return 'transcript'
+        return t
+
+    df['type'] = df.type.apply(simplify_type).fillna('')
+    df['parent_type'] = (
+        df.Parent.apply(lambda x: strip_id_field(x)[0]).fillna('').apply(simplify_type)
+    )
+    df['parent_id'] = df.Parent.apply(lambda x: strip_id_field(x)[1]).fillna('')
+    df.loc[df.type == 'gene', 'parent_type'] = 'seq'
+    df.loc[df.type == 'gene', 'parent_id'] = df.seqid
+
+    if df[df.error != ''].shape[0]:
+        logging.warning(
+            f'dropping {df[df.error != ""].shape[0]} features that did not match an expected type: {df[df.error != ""].type.unique()}'
+        )
+    df = df[df.error == '']
+
+    if df[df.feature_id == ''].shape[0]:
+        logging.warning(f'dropping {df[df.feature_id == ""].shape[0]} rows for missing ID')
+    df = df[df.feature_id != '']
+    df['regions'] = df.start.astype(str) + '-' + df.end.astype(str)
+
+    # use the feature key to group elements that are discontinuous
+    links_df = (
+        df.sort_values(['seqid', 'start'])
+        .groupby(GFF_KEY_COLS + ['parent_type', 'parent_id'])
+        .agg({'row_index': agg_strings_unique})
+        .reset_index()
+    )
+    nodes_df = (
+        df.sort_values(['seqid', 'start'])
+        .groupby(GFF_KEY_COLS)
+        .agg(
+            {
+                'start': 'min',
+                'end': 'max',
+                'regions': agg_strings_unique,
+                'version': agg_strings_unique,
+                'Note': agg_strings_unique,
+                'Name': agg_strings_unique,
+                'Alias': agg_strings_unique,
+                'biotype': agg_strings_unique,
+                'exon_number': agg_strings_unique,
+                'row_index': agg_strings_unique,
+                'source': agg_strings_unique,
+                'standard_name': agg_strings_unique,
+                'old-name': agg_strings_unique,
+            }
+        )
+        .reset_index()
+    )
+    nodes_df, links_df = fix_dangling_parent_reference(nodes_df, links_df)
+    nodes_df, links_df = fix_orphan_elements(nodes_df, links_df)
+    nodes_df, links_df = insert_missing_transcripts(nodes_df, links_df)
+    validate_gff_coordinates(nodes_df, links_df)
+
+    df = nodes_df.merge(links_df, how='outer', on=GFF_KEY_COLS).fillna('')
+
+    def feature_key(row, parent=False):
+        if not parent:
+            return tuple([row[c] for c in ['feature_id', 'type', 'seqid', 'strand']])
+        else:
+            return tuple([row[c] for c in ['parent_id', 'parent_type', 'seqid', 'strand']])
 
     genes_by_id = {}
-    for row in df[df.type.isin(genelike_features)].to_dict('records'):
-        genes_by_id[row['feature_id']] = {
-            'start': row['start'],
-            'end': row['end'],
-            'chr': row['seqid'],
+    for _, row in df[df.type == 'gene'].iterrows():
+        genes_by_id[feature_key(row)] = {
+            'start': row.start,
+            'end': row.end,
+            'chr': row.seqid,
             'aliases': pull_alias_terms(row),
-            'strand': row['strand'],
+            'strand': row.strand,
             'transcripts': [],
-            'name': row['feature_id'] + '.' + row['version'],
+            'name': row.feature_id,
+            'version': row.version,
+            'biotype': row.biotype,
+            'note': row.Note,
         }
-        consumed.add(row['row_index'])
     logging.info(f'loaded {len(genes_by_id)} genes')
 
     transcripts_by_id = {}
+    df = df.fillna('')
 
-    for row in df[df.parent_type == 'gene'].to_dict('records'):
-        for parent in row['Parent'].split(','):
-            gene_id = parent.split(':')[1]
-            if gene_id not in genes_by_id:
-                raise KeyError(
-                    f'cannot find gene ({gene_id}) skipping transcript ({row["feature_id"]})'
-                )
-            feature_id = row['feature_id']
-            transcript = {
-                'name': feature_id + '.' + row['version'],
-                'start': row['start'],
-                'end': row['end'],
-                'aliases': pull_alias_terms(row),
-                'domains': [],
-                'exons': [],
-                'cdna_coding_start': None,
-                'cdna_coding_end': None,
-            }
-            genes_by_id[gene_id]['transcripts'].append(transcript)
-            transcripts_by_id[feature_id] = transcript
-            consumed.add(row['row_index'])
+    for _, row in df[df.type == 'transcript'].iterrows():
+        parent_key = feature_key(row, True)
+        if parent_key not in genes_by_id:
+            raise KeyError(
+                f'cannot find gene ({row.parent_id}) skipping feature ({row.feature_id}) on line ({row.row_index})'
+            )
+        feature_id = row.feature_id
+        transcript = {
+            'name': feature_id,
+            'start': row.start,
+            'end': row.end,
+            'aliases': pull_alias_terms(row),
+            'domains': [],
+            'exons': [],
+            'version': row.version,
+            'note': row.Note,
+            'biotype': row.biotype,
+        }
+        genes_by_id[parent_key]['transcripts'].append(transcript)
+        transcripts_by_id[feature_key(row)] = transcript
 
-    logging.info(f'loaded {len(transcripts_by_id)} transcripts')
     # now cds
-    cds_count = 0
-    for row in df[df.type == 'CDS'].to_dict('records'):
-        for parent in row['Parent'].split(','):
-            transcript_id = parent.split(':')[1]
-            if transcript_id not in transcripts_by_id:
-                raise KeyError(
-                    f'failed to find parent transcript ({transcript_id}) skipping cds on line ({row["row_index"] + 1})'
-                )
-            transcripts_by_id[transcript_id].update(
-                {'cdna_coding_start': row['start'], 'cdna_coding_end': row['end']}
+    cds_by_id = {}
+    for _, row in df[df.type == 'CDS'].iterrows():
+        parent_key = feature_key(row, True)
+        if parent_key not in transcripts_by_id:
+            print(row)
+            raise KeyError(
+                f'failed to find parent transcript ({row.parent_id}) skipping cds ({row.feature_id}) on line ({row.row_index})'
             )
-            cds_count += 1
-            consumed.add(row['row_index'])
-    logging.info(f'loaded {cds_count} cds regions')
+        parent = transcripts_by_id[parent_key]
+        parent.setdefault('translations', [])
+        cds = {
+            'start': row.start,
+            'end': row.end,
+            'name': row.feature_id,
+            'aliases': pull_alias_terms(row),
+            'version': row.version,
+            'note': row.Note,
+            'biotype': row.biotype,
+        }
+        parent['translations'].append(cds)
+        cds_by_id[feature_key(row)] = cds
+
+    logging.info(f'loaded {len(transcripts_by_id)} transcripts')
+    logging.info(f'loaded {len(cds_by_id)} cds regions')
     # exons
-    exons_count = 0
-    for row in df[df.type == 'exon'].to_dict('records'):
-        for parent in row['Parent'].split(','):
-            transcript_id = parent.split(':')[1]
-            if transcript_id not in transcripts_by_id:
-                raise KeyError(
-                    f'failed to find parent transcript ({transcript_id}) skipping exon ({row["feature_id"]}) on line {row["row_index"] + 1}'
-                )
-            transcripts_by_id[transcript_id]['exons'].append(
-                {
-                    'start': row['start'],
-                    'end': row['end'],
-                    'name': row['feature_id'] + '.' + row['version'],
-                }
+    exons_by_id = {}
+
+    for _, row in df[df.type == 'exon'].iterrows():
+        parent_key = feature_key(row, True)
+        if parent_key not in transcripts_by_id:
+            raise KeyError(
+                f'failed to find parent transcript ({row.parent_id}) skipping exon ({row["feature_id"]}) index={row["row_index"]}'
             )
-            exons_count += 1
-            consumed.add(row['row_index'])
+        exon = {
+            'start': row.start,
+            'end': row.end,
+            'name': row.feature_id,
+            'version': row.version,
+            'number': row.exon_number,
+        }
+        transcripts_by_id[parent_key]['exons'].append(exon)
+        exons_by_id[feature_key(row)] = exon
 
-    logging.info(f'loaded {exons_count} exons')
+    logging.info(f'loaded {len(exons_by_id)} exons')
 
-    ignored_df = df[~df.row_index.isin(consumed)]
+    ignored_df = df[~df.type.isin({'exon', 'CDS', 'transcript', 'gene'})]
     if ignored_df.shape[0]:
         logging.warning(
             f'Ignored {ignored_df.shape[0]} rows that did not match the expected types: {ignored_df.type.unique()}'
         )
 
-    result = {'genes': list(genes_by_id.values())}
+    result = strip_empty_fields({'genes': list(genes_by_id.values())})
+
     try:
-        snakemake_validate(
-            result, pkg_resources.resource_filename('mavis.annotate', 'annotations_schema.json')
-        )
+        parse_annotations_json(result)
     except Exception as err:
         short_msg = '. '.join(
             [line for line in str(err).split('\n') if line.strip()][:3]
         )  # these can get super long
+        with open('tmp_out.json', 'w') as fh:
+            fh.write(json.dumps(result, sort_keys=True, indent='  '))
         raise AssertionError(short_msg)
+    # re-strip (mavis adds defaults)
+    result = strip_empty_fields({'genes': list(genes_by_id.values())})
     return result
 
 
-def convert_gff3_to_mavis(filename: str, no_alt) -> Dict:
+def convert_gff3_to_mavis(filename: str, no_alt=False) -> Dict:
     """
     Convert an input gff3 file to the JSON format accepted by MAVIS
     """
+    logging.info(f'reading: {filename}')
     df = pd.read_csv(
         filename,
         sep='\t',
@@ -304,50 +672,36 @@ def convert_gff3_to_mavis(filename: str, no_alt) -> Dict:
     }
     df = df[~df.type.isin(skip_types)]
 
-    attribute_columns = [
-        'ID',
-        'Name',
-        'Alias',
-        'Parent',
-        'Target',
-        'Gap',
-        'Derives_from',
-        'Note',
-        'DBxref',
-        'Ontology_term',
-        'rank',
-        'version',
-        'exon_id',
-    ]
-
     def split_attributes(row):
         result = {}
         for attr in row.attributes.split(';'):
             name, value = attr.split('=')
             result[name] = value
-        return [row.row_index] + [result.get(c, '') for c in attribute_columns]
+        return [row.row_index] + [result.get(c, '') for c in GFF_ATTRS]
 
     prev_size = df.shape[0]
     attrs_df = pd.DataFrame(
         df.apply(split_attributes, axis=1).tolist(),
-        columns=['row_index'] + attribute_columns,
+        columns=['row_index'] + GFF_ATTRS,
     )
     assert prev_size == attrs_df.shape[0]
     df = df.merge(attrs_df, on=['row_index'])
+    df = df.drop(columns=['attributes'])
 
     assert prev_size == df.shape[0]
 
-    df['feature_id'] = df['ID'].apply(lambda id: id.split(':')[1] if ':' in id else '')
+    df['feature_id'] = df.apply(parse_gff_id, axis=1)
     df.loc[(df.feature_id == '') & (df.type == 'exon'), 'feature_id'] = df.exon_id
     df = df[df.feature_id != '']
-    df['strand'] = df.strand.fillna('?')
+    df['strand'] = df.strand.fillna('')
     return convert_pandas_gff_to_mavis(df)
 
 
-def convert_gff2_to_mavis(filename: str, no_alt) -> Dict:
+def convert_gff2_to_mavis(filename: str, no_alt=False) -> Dict:
     """
     Convert an input gff2/gtf file to the JSON format accepted by MAVIS
     """
+    logging.info(f'reading: {filename}')
     df = pd.read_csv(
         filename,
         sep='\t',
@@ -392,47 +746,34 @@ def convert_gff2_to_mavis(filename: str, no_alt) -> Dict:
     }
     df = df[~df.type.isin(skip_types)]
 
-    attribute_columns = [
-        'gene_id',
-        'gene_version',
-        'gene_name',
-        'transcript_id',
-        'transcript_version',
-        'transcript_name',
-        'exon_id',
-        'exon_version',
-    ]
-
     def split_attributes(row):
         result = {}
         for attr in row.attributes.split('";'):
-            if not attr:
+            if not attr.strip():
                 continue
             m = re.match(r'^\s*([^"]+)\s+"(.*)"?$', attr)
             if not m:
                 raise KeyError(f'attributes do not follow expected pattern: {attr}')
             result[m.group(1)] = m.group(2)
-        return [row.row_index] + [result.get(c, '') for c in attribute_columns]
+        return [row.row_index] + [result.get(c, '') for c in GFF_ATTRS]
 
     prev_size = df.shape[0]
     attrs_df = pd.DataFrame(
         df.apply(split_attributes, axis=1).tolist(),
-        columns=['row_index'] + attribute_columns,
+        columns=['row_index'] + GFF_ATTRS,
     )
     assert prev_size == attrs_df.shape[0]
     df = df.merge(attrs_df, on=['row_index'])
     assert prev_size == df.shape[0]
+    df = df.drop(columns=['attributes'])
 
     df['Alias'] = ''
-    df['feature_id'] = ''
-    df.loc[df.type == 'exon', 'feature_id'] = df.exon_id
-    df.loc[df.type == 'gene', 'feature_id'] = df.gene_id
-    df.loc[df.type == 'transcript', 'feature_id'] = df.transcript_id
+    df['feature_id'] = df.apply(parse_gff_id, axis=1)
 
     df['Name'] = ''
     df.loc[df.type == 'gene', 'Name'] = df.gene_name
     df.loc[df.type == 'transcript', 'Name'] = df.transcript_name
-    df['strand'] = df.strand.fillna('?')
+    df['strand'] = df.strand.fillna('')
 
     df['Parent'] = ''
     df.loc[(df.type == 'transcript') & (df.gene_id != ''), 'Parent'] = 'gene:' + df.gene_id
@@ -442,23 +783,53 @@ def split_attributes(row):
     df.loc[(df.type == 'CDS') & (df.transcript_id != ''), 'Parent'] = (
         'transcript:' + df.transcript_id
     )
+    df.loc[
+        (df.type == 'CDS') & df.Parent.str.startswith('transcript:unassigned_transcript_'), 'Parent'
+    ] = ''
+    df.loc[(df.type == 'CDS') & (df.Parent == '') & (df.gene_id != ''), 'Parent'] = (
+        'gene:' + df.gene_id
+    )
 
     df['version'] = ''
     df.loc[df.type == 'transcript', 'version'] = df.transcript_version
     df.loc[df.type == 'exon', 'version'] = df.exon_version
     df.loc[df.type == 'gene', 'version'] = df.gene_version
+    df.loc[df.type == 'CDS', 'version'] = df.protein_version
 
-    df['strand'] = df.strand.fillna('?')
+    df['strand'] = df.strand.fillna('')
     return convert_pandas_gff_to_mavis(df)
 
 
+def convert_mavis_json_2to3(filename):
+    logging.info(f'loading: {filename}')
+    with open(filename, 'r') as fh:
+        content = json.load(fh)
+
+    # move translations into sep object
+    for gene in content['genes']:
+        for transcript in gene.get('transcripts', []):
+            if any(transcript.get(k) for k in ['cdna_coding_start', 'cdna_coding_end', 'domains']):
+                transcript['translations'] = [
+                    {
+                        'cdna_coding_start': transcript['cdna_coding_start'],
+                        'cdna_coding_end': transcript['cdna_coding_end'],
+                        'domains': transcript['domains'],
+                    }
+                ]
+                del transcript['domains']
+                del transcript['cdna_coding_start']
+                del transcript['cdna_coding_end']
+    parse_annotations_json(content)
+    content = strip_empty_fields(content)
+    return content
+
+
 if __name__ == '__main__':
-    logging.basicConfig(format='{message}', style='{', level=logging.INFO)
     parser = argparse.ArgumentParser()
     parser.add_argument(
         'input', help='path to the tab-delimated mavis v2 style reference annotations file'
     )
-    parser.add_argument('--input_type', default='v2', choices=['v2', 'gff3', 'gtf'])
+    parser.add_argument('--input_type', default='v2', choices=['v2-tab', 'v2-json', 'gff3', 'gtf'])
     parser.add_argument('output', help='path to the JSON output file')
     parser.add_argument(
         '--keep_alt',
@@ -466,11 +837,18 @@ def split_attributes(row):
         action='store_true',
         default=False,
     )
+    parser.add_argument(
+        '--log_level', choices=['INFO', 'DEBUG', 'WARNING', 'ERROR'], default='INFO'
+    )
 
     args = parser.parse_args()
 
-    if args.input_type == 'v2':
+    logging.basicConfig(format='{message}', style='{', level=logging.getLevelName(args.log_level))
+
+    if args.input_type == 'v2-tab':
         annotations = convert_tab_to_json(args.input)
+    elif args.input_type == 'v2-json':
+        annotations = convert_mavis_json_2to3(args.input)
     elif args.input_type == 'gtf':
         annotations = convert_gff2_to_mavis(args.input, not args.keep_alt)
     else:
diff --git a/tests/data/example_genes.json b/tests/data/example_genes.json
index f1a6cf8e..470ac202 100644
--- a/tests/data/example_genes.json
+++ b/tests/data/example_genes.json
@@ -1,8037 +1 @@
-{
-    "genes": [
-    	{
-            "aliases": [
-                "EGFR"
-            ],
-            "chr": "7",
-            "end": 55324313,
-            "name": "ENSG00000146648",
-            "start": 55086714,
-            "strand": "+",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 3533,
-                    "cdna_coding_start": 258,
-                    "domains": [
-                        {
-                            "name": "PIRSF000619",
-                            "regions": [
-                                {
-                                    "end": 1090,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 920,
-                                    "start": 669
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 191,
-                                    "start": 28
-                                },
-                                {
-                                    "end": 475,
-                                    "start": 283
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 293,
-                                    "start": 141
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 934,
-                                    "start": 667
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 219,
-                                    "start": 145
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 290,
-                                    "start": 142
-                                },
-                                {
-                                    "end": 593,
-                                    "start": 460
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00109",
-                            "regions": [
-                                {
-                                    "end": 758,
-                                    "start": 745
-                                },
-                                {
-                                    "end": 800,
-                                    "start": 782
-                                },
-                                {
-                                    "end": 841,
-                                    "start": 831
-                                },
-                                {
-                                    "end": 872,
-                                    "start": 850
-                                },
-                                {
-                                    "end": 916,
-                                    "start": 894
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 975,
-                                    "start": 651
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 141,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 435,
-                                    "start": 316
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 924,
-                                    "start": 667
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 225,
-                                    "start": 183
-                                },
-                                {
-                                    "end": 502,
-                                    "start": 451
-                                },
-                                {
-                                    "end": 556,
-                                    "start": 507
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 923,
-                                    "start": 667
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 919,
-                                    "start": 667
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55270769,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086714
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55238906,
-                            "name": null,
-                            "start": 55238868
-                        },
-                        {
-                            "end": 55240817,
-                            "name": null,
-                            "start": 55240676
-                        },
-                        {
-                            "end": 55241736,
-                            "name": null,
-                            "start": 55241614
-                        },
-                        {
-                            "end": 55242513,
-                            "name": null,
-                            "start": 55242415
-                        },
-                        {
-                            "end": 55249171,
-                            "name": null,
-                            "start": 55248986
-                        },
-                        {
-                            "end": 55259567,
-                            "name": null,
-                            "start": 55259412
-                        },
-                        {
-                            "end": 55260534,
-                            "name": null,
-                            "start": 55260459
-                        },
-                        {
-                            "end": 55266556,
-                            "name": null,
-                            "start": 55266410
-                        },
-                        {
-                            "end": 55268106,
-                            "name": null,
-                            "start": 55268009
-                        },
-                        {
-                            "end": 55269048,
-                            "name": null,
-                            "start": 55268881
-                        },
-                        {
-                            "end": 55269475,
-                            "name": null,
-                            "start": 55269428
-                        },
-                        {
-                            "end": 55270769,
-                            "name": null,
-                            "start": 55270210
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000455089",
-                    "start": 55086714
-                },
-                {
-                    "cdna_coding_end": 2133,
-                    "cdna_coding_start": 247,
-                    "domains": [
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 187
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 339,
-                                    "start": 182
-                                },
-                                {
-                                    "end": 624,
-                                    "start": 505
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 29
-                                },
-                                {
-                                    "end": 520,
-                                    "start": 328
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 338,
-                                    "start": 185
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 270,
-                                    "start": 228
-                                },
-                                {
-                                    "end": 547,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 601,
-                                    "start": 552
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 167,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 480,
-                                    "start": 361
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55236328,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086725
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55236328,
-                            "name": null,
-                            "start": 55236216
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000342916",
-                    "start": 55086725
-                },
-                {
-                    "cdna_coding_end": 2363,
-                    "cdna_coding_start": 246,
-                    "domains": [
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 339,
-                                    "start": 182
-                                },
-                                {
-                                    "end": 624,
-                                    "start": 505
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 187
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 338,
-                                    "start": 185
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 29
-                                },
-                                {
-                                    "end": 520,
-                                    "start": 328
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 270,
-                                    "start": 228
-                                },
-                                {
-                                    "end": 547,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 601,
-                                    "start": 552
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 167,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 480,
-                                    "start": 361
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55238738,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086726
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55238738,
-                            "name": null,
-                            "start": 55238000
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000344576",
-                    "start": 55086726
-                },
-                {
-                    "cdna_coding_end": 1462,
-                    "cdna_coding_start": 245,
-                    "domains": [
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 339,
-                                    "start": 182
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 187
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 338,
-                                    "start": 185
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 29
-                                },
-                                {
-                                    "end": 403,
-                                    "start": 328
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 270,
-                                    "start": 228
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 167,
-                                    "start": 57
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55224644,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086727
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224644,
-                            "name": null,
-                            "start": 55224452
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000420316",
-                    "start": 55086727
-                },
-                {
-                    "cdna_coding_end": 3810,
-                    "cdna_coding_start": 178,
-                    "domains": [
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 969,
-                                    "start": 712
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 167,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 480,
-                                    "start": 361
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 1020,
-                                    "start": 696
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 964,
-                                    "start": 712
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 968,
-                                    "start": 712
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 270,
-                                    "start": 228
-                                },
-                                {
-                                    "end": 547,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 601,
-                                    "start": 552
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 338,
-                                    "start": 185
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 29
-                                },
-                                {
-                                    "end": 520,
-                                    "start": 328
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 965,
-                                    "start": 714
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PIRSF000619",
-                            "regions": [
-                                {
-                                    "end": 1210,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00109",
-                            "regions": [
-                                {
-                                    "end": 803,
-                                    "start": 790
-                                },
-                                {
-                                    "end": 845,
-                                    "start": 827
-                                },
-                                {
-                                    "end": 886,
-                                    "start": 876
-                                },
-                                {
-                                    "end": 917,
-                                    "start": 895
-                                },
-                                {
-                                    "end": 961,
-                                    "start": 939
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 339,
-                                    "start": 182
-                                },
-                                {
-                                    "end": 638,
-                                    "start": 505
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 187
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 979,
-                                    "start": 712
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55279321,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086794
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55238906,
-                            "name": null,
-                            "start": 55238868
-                        },
-                        {
-                            "end": 55240817,
-                            "name": null,
-                            "start": 55240676
-                        },
-                        {
-                            "end": 55241736,
-                            "name": null,
-                            "start": 55241614
-                        },
-                        {
-                            "end": 55242513,
-                            "name": null,
-                            "start": 55242415
-                        },
-                        {
-                            "end": 55249171,
-                            "name": null,
-                            "start": 55248986
-                        },
-                        {
-                            "end": 55259567,
-                            "name": null,
-                            "start": 55259412
-                        },
-                        {
-                            "end": 55260534,
-                            "name": null,
-                            "start": 55260459
-                        },
-                        {
-                            "end": 55266556,
-                            "name": null,
-                            "start": 55266410
-                        },
-                        {
-                            "end": 55268106,
-                            "name": null,
-                            "start": 55268009
-                        },
-                        {
-                            "end": 55269048,
-                            "name": null,
-                            "start": 55268881
-                        },
-                        {
-                            "end": 55269475,
-                            "name": null,
-                            "start": 55269428
-                        },
-                        {
-                            "end": 55270318,
-                            "name": null,
-                            "start": 55270210
-                        },
-                        {
-                            "end": 55279321,
-                            "name": null,
-                            "start": 55272949
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000275493",
-                    "start": 55086794
-                },
-                {
-                    "cdna_coding_end": 2134,
-                    "cdna_coding_start": 161,
-                    "domains": [
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 167,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 480,
-                                    "start": 361
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 270,
-                                    "start": 228
-                                },
-                                {
-                                    "end": 547,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 601,
-                                    "start": 552
-                                },
-                                {
-                                    "end": 653,
-                                    "start": 614
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 29
-                                },
-                                {
-                                    "end": 520,
-                                    "start": 328
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 338,
-                                    "start": 185
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 187
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 339,
-                                    "start": 182
-                                },
-                                {
-                                    "end": 638,
-                                    "start": 505
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55324313,
-                    "exons": [
-                        {
-                            "end": 55087058,
-                            "name": null,
-                            "start": 55086811
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55238906,
-                            "name": null,
-                            "start": 55238868
-                        },
-                        {
-                            "end": 55240621,
-                            "name": null,
-                            "start": 55240539
-                        },
-                        {
-                            "end": 55324313,
-                            "name": null,
-                            "start": 55323947
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000442591",
-                    "start": 55086811
-                },
-                {
-                    "cdna_coding_end": 691,
-                    "cdna_coding_start": 308,
-                    "domains": [
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 127,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 114,
-                                    "start": 4
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55214417,
-                    "exons": [
-                        {
-                            "end": 55177651,
-                            "name": null,
-                            "start": 55177416
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214417,
-                            "name": null,
-                            "start": 55214299
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000450046",
-                    "start": 55177416
-                },
-                {
-                    "cdna_coding_end": 3657,
-                    "cdna_coding_start": 184,
-                    "domains": [
-                        {
-                            "name": "SM00261",
-                            "regions": [
-                                {
-                                    "end": 217,
-                                    "start": 175
-                                },
-                                {
-                                    "end": 494,
-                                    "start": 443
-                                },
-                                {
-                                    "end": 548,
-                                    "start": 499
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 911,
-                                    "start": 659
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 915,
-                                    "start": 659
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 967,
-                                    "start": 643
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 916,
-                                    "start": 659
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01030",
-                            "regions": [
-                                {
-                                    "end": 114,
-                                    "start": 4
-                                },
-                                {
-                                    "end": 427,
-                                    "start": 308
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 211,
-                                    "start": 134
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 926,
-                                    "start": 659
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00109",
-                            "regions": [
-                                {
-                                    "end": 750,
-                                    "start": 737
-                                },
-                                {
-                                    "end": 792,
-                                    "start": 774
-                                },
-                                {
-                                    "end": 833,
-                                    "start": 823
-                                },
-                                {
-                                    "end": 864,
-                                    "start": 842
-                                },
-                                {
-                                    "end": 908,
-                                    "start": 886
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 286,
-                                    "start": 129
-                                },
-                                {
-                                    "end": 585,
-                                    "start": 452
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PIRSF000619",
-                            "regions": [
-                                {
-                                    "end": 1157,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 912,
-                                    "start": 661
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52058",
-                            "regions": [
-                                {
-                                    "end": 158,
-                                    "start": 1
-                                },
-                                {
-                                    "end": 467,
-                                    "start": 275
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00757",
-                            "regions": [
-                                {
-                                    "end": 285,
-                                    "start": 132
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 55273591,
-                    "exons": [
-                        {
-                            "end": 55177651,
-                            "name": null,
-                            "start": 55177540
-                        },
-                        {
-                            "end": 55210130,
-                            "name": null,
-                            "start": 55209979
-                        },
-                        {
-                            "end": 55211181,
-                            "name": null,
-                            "start": 55210998
-                        },
-                        {
-                            "end": 55214433,
-                            "name": null,
-                            "start": 55214299
-                        },
-                        {
-                            "end": 55219055,
-                            "name": null,
-                            "start": 55218987
-                        },
-                        {
-                            "end": 55220357,
-                            "name": null,
-                            "start": 55220239
-                        },
-                        {
-                            "end": 55221845,
-                            "name": null,
-                            "start": 55221704
-                        },
-                        {
-                            "end": 55223639,
-                            "name": null,
-                            "start": 55223523
-                        },
-                        {
-                            "end": 55224352,
-                            "name": null,
-                            "start": 55224226
-                        },
-                        {
-                            "end": 55224525,
-                            "name": null,
-                            "start": 55224452
-                        },
-                        {
-                            "end": 55225446,
-                            "name": null,
-                            "start": 55225356
-                        },
-                        {
-                            "end": 55228031,
-                            "name": null,
-                            "start": 55227832
-                        },
-                        {
-                            "end": 55229324,
-                            "name": null,
-                            "start": 55229192
-                        },
-                        {
-                            "end": 55231516,
-                            "name": null,
-                            "start": 55231426
-                        },
-                        {
-                            "end": 55233130,
-                            "name": null,
-                            "start": 55232973
-                        },
-                        {
-                            "end": 55238906,
-                            "name": null,
-                            "start": 55238868
-                        },
-                        {
-                            "end": 55240817,
-                            "name": null,
-                            "start": 55240676
-                        },
-                        {
-                            "end": 55241736,
-                            "name": null,
-                            "start": 55241614
-                        },
-                        {
-                            "end": 55242513,
-                            "name": null,
-                            "start": 55242415
-                        },
-                        {
-                            "end": 55249171,
-                            "name": null,
-                            "start": 55248986
-                        },
-                        {
-                            "end": 55259567,
-                            "name": null,
-                            "start": 55259412
-                        },
-                        {
-                            "end": 55260534,
-                            "name": null,
-                            "start": 55260459
-                        },
-                        {
-                            "end": 55266556,
-                            "name": null,
-                            "start": 55266410
-                        },
-                        {
-                            "end": 55268106,
-                            "name": null,
-                            "start": 55268009
-                        },
-                        {
-                            "end": 55269048,
-                            "name": null,
-                            "start": 55268881
-                        },
-                        {
-                            "end": 55269475,
-                            "name": null,
-                            "start": 55269428
-                        },
-                        {
-                            "end": 55270318,
-                            "name": null,
-                            "start": 55270210
-                        },
-                        {
-                            "end": 55273591,
-                            "name": null,
-                            "start": 55272949
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000454757",
-                    "start": 55177540
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "DSTYK"
-            ],
-            "chr": "1",
-            "end": 205180727,
-            "name": "ENSG00000133059",
-            "start": 205111632,
-            "strand": "-",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 1831,
-                    "cdna_coding_start": 65,
-                    "domains": [
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 565,
-                                    "start": 337
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 585,
-                                    "start": 452
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 556,
-                                    "start": 451
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 558,
-                                    "start": 471
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 565,
-                                    "start": 312
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 205180727,
-                    "exons": [
-                        {
-                            "end": 205116873,
-                            "name": null,
-                            "start": 205111632
-                        },
-                        {
-                            "end": 205117467,
-                            "name": null,
-                            "start": 205117333
-                        },
-                        {
-                            "end": 205119898,
-                            "name": null,
-                            "start": 205119808
-                        },
-                        {
-                            "end": 205133083,
-                            "name": null,
-                            "start": 205133055
-                        },
-                        {
-                            "end": 205138960,
-                            "name": null,
-                            "start": 205138291
-                        },
-                        {
-                            "end": 205156934,
-                            "name": null,
-                            "start": 205156546
-                        },
-                        {
-                            "end": 205180727,
-                            "name": null,
-                            "start": 205180399
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000367160",
-                    "start": 205111632
-                },
-                {
-                    "cdna_coding_end": 2686,
-                    "cdna_coding_start": 32,
-                    "domains": [
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 820,
-                                    "start": 654
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 884,
-                                    "start": 652
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 853,
-                                    "start": 627
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 861,
-                                    "start": 652
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 824,
-                                    "start": 654
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 861,
-                                    "start": 652
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 205180694,
-                    "exons": [
-                        {
-                            "end": 205116873,
-                            "name": null,
-                            "start": 205111633
-                        },
-                        {
-                            "end": 205119922,
-                            "name": null,
-                            "start": 205119808
-                        },
-                        {
-                            "end": 205126514,
-                            "name": null,
-                            "start": 205126401
-                        },
-                        {
-                            "end": 205128807,
-                            "name": null,
-                            "start": 205128675
-                        },
-                        {
-                            "end": 205129398,
-                            "name": null,
-                            "start": 205129242
-                        },
-                        {
-                            "end": 205130515,
-                            "name": null,
-                            "start": 205130386
-                        },
-                        {
-                            "end": 205131340,
-                            "name": null,
-                            "start": 205131164
-                        },
-                        {
-                            "end": 205132134,
-                            "name": null,
-                            "start": 205132051
-                        },
-                        {
-                            "end": 205133083,
-                            "name": null,
-                            "start": 205132851
-                        },
-                        {
-                            "end": 205138960,
-                            "name": null,
-                            "start": 205138291
-                        },
-                        {
-                            "end": 205156934,
-                            "name": null,
-                            "start": 205156546
-                        },
-                        {
-                            "end": 205180694,
-                            "name": null,
-                            "start": 205180399
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000367161",
-                    "start": 205111633
-                },
-                {
-                    "cdna_coding_end": 2821,
-                    "cdna_coding_start": 32,
-                    "domains": [
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 899,
-                                    "start": 654
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 906,
-                                    "start": 652
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 897,
-                                    "start": 638
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 906,
-                                    "start": 652
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 906,
-                                    "start": 652
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 897,
-                                    "start": 654
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 205180694,
-                    "exons": [
-                        {
-                            "end": 205116873,
-                            "name": null,
-                            "start": 205111633
-                        },
-                        {
-                            "end": 205117467,
-                            "name": null,
-                            "start": 205117333
-                        },
-                        {
-                            "end": 205119922,
-                            "name": null,
-                            "start": 205119808
-                        },
-                        {
-                            "end": 205126514,
-                            "name": null,
-                            "start": 205126401
-                        },
-                        {
-                            "end": 205128807,
-                            "name": null,
-                            "start": 205128675
-                        },
-                        {
-                            "end": 205129398,
-                            "name": null,
-                            "start": 205129242
-                        },
-                        {
-                            "end": 205130515,
-                            "name": null,
-                            "start": 205130386
-                        },
-                        {
-                            "end": 205131340,
-                            "name": null,
-                            "start": 205131164
-                        },
-                        {
-                            "end": 205132134,
-                            "name": null,
-                            "start": 205132051
-                        },
-                        {
-                            "end": 205133083,
-                            "name": null,
-                            "start": 205132851
-                        },
-                        {
-                            "end": 205138960,
-                            "name": null,
-                            "start": 205138291
-                        },
-                        {
-                            "end": 205156934,
-                            "name": null,
-                            "start": 205156546
-                        },
-                        {
-                            "end": 205180694,
-                            "name": null,
-                            "start": 205180399
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000367162",
-                    "start": 205111633
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "NDUFA12"
-            ],
-            "chr": "12",
-            "end": 95397546,
-            "name": "ENSG00000184752",
-            "start": 95290831,
-            "strand": "-",
-            "transcripts": [
-                {
-                    "domains": [],
-                    "end": 95397436,
-                    "exons": [
-                        {
-                            "end": 95291086,
-                            "name": null,
-                            "start": 95290831
-                        },
-                        {
-                            "end": 95318582,
-                            "name": null,
-                            "start": 95318422
-                        },
-                        {
-                            "end": 95322039,
-                            "name": null,
-                            "start": 95321793
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396515
-                        },
-                        {
-                            "end": 95397436,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000552205",
-                    "start": 95290831
-                },
-                {
-                    "cdna_coding_end": 188,
-                    "cdna_coding_start": 21,
-                    "domains": [],
-                    "end": 95397476,
-                    "exons": [
-                        {
-                            "end": 95365261,
-                            "name": null,
-                            "start": 95365108
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396582
-                        },
-                        {
-                            "end": 95397476,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000547157",
-                    "start": 95365108
-                },
-                {
-                    "cdna_coding_end": 144,
-                    "cdna_coding_start": 1,
-                    "domains": [
-                        {
-                            "name": "PF05071",
-                            "regions": [
-                                {
-                                    "end": 33,
-                                    "start": 12
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 95397384,
-                    "exons": [
-                        {
-                            "end": 95365396,
-                            "name": null,
-                            "start": 95365109
-                        },
-                        {
-                            "end": 95388033,
-                            "name": null,
-                            "start": 95387946
-                        },
-                        {
-                            "end": 95390752,
-                            "name": null,
-                            "start": 95390680
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396515
-                        },
-                        {
-                            "end": 95397384,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000551991",
-                    "start": 95365109
-                },
-                {
-                    "cdna_coding_end": 528,
-                    "cdna_coding_start": 91,
-                    "domains": [
-                        {
-                            "name": "PF05071",
-                            "regions": [
-                                {
-                                    "end": 137,
-                                    "start": 36
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 95397546,
-                    "exons": [
-                        {
-                            "end": 95365396,
-                            "name": null,
-                            "start": 95365109
-                        },
-                        {
-                            "end": 95388033,
-                            "name": null,
-                            "start": 95387946
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396515
-                        },
-                        {
-                            "end": 95397546,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000327772",
-                    "start": 95365109
-                },
-                {
-                    "cdna_coding_end": 225,
-                    "cdna_coding_start": 34,
-                    "domains": [
-                        {
-                            "name": "PF05071",
-                            "regions": [
-                                {
-                                    "end": 53,
-                                    "start": 36
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 95397489,
-                    "exons": [
-                        {
-                            "end": 95365396,
-                            "name": null,
-                            "start": 95365112
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396515
-                        },
-                        {
-                            "end": 95397489,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000547986",
-                    "start": 95365112
-                },
-                {
-                    "cdna_coding_end": 368,
-                    "cdna_coding_start": 69,
-                    "domains": [
-                        {
-                            "name": "PF05071",
-                            "regions": [
-                                {
-                                    "end": 87,
-                                    "start": 36
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 95397524,
-                    "exons": [
-                        {
-                            "end": 95365396,
-                            "name": null,
-                            "start": 95365254
-                        },
-                        {
-                            "end": 95366265,
-                            "name": null,
-                            "start": 95366171
-                        },
-                        {
-                            "end": 95388033,
-                            "name": null,
-                            "start": 95387946
-                        },
-                        {
-                            "end": 95396597,
-                            "name": null,
-                            "start": 95396515
-                        },
-                        {
-                            "end": 95397524,
-                            "name": null,
-                            "start": 95397371
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000546788",
-                    "start": 95365254
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "FRMD6"
-            ],
-            "chr": "14",
-            "end": 52197445,
-            "name": "ENSG00000139926",
-            "start": 51955818,
-            "strand": "+",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 2338,
-                    "cdna_coding_start": 494,
-                    "domains": [
-                        {
-                            "name": "PF09379",
-                            "regions": [
-                                {
-                                    "end": 109,
-                                    "start": 20
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF09380",
-                            "regions": [
-                                {
-                                    "end": 322,
-                                    "start": 237
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF50729",
-                            "regions": [
-                                {
-                                    "end": 375,
-                                    "start": 219
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00295",
-                            "regions": [
-                                {
-                                    "end": 226,
-                                    "start": 12
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 320,
-                                    "start": 16
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00373",
-                            "regions": [
-                                {
-                                    "end": 226,
-                                    "start": 115
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF47031",
-                            "regions": [
-                                {
-                                    "end": 218,
-                                    "start": 110
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF54236",
-                            "regions": [
-                                {
-                                    "end": 110,
-                                    "start": 14
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52197177,
-                    "exons": [
-                        {
-                            "end": 51956138,
-                            "name": null,
-                            "start": 51955855
-                        },
-                        {
-                            "end": 52037128,
-                            "name": null,
-                            "start": 52037066
-                        },
-                        {
-                            "end": 52156653,
-                            "name": null,
-                            "start": 52156409
-                        },
-                        {
-                            "end": 52164950,
-                            "name": null,
-                            "start": 52164860
-                        },
-                        {
-                            "end": 52167853,
-                            "name": null,
-                            "start": 52167774
-                        },
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169230
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52174951,
-                            "name": null,
-                            "start": 52174796
-                        },
-                        {
-                            "end": 52178314,
-                            "name": null,
-                            "start": 52178249
-                        },
-                        {
-                            "end": 52179269,
-                            "name": null,
-                            "start": 52179201
-                        },
-                        {
-                            "end": 52182217,
-                            "name": null,
-                            "start": 52182043
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188667
-                        },
-                        {
-                            "end": 52192588,
-                            "name": null,
-                            "start": 52192497
-                        },
-                        {
-                            "end": 52197177,
-                            "name": null,
-                            "start": 52194463
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000356218",
-                    "start": 51955855
-                },
-                {
-                    "cdna_coding_end": 2130,
-                    "cdna_coding_start": 286,
-                    "domains": [
-                        {
-                            "name": "PF00373",
-                            "regions": [
-                                {
-                                    "end": 226,
-                                    "start": 115
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF47031",
-                            "regions": [
-                                {
-                                    "end": 218,
-                                    "start": 110
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF54236",
-                            "regions": [
-                                {
-                                    "end": 110,
-                                    "start": 14
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 320,
-                                    "start": 16
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00295",
-                            "regions": [
-                                {
-                                    "end": 226,
-                                    "start": 12
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF50729",
-                            "regions": [
-                                {
-                                    "end": 375,
-                                    "start": 219
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF09380",
-                            "regions": [
-                                {
-                                    "end": 322,
-                                    "start": 237
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF09379",
-                            "regions": [
-                                {
-                                    "end": 109,
-                                    "start": 20
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52197445,
-                    "exons": [
-                        {
-                            "end": 52118714,
-                            "name": null,
-                            "start": 52118576
-                        },
-                        {
-                            "end": 52156653,
-                            "name": null,
-                            "start": 52156409
-                        },
-                        {
-                            "end": 52164950,
-                            "name": null,
-                            "start": 52164860
-                        },
-                        {
-                            "end": 52167853,
-                            "name": null,
-                            "start": 52167774
-                        },
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169230
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52174951,
-                            "name": null,
-                            "start": 52174796
-                        },
-                        {
-                            "end": 52178314,
-                            "name": null,
-                            "start": 52178249
-                        },
-                        {
-                            "end": 52179269,
-                            "name": null,
-                            "start": 52179201
-                        },
-                        {
-                            "end": 52182217,
-                            "name": null,
-                            "start": 52182043
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188667
-                        },
-                        {
-                            "end": 52192588,
-                            "name": null,
-                            "start": 52192497
-                        },
-                        {
-                            "end": 52197445,
-                            "name": null,
-                            "start": 52194463
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000395718",
-                    "start": 52118576
-                },
-                {
-                    "cdna_coding_end": 2065,
-                    "cdna_coding_start": 197,
-                    "domains": [
-                        {
-                            "name": "PF09380",
-                            "regions": [
-                                {
-                                    "end": 330,
-                                    "start": 245
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF09379",
-                            "regions": [
-                                {
-                                    "end": 117,
-                                    "start": 20
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF47031",
-                            "regions": [
-                                {
-                                    "end": 226,
-                                    "start": 118
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00373",
-                            "regions": [
-                                {
-                                    "end": 234,
-                                    "start": 123
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF54236",
-                            "regions": [
-                                {
-                                    "end": 118,
-                                    "start": 14
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 328,
-                                    "start": 16
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00295",
-                            "regions": [
-                                {
-                                    "end": 234,
-                                    "start": 12
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF50729",
-                            "regions": [
-                                {
-                                    "end": 383,
-                                    "start": 227
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52195654,
-                    "exons": [
-                        {
-                            "end": 52118714,
-                            "name": null,
-                            "start": 52118665
-                        },
-                        {
-                            "end": 52156653,
-                            "name": null,
-                            "start": 52156409
-                        },
-                        {
-                            "end": 52164950,
-                            "name": null,
-                            "start": 52164860
-                        },
-                        {
-                            "end": 52167877,
-                            "name": null,
-                            "start": 52167774
-                        },
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169230
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52174951,
-                            "name": null,
-                            "start": 52174796
-                        },
-                        {
-                            "end": 52178314,
-                            "name": null,
-                            "start": 52178249
-                        },
-                        {
-                            "end": 52179269,
-                            "name": null,
-                            "start": 52179201
-                        },
-                        {
-                            "end": 52182217,
-                            "name": null,
-                            "start": 52182043
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188667
-                        },
-                        {
-                            "end": 52192588,
-                            "name": null,
-                            "start": 52192497
-                        },
-                        {
-                            "end": 52195654,
-                            "name": null,
-                            "start": 52194463
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000344768",
-                    "start": 52118665
-                },
-                {
-                    "domains": [],
-                    "end": 52164945,
-                    "exons": [
-                        {
-                            "end": 52118935,
-                            "name": null,
-                            "start": 52118698
-                        },
-                        {
-                            "end": 52156653,
-                            "name": null,
-                            "start": 52156409
-                        },
-                        {
-                            "end": 52164945,
-                            "name": null,
-                            "start": 52164860
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000554778",
-                    "start": 52118698
-                },
-                {
-                    "domains": [],
-                    "end": 52174806,
-                    "exons": [
-                        {
-                            "end": 52164950,
-                            "name": null,
-                            "start": 52164706
-                        },
-                        {
-                            "end": 52167877,
-                            "name": null,
-                            "start": 52167774
-                        },
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169230
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52174806,
-                            "name": null,
-                            "start": 52174796
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000555936",
-                    "start": 52164706
-                },
-                {
-                    "cdna_coding_end": 1775,
-                    "cdna_coding_start": 138,
-                    "domains": [
-                        {
-                            "name": "SSF50729",
-                            "regions": [
-                                {
-                                    "end": 306,
-                                    "start": 150
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 251,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF54236",
-                            "regions": [
-                                {
-                                    "end": 41,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF47031",
-                            "regions": [
-                                {
-                                    "end": 149,
-                                    "start": 41
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00373",
-                            "regions": [
-                                {
-                                    "end": 157,
-                                    "start": 46
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF09380",
-                            "regions": [
-                                {
-                                    "end": 253,
-                                    "start": 168
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52197148,
-                    "exons": [
-                        {
-                            "end": 52164950,
-                            "name": null,
-                            "start": 52164831
-                        },
-                        {
-                            "end": 52167853,
-                            "name": null,
-                            "start": 52167774
-                        },
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169230
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52174951,
-                            "name": null,
-                            "start": 52174796
-                        },
-                        {
-                            "end": 52178314,
-                            "name": null,
-                            "start": 52178249
-                        },
-                        {
-                            "end": 52179269,
-                            "name": null,
-                            "start": 52179201
-                        },
-                        {
-                            "end": 52182217,
-                            "name": null,
-                            "start": 52182043
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188667
-                        },
-                        {
-                            "end": 52192588,
-                            "name": null,
-                            "start": 52192497
-                        },
-                        {
-                            "end": 52197148,
-                            "name": null,
-                            "start": 52194463
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000554167",
-                    "start": 52164831
-                },
-                {
-                    "cdna_coding_end": 390,
-                    "cdna_coding_start": 1,
-                    "domains": [
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 129,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00373",
-                            "regions": [
-                                {
-                                    "end": 124,
-                                    "start": 13
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF47031",
-                            "regions": [
-                                {
-                                    "end": 116,
-                                    "start": 8
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52175062,
-                    "exons": [
-                        {
-                            "end": 52169306,
-                            "name": null,
-                            "start": 52169266
-                        },
-                        {
-                            "end": 52171653,
-                            "name": null,
-                            "start": 52171467
-                        },
-                        {
-                            "end": 52175062,
-                            "name": null,
-                            "start": 52174796
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000557405",
-                    "start": 52169266
-                },
-                {
-                    "cdna_coding_end": 618,
-                    "cdna_coding_start": 1,
-                    "domains": [
-                        {
-                            "name": "PF09380",
-                            "regions": [
-                                {
-                                    "end": 60,
-                                    "start": 2
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50057",
-                            "regions": [
-                                {
-                                    "end": 58,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF50729",
-                            "regions": [
-                                {
-                                    "end": 113,
-                                    "start": 2
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 52187243,
-                    "exons": [
-                        {
-                            "end": 52179269,
-                            "name": null,
-                            "start": 52179231
-                        },
-                        {
-                            "end": 52182217,
-                            "name": null,
-                            "start": 52182043
-                        },
-                        {
-                            "end": 52187243,
-                            "name": null,
-                            "start": 52186773
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000555197",
-                    "start": 52179231
-                },
-                {
-                    "cdna_coding_end": 573,
-                    "cdna_coding_start": 145,
-                    "domains": [],
-                    "end": 52192513,
-                    "exons": [
-                        {
-                            "end": 52184066,
-                            "name": null,
-                            "start": 52183973
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188673
-                        },
-                        {
-                            "end": 52192513,
-                            "name": null,
-                            "start": 52192497
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000555703",
-                    "start": 52183973
-                },
-                {
-                    "cdna_coding_end": 939,
-                    "cdna_coding_start": 145,
-                    "domains": [],
-                    "end": 52195487,
-                    "exons": [
-                        {
-                            "end": 52184066,
-                            "name": null,
-                            "start": 52183973
-                        },
-                        {
-                            "end": 52187108,
-                            "name": null,
-                            "start": 52186773
-                        },
-                        {
-                            "end": 52188798,
-                            "name": null,
-                            "start": 52188667
-                        },
-                        {
-                            "end": 52192588,
-                            "name": null,
-                            "start": 52192497
-                        },
-                        {
-                            "end": 52195487,
-                            "name": null,
-                            "start": 52194463
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000553556",
-                    "start": 52183973
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "PRKCB"
-            ],
-            "chr": "16",
-            "end": 24231932,
-            "name": "ENSG00000166501",
-            "start": 23847322,
-            "strand": "+",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 2191,
-                    "cdna_coding_start": 176,
-                    "domains": [
-                        {
-                            "name": "SM00239",
-                            "regions": [
-                                {
-                                    "end": 275,
-                                    "start": 172
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 583,
-                                    "start": 344
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF49562",
-                            "regions": [
-                                {
-                                    "end": 288,
-                                    "start": 157
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00109",
-                            "regions": [
-                                {
-                                    "end": 86,
-                                    "start": 37
-                                },
-                                {
-                                    "end": 151,
-                                    "start": 102
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 600,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00008",
-                            "regions": [
-                                {
-                                    "end": 48,
-                                    "start": 34
-                                },
-                                {
-                                    "end": 59,
-                                    "start": 50
-                                },
-                                {
-                                    "end": 74,
-                                    "start": 63
-                                },
-                                {
-                                    "end": 152,
-                                    "start": 140
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00433",
-                            "regions": [
-                                {
-                                    "end": 666,
-                                    "start": 623
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 600,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00168",
-                            "regions": [
-                                {
-                                    "end": 259,
-                                    "start": 175
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57889",
-                            "regions": [
-                                {
-                                    "end": 92,
-                                    "start": 6
-                                },
-                                {
-                                    "end": 157,
-                                    "start": 101
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00130",
-                            "regions": [
-                                {
-                                    "end": 87,
-                                    "start": 37
-                                },
-                                {
-                                    "end": 153,
-                                    "start": 102
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50081",
-                            "regions": [
-                                {
-                                    "end": 86,
-                                    "start": 36
-                                },
-                                {
-                                    "end": 151,
-                                    "start": 101
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 627,
-                                    "start": 317
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 586,
-                                    "start": 343
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 576,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00360",
-                            "regions": [
-                                {
-                                    "end": 200,
-                                    "start": 188
-                                },
-                                {
-                                    "end": 230,
-                                    "start": 217
-                                },
-                                {
-                                    "end": 248,
-                                    "start": 240
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00133",
-                            "regions": [
-                                {
-                                    "end": 664,
-                                    "start": 601
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50004",
-                            "regions": [
-                                {
-                                    "end": 260,
-                                    "start": 173
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PIRSF000550",
-                            "regions": [
-                                {
-                                    "end": 671,
-                                    "start": 1
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 24231932,
-                    "exons": [
-                        {
-                            "end": 23847669,
-                            "name": null,
-                            "start": 23847322
-                        },
-                        {
-                            "end": 23848727,
-                            "name": null,
-                            "start": 23848696
-                        },
-                        {
-                            "end": 23999911,
-                            "name": null,
-                            "start": 23999829
-                        },
-                        {
-                            "end": 24043568,
-                            "name": null,
-                            "start": 24043457
-                        },
-                        {
-                            "end": 24046868,
-                            "name": null,
-                            "start": 24046740
-                        },
-                        {
-                            "end": 24104268,
-                            "name": null,
-                            "start": 24104112
-                        },
-                        {
-                            "end": 24105618,
-                            "name": null,
-                            "start": 24105484
-                        },
-                        {
-                            "end": 24124390,
-                            "name": null,
-                            "start": 24124294
-                        },
-                        {
-                            "end": 24135302,
-                            "name": null,
-                            "start": 24135156
-                        },
-                        {
-                            "end": 24166178,
-                            "name": null,
-                            "start": 24166005
-                        },
-                        {
-                            "end": 24183682,
-                            "name": null,
-                            "start": 24183591
-                        },
-                        {
-                            "end": 24185901,
-                            "name": null,
-                            "start": 24185839
-                        },
-                        {
-                            "end": 24192249,
-                            "name": null,
-                            "start": 24192111
-                        },
-                        {
-                            "end": 24196512,
-                            "name": null,
-                            "start": 24196432
-                        },
-                        {
-                            "end": 24196888,
-                            "name": null,
-                            "start": 24196781
-                        },
-                        {
-                            "end": 24202551,
-                            "name": null,
-                            "start": 24202411
-                        },
-                        {
-                            "end": 24231932,
-                            "name": null,
-                            "start": 24231282
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000321728",
-                    "start": 23847322
-                },
-                {
-                    "cdna_coding_end": 2174,
-                    "cdna_coding_start": 153,
-                    "domains": [
-                        {
-                            "name": "SM00133",
-                            "regions": [
-                                {
-                                    "end": 663,
-                                    "start": 601
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50004",
-                            "regions": [
-                                {
-                                    "end": 260,
-                                    "start": 173
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PIRSF000550",
-                            "regions": [
-                                {
-                                    "end": 672,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00069",
-                            "regions": [
-                                {
-                                    "end": 586,
-                                    "start": 343
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00360",
-                            "regions": [
-                                {
-                                    "end": 200,
-                                    "start": 188
-                                },
-                                {
-                                    "end": 230,
-                                    "start": 217
-                                },
-                                {
-                                    "end": 248,
-                                    "start": 240
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00219",
-                            "regions": [
-                                {
-                                    "end": 576,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50081",
-                            "regions": [
-                                {
-                                    "end": 86,
-                                    "start": 36
-                                },
-                                {
-                                    "end": 151,
-                                    "start": 101
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF56112",
-                            "regions": [
-                                {
-                                    "end": 627,
-                                    "start": 317
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00220",
-                            "regions": [
-                                {
-                                    "end": 600,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00433",
-                            "regions": [
-                                {
-                                    "end": 664,
-                                    "start": 627
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00130",
-                            "regions": [
-                                {
-                                    "end": 87,
-                                    "start": 37
-                                },
-                                {
-                                    "end": 153,
-                                    "start": 102
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00168",
-                            "regions": [
-                                {
-                                    "end": 259,
-                                    "start": 175
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57889",
-                            "regions": [
-                                {
-                                    "end": 92,
-                                    "start": 6
-                                },
-                                {
-                                    "end": 157,
-                                    "start": 101
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00008",
-                            "regions": [
-                                {
-                                    "end": 48,
-                                    "start": 34
-                                },
-                                {
-                                    "end": 59,
-                                    "start": 50
-                                },
-                                {
-                                    "end": 74,
-                                    "start": 63
-                                },
-                                {
-                                    "end": 152,
-                                    "start": 140
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50011",
-                            "regions": [
-                                {
-                                    "end": 600,
-                                    "start": 342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00109",
-                            "regions": [
-                                {
-                                    "end": 86,
-                                    "start": 37
-                                },
-                                {
-                                    "end": 151,
-                                    "start": 102
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07714",
-                            "regions": [
-                                {
-                                    "end": 583,
-                                    "start": 344
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF49562",
-                            "regions": [
-                                {
-                                    "end": 288,
-                                    "start": 157
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00239",
-                            "regions": [
-                                {
-                                    "end": 275,
-                                    "start": 172
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 24231932,
-                    "exons": [
-                        {
-                            "end": 23847669,
-                            "name": null,
-                            "start": 23847345
-                        },
-                        {
-                            "end": 23848727,
-                            "name": null,
-                            "start": 23848696
-                        },
-                        {
-                            "end": 23999911,
-                            "name": null,
-                            "start": 23999829
-                        },
-                        {
-                            "end": 24043568,
-                            "name": null,
-                            "start": 24043457
-                        },
-                        {
-                            "end": 24046868,
-                            "name": null,
-                            "start": 24046740
-                        },
-                        {
-                            "end": 24104268,
-                            "name": null,
-                            "start": 24104112
-                        },
-                        {
-                            "end": 24105618,
-                            "name": null,
-                            "start": 24105484
-                        },
-                        {
-                            "end": 24124390,
-                            "name": null,
-                            "start": 24124294
-                        },
-                        {
-                            "end": 24135302,
-                            "name": null,
-                            "start": 24135156
-                        },
-                        {
-                            "end": 24166178,
-                            "name": null,
-                            "start": 24166005
-                        },
-                        {
-                            "end": 24183682,
-                            "name": null,
-                            "start": 24183591
-                        },
-                        {
-                            "end": 24185901,
-                            "name": null,
-                            "start": 24185839
-                        },
-                        {
-                            "end": 24192249,
-                            "name": null,
-                            "start": 24192111
-                        },
-                        {
-                            "end": 24196512,
-                            "name": null,
-                            "start": 24196432
-                        },
-                        {
-                            "end": 24196888,
-                            "name": null,
-                            "start": 24196781
-                        },
-                        {
-                            "end": 24202551,
-                            "name": null,
-                            "start": 24202411
-                        },
-                        {
-                            "end": 24231932,
-                            "name": null,
-                            "start": 24225979
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000303531",
-                    "start": 23847345
-                },
-                {
-                    "cdna_coding_end": 268,
-                    "cdna_coding_start": 95,
-                    "domains": [
-                        {
-                            "name": "PR00008",
-                            "regions": [
-                                {
-                                    "end": 48,
-                                    "start": 34
-                                },
-                                {
-                                    "end": 57,
-                                    "start": 50
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50081",
-                            "regions": [
-                                {
-                                    "end": 57,
-                                    "start": 36
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57889",
-                            "regions": [
-                                {
-                                    "end": 57,
-                                    "start": 6
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 23880647,
-                    "exons": [
-                        {
-                            "end": 23847669,
-                            "name": null,
-                            "start": 23847403
-                        },
-                        {
-                            "end": 23880647,
-                            "name": null,
-                            "start": 23880435
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000498058",
-                    "start": 23847403
-                },
-                {
-                    "domains": [],
-                    "end": 24124386,
-                    "exons": [
-                        {
-                            "end": 23848727,
-                            "name": null,
-                            "start": 23848544
-                        },
-                        {
-                            "end": 24104268,
-                            "name": null,
-                            "start": 24104112
-                        },
-                        {
-                            "end": 24105618,
-                            "name": null,
-                            "start": 24105484
-                        },
-                        {
-                            "end": 24124386,
-                            "name": null,
-                            "start": 24124294
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000498739",
-                    "start": 23848544
-                },
-                {
-                    "domains": [],
-                    "end": 24192166,
-                    "exons": [
-                        {
-                            "end": 24163176,
-                            "name": null,
-                            "start": 24163006
-                        },
-                        {
-                            "end": 24166178,
-                            "name": null,
-                            "start": 24166005
-                        },
-                        {
-                            "end": 24183682,
-                            "name": null,
-                            "start": 24183591
-                        },
-                        {
-                            "end": 24185901,
-                            "name": null,
-                            "start": 24185839
-                        },
-                        {
-                            "end": 24192166,
-                            "name": null,
-                            "start": 24192111
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000472066",
-                    "start": 24163006
-                },
-                {
-                    "domains": [],
-                    "end": 24202909,
-                    "exons": [
-                        {
-                            "end": 24196888,
-                            "name": null,
-                            "start": 24196852
-                        },
-                        {
-                            "end": 24202909,
-                            "name": null,
-                            "start": 24202411
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000466124",
-                    "start": 24196852
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "GIMAP4"
-            ],
-            "chr": "7",
-            "end": 150271041,
-            "name": "ENSG00000133574",
-            "start": 150264365,
-            "strand": "+",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 1165,
-                    "cdna_coding_start": 176,
-                    "domains": [
-                        {
-                            "name": "PF04548",
-                            "regions": [
-                                {
-                                    "end": 238,
-                                    "start": 31
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52540",
-                            "regions": [
-                                {
-                                    "end": 288,
-                                    "start": 24
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 150271041,
-                    "exons": [
-                        {
-                            "end": 150264525,
-                            "name": null,
-                            "start": 150264365
-                        },
-                        {
-                            "end": 150267047,
-                            "name": null,
-                            "start": 150266976
-                        },
-                        {
-                            "end": 150271041,
-                            "name": null,
-                            "start": 150269217
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000255945",
-                    "start": 150264365
-                },
-                {
-                    "cdna_coding_end": 1115,
-                    "cdna_coding_start": 84,
-                    "domains": [
-                        {
-                            "name": "PF04548",
-                            "regions": [
-                                {
-                                    "end": 252,
-                                    "start": 45
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF52540",
-                            "regions": [
-                                {
-                                    "end": 302,
-                                    "start": 38
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 150270602,
-                    "exons": [
-                        {
-                            "end": 150264525,
-                            "name": null,
-                            "start": 150264457
-                        },
-                        {
-                            "end": 150267089,
-                            "name": null,
-                            "start": 150266976
-                        },
-                        {
-                            "end": 150270602,
-                            "name": null,
-                            "start": 150269217
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000461940",
-                    "start": 150264457
-                },
-                {
-                    "cdna_coding_end": 552,
-                    "cdna_coding_start": 100,
-                    "domains": [
-                        {
-                            "name": "SSF52540",
-                            "regions": [
-                                {
-                                    "end": 151,
-                                    "start": 38
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF04548",
-                            "regions": [
-                                {
-                                    "end": 151,
-                                    "start": 45
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 150269569,
-                    "exons": [
-                        {
-                            "end": 150264608,
-                            "name": null,
-                            "start": 150264524
-                        },
-                        {
-                            "end": 150267089,
-                            "name": null,
-                            "start": 150266976
-                        },
-                        {
-                            "end": 150269569,
-                            "name": null,
-                            "start": 150269217
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000479232",
-                    "start": 150264524
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "IL7"
-            ],
-            "chr": "8",
-            "end": 79717758,
-            "name": "ENSG00000104432",
-            "start": 79587978,
-            "strand": "-",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 1135,
-                    "cdna_coding_start": 602,
-                    "domains": [
-                        {
-                            "name": "PIRSF001942",
-                            "regions": [
-                                {
-                                    "end": 177,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 48,
-                                    "start": 26
-                                },
-                                {
-                                    "end": 77,
-                                    "start": 57
-                                },
-                                {
-                                    "end": 98,
-                                    "start": 78
-                                },
-                                {
-                                    "end": 118,
-                                    "start": 99
-                                },
-                                {
-                                    "end": 173,
-                                    "start": 151
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01415",
-                            "regions": [
-                                {
-                                    "end": 173,
-                                    "start": 28
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00127",
-                            "regions": [
-                                {
-                                    "end": 173,
-                                    "start": 27
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717758,
-                    "exons": [
-                        {
-                            "end": 79646067,
-                            "name": null,
-                            "start": 79645007
-                        },
-                        {
-                            "end": 79648762,
-                            "name": null,
-                            "start": 79648709
-                        },
-                        {
-                            "end": 79650870,
-                            "name": null,
-                            "start": 79650739
-                        },
-                        {
-                            "end": 79652317,
-                            "name": null,
-                            "start": 79652237
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710307
-                        },
-                        {
-                            "end": 79717758,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000263851",
-                    "start": 79645007
-                },
-                {
-                    "cdna_coding_end": 758,
-                    "cdna_coding_start": 543,
-                    "domains": [
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 48,
-                                    "start": 26
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01415",
-                            "regions": [
-                                {
-                                    "end": 54,
-                                    "start": 28
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717699,
-                    "exons": [
-                        {
-                            "end": 79646063,
-                            "name": null,
-                            "start": 79645283
-                        },
-                        {
-                            "end": 79648762,
-                            "name": null,
-                            "start": 79648709
-                        },
-                        {
-                            "end": 79650870,
-                            "name": null,
-                            "start": 79650739
-                        },
-                        {
-                            "end": 79652317,
-                            "name": null,
-                            "start": 79652237
-                        },
-                        {
-                            "end": 79659331,
-                            "name": null,
-                            "start": 79659129
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710307
-                        },
-                        {
-                            "end": 79717699,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000518982",
-                    "start": 79645283
-                },
-                {
-                    "cdna_coding_end": 408,
-                    "cdna_coding_start": 7,
-                    "domains": [
-                        {
-                            "name": "PF01415",
-                            "regions": [
-                                {
-                                    "end": 77,
-                                    "start": 28
-                                },
-                                {
-                                    "end": 129,
-                                    "start": 91
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00127",
-                            "regions": [
-                                {
-                                    "end": 129,
-                                    "start": 27
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 48,
-                                    "start": 26
-                                },
-                                {
-                                    "end": 77,
-                                    "start": 57
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PIRSF001942",
-                            "regions": [
-                                {
-                                    "end": 133,
-                                    "start": 1
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717163,
-                    "exons": [
-                        {
-                            "end": 79646067,
-                            "name": null,
-                            "start": 79645900
-                        },
-                        {
-                            "end": 79648762,
-                            "name": null,
-                            "start": 79648709
-                        },
-                        {
-                            "end": 79652317,
-                            "name": null,
-                            "start": 79652237
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710307
-                        },
-                        {
-                            "end": 79717163,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000520269",
-                    "start": 79645900
-                },
-                {
-                    "cdna_coding_end": 120,
-                    "cdna_coding_start": 7,
-                    "domains": [
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 37,
-                                    "start": 26
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717163,
-                    "exons": [
-                        {
-                            "end": 79646067,
-                            "name": null,
-                            "start": 79645900
-                        },
-                        {
-                            "end": 79648762,
-                            "name": null,
-                            "start": 79648709
-                        },
-                        {
-                            "end": 79652317,
-                            "name": null,
-                            "start": 79652237
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710363
-                        },
-                        {
-                            "end": 79717163,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000520215",
-                    "start": 79645900
-                },
-                {
-                    "cdna_coding_end": 643,
-                    "cdna_coding_start": 530,
-                    "domains": [
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 37,
-                                    "start": 26
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717686,
-                    "exons": [
-                        {
-                            "end": 79646067,
-                            "name": null,
-                            "start": 79645900
-                        },
-                        {
-                            "end": 79648762,
-                            "name": null,
-                            "start": 79648709
-                        },
-                        {
-                            "end": 79650870,
-                            "name": null,
-                            "start": 79650739
-                        },
-                        {
-                            "end": 79652317,
-                            "name": null,
-                            "start": 79652237
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710363
-                        },
-                        {
-                            "end": 79717686,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000520317",
-                    "start": 79645900
-                },
-                {
-                    "cdna_coding_end": 195,
-                    "cdna_coding_start": 1,
-                    "domains": [
-                        {
-                            "name": "SM00127",
-                            "regions": [
-                                {
-                                    "end": 60,
-                                    "start": 1
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF01415",
-                            "regions": [
-                                {
-                                    "end": 60,
-                                    "start": 1
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79652311,
-                    "exons": [
-                        {
-                            "end": 79646067,
-                            "name": null,
-                            "start": 79645948
-                        },
-                        {
-                            "end": 79652311,
-                            "name": null,
-                            "start": 79652237
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000541183",
-                    "start": 79645948
-                },
-                {
-                    "cdna_coding_end": 817,
-                    "cdna_coding_start": 602,
-                    "domains": [
-                        {
-                            "name": "PF01415",
-                            "regions": [
-                                {
-                                    "end": 54,
-                                    "start": 28
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00435",
-                            "regions": [
-                                {
-                                    "end": 25,
-                                    "start": 2
-                                },
-                                {
-                                    "end": 48,
-                                    "start": 26
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 79717758,
-                    "exons": [
-                        {
-                            "end": 79659331,
-                            "name": null,
-                            "start": 79659263
-                        },
-                        {
-                            "end": 79710443,
-                            "name": null,
-                            "start": 79710307
-                        },
-                        {
-                            "end": 79717758,
-                            "name": null,
-                            "start": 79717148
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000379113",
-                    "start": 79659263
-                }
-            ]
-        },
-        {
-            "aliases": [
-                "SVEP1"
-            ],
-            "chr": "9",
-            "end": 113342160,
-            "name": "ENSG00000165124",
-            "start": 113127531,
-            "strand": "-",
-            "transcripts": [
-                {
-                    "cdna_coding_end": 11053,
-                    "cdna_coding_start": 338,
-                    "domains": [
-                        {
-                            "name": "SM00032",
-                            "regions": [
-                                {
-                                    "end": 433,
-                                    "start": 378
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 438
-                                },
-                                {
-                                    "end": 559,
-                                    "start": 498
-                                },
-                                {
-                                    "end": 787,
-                                    "start": 727
-                                },
-                                {
-                                    "end": 1685,
-                                    "start": 1631
-                                },
-                                {
-                                    "end": 1743,
-                                    "start": 1690
-                                },
-                                {
-                                    "end": 1842,
-                                    "start": 1789
-                                },
-                                {
-                                    "end": 1900,
-                                    "start": 1847
-                                },
-                                {
-                                    "end": 1958,
-                                    "start": 1905
-                                },
-                                {
-                                    "end": 2016,
-                                    "start": 1963
-                                },
-                                {
-                                    "end": 2078,
-                                    "start": 2021
-                                },
-                                {
-                                    "end": 2141,
-                                    "start": 2083
-                                },
-                                {
-                                    "end": 2199,
-                                    "start": 2146
-                                },
-                                {
-                                    "end": 2259,
-                                    "start": 2204
-                                },
-                                {
-                                    "end": 2318,
-                                    "start": 2264
-                                },
-                                {
-                                    "end": 2376,
-                                    "start": 2323
-                                },
-                                {
-                                    "end": 2435,
-                                    "start": 2381
-                                },
-                                {
-                                    "end": 2493,
-                                    "start": 2440
-                                },
-                                {
-                                    "end": 2551,
-                                    "start": 2498
-                                },
-                                {
-                                    "end": 2608,
-                                    "start": 2556
-                                },
-                                {
-                                    "end": 2712,
-                                    "start": 2654
-                                },
-                                {
-                                    "end": 2770,
-                                    "start": 2717
-                                },
-                                {
-                                    "end": 2828,
-                                    "start": 2775
-                                },
-                                {
-                                    "end": 2886,
-                                    "start": 2833
-                                },
-                                {
-                                    "end": 2944,
-                                    "start": 2891
-                                },
-                                {
-                                    "end": 3002,
-                                    "start": 2949
-                                },
-                                {
-                                    "end": 3059,
-                                    "start": 3007
-                                },
-                                {
-                                    "end": 3117,
-                                    "start": 3064
-                                },
-                                {
-                                    "end": 3176,
-                                    "start": 3122
-                                },
-                                {
-                                    "end": 3236,
-                                    "start": 3181
-                                },
-                                {
-                                    "end": 3294,
-                                    "start": 3241
-                                },
-                                {
-                                    "end": 3352,
-                                    "start": 3299
-                                },
-                                {
-                                    "end": 3411,
-                                    "start": 3357
-                                },
-                                {
-                                    "end": 3468,
-                                    "start": 3416
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF02494",
-                            "regions": [
-                                {
-                                    "end": 642,
-                                    "start": 561
-                                },
-                                {
-                                    "end": 721,
-                                    "start": 644
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00895",
-                            "regions": [
-                                {
-                                    "end": 1530,
-                                    "start": 1512
-                                },
-                                {
-                                    "end": 1558,
-                                    "start": 1539
-                                },
-                                {
-                                    "end": 1592,
-                                    "start": 1559
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57535",
-                            "regions": [
-                                {
-                                    "end": 433,
-                                    "start": 374
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 434
-                                },
-                                {
-                                    "end": 560,
-                                    "start": 494
-                                },
-                                {
-                                    "end": 790,
-                                    "start": 727
-                                },
-                                {
-                                    "end": 1746,
-                                    "start": 1626
-                                },
-                                {
-                                    "end": 1842,
-                                    "start": 1785
-                                },
-                                {
-                                    "end": 1900,
-                                    "start": 1843
-                                },
-                                {
-                                    "end": 1958,
-                                    "start": 1901
-                                },
-                                {
-                                    "end": 2016,
-                                    "start": 1959
-                                },
-                                {
-                                    "end": 2078,
-                                    "start": 2017
-                                },
-                                {
-                                    "end": 2199,
-                                    "start": 2081
-                                },
-                                {
-                                    "end": 2318,
-                                    "start": 2202
-                                },
-                                {
-                                    "end": 2377,
-                                    "start": 2321
-                                },
-                                {
-                                    "end": 2437,
-                                    "start": 2379
-                                },
-                                {
-                                    "end": 2551,
-                                    "start": 2438
-                                },
-                                {
-                                    "end": 2616,
-                                    "start": 2552
-                                },
-                                {
-                                    "end": 2712,
-                                    "start": 2643
-                                },
-                                {
-                                    "end": 2828,
-                                    "start": 2715
-                                },
-                                {
-                                    "end": 2886,
-                                    "start": 2829
-                                },
-                                {
-                                    "end": 2944,
-                                    "start": 2887
-                                },
-                                {
-                                    "end": 3117,
-                                    "start": 2945
-                                },
-                                {
-                                    "end": 3176,
-                                    "start": 3118
-                                },
-                                {
-                                    "end": 3229,
-                                    "start": 3177
-                                },
-                                {
-                                    "end": 3475,
-                                    "start": 3239
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF49899",
-                            "regions": [
-                                {
-                                    "end": 1632,
-                                    "start": 1421
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00159",
-                            "regions": [
-                                {
-                                    "end": 1627,
-                                    "start": 1420
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00354",
-                            "regions": [
-                                {
-                                    "end": 1620,
-                                    "start": 1442
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07699",
-                            "regions": [
-                                {
-                                    "end": 360,
-                                    "start": 310
-                                },
-                                {
-                                    "end": 1052,
-                                    "start": 1005
-                                },
-                                {
-                                    "end": 1106,
-                                    "start": 1059
-                                },
-                                {
-                                    "end": 1160,
-                                    "start": 1113
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 1409,
-                                    "start": 1197
-                                },
-                                {
-                                    "end": 3554,
-                                    "start": 3468
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50825",
-                            "regions": [
-                                {
-                                    "end": 642,
-                                    "start": 560
-                                },
-                                {
-                                    "end": 724,
-                                    "start": 643
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00092",
-                            "regions": [
-                                {
-                                    "end": 252,
-                                    "start": 84
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57196",
-                            "regions": [
-                                {
-                                    "end": 1267,
-                                    "start": 1189
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1268
-                                },
-                                {
-                                    "end": 1342,
-                                    "start": 1306
-                                },
-                                {
-                                    "end": 1423,
-                                    "start": 1344
-                                },
-                                {
-                                    "end": 1786,
-                                    "start": 1735
-                                },
-                                {
-                                    "end": 3506,
-                                    "start": 3463
-                                },
-                                {
-                                    "end": 3535,
-                                    "start": 3507
-                                },
-                                {
-                                    "end": 3570,
-                                    "start": 3537
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50026",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1193
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1231
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1269
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1307
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1345
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1383
-                                },
-                                {
-                                    "end": 1784,
-                                    "start": 1745
-                                },
-                                {
-                                    "end": 3532,
-                                    "start": 3500
-                                },
-                                {
-                                    "end": 3564,
-                                    "start": 3533
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00181",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1196
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1234
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1272
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1310
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1348
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1386
-                                },
-                                {
-                                    "end": 1784,
-                                    "start": 1748
-                                },
-                                {
-                                    "end": 3500,
-                                    "start": 3471
-                                },
-                                {
-                                    "end": 3532,
-                                    "start": 3503
-                                },
-                                {
-                                    "end": 3564,
-                                    "start": 3535
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00179",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1196
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1231
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1269
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1307
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1345
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1383
-                                },
-                                {
-                                    "end": 1784,
-                                    "start": 1745
-                                },
-                                {
-                                    "end": 3532,
-                                    "start": 3504
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 440,
-                                    "start": 269
-                                },
-                                {
-                                    "end": 1144,
-                                    "start": 988
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07645",
-                            "regions": [
-                                {
-                                    "end": 1783,
-                                    "start": 1745
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50923",
-                            "regions": [
-                                {
-                                    "end": 435,
-                                    "start": 376
-                                },
-                                {
-                                    "end": 495,
-                                    "start": 436
-                                },
-                                {
-                                    "end": 561,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 789,
-                                    "start": 725
-                                },
-                                {
-                                    "end": 1687,
-                                    "start": 1629
-                                },
-                                {
-                                    "end": 1745,
-                                    "start": 1688
-                                },
-                                {
-                                    "end": 1844,
-                                    "start": 1787
-                                },
-                                {
-                                    "end": 1902,
-                                    "start": 1845
-                                },
-                                {
-                                    "end": 1960,
-                                    "start": 1903
-                                },
-                                {
-                                    "end": 2018,
-                                    "start": 1961
-                                },
-                                {
-                                    "end": 2080,
-                                    "start": 2019
-                                },
-                                {
-                                    "end": 2143,
-                                    "start": 2081
-                                },
-                                {
-                                    "end": 2201,
-                                    "start": 2144
-                                },
-                                {
-                                    "end": 2261,
-                                    "start": 2202
-                                },
-                                {
-                                    "end": 2320,
-                                    "start": 2262
-                                },
-                                {
-                                    "end": 2378,
-                                    "start": 2321
-                                },
-                                {
-                                    "end": 2437,
-                                    "start": 2379
-                                },
-                                {
-                                    "end": 2495,
-                                    "start": 2438
-                                },
-                                {
-                                    "end": 2553,
-                                    "start": 2496
-                                },
-                                {
-                                    "end": 2610,
-                                    "start": 2554
-                                },
-                                {
-                                    "end": 2714,
-                                    "start": 2663
-                                },
-                                {
-                                    "end": 2772,
-                                    "start": 2715
-                                },
-                                {
-                                    "end": 2830,
-                                    "start": 2773
-                                },
-                                {
-                                    "end": 2888,
-                                    "start": 2831
-                                },
-                                {
-                                    "end": 2946,
-                                    "start": 2889
-                                },
-                                {
-                                    "end": 3004,
-                                    "start": 2947
-                                },
-                                {
-                                    "end": 3061,
-                                    "start": 3005
-                                },
-                                {
-                                    "end": 3119,
-                                    "start": 3062
-                                },
-                                {
-                                    "end": 3178,
-                                    "start": 3120
-                                },
-                                {
-                                    "end": 3238,
-                                    "start": 3179
-                                },
-                                {
-                                    "end": 3296,
-                                    "start": 3239
-                                },
-                                {
-                                    "end": 3354,
-                                    "start": 3297
-                                },
-                                {
-                                    "end": 3413,
-                                    "start": 3355
-                                },
-                                {
-                                    "end": 3470,
-                                    "start": 3414
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00327",
-                            "regions": [
-                                {
-                                    "end": 260,
-                                    "start": 81
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00008",
-                            "regions": [
-                                {
-                                    "end": 1226,
-                                    "start": 1197
-                                },
-                                {
-                                    "end": 1265,
-                                    "start": 1235
-                                },
-                                {
-                                    "end": 1302,
-                                    "start": 1273
-                                },
-                                {
-                                    "end": 1379,
-                                    "start": 1349
-                                },
-                                {
-                                    "end": 1417,
-                                    "start": 1387
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50234",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 83
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07974",
-                            "regions": [
-                                {
-                                    "end": 1266,
-                                    "start": 1235
-                                },
-                                {
-                                    "end": 3499,
-                                    "start": 3475
-                                },
-                                {
-                                    "end": 3531,
-                                    "start": 3507
-                                },
-                                {
-                                    "end": 3563,
-                                    "start": 3536
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF53300",
-                            "regions": [
-                                {
-                                    "end": 262,
-                                    "start": 79
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00084",
-                            "regions": [
-                                {
-                                    "end": 430,
-                                    "start": 378
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 438
-                                },
-                                {
-                                    "end": 1685,
-                                    "start": 1628
-                                },
-                                {
-                                    "end": 1743,
-                                    "start": 1690
-                                },
-                                {
-                                    "end": 1842,
-                                    "start": 1789
-                                },
-                                {
-                                    "end": 1900,
-                                    "start": 1847
-                                },
-                                {
-                                    "end": 1958,
-                                    "start": 1905
-                                },
-                                {
-                                    "end": 2016,
-                                    "start": 1963
-                                },
-                                {
-                                    "end": 2078,
-                                    "start": 2021
-                                },
-                                {
-                                    "end": 2136,
-                                    "start": 2083
-                                },
-                                {
-                                    "end": 2199,
-                                    "start": 2146
-                                },
-                                {
-                                    "end": 2259,
-                                    "start": 2204
-                                },
-                                {
-                                    "end": 2318,
-                                    "start": 2264
-                                },
-                                {
-                                    "end": 2376,
-                                    "start": 2323
-                                },
-                                {
-                                    "end": 2435,
-                                    "start": 2381
-                                },
-                                {
-                                    "end": 2493,
-                                    "start": 2440
-                                },
-                                {
-                                    "end": 2551,
-                                    "start": 2498
-                                },
-                                {
-                                    "end": 2608,
-                                    "start": 2556
-                                },
-                                {
-                                    "end": 2712,
-                                    "start": 2667
-                                },
-                                {
-                                    "end": 2770,
-                                    "start": 2717
-                                },
-                                {
-                                    "end": 2828,
-                                    "start": 2775
-                                },
-                                {
-                                    "end": 2886,
-                                    "start": 2833
-                                },
-                                {
-                                    "end": 2944,
-                                    "start": 2891
-                                },
-                                {
-                                    "end": 3002,
-                                    "start": 2949
-                                },
-                                {
-                                    "end": 3059,
-                                    "start": 3007
-                                },
-                                {
-                                    "end": 3117,
-                                    "start": 3084
-                                },
-                                {
-                                    "end": 3172,
-                                    "start": 3122
-                                },
-                                {
-                                    "end": 3236,
-                                    "start": 3181
-                                },
-                                {
-                                    "end": 3290,
-                                    "start": 3241
-                                },
-                                {
-                                    "end": 3352,
-                                    "start": 3299
-                                },
-                                {
-                                    "end": 3411,
-                                    "start": 3357
-                                },
-                                {
-                                    "end": 3468,
-                                    "start": 3416
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 113342160,
-                    "exons": [
-                        {
-                            "end": 113128840,
-                            "name": null,
-                            "start": 113127531
-                        },
-                        {
-                            "end": 113132296,
-                            "name": null,
-                            "start": 113132203
-                        },
-                        {
-                            "end": 113137743,
-                            "name": null,
-                            "start": 113137648
-                        },
-                        {
-                            "end": 113139646,
-                            "name": null,
-                            "start": 113139551
-                        },
-                        {
-                            "end": 113141797,
-                            "name": null,
-                            "start": 113141627
-                        },
-                        {
-                            "end": 113148354,
-                            "name": null,
-                            "start": 113148178
-                        },
-                        {
-                            "end": 113149738,
-                            "name": null,
-                            "start": 113149565
-                        },
-                        {
-                            "end": 113151867,
-                            "name": null,
-                            "start": 113151804
-                        },
-                        {
-                            "end": 113163289,
-                            "name": null,
-                            "start": 113163134
-                        },
-                        {
-                            "end": 113166832,
-                            "name": null,
-                            "start": 113166607
-                        },
-                        {
-                            "end": 113171231,
-                            "name": null,
-                            "start": 113168440
-                        },
-                        {
-                            "end": 113174015,
-                            "name": null,
-                            "start": 113173343
-                        },
-                        {
-                            "end": 113190038,
-                            "name": null,
-                            "start": 113189871
-                        },
-                        {
-                            "end": 113191614,
-                            "name": null,
-                            "start": 113191423
-                        },
-                        {
-                            "end": 113192284,
-                            "name": null,
-                            "start": 113192200
-                        },
-                        {
-                            "end": 113192730,
-                            "name": null,
-                            "start": 113192554
-                        },
-                        {
-                            "end": 113194314,
-                            "name": null,
-                            "start": 113194195
-                        },
-                        {
-                            "end": 113194915,
-                            "name": null,
-                            "start": 113194742
-                        },
-                        {
-                            "end": 113196786,
-                            "name": null,
-                            "start": 113196616
-                        },
-                        {
-                            "end": 113197644,
-                            "name": null,
-                            "start": 113197521
-                        },
-                        {
-                            "end": 113198784,
-                            "name": null,
-                            "start": 113198660
-                        },
-                        {
-                            "end": 113206000,
-                            "name": null,
-                            "start": 113205825
-                        },
-                        {
-                            "end": 113208318,
-                            "name": null,
-                            "start": 113208117
-                        },
-                        {
-                            "end": 113209337,
-                            "name": null,
-                            "start": 113209180
-                        },
-                        {
-                            "end": 113212540,
-                            "name": null,
-                            "start": 113212339
-                        },
-                        {
-                            "end": 113213682,
-                            "name": null,
-                            "start": 113213569
-                        },
-                        {
-                            "end": 113217983,
-                            "name": null,
-                            "start": 113217870
-                        },
-                        {
-                            "end": 113219632,
-                            "name": null,
-                            "start": 113219536
-                        },
-                        {
-                            "end": 113220842,
-                            "name": null,
-                            "start": 113220751
-                        },
-                        {
-                            "end": 113221393,
-                            "name": null,
-                            "start": 113221232
-                        },
-                        {
-                            "end": 113228306,
-                            "name": null,
-                            "start": 113228145
-                        },
-                        {
-                            "end": 113231381,
-                            "name": null,
-                            "start": 113231220
-                        },
-                        {
-                            "end": 113233877,
-                            "name": null,
-                            "start": 113233644
-                        },
-                        {
-                            "end": 113234603,
-                            "name": null,
-                            "start": 113234439
-                        },
-                        {
-                            "end": 113238595,
-                            "name": null,
-                            "start": 113238484
-                        },
-                        {
-                            "end": 113242036,
-                            "name": null,
-                            "start": 113241915
-                        },
-                        {
-                            "end": 113243716,
-                            "name": null,
-                            "start": 113243522
-                        },
-                        {
-                            "end": 113244772,
-                            "name": null,
-                            "start": 113244641
-                        },
-                        {
-                            "end": 113245973,
-                            "name": null,
-                            "start": 113245866
-                        },
-                        {
-                            "end": 113252059,
-                            "name": null,
-                            "start": 113251930
-                        },
-                        {
-                            "end": 113259213,
-                            "name": null,
-                            "start": 113259095
-                        },
-                        {
-                            "end": 113261518,
-                            "name": null,
-                            "start": 113261321
-                        },
-                        {
-                            "end": 113265497,
-                            "name": null,
-                            "start": 113265318
-                        },
-                        {
-                            "end": 113275385,
-                            "name": null,
-                            "start": 113275206
-                        },
-                        {
-                            "end": 113276386,
-                            "name": null,
-                            "start": 113276228
-                        },
-                        {
-                            "end": 113308571,
-                            "name": null,
-                            "start": 113308395
-                        },
-                        {
-                            "end": 113312384,
-                            "name": null,
-                            "start": 113312129
-                        },
-                        {
-                            "end": 113342160,
-                            "name": null,
-                            "start": 113341293
-                        }
-                    ],
-                    "is_best_transcript": true,
-                    "name": "ENST00000401783",
-                    "start": 113127531
-                },
-                {
-                    "cdna_coding_end": 4909,
-                    "cdna_coding_start": 416,
-                    "domains": [
-                        {
-                            "name": "PF00084",
-                            "regions": [
-                                {
-                                    "end": 62,
-                                    "start": 9
-                                },
-                                {
-                                    "end": 125,
-                                    "start": 72
-                                },
-                                {
-                                    "end": 185,
-                                    "start": 130
-                                },
-                                {
-                                    "end": 244,
-                                    "start": 190
-                                },
-                                {
-                                    "end": 302,
-                                    "start": 249
-                                },
-                                {
-                                    "end": 361,
-                                    "start": 307
-                                },
-                                {
-                                    "end": 419,
-                                    "start": 366
-                                },
-                                {
-                                    "end": 477,
-                                    "start": 424
-                                },
-                                {
-                                    "end": 534,
-                                    "start": 482
-                                },
-                                {
-                                    "end": 638,
-                                    "start": 593
-                                },
-                                {
-                                    "end": 696,
-                                    "start": 643
-                                },
-                                {
-                                    "end": 754,
-                                    "start": 701
-                                },
-                                {
-                                    "end": 812,
-                                    "start": 759
-                                },
-                                {
-                                    "end": 870,
-                                    "start": 817
-                                },
-                                {
-                                    "end": 928,
-                                    "start": 875
-                                },
-                                {
-                                    "end": 985,
-                                    "start": 933
-                                },
-                                {
-                                    "end": 1043,
-                                    "start": 1010
-                                },
-                                {
-                                    "end": 1098,
-                                    "start": 1048
-                                },
-                                {
-                                    "end": 1162,
-                                    "start": 1107
-                                },
-                                {
-                                    "end": 1216,
-                                    "start": 1167
-                                },
-                                {
-                                    "end": 1278,
-                                    "start": 1225
-                                },
-                                {
-                                    "end": 1337,
-                                    "start": 1283
-                                },
-                                {
-                                    "end": 1394,
-                                    "start": 1342
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07974",
-                            "regions": [
-                                {
-                                    "end": 1425,
-                                    "start": 1401
-                                },
-                                {
-                                    "end": 1457,
-                                    "start": 1433
-                                },
-                                {
-                                    "end": 1489,
-                                    "start": 1462
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00008",
-                            "regions": [
-                                {
-                                    "end": 1456,
-                                    "start": 1427
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50923",
-                            "regions": [
-                                {
-                                    "end": 69,
-                                    "start": 7
-                                },
-                                {
-                                    "end": 127,
-                                    "start": 70
-                                },
-                                {
-                                    "end": 187,
-                                    "start": 128
-                                },
-                                {
-                                    "end": 246,
-                                    "start": 188
-                                },
-                                {
-                                    "end": 304,
-                                    "start": 247
-                                },
-                                {
-                                    "end": 363,
-                                    "start": 305
-                                },
-                                {
-                                    "end": 421,
-                                    "start": 364
-                                },
-                                {
-                                    "end": 479,
-                                    "start": 422
-                                },
-                                {
-                                    "end": 536,
-                                    "start": 480
-                                },
-                                {
-                                    "end": 640,
-                                    "start": 589
-                                },
-                                {
-                                    "end": 698,
-                                    "start": 641
-                                },
-                                {
-                                    "end": 756,
-                                    "start": 699
-                                },
-                                {
-                                    "end": 814,
-                                    "start": 757
-                                },
-                                {
-                                    "end": 872,
-                                    "start": 815
-                                },
-                                {
-                                    "end": 930,
-                                    "start": 873
-                                },
-                                {
-                                    "end": 987,
-                                    "start": 931
-                                },
-                                {
-                                    "end": 1045,
-                                    "start": 988
-                                },
-                                {
-                                    "end": 1104,
-                                    "start": 1046
-                                },
-                                {
-                                    "end": 1164,
-                                    "start": 1105
-                                },
-                                {
-                                    "end": 1222,
-                                    "start": 1165
-                                },
-                                {
-                                    "end": 1280,
-                                    "start": 1223
-                                },
-                                {
-                                    "end": 1339,
-                                    "start": 1281
-                                },
-                                {
-                                    "end": 1396,
-                                    "start": 1340
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00181",
-                            "regions": [
-                                {
-                                    "end": 1426,
-                                    "start": 1397
-                                },
-                                {
-                                    "end": 1458,
-                                    "start": 1429
-                                },
-                                {
-                                    "end": 1490,
-                                    "start": 1461
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57196",
-                            "regions": [
-                                {
-                                    "end": 1432,
-                                    "start": 1389
-                                },
-                                {
-                                    "end": 1461,
-                                    "start": 1433
-                                },
-                                {
-                                    "end": 1496,
-                                    "start": 1463
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50026",
-                            "regions": [
-                                {
-                                    "end": 1458,
-                                    "start": 1426
-                                },
-                                {
-                                    "end": 1490,
-                                    "start": 1459
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 1480,
-                                    "start": 1394
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57535",
-                            "regions": [
-                                {
-                                    "end": 125,
-                                    "start": 7
-                                },
-                                {
-                                    "end": 244,
-                                    "start": 128
-                                },
-                                {
-                                    "end": 303,
-                                    "start": 247
-                                },
-                                {
-                                    "end": 363,
-                                    "start": 305
-                                },
-                                {
-                                    "end": 477,
-                                    "start": 364
-                                },
-                                {
-                                    "end": 542,
-                                    "start": 478
-                                },
-                                {
-                                    "end": 638,
-                                    "start": 569
-                                },
-                                {
-                                    "end": 754,
-                                    "start": 641
-                                },
-                                {
-                                    "end": 812,
-                                    "start": 755
-                                },
-                                {
-                                    "end": 870,
-                                    "start": 813
-                                },
-                                {
-                                    "end": 1043,
-                                    "start": 871
-                                },
-                                {
-                                    "end": 1102,
-                                    "start": 1044
-                                },
-                                {
-                                    "end": 1155,
-                                    "start": 1103
-                                },
-                                {
-                                    "end": 1401,
-                                    "start": 1165
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00032",
-                            "regions": [
-                                {
-                                    "end": 67,
-                                    "start": 9
-                                },
-                                {
-                                    "end": 125,
-                                    "start": 72
-                                },
-                                {
-                                    "end": 185,
-                                    "start": 130
-                                },
-                                {
-                                    "end": 244,
-                                    "start": 190
-                                },
-                                {
-                                    "end": 302,
-                                    "start": 249
-                                },
-                                {
-                                    "end": 361,
-                                    "start": 307
-                                },
-                                {
-                                    "end": 419,
-                                    "start": 366
-                                },
-                                {
-                                    "end": 477,
-                                    "start": 424
-                                },
-                                {
-                                    "end": 534,
-                                    "start": 482
-                                },
-                                {
-                                    "end": 638,
-                                    "start": 580
-                                },
-                                {
-                                    "end": 696,
-                                    "start": 643
-                                },
-                                {
-                                    "end": 754,
-                                    "start": 701
-                                },
-                                {
-                                    "end": 812,
-                                    "start": 759
-                                },
-                                {
-                                    "end": 870,
-                                    "start": 817
-                                },
-                                {
-                                    "end": 928,
-                                    "start": 875
-                                },
-                                {
-                                    "end": 985,
-                                    "start": 933
-                                },
-                                {
-                                    "end": 1043,
-                                    "start": 990
-                                },
-                                {
-                                    "end": 1102,
-                                    "start": 1048
-                                },
-                                {
-                                    "end": 1162,
-                                    "start": 1107
-                                },
-                                {
-                                    "end": 1220,
-                                    "start": 1167
-                                },
-                                {
-                                    "end": 1278,
-                                    "start": 1225
-                                },
-                                {
-                                    "end": 1337,
-                                    "start": 1283
-                                },
-                                {
-                                    "end": 1394,
-                                    "start": 1342
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 113190038,
-                    "exons": [
-                        {
-                            "end": 113128840,
-                            "name": null,
-                            "start": 113127536
-                        },
-                        {
-                            "end": 113132296,
-                            "name": null,
-                            "start": 113132203
-                        },
-                        {
-                            "end": 113137743,
-                            "name": null,
-                            "start": 113137648
-                        },
-                        {
-                            "end": 113139646,
-                            "name": null,
-                            "start": 113139551
-                        },
-                        {
-                            "end": 113141797,
-                            "name": null,
-                            "start": 113141627
-                        },
-                        {
-                            "end": 113148354,
-                            "name": null,
-                            "start": 113148178
-                        },
-                        {
-                            "end": 113149738,
-                            "name": null,
-                            "start": 113149565
-                        },
-                        {
-                            "end": 113151867,
-                            "name": null,
-                            "start": 113151804
-                        },
-                        {
-                            "end": 113163289,
-                            "name": null,
-                            "start": 113163134
-                        },
-                        {
-                            "end": 113166832,
-                            "name": null,
-                            "start": 113166607
-                        },
-                        {
-                            "end": 113171231,
-                            "name": null,
-                            "start": 113168440
-                        },
-                        {
-                            "end": 113174015,
-                            "name": null,
-                            "start": 113173343
-                        },
-                        {
-                            "end": 113190038,
-                            "name": null,
-                            "start": 113189871
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000297826",
-                    "start": 113127536
-                },
-                {
-                    "cdna_coding_end": 10911,
-                    "cdna_coding_start": 265,
-                    "domains": [
-                        {
-                            "name": "SSF57535",
-                            "regions": [
-                                {
-                                    "end": 410,
-                                    "start": 351
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 411
-                                },
-                                {
-                                    "end": 537,
-                                    "start": 471
-                                },
-                                {
-                                    "end": 767,
-                                    "start": 704
-                                },
-                                {
-                                    "end": 1723,
-                                    "start": 1603
-                                },
-                                {
-                                    "end": 1819,
-                                    "start": 1762
-                                },
-                                {
-                                    "end": 1877,
-                                    "start": 1820
-                                },
-                                {
-                                    "end": 1935,
-                                    "start": 1878
-                                },
-                                {
-                                    "end": 1993,
-                                    "start": 1936
-                                },
-                                {
-                                    "end": 2055,
-                                    "start": 1994
-                                },
-                                {
-                                    "end": 2176,
-                                    "start": 2058
-                                },
-                                {
-                                    "end": 2295,
-                                    "start": 2179
-                                },
-                                {
-                                    "end": 2354,
-                                    "start": 2298
-                                },
-                                {
-                                    "end": 2414,
-                                    "start": 2356
-                                },
-                                {
-                                    "end": 2528,
-                                    "start": 2415
-                                },
-                                {
-                                    "end": 2593,
-                                    "start": 2529
-                                },
-                                {
-                                    "end": 2689,
-                                    "start": 2620
-                                },
-                                {
-                                    "end": 2805,
-                                    "start": 2692
-                                },
-                                {
-                                    "end": 2863,
-                                    "start": 2806
-                                },
-                                {
-                                    "end": 2921,
-                                    "start": 2864
-                                },
-                                {
-                                    "end": 3094,
-                                    "start": 2922
-                                },
-                                {
-                                    "end": 3153,
-                                    "start": 3095
-                                },
-                                {
-                                    "end": 3206,
-                                    "start": 3154
-                                },
-                                {
-                                    "end": 3452,
-                                    "start": 3216
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF49899",
-                            "regions": [
-                                {
-                                    "end": 1609,
-                                    "start": 1398
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00159",
-                            "regions": [
-                                {
-                                    "end": 1604,
-                                    "start": 1397
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00354",
-                            "regions": [
-                                {
-                                    "end": 1597,
-                                    "start": 1419
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00895",
-                            "regions": [
-                                {
-                                    "end": 1507,
-                                    "start": 1489
-                                },
-                                {
-                                    "end": 1535,
-                                    "start": 1516
-                                },
-                                {
-                                    "end": 1569,
-                                    "start": 1536
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF02494",
-                            "regions": [
-                                {
-                                    "end": 619,
-                                    "start": 538
-                                },
-                                {
-                                    "end": 698,
-                                    "start": 621
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00032",
-                            "regions": [
-                                {
-                                    "end": 410,
-                                    "start": 355
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 415
-                                },
-                                {
-                                    "end": 536,
-                                    "start": 475
-                                },
-                                {
-                                    "end": 764,
-                                    "start": 704
-                                },
-                                {
-                                    "end": 1662,
-                                    "start": 1608
-                                },
-                                {
-                                    "end": 1720,
-                                    "start": 1667
-                                },
-                                {
-                                    "end": 1819,
-                                    "start": 1766
-                                },
-                                {
-                                    "end": 1877,
-                                    "start": 1824
-                                },
-                                {
-                                    "end": 1935,
-                                    "start": 1882
-                                },
-                                {
-                                    "end": 1993,
-                                    "start": 1940
-                                },
-                                {
-                                    "end": 2055,
-                                    "start": 1998
-                                },
-                                {
-                                    "end": 2118,
-                                    "start": 2060
-                                },
-                                {
-                                    "end": 2176,
-                                    "start": 2123
-                                },
-                                {
-                                    "end": 2236,
-                                    "start": 2181
-                                },
-                                {
-                                    "end": 2295,
-                                    "start": 2241
-                                },
-                                {
-                                    "end": 2353,
-                                    "start": 2300
-                                },
-                                {
-                                    "end": 2412,
-                                    "start": 2358
-                                },
-                                {
-                                    "end": 2470,
-                                    "start": 2417
-                                },
-                                {
-                                    "end": 2528,
-                                    "start": 2475
-                                },
-                                {
-                                    "end": 2585,
-                                    "start": 2533
-                                },
-                                {
-                                    "end": 2689,
-                                    "start": 2631
-                                },
-                                {
-                                    "end": 2747,
-                                    "start": 2694
-                                },
-                                {
-                                    "end": 2805,
-                                    "start": 2752
-                                },
-                                {
-                                    "end": 2863,
-                                    "start": 2810
-                                },
-                                {
-                                    "end": 2921,
-                                    "start": 2868
-                                },
-                                {
-                                    "end": 2979,
-                                    "start": 2926
-                                },
-                                {
-                                    "end": 3036,
-                                    "start": 2984
-                                },
-                                {
-                                    "end": 3094,
-                                    "start": 3041
-                                },
-                                {
-                                    "end": 3153,
-                                    "start": 3099
-                                },
-                                {
-                                    "end": 3213,
-                                    "start": 3158
-                                },
-                                {
-                                    "end": 3271,
-                                    "start": 3218
-                                },
-                                {
-                                    "end": 3329,
-                                    "start": 3276
-                                },
-                                {
-                                    "end": 3388,
-                                    "start": 3334
-                                },
-                                {
-                                    "end": 3445,
-                                    "start": 3393
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00179",
-                            "regions": [
-                                {
-                                    "end": 1206,
-                                    "start": 1173
-                                },
-                                {
-                                    "end": 1244,
-                                    "start": 1208
-                                },
-                                {
-                                    "end": 1282,
-                                    "start": 1246
-                                },
-                                {
-                                    "end": 1320,
-                                    "start": 1284
-                                },
-                                {
-                                    "end": 1358,
-                                    "start": 1322
-                                },
-                                {
-                                    "end": 1396,
-                                    "start": 1360
-                                },
-                                {
-                                    "end": 1761,
-                                    "start": 1722
-                                },
-                                {
-                                    "end": 3509,
-                                    "start": 3481
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 417,
-                                    "start": 246
-                                },
-                                {
-                                    "end": 1121,
-                                    "start": 965
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57196",
-                            "regions": [
-                                {
-                                    "end": 1244,
-                                    "start": 1166
-                                },
-                                {
-                                    "end": 1282,
-                                    "start": 1245
-                                },
-                                {
-                                    "end": 1319,
-                                    "start": 1283
-                                },
-                                {
-                                    "end": 1400,
-                                    "start": 1321
-                                },
-                                {
-                                    "end": 1763,
-                                    "start": 1712
-                                },
-                                {
-                                    "end": 3483,
-                                    "start": 3440
-                                },
-                                {
-                                    "end": 3512,
-                                    "start": 3484
-                                },
-                                {
-                                    "end": 3547,
-                                    "start": 3514
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50026",
-                            "regions": [
-                                {
-                                    "end": 1206,
-                                    "start": 1170
-                                },
-                                {
-                                    "end": 1244,
-                                    "start": 1208
-                                },
-                                {
-                                    "end": 1282,
-                                    "start": 1246
-                                },
-                                {
-                                    "end": 1320,
-                                    "start": 1284
-                                },
-                                {
-                                    "end": 1358,
-                                    "start": 1322
-                                },
-                                {
-                                    "end": 1396,
-                                    "start": 1360
-                                },
-                                {
-                                    "end": 1761,
-                                    "start": 1722
-                                },
-                                {
-                                    "end": 3509,
-                                    "start": 3477
-                                },
-                                {
-                                    "end": 3541,
-                                    "start": 3510
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00181",
-                            "regions": [
-                                {
-                                    "end": 1206,
-                                    "start": 1173
-                                },
-                                {
-                                    "end": 1244,
-                                    "start": 1211
-                                },
-                                {
-                                    "end": 1282,
-                                    "start": 1249
-                                },
-                                {
-                                    "end": 1320,
-                                    "start": 1287
-                                },
-                                {
-                                    "end": 1358,
-                                    "start": 1325
-                                },
-                                {
-                                    "end": 1396,
-                                    "start": 1363
-                                },
-                                {
-                                    "end": 1761,
-                                    "start": 1725
-                                },
-                                {
-                                    "end": 3477,
-                                    "start": 3448
-                                },
-                                {
-                                    "end": 3509,
-                                    "start": 3480
-                                },
-                                {
-                                    "end": 3541,
-                                    "start": 3512
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00092",
-                            "regions": [
-                                {
-                                    "end": 229,
-                                    "start": 61
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50825",
-                            "regions": [
-                                {
-                                    "end": 619,
-                                    "start": 537
-                                },
-                                {
-                                    "end": 701,
-                                    "start": 620
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 1386,
-                                    "start": 1174
-                                },
-                                {
-                                    "end": 3531,
-                                    "start": 3445
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07699",
-                            "regions": [
-                                {
-                                    "end": 337,
-                                    "start": 287
-                                },
-                                {
-                                    "end": 1029,
-                                    "start": 982
-                                },
-                                {
-                                    "end": 1083,
-                                    "start": 1036
-                                },
-                                {
-                                    "end": 1137,
-                                    "start": 1090
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00008",
-                            "regions": [
-                                {
-                                    "end": 1203,
-                                    "start": 1174
-                                },
-                                {
-                                    "end": 1242,
-                                    "start": 1212
-                                },
-                                {
-                                    "end": 1279,
-                                    "start": 1250
-                                },
-                                {
-                                    "end": 1356,
-                                    "start": 1326
-                                },
-                                {
-                                    "end": 1394,
-                                    "start": 1364
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00327",
-                            "regions": [
-                                {
-                                    "end": 237,
-                                    "start": 58
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50923",
-                            "regions": [
-                                {
-                                    "end": 412,
-                                    "start": 353
-                                },
-                                {
-                                    "end": 472,
-                                    "start": 413
-                                },
-                                {
-                                    "end": 538,
-                                    "start": 473
-                                },
-                                {
-                                    "end": 766,
-                                    "start": 702
-                                },
-                                {
-                                    "end": 1664,
-                                    "start": 1606
-                                },
-                                {
-                                    "end": 1722,
-                                    "start": 1665
-                                },
-                                {
-                                    "end": 1821,
-                                    "start": 1764
-                                },
-                                {
-                                    "end": 1879,
-                                    "start": 1822
-                                },
-                                {
-                                    "end": 1937,
-                                    "start": 1880
-                                },
-                                {
-                                    "end": 1995,
-                                    "start": 1938
-                                },
-                                {
-                                    "end": 2057,
-                                    "start": 1996
-                                },
-                                {
-                                    "end": 2120,
-                                    "start": 2058
-                                },
-                                {
-                                    "end": 2178,
-                                    "start": 2121
-                                },
-                                {
-                                    "end": 2238,
-                                    "start": 2179
-                                },
-                                {
-                                    "end": 2297,
-                                    "start": 2239
-                                },
-                                {
-                                    "end": 2355,
-                                    "start": 2298
-                                },
-                                {
-                                    "end": 2414,
-                                    "start": 2356
-                                },
-                                {
-                                    "end": 2472,
-                                    "start": 2415
-                                },
-                                {
-                                    "end": 2530,
-                                    "start": 2473
-                                },
-                                {
-                                    "end": 2587,
-                                    "start": 2531
-                                },
-                                {
-                                    "end": 2691,
-                                    "start": 2640
-                                },
-                                {
-                                    "end": 2749,
-                                    "start": 2692
-                                },
-                                {
-                                    "end": 2807,
-                                    "start": 2750
-                                },
-                                {
-                                    "end": 2865,
-                                    "start": 2808
-                                },
-                                {
-                                    "end": 2923,
-                                    "start": 2866
-                                },
-                                {
-                                    "end": 2981,
-                                    "start": 2924
-                                },
-                                {
-                                    "end": 3038,
-                                    "start": 2982
-                                },
-                                {
-                                    "end": 3096,
-                                    "start": 3039
-                                },
-                                {
-                                    "end": 3155,
-                                    "start": 3097
-                                },
-                                {
-                                    "end": 3215,
-                                    "start": 3156
-                                },
-                                {
-                                    "end": 3273,
-                                    "start": 3216
-                                },
-                                {
-                                    "end": 3331,
-                                    "start": 3274
-                                },
-                                {
-                                    "end": 3390,
-                                    "start": 3332
-                                },
-                                {
-                                    "end": 3447,
-                                    "start": 3391
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07645",
-                            "regions": [
-                                {
-                                    "end": 1760,
-                                    "start": 1722
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF53300",
-                            "regions": [
-                                {
-                                    "end": 239,
-                                    "start": 56
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00084",
-                            "regions": [
-                                {
-                                    "end": 407,
-                                    "start": 355
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 415
-                                },
-                                {
-                                    "end": 1662,
-                                    "start": 1605
-                                },
-                                {
-                                    "end": 1720,
-                                    "start": 1667
-                                },
-                                {
-                                    "end": 1819,
-                                    "start": 1766
-                                },
-                                {
-                                    "end": 1877,
-                                    "start": 1824
-                                },
-                                {
-                                    "end": 1935,
-                                    "start": 1882
-                                },
-                                {
-                                    "end": 1993,
-                                    "start": 1940
-                                },
-                                {
-                                    "end": 2055,
-                                    "start": 1998
-                                },
-                                {
-                                    "end": 2113,
-                                    "start": 2060
-                                },
-                                {
-                                    "end": 2176,
-                                    "start": 2123
-                                },
-                                {
-                                    "end": 2236,
-                                    "start": 2181
-                                },
-                                {
-                                    "end": 2295,
-                                    "start": 2241
-                                },
-                                {
-                                    "end": 2353,
-                                    "start": 2300
-                                },
-                                {
-                                    "end": 2412,
-                                    "start": 2358
-                                },
-                                {
-                                    "end": 2470,
-                                    "start": 2417
-                                },
-                                {
-                                    "end": 2528,
-                                    "start": 2475
-                                },
-                                {
-                                    "end": 2585,
-                                    "start": 2533
-                                },
-                                {
-                                    "end": 2689,
-                                    "start": 2644
-                                },
-                                {
-                                    "end": 2747,
-                                    "start": 2694
-                                },
-                                {
-                                    "end": 2805,
-                                    "start": 2752
-                                },
-                                {
-                                    "end": 2863,
-                                    "start": 2810
-                                },
-                                {
-                                    "end": 2921,
-                                    "start": 2868
-                                },
-                                {
-                                    "end": 2979,
-                                    "start": 2926
-                                },
-                                {
-                                    "end": 3036,
-                                    "start": 2984
-                                },
-                                {
-                                    "end": 3094,
-                                    "start": 3061
-                                },
-                                {
-                                    "end": 3149,
-                                    "start": 3099
-                                },
-                                {
-                                    "end": 3213,
-                                    "start": 3158
-                                },
-                                {
-                                    "end": 3267,
-                                    "start": 3218
-                                },
-                                {
-                                    "end": 3329,
-                                    "start": 3276
-                                },
-                                {
-                                    "end": 3388,
-                                    "start": 3334
-                                },
-                                {
-                                    "end": 3445,
-                                    "start": 3393
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07974",
-                            "regions": [
-                                {
-                                    "end": 1243,
-                                    "start": 1212
-                                },
-                                {
-                                    "end": 3476,
-                                    "start": 3452
-                                },
-                                {
-                                    "end": 3508,
-                                    "start": 3484
-                                },
-                                {
-                                    "end": 3540,
-                                    "start": 3513
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50234",
-                            "regions": [
-                                {
-                                    "end": 241,
-                                    "start": 60
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 113342018,
-                    "exons": [
-                        {
-                            "end": 113128840,
-                            "name": null,
-                            "start": 113127536
-                        },
-                        {
-                            "end": 113132296,
-                            "name": null,
-                            "start": 113132203
-                        },
-                        {
-                            "end": 113137743,
-                            "name": null,
-                            "start": 113137648
-                        },
-                        {
-                            "end": 113139646,
-                            "name": null,
-                            "start": 113139551
-                        },
-                        {
-                            "end": 113141797,
-                            "name": null,
-                            "start": 113141627
-                        },
-                        {
-                            "end": 113148354,
-                            "name": null,
-                            "start": 113148178
-                        },
-                        {
-                            "end": 113149738,
-                            "name": null,
-                            "start": 113149565
-                        },
-                        {
-                            "end": 113151867,
-                            "name": null,
-                            "start": 113151804
-                        },
-                        {
-                            "end": 113163289,
-                            "name": null,
-                            "start": 113163134
-                        },
-                        {
-                            "end": 113166832,
-                            "name": null,
-                            "start": 113166607
-                        },
-                        {
-                            "end": 113171231,
-                            "name": null,
-                            "start": 113168440
-                        },
-                        {
-                            "end": 113174015,
-                            "name": null,
-                            "start": 113173343
-                        },
-                        {
-                            "end": 113190038,
-                            "name": null,
-                            "start": 113189871
-                        },
-                        {
-                            "end": 113191614,
-                            "name": null,
-                            "start": 113191423
-                        },
-                        {
-                            "end": 113192284,
-                            "name": null,
-                            "start": 113192200
-                        },
-                        {
-                            "end": 113192730,
-                            "name": null,
-                            "start": 113192554
-                        },
-                        {
-                            "end": 113194314,
-                            "name": null,
-                            "start": 113194195
-                        },
-                        {
-                            "end": 113194915,
-                            "name": null,
-                            "start": 113194742
-                        },
-                        {
-                            "end": 113196786,
-                            "name": null,
-                            "start": 113196616
-                        },
-                        {
-                            "end": 113197644,
-                            "name": null,
-                            "start": 113197521
-                        },
-                        {
-                            "end": 113198784,
-                            "name": null,
-                            "start": 113198660
-                        },
-                        {
-                            "end": 113206000,
-                            "name": null,
-                            "start": 113205825
-                        },
-                        {
-                            "end": 113208318,
-                            "name": null,
-                            "start": 113208117
-                        },
-                        {
-                            "end": 113209337,
-                            "name": null,
-                            "start": 113209180
-                        },
-                        {
-                            "end": 113212540,
-                            "name": null,
-                            "start": 113212339
-                        },
-                        {
-                            "end": 113213682,
-                            "name": null,
-                            "start": 113213569
-                        },
-                        {
-                            "end": 113217983,
-                            "name": null,
-                            "start": 113217870
-                        },
-                        {
-                            "end": 113219632,
-                            "name": null,
-                            "start": 113219536
-                        },
-                        {
-                            "end": 113220842,
-                            "name": null,
-                            "start": 113220751
-                        },
-                        {
-                            "end": 113221393,
-                            "name": null,
-                            "start": 113221232
-                        },
-                        {
-                            "end": 113228306,
-                            "name": null,
-                            "start": 113228145
-                        },
-                        {
-                            "end": 113231381,
-                            "name": null,
-                            "start": 113231220
-                        },
-                        {
-                            "end": 113233877,
-                            "name": null,
-                            "start": 113233644
-                        },
-                        {
-                            "end": 113234603,
-                            "name": null,
-                            "start": 113234439
-                        },
-                        {
-                            "end": 113238595,
-                            "name": null,
-                            "start": 113238484
-                        },
-                        {
-                            "end": 113242036,
-                            "name": null,
-                            "start": 113241915
-                        },
-                        {
-                            "end": 113243716,
-                            "name": null,
-                            "start": 113243522
-                        },
-                        {
-                            "end": 113244772,
-                            "name": null,
-                            "start": 113244641
-                        },
-                        {
-                            "end": 113245973,
-                            "name": null,
-                            "start": 113245866
-                        },
-                        {
-                            "end": 113252059,
-                            "name": null,
-                            "start": 113251930
-                        },
-                        {
-                            "end": 113259213,
-                            "name": null,
-                            "start": 113259095
-                        },
-                        {
-                            "end": 113261518,
-                            "name": null,
-                            "start": 113261321
-                        },
-                        {
-                            "end": 113265497,
-                            "name": null,
-                            "start": 113265318
-                        },
-                        {
-                            "end": 113275385,
-                            "name": null,
-                            "start": 113275206
-                        },
-                        {
-                            "end": 113276386,
-                            "name": null,
-                            "start": 113276228
-                        },
-                        {
-                            "end": 113308571,
-                            "name": null,
-                            "start": 113308395
-                        },
-                        {
-                            "end": 113312384,
-                            "name": null,
-                            "start": 113312129
-                        },
-                        {
-                            "end": 113342018,
-                            "name": null,
-                            "start": 113341293
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000374469",
-                    "start": 113127536
-                },
-                {
-                    "cdna_coding_end": 4650,
-                    "cdna_coding_start": 1,
-                    "domains": [
-                        {
-                            "name": "PS50825",
-                            "regions": [
-                                {
-                                    "end": 642,
-                                    "start": 560
-                                },
-                                {
-                                    "end": 724,
-                                    "start": 643
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07699",
-                            "regions": [
-                                {
-                                    "end": 360,
-                                    "start": 310
-                                },
-                                {
-                                    "end": 1052,
-                                    "start": 1005
-                                },
-                                {
-                                    "end": 1106,
-                                    "start": 1059
-                                },
-                                {
-                                    "end": 1160,
-                                    "start": 1113
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50311",
-                            "regions": [
-                                {
-                                    "end": 1409,
-                                    "start": 1197
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00181",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1196
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1234
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1272
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1310
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1348
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1386
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57196",
-                            "regions": [
-                                {
-                                    "end": 1267,
-                                    "start": 1189
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1268
-                                },
-                                {
-                                    "end": 1342,
-                                    "start": 1306
-                                },
-                                {
-                                    "end": 1423,
-                                    "start": 1344
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50026",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1193
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1231
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1269
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1307
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1345
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1383
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 440,
-                                    "start": 269
-                                },
-                                {
-                                    "end": 1144,
-                                    "start": 988
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00179",
-                            "regions": [
-                                {
-                                    "end": 1229,
-                                    "start": 1196
-                                },
-                                {
-                                    "end": 1267,
-                                    "start": 1231
-                                },
-                                {
-                                    "end": 1305,
-                                    "start": 1269
-                                },
-                                {
-                                    "end": 1343,
-                                    "start": 1307
-                                },
-                                {
-                                    "end": 1381,
-                                    "start": 1345
-                                },
-                                {
-                                    "end": 1419,
-                                    "start": 1383
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00092",
-                            "regions": [
-                                {
-                                    "end": 252,
-                                    "start": 84
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00032",
-                            "regions": [
-                                {
-                                    "end": 433,
-                                    "start": 378
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 438
-                                },
-                                {
-                                    "end": 559,
-                                    "start": 498
-                                },
-                                {
-                                    "end": 787,
-                                    "start": 727
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF02494",
-                            "regions": [
-                                {
-                                    "end": 642,
-                                    "start": 561
-                                },
-                                {
-                                    "end": 721,
-                                    "start": 644
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PR00010",
-                            "regions": [
-                                {
-                                    "end": 1318,
-                                    "start": 1307
-                                },
-                                {
-                                    "end": 1364,
-                                    "start": 1357
-                                },
-                                {
-                                    "end": 1413,
-                                    "start": 1403
-                                },
-                                {
-                                    "end": 1420,
-                                    "start": 1414
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00354",
-                            "regions": [
-                                {
-                                    "end": 1532,
-                                    "start": 1442
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57535",
-                            "regions": [
-                                {
-                                    "end": 433,
-                                    "start": 374
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 434
-                                },
-                                {
-                                    "end": 560,
-                                    "start": 494
-                                },
-                                {
-                                    "end": 790,
-                                    "start": 727
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF49899",
-                            "regions": [
-                                {
-                                    "end": 1547,
-                                    "start": 1421
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50234",
-                            "regions": [
-                                {
-                                    "end": 264,
-                                    "start": 83
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF53300",
-                            "regions": [
-                                {
-                                    "end": 262,
-                                    "start": 79
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00084",
-                            "regions": [
-                                {
-                                    "end": 430,
-                                    "start": 378
-                                },
-                                {
-                                    "end": 493,
-                                    "start": 438
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50923",
-                            "regions": [
-                                {
-                                    "end": 435,
-                                    "start": 376
-                                },
-                                {
-                                    "end": 495,
-                                    "start": 436
-                                },
-                                {
-                                    "end": 561,
-                                    "start": 496
-                                },
-                                {
-                                    "end": 789,
-                                    "start": 725
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07645",
-                            "regions": [
-                                {
-                                    "end": 1262,
-                                    "start": 1231
-                                },
-                                {
-                                    "end": 1338,
-                                    "start": 1308
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00008",
-                            "regions": [
-                                {
-                                    "end": 1226,
-                                    "start": 1197
-                                },
-                                {
-                                    "end": 1265,
-                                    "start": 1235
-                                },
-                                {
-                                    "end": 1302,
-                                    "start": 1273
-                                },
-                                {
-                                    "end": 1337,
-                                    "start": 1311
-                                },
-                                {
-                                    "end": 1379,
-                                    "start": 1349
-                                },
-                                {
-                                    "end": 1417,
-                                    "start": 1387
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00327",
-                            "regions": [
-                                {
-                                    "end": 260,
-                                    "start": 81
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 113341823,
-                    "exons": [
-                        {
-                            "end": 113206000,
-                            "name": null,
-                            "start": 113204759
-                        },
-                        {
-                            "end": 113208318,
-                            "name": null,
-                            "start": 113208117
-                        },
-                        {
-                            "end": 113209337,
-                            "name": null,
-                            "start": 113209180
-                        },
-                        {
-                            "end": 113212540,
-                            "name": null,
-                            "start": 113212339
-                        },
-                        {
-                            "end": 113213682,
-                            "name": null,
-                            "start": 113213569
-                        },
-                        {
-                            "end": 113217983,
-                            "name": null,
-                            "start": 113217870
-                        },
-                        {
-                            "end": 113219632,
-                            "name": null,
-                            "start": 113219536
-                        },
-                        {
-                            "end": 113220399,
-                            "name": null,
-                            "start": 113220395
-                        },
-                        {
-                            "end": 113220842,
-                            "name": null,
-                            "start": 113220756
-                        },
-                        {
-                            "end": 113221393,
-                            "name": null,
-                            "start": 113221232
-                        },
-                        {
-                            "end": 113228306,
-                            "name": null,
-                            "start": 113228145
-                        },
-                        {
-                            "end": 113231381,
-                            "name": null,
-                            "start": 113231220
-                        },
-                        {
-                            "end": 113233877,
-                            "name": null,
-                            "start": 113233644
-                        },
-                        {
-                            "end": 113234603,
-                            "name": null,
-                            "start": 113234439
-                        },
-                        {
-                            "end": 113238595,
-                            "name": null,
-                            "start": 113238484
-                        },
-                        {
-                            "end": 113242036,
-                            "name": null,
-                            "start": 113241915
-                        },
-                        {
-                            "end": 113243716,
-                            "name": null,
-                            "start": 113243522
-                        },
-                        {
-                            "end": 113244772,
-                            "name": null,
-                            "start": 113244641
-                        },
-                        {
-                            "end": 113245973,
-                            "name": null,
-                            "start": 113245866
-                        },
-                        {
-                            "end": 113252059,
-                            "name": null,
-                            "start": 113251930
-                        },
-                        {
-                            "end": 113259213,
-                            "name": null,
-                            "start": 113259095
-                        },
-                        {
-                            "end": 113261518,
-                            "name": null,
-                            "start": 113261321
-                        },
-                        {
-                            "end": 113265497,
-                            "name": null,
-                            "start": 113265318
-                        },
-                        {
-                            "end": 113275385,
-                            "name": null,
-                            "start": 113275206
-                        },
-                        {
-                            "end": 113276386,
-                            "name": null,
-                            "start": 113276228
-                        },
-                        {
-                            "end": 113308571,
-                            "name": null,
-                            "start": 113308395
-                        },
-                        {
-                            "end": 113312384,
-                            "name": null,
-                            "start": 113312129
-                        },
-                        {
-                            "end": 113341823,
-                            "name": null,
-                            "start": 113341293
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000302728",
-                    "start": 113204759
-                },
-                {
-                    "cdna_coding_end": 2944,
-                    "cdna_coding_start": 407,
-                    "domains": [
-                        {
-                            "name": "PF02494",
-                            "regions": [
-                                {
-                                    "end": 619,
-                                    "start": 538
-                                },
-                                {
-                                    "end": 698,
-                                    "start": 621
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00032",
-                            "regions": [
-                                {
-                                    "end": 410,
-                                    "start": 355
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 415
-                                },
-                                {
-                                    "end": 536,
-                                    "start": 475
-                                },
-                                {
-                                    "end": 764,
-                                    "start": 704
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57535",
-                            "regions": [
-                                {
-                                    "end": 410,
-                                    "start": 351
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 411
-                                },
-                                {
-                                    "end": 537,
-                                    "start": 471
-                                },
-                                {
-                                    "end": 767,
-                                    "start": 704
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF07699",
-                            "regions": [
-                                {
-                                    "end": 337,
-                                    "start": 287
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50825",
-                            "regions": [
-                                {
-                                    "end": 619,
-                                    "start": 537
-                                },
-                                {
-                                    "end": 701,
-                                    "start": 620
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00092",
-                            "regions": [
-                                {
-                                    "end": 229,
-                                    "start": 61
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF57184",
-                            "regions": [
-                                {
-                                    "end": 417,
-                                    "start": 246
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50923",
-                            "regions": [
-                                {
-                                    "end": 412,
-                                    "start": 353
-                                },
-                                {
-                                    "end": 472,
-                                    "start": 413
-                                },
-                                {
-                                    "end": 538,
-                                    "start": 473
-                                },
-                                {
-                                    "end": 766,
-                                    "start": 702
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SM00327",
-                            "regions": [
-                                {
-                                    "end": 237,
-                                    "start": 58
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PS50234",
-                            "regions": [
-                                {
-                                    "end": 241,
-                                    "start": 60
-                                }
-                            ]
-                        },
-                        {
-                            "name": "SSF53300",
-                            "regions": [
-                                {
-                                    "end": 239,
-                                    "start": 56
-                                }
-                            ]
-                        },
-                        {
-                            "name": "PF00084",
-                            "regions": [
-                                {
-                                    "end": 407,
-                                    "start": 355
-                                },
-                                {
-                                    "end": 470,
-                                    "start": 415
-                                }
-                            ]
-                        }
-                    ],
-                    "end": 113342160,
-                    "exons": [
-                        {
-                            "end": 113238595,
-                            "name": null,
-                            "start": 113238163
-                        },
-                        {
-                            "end": 113242036,
-                            "name": null,
-                            "start": 113241915
-                        },
-                        {
-                            "end": 113243716,
-                            "name": null,
-                            "start": 113243522
-                        },
-                        {
-                            "end": 113244772,
-                            "name": null,
-                            "start": 113244641
-                        },
-                        {
-                            "end": 113245973,
-                            "name": null,
-                            "start": 113245866
-                        },
-                        {
-                            "end": 113252059,
-                            "name": null,
-                            "start": 113251930
-                        },
-                        {
-                            "end": 113259213,
-                            "name": null,
-                            "start": 113259095
-                        },
-                        {
-                            "end": 113261518,
-                            "name": null,
-                            "start": 113261321
-                        },
-                        {
-                            "end": 113265497,
-                            "name": null,
-                            "start": 113265318
-                        },
-                        {
-                            "end": 113275385,
-                            "name": null,
-                            "start": 113275206
-                        },
-                        {
-                            "end": 113276386,
-                            "name": null,
-                            "start": 113276228
-                        },
-                        {
-                            "end": 113308571,
-                            "name": null,
-                            "start": 113308395
-                        },
-                        {
-                            "end": 113312384,
-                            "name": null,
-                            "start": 113312129
-                        },
-                        {
-                            "end": 113342160,
-                            "name": null,
-                            "start": 113341293
-                        }
-                    ],
-                    "is_best_transcript": false,
-                    "name": "ENST00000374461",
-                    "start": 113238163
-                }
-            ]
-        },
-        {
-            "aliases": ["ARID1B"],
-            "chr": "6",
-            "strand": "+",
-            "name": "ENSG00000049618",
-            "end": 157530401,
-            "start": 157099063,
-            "transcripts": [{
-                "name": "ENST00000346085",
-                "is_best_transcript": true,
-                "start": 157099063,
-                "end": 157529495,
-                "cdna_coding_end": 6751,
-                "cdna_coding_start": 2,
-                "exons": [
-                    {"start": 157099063, "end": 157100605},
-                    {"start": 157150361, "end": 157150555},
-                    {"start": 157192748, "end": 157192786},
-                    {"start": 157222510, "end": 157222659},
-                    {"start": 157256600, "end": 157256710},
-                    {"start": 157405796, "end": 157406039},
-                    {"start": 157431606, "end": 157431695},
-                    {"start": 157454162, "end": 157454341},
-                    {"start": 157469758, "end": 157470085},
-                    {"start": 157488174, "end": 157488319},
-                    {"start": 157495142, "end": 157495251},
-                    {"start": 157502103, "end": 157502312},
-                    {"start": 157505365, "end": 157505569},
-                    {"start": 157510776, "end": 157510914},
-                    {"start": 157511172, "end": 157511344},
-                    {"start": 157517299, "end": 157517449},
-                    {"start": 157519945, "end": 157520041},
-                    {"start": 157521839, "end": 157522622},
-                    {"start": 157525000, "end": 157525130},
-                    {"start": 157527301, "end": 157529495}
-                ],
-                "domains": [
-                    {"name": "PF12031", "regions": [{"start": 1939, "end": 2195}]},
-                    {"name": "PS50324", "regions": [{"start": 35, "end": 57}, {"start": 697, "end": 784}]},
-                    {"name": "PF01388", "regions": [{"start": 1065, "end": 1153}]},
-                    {"name": "PS50099", "regions": [{"start": 715, "end": 820}, {"start": 1472, "end": 1610}]},
-                    {"name": "SSF48371", "regions": [{"start": 2075, "end": 2220}]},
-                    {"name": "PS50316", "regions": [{"start": 81, "end": 104}]},
-                    {"name": "PS50322", "regions": [{"start": 107, "end": 131}, {"start": 574, "end": 646}]},
-                    {"name": "PS51011", "regions": [{"start": 1066, "end": 1157}]},
-                    {"name": "PS50310", "regions": [{"start": 2, "end": 47}, {"start": 329, "end": 493}]},
-                    {"name": "PS50315", "regions": [{"start": 141, "end": 401}]},
-                    {"name": "SSF46774", "regions": [{"start": 1049, "end": 1168}]},
-                    {"name": "SM00501", "regions": [{"start": 1067, "end": 1158}]}
-                ]
-            }]
-        }
-    ]
-}
+{"genes": [{"aliases": ["EGFR"], "chr": "7", "end": 55324313, "name": "ENSG00000146648", "start": 55086714, "strand": "+", "transcripts": [{"end": 55270769, "exons": [{"end": 55087058, "start": 55086714}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270769, "start": 55270210}], "is_best_transcript": false, "name": "ENST00000455089", "start": 55086714, "translations": [{"cdna_coding_end": 3533, "cdna_coding_start": 258, "domains": [{"name": "PIRSF000619", "regions": [{"end": 1090, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 920, "start": 669}]}, {"name": "SSF52058", "regions": [{"end": 191, "start": 28}, {"end": 475, "start": 283}]}, {"name": "PF00757", "regions": [{"end": 293, "start": 141}]}, {"name": "PS50011", "regions": [{"end": 934, "start": 667}]}, {"name": "PS50311", "regions": [{"end": 219, "start": 145}]}, {"name": "SSF57184", "regions": [{"end": 290, "start": 142}, {"end": 593, "start": 460}]}, {"name": "PR00109", "regions": [{"end": 758, "start": 745}, {"end": 800, "start": 782}, {"end": 841, "start": 831}, {"end": 872, "start": 850}, {"end": 916, "start": 894}]}, {"name": "SSF56112", "regions": [{"end": 975, "start": 651}]}, {"name": "PF01030", "regions": [{"end": 141, "start": 57}, {"end": 435, "start": 316}]}, {"name": "SM00220", "regions": [{"end": 924, "start": 667}]}, {"name": "SM00261", "regions": [{"end": 225, "start": 183}, {"end": 502, "start": 451}, {"end": 556, "start": 507}]}, {"name": "SM00219", "regions": [{"end": 923, "start": 667}]}, {"name": "PF00069", "regions": [{"end": 919, "start": 667}]}]}]}, {"end": 55236328, "exons": [{"end": 55087058, "start": 55086725}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55236328, "start": 55236216}], "is_best_transcript": false, "name": "ENST00000342916", "start": 55086725, "translations": [{"cdna_coding_end": 2133, "cdna_coding_start": 247, "domains": [{"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55238738, "exons": [{"end": 55087058, "start": 55086726}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238738, "start": 55238000}], "is_best_transcript": false, "name": "ENST00000344576", "start": 55086726, "translations": [{"cdna_coding_end": 2363, "cdna_coding_start": 246, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55224644, "exons": [{"end": 55087058, "start": 55086727}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224644, "start": 55224452}], "is_best_transcript": false, "name": "ENST00000420316", "start": 55086727, "translations": [{"cdna_coding_end": 1462, "cdna_coding_start": 245, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 403, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}]}]}]}, {"end": 55279321, "exons": [{"end": 55087058, "start": 55086794}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55279321, "start": 55272949}], "is_best_transcript": true, "name": "ENST00000275493", "start": 55086794, "translations": [{"cdna_coding_end": 3810, "cdna_coding_start": 178, "domains": [{"name": "SM00220", "regions": [{"end": 969, "start": 712}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SSF56112", "regions": [{"end": 1020, "start": 696}]}, {"name": "PF00069", "regions": [{"end": 964, "start": 712}]}, {"name": "SM00219", "regions": [{"end": 968, "start": 712}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF07714", "regions": [{"end": 965, "start": 714}]}, {"name": "PIRSF000619", "regions": [{"end": 1210, "start": 1}]}, {"name": "PR00109", "regions": [{"end": 803, "start": 790}, {"end": 845, "start": 827}, {"end": 886, "start": 876}, {"end": 917, "start": 895}, {"end": 961, "start": 939}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PS50011", "regions": [{"end": 979, "start": 712}]}]}]}, {"end": 55324313, "exons": [{"end": 55087058, "start": 55086811}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240621, "start": 55240539}, {"end": 55324313, "start": 55323947}], "is_best_transcript": false, "name": "ENST00000442591", "start": 55086811, "translations": [{"cdna_coding_end": 2134, "cdna_coding_start": 161, "domains": [{"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}, {"end": 653, "start": 614}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}]}]}, {"end": 55214417, "exons": [{"end": 55177651, "start": 55177416}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214417, "start": 55214299}], "is_best_transcript": false, "name": "ENST00000450046", "start": 55177416, "translations": [{"cdna_coding_end": 691, "cdna_coding_start": 308, "domains": [{"name": "SSF52058", "regions": [{"end": 127, "start": 1}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}]}]}]}, {"end": 55273591, "exons": [{"end": 55177651, "start": 55177540}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55273591, "start": 55272949}], "is_best_transcript": false, "name": "ENST00000454757", "start": 55177540, "translations": [{"cdna_coding_end": 3657, "cdna_coding_start": 184, "domains": [{"name": "SM00261", "regions": [{"end": 217, "start": 175}, {"end": 494, "start": 443}, {"end": 548, "start": 499}]}, {"name": "PF00069", "regions": [{"end": 911, "start": 659}]}, {"name": "SM00219", "regions": [{"end": 915, "start": 659}]}, {"name": "SSF56112", "regions": [{"end": 967, "start": 643}]}, {"name": "SM00220", "regions": [{"end": 916, "start": 659}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}, {"end": 427, "start": 308}]}, {"name": "PS50311", "regions": [{"end": 211, "start": 134}]}, {"name": "PS50011", "regions": [{"end": 926, "start": 659}]}, {"name": "PR00109", "regions": [{"end": 750, "start": 737}, {"end": 792, "start": 774}, {"end": 833, "start": 823}, {"end": 864, "start": 842}, {"end": 908, "start": 886}]}, {"name": "SSF57184", "regions": [{"end": 286, "start": 129}, {"end": 585, "start": 452}]}, {"name": "PIRSF000619", "regions": [{"end": 1157, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 912, "start": 661}]}, {"name": "SSF52058", "regions": [{"end": 158, "start": 1}, {"end": 467, "start": 275}]}, {"name": "PF00757", "regions": [{"end": 285, "start": 132}]}]}]}]}, {"aliases": ["DSTYK"], "chr": "1", "end": 205180727, "name": "ENSG00000133059", "start": 205111632, "strand": "-", "transcripts": [{"end": 205180727, "exons": [{"end": 205116873, "start": 205111632}, {"end": 205117467, "start": 205117333}, {"end": 205119898, "start": 205119808}, {"end": 205133083, "start": 205133055}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180727, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367160", "start": 205111632, "translations": [{"cdna_coding_end": 1831, "cdna_coding_start": 65, "domains": [{"name": "SM00220", "regions": [{"end": 565, "start": 337}]}, {"name": "SSF56112", "regions": [{"end": 585, "start": 452}]}, {"name": "PF00069", "regions": [{"end": 556, "start": 451}]}, {"name": "PF07714", "regions": [{"end": 558, "start": 471}]}, {"name": "PS50011", "regions": [{"end": 565, "start": 312}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367161", "start": 205111633, "translations": [{"cdna_coding_end": 2686, "cdna_coding_start": 32, "domains": [{"name": "PF07714", "regions": [{"end": 820, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 884, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 853, "start": 627}]}, {"name": "SM00220", "regions": [{"end": 861, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 824, "start": 654}]}, {"name": "SM00219", "regions": [{"end": 861, "start": 652}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205117467, "start": 205117333}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": true, "name": "ENST00000367162", "start": 205111633, "translations": [{"cdna_coding_end": 2821, "cdna_coding_start": 32, "domains": [{"name": "PF07714", "regions": [{"end": 899, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 906, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 897, "start": 638}]}, {"name": "SM00220", "regions": [{"end": 906, "start": 652}]}, {"name": "SM00219", "regions": [{"end": 906, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 897, "start": 654}]}]}]}]}, {"aliases": ["NDUFA12"], "chr": "12", "end": 95397546, "name": "ENSG00000184752", "start": 95290831, "strand": "-", "transcripts": [{"end": 95397436, "exons": [{"end": 95291086, "start": 95290831}, {"end": 95318582, "start": 95318422}, {"end": 95322039, "start": 95321793}, {"end": 95396597, "start": 95396515}, {"end": 95397436, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000552205", "start": 95290831}, {"end": 95397476, "exons": [{"end": 95365261, "start": 95365108}, {"end": 95396597, "start": 95396582}, {"end": 95397476, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547157", "start": 95365108, "translations": [{"cdna_coding_end": 188, "cdna_coding_start": 21}]}, {"end": 95397384, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95390752, "start": 95390680}, {"end": 95396597, "start": 95396515}, {"end": 95397384, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000551991", "start": 95365109, "translations": [{"cdna_coding_end": 144, "cdna_coding_start": 1, "domains": [{"name": "PF05071", "regions": [{"end": 33, "start": 12}]}]}]}, {"end": 95397546, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397546, "start": 95397371}], "is_best_transcript": true, "name": "ENST00000327772", "start": 95365109, "translations": [{"cdna_coding_end": 528, "cdna_coding_start": 91, "domains": [{"name": "PF05071", "regions": [{"end": 137, "start": 36}]}]}]}, {"end": 95397489, "exons": [{"end": 95365396, "start": 95365112}, {"end": 95396597, "start": 95396515}, {"end": 95397489, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547986", "start": 95365112, "translations": [{"cdna_coding_end": 225, "cdna_coding_start": 34, "domains": [{"name": "PF05071", "regions": [{"end": 53, "start": 36}]}]}]}, {"end": 95397524, "exons": [{"end": 95365396, "start": 95365254}, {"end": 95366265, "start": 95366171}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397524, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000546788", "start": 95365254, "translations": [{"cdna_coding_end": 368, "cdna_coding_start": 69, "domains": [{"name": "PF05071", "regions": [{"end": 87, "start": 36}]}]}]}]}, {"aliases": ["FRMD6"], "chr": "14", "end": 52197445, "name": "ENSG00000139926", "start": 51955818, "strand": "+", "transcripts": [{"end": 52197177, "exons": [{"end": 51956138, "start": 51955855}, {"end": 52037128, "start": 52037066}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197177, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000356218", "start": 51955855, "translations": [{"cdna_coding_end": 2338, "cdna_coding_start": 494, "domains": [{"name": "PF09379", "regions": [{"end": 109, "start": 20}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}]}]}, {"end": 52197445, "exons": [{"end": 52118714, "start": 52118576}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197445, "start": 52194463}], "is_best_transcript": true, "name": "ENST00000395718", "start": 52118576, "translations": [{"cdna_coding_end": 2130, "cdna_coding_start": 286, "domains": [{"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "PF09379", "regions": [{"end": 109, "start": 20}]}]}]}, {"end": 52195654, "exons": [{"end": 52118714, "start": 52118665}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195654, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000344768", "start": 52118665, "translations": [{"cdna_coding_end": 2065, "cdna_coding_start": 197, "domains": [{"name": "PF09380", "regions": [{"end": 330, "start": 245}]}, {"name": "PF09379", "regions": [{"end": 117, "start": 20}]}, {"name": "SSF47031", "regions": [{"end": 226, "start": 118}]}, {"name": "PF00373", "regions": [{"end": 234, "start": 123}]}, {"name": "SSF54236", "regions": [{"end": 118, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 328, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 234, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 383, "start": 227}]}]}]}, {"end": 52164945, "exons": [{"end": 52118935, "start": 52118698}, {"end": 52156653, "start": 52156409}, {"end": 52164945, "start": 52164860}], "is_best_transcript": false, "name": "ENST00000554778", "start": 52118698}, {"end": 52174806, "exons": [{"end": 52164950, "start": 52164706}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174806, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000555936", "start": 52164706}, {"end": 52197148, "exons": [{"end": 52164950, "start": 52164831}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197148, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000554167", "start": 52164831, "translations": [{"cdna_coding_end": 1775, "cdna_coding_start": 138, "domains": [{"name": "SSF50729", "regions": [{"end": 306, "start": 150}]}, {"name": "PS50057", "regions": [{"end": 251, "start": 1}]}, {"name": "SSF54236", "regions": [{"end": 41, "start": 1}]}, {"name": "SSF47031", "regions": [{"end": 149, "start": 41}]}, {"name": "PF00373", "regions": [{"end": 157, "start": 46}]}, {"name": "PF09380", "regions": [{"end": 253, "start": 168}]}]}]}, {"end": 52175062, "exons": [{"end": 52169306, "start": 52169266}, {"end": 52171653, "start": 52171467}, {"end": 52175062, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000557405", "start": 52169266, "translations": [{"cdna_coding_end": 390, "cdna_coding_start": 1, "domains": [{"name": "PS50057", "regions": [{"end": 129, "start": 1}]}, {"name": "PF00373", "regions": [{"end": 124, "start": 13}]}, {"name": "SSF47031", "regions": [{"end": 116, "start": 8}]}]}]}, {"end": 52187243, "exons": [{"end": 52179269, "start": 52179231}, {"end": 52182217, "start": 52182043}, {"end": 52187243, "start": 52186773}], "is_best_transcript": false, "name": "ENST00000555197", "start": 52179231, "translations": [{"cdna_coding_end": 618, "cdna_coding_start": 1, "domains": [{"name": "PF09380", "regions": [{"end": 60, "start": 2}]}, {"name": "PS50057", "regions": [{"end": 58, "start": 1}]}, {"name": "SSF50729", "regions": [{"end": 113, "start": 2}]}]}]}, {"end": 52192513, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188673}, {"end": 52192513, "start": 52192497}], "is_best_transcript": false, "name": "ENST00000555703", "start": 52183973, "translations": [{"cdna_coding_end": 573, "cdna_coding_start": 145}]}, {"end": 52195487, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195487, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000553556", "start": 52183973, "translations": [{"cdna_coding_end": 939, "cdna_coding_start": 145}]}]}, {"aliases": ["PRKCB"], "chr": "16", "end": 24231932, "name": "ENSG00000166501", "start": 23847322, "strand": "+", "transcripts": [{"end": 24231932, "exons": [{"end": 23847669, "start": 23847322}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24231282}], "is_best_transcript": true, "name": "ENST00000321728", "start": 23847322, "translations": [{"cdna_coding_end": 2191, "cdna_coding_start": 176, "domains": [{"name": "SM00239", "regions": [{"end": 275, "start": 172}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PF00433", "regions": [{"end": 666, "start": 623}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00133", "regions": [{"end": 664, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 671, "start": 1}]}]}]}, {"end": 24231932, "exons": [{"end": 23847669, "start": 23847345}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24225979}], "is_best_transcript": false, "name": "ENST00000303531", "start": 23847345, "translations": [{"cdna_coding_end": 2174, "cdna_coding_start": 153, "domains": [{"name": "SM00133", "regions": [{"end": 663, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 672, "start": 1}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00433", "regions": [{"end": 664, "start": 627}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00239", "regions": [{"end": 275, "start": 172}]}]}]}, {"end": 23880647, "exons": [{"end": 23847669, "start": 23847403}, {"end": 23880647, "start": 23880435}], "is_best_transcript": false, "name": "ENST00000498058", "start": 23847403, "translations": [{"cdna_coding_end": 268, "cdna_coding_start": 95, "domains": [{"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 57, "start": 50}]}, {"name": "PS50081", "regions": [{"end": 57, "start": 36}]}, {"name": "SSF57889", "regions": [{"end": 57, "start": 6}]}]}]}, {"end": 24124386, "exons": [{"end": 23848727, "start": 23848544}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124386, "start": 24124294}], "is_best_transcript": false, "name": "ENST00000498739", "start": 23848544}, {"end": 24192166, "exons": [{"end": 24163176, "start": 24163006}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192166, "start": 24192111}], "is_best_transcript": false, "name": "ENST00000472066", "start": 24163006}, {"end": 24202909, "exons": [{"end": 24196888, "start": 24196852}, {"end": 24202909, "start": 24202411}], "is_best_transcript": false, "name": "ENST00000466124", "start": 24196852}]}, {"aliases": ["GIMAP4"], "chr": "7", "end": 150271041, "name": "ENSG00000133574", "start": 150264365, "strand": "+", "transcripts": [{"end": 150271041, "exons": [{"end": 150264525, "start": 150264365}, {"end": 150267047, "start": 150266976}, {"end": 150271041, "start": 150269217}], "is_best_transcript": true, "name": "ENST00000255945", "start": 150264365, "translations": [{"cdna_coding_end": 1165, "cdna_coding_start": 176, "domains": [{"name": "PF04548", "regions": [{"end": 238, "start": 31}]}, {"name": "SSF52540", "regions": [{"end": 288, "start": 24}]}]}]}, {"end": 150270602, "exons": [{"end": 150264525, "start": 150264457}, {"end": 150267089, "start": 150266976}, {"end": 150270602, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000461940", "start": 150264457, "translations": [{"cdna_coding_end": 1115, "cdna_coding_start": 84, "domains": [{"name": "PF04548", "regions": [{"end": 252, "start": 45}]}, {"name": "SSF52540", "regions": [{"end": 302, "start": 38}]}]}]}, {"end": 150269569, "exons": [{"end": 150264608, "start": 150264524}, {"end": 150267089, "start": 150266976}, {"end": 150269569, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000479232", "start": 150264524, "translations": [{"cdna_coding_end": 552, "cdna_coding_start": 100, "domains": [{"name": "SSF52540", "regions": [{"end": 151, "start": 38}]}, {"name": "PF04548", "regions": [{"end": 151, "start": 45}]}]}]}]}, {"aliases": ["IL7"], "chr": "8", "end": 79717758, "name": "ENSG00000104432", "start": 79587978, "strand": "-", "transcripts": [{"end": 79717758, "exons": [{"end": 79646067, "start": 79645007}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": true, "name": "ENST00000263851", "start": 79645007, "translations": [{"cdna_coding_end": 1135, "cdna_coding_start": 602, "domains": [{"name": "PIRSF001942", "regions": [{"end": 177, "start": 1}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}, {"end": 98, "start": 78}, {"end": 118, "start": 99}, {"end": 173, "start": 151}]}, {"name": "PF01415", "regions": [{"end": 173, "start": 28}]}, {"name": "SM00127", "regions": [{"end": 173, "start": 27}]}]}]}, {"end": 79717699, "exons": [{"end": 79646063, "start": 79645283}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79659331, "start": 79659129}, {"end": 79710443, "start": 79710307}, {"end": 79717699, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000518982", "start": 79645283, "translations": [{"cdna_coding_end": 758, "cdna_coding_start": 543, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}, {"name": "PF01415", "regions": [{"end": 54, "start": 28}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520269", "start": 79645900, "translations": [{"cdna_coding_end": 408, "cdna_coding_start": 7, "domains": [{"name": "PF01415", "regions": [{"end": 77, "start": 28}, {"end": 129, "start": 91}]}, {"name": "SM00127", "regions": [{"end": 129, "start": 27}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}]}, {"name": "PIRSF001942", "regions": [{"end": 133, "start": 1}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520215", "start": 79645900, "translations": [{"cdna_coding_end": 120, "cdna_coding_start": 7, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79717686, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717686, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520317", "start": 79645900, "translations": [{"cdna_coding_end": 643, "cdna_coding_start": 530, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79652311, "exons": [{"end": 79646067, "start": 79645948}, {"end": 79652311, "start": 79652237}], "is_best_transcript": false, "name": "ENST00000541183", "start": 79645948, "translations": [{"cdna_coding_end": 195, "cdna_coding_start": 1, "domains": [{"name": "SM00127", "regions": [{"end": 60, "start": 1}]}, {"name": "PF01415", "regions": [{"end": 60, "start": 1}]}]}]}, {"end": 79717758, "exons": [{"end": 79659331, "start": 79659263}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000379113", "start": 79659263, "translations": [{"cdna_coding_end": 817, "cdna_coding_start": 602, "domains": [{"name": "PF01415", "regions": [{"end": 54, "start": 28}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}]}]}]}, {"aliases": ["SVEP1"], "chr": "9", "end": 113342160, "name": "ENSG00000165124", "start": 113127531, "strand": "-", "transcripts": [{"end": 113342160, "exons": [{"end": 113128840, "start": 113127531}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": true, "name": "ENST00000401783", "start": 113127531, "translations": [{"cdna_coding_end": 11053, "cdna_coding_start": 338, "domains": [{"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}, {"end": 1685, "start": 1631}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2141, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2654}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3064}, {"end": 3176, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3294, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00895", "regions": [{"end": 1530, "start": 1512}, {"end": 1558, "start": 1539}, {"end": 1592, "start": 1559}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}, {"end": 1746, "start": 1626}, {"end": 1842, "start": 1785}, {"end": 1900, "start": 1843}, {"end": 1958, "start": 1901}, {"end": 2016, "start": 1959}, {"end": 2078, "start": 2017}, {"end": 2199, "start": 2081}, {"end": 2318, "start": 2202}, {"end": 2377, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2551, "start": 2438}, {"end": 2616, "start": 2552}, {"end": 2712, "start": 2643}, {"end": 2828, "start": 2715}, {"end": 2886, "start": 2829}, {"end": 2944, "start": 2887}, {"end": 3117, "start": 2945}, {"end": 3176, "start": 3118}, {"end": 3229, "start": 3177}, {"end": 3475, "start": 3239}]}, {"name": "SSF49899", "regions": [{"end": 1632, "start": 1421}]}, {"name": "SM00159", "regions": [{"end": 1627, "start": 1420}]}, {"name": "PF00354", "regions": [{"end": 1620, "start": 1442}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}, {"end": 3554, "start": 3468}]}, {"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}, {"end": 1786, "start": 1735}, {"end": 3506, "start": 3463}, {"end": 3535, "start": 3507}, {"end": 3570, "start": 3537}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3500}, {"end": 3564, "start": 3533}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}, {"end": 1784, "start": 1748}, {"end": 3500, "start": 3471}, {"end": 3532, "start": 3503}, {"end": 3564, "start": 3535}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3504}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "PF07645", "regions": [{"end": 1783, "start": 1745}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}, {"end": 1687, "start": 1629}, {"end": 1745, "start": 1688}, {"end": 1844, "start": 1787}, {"end": 1902, "start": 1845}, {"end": 1960, "start": 1903}, {"end": 2018, "start": 1961}, {"end": 2080, "start": 2019}, {"end": 2143, "start": 2081}, {"end": 2201, "start": 2144}, {"end": 2261, "start": 2202}, {"end": 2320, "start": 2262}, {"end": 2378, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2495, "start": 2438}, {"end": 2553, "start": 2496}, {"end": 2610, "start": 2554}, {"end": 2714, "start": 2663}, {"end": 2772, "start": 2715}, {"end": 2830, "start": 2773}, {"end": 2888, "start": 2831}, {"end": 2946, "start": 2889}, {"end": 3004, "start": 2947}, {"end": 3061, "start": 3005}, {"end": 3119, "start": 3062}, {"end": 3178, "start": 3120}, {"end": 3238, "start": 3179}, {"end": 3296, "start": 3239}, {"end": 3354, "start": 3297}, {"end": 3413, "start": 3355}, {"end": 3470, "start": 3414}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "PF07974", "regions": [{"end": 1266, "start": 1235}, {"end": 3499, "start": 3475}, {"end": 3531, "start": 3507}, {"end": 3563, "start": 3536}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}, {"end": 1685, "start": 1628}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2136, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2667}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3084}, {"end": 3172, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3290, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}]}]}, {"end": 113190038, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}], "is_best_transcript": false, "name": "ENST00000297826", "start": 113127536, "translations": [{"cdna_coding_end": 4909, "cdna_coding_start": 416, "domains": [{"name": "PF00084", "regions": [{"end": 62, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 593}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 1010}, {"end": 1098, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1216, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}, {"name": "PF07974", "regions": [{"end": 1425, "start": 1401}, {"end": 1457, "start": 1433}, {"end": 1489, "start": 1462}]}, {"name": "PF00008", "regions": [{"end": 1456, "start": 1427}]}, {"name": "PS50923", "regions": [{"end": 69, "start": 7}, {"end": 127, "start": 70}, {"end": 187, "start": 128}, {"end": 246, "start": 188}, {"end": 304, "start": 247}, {"end": 363, "start": 305}, {"end": 421, "start": 364}, {"end": 479, "start": 422}, {"end": 536, "start": 480}, {"end": 640, "start": 589}, {"end": 698, "start": 641}, {"end": 756, "start": 699}, {"end": 814, "start": 757}, {"end": 872, "start": 815}, {"end": 930, "start": 873}, {"end": 987, "start": 931}, {"end": 1045, "start": 988}, {"end": 1104, "start": 1046}, {"end": 1164, "start": 1105}, {"end": 1222, "start": 1165}, {"end": 1280, "start": 1223}, {"end": 1339, "start": 1281}, {"end": 1396, "start": 1340}]}, {"name": "SM00181", "regions": [{"end": 1426, "start": 1397}, {"end": 1458, "start": 1429}, {"end": 1490, "start": 1461}]}, {"name": "SSF57196", "regions": [{"end": 1432, "start": 1389}, {"end": 1461, "start": 1433}, {"end": 1496, "start": 1463}]}, {"name": "PS50026", "regions": [{"end": 1458, "start": 1426}, {"end": 1490, "start": 1459}]}, {"name": "PS50311", "regions": [{"end": 1480, "start": 1394}]}, {"name": "SSF57535", "regions": [{"end": 125, "start": 7}, {"end": 244, "start": 128}, {"end": 303, "start": 247}, {"end": 363, "start": 305}, {"end": 477, "start": 364}, {"end": 542, "start": 478}, {"end": 638, "start": 569}, {"end": 754, "start": 641}, {"end": 812, "start": 755}, {"end": 870, "start": 813}, {"end": 1043, "start": 871}, {"end": 1102, "start": 1044}, {"end": 1155, "start": 1103}, {"end": 1401, "start": 1165}]}, {"name": "SM00032", "regions": [{"end": 67, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 580}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 990}, {"end": 1102, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1220, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}]}]}, {"end": 113342018, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342018, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374469", "start": 113127536, "translations": [{"cdna_coding_end": 10911, "cdna_coding_start": 265, "domains": [{"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}, {"end": 1723, "start": 1603}, {"end": 1819, "start": 1762}, {"end": 1877, "start": 1820}, {"end": 1935, "start": 1878}, {"end": 1993, "start": 1936}, {"end": 2055, "start": 1994}, {"end": 2176, "start": 2058}, {"end": 2295, "start": 2179}, {"end": 2354, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2528, "start": 2415}, {"end": 2593, "start": 2529}, {"end": 2689, "start": 2620}, {"end": 2805, "start": 2692}, {"end": 2863, "start": 2806}, {"end": 2921, "start": 2864}, {"end": 3094, "start": 2922}, {"end": 3153, "start": 3095}, {"end": 3206, "start": 3154}, {"end": 3452, "start": 3216}]}, {"name": "SSF49899", "regions": [{"end": 1609, "start": 1398}]}, {"name": "SM00159", "regions": [{"end": 1604, "start": 1397}]}, {"name": "PF00354", "regions": [{"end": 1597, "start": 1419}]}, {"name": "PR00895", "regions": [{"end": 1507, "start": 1489}, {"end": 1535, "start": 1516}, {"end": 1569, "start": 1536}]}, {"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}, {"end": 1662, "start": 1608}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2118, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2631}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3041}, {"end": 3153, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3271, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "SM00179", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3481}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}, {"end": 1121, "start": 965}]}, {"name": "SSF57196", "regions": [{"end": 1244, "start": 1166}, {"end": 1282, "start": 1245}, {"end": 1319, "start": 1283}, {"end": 1400, "start": 1321}, {"end": 1763, "start": 1712}, {"end": 3483, "start": 3440}, {"end": 3512, "start": 3484}, {"end": 3547, "start": 3514}]}, {"name": "PS50026", "regions": [{"end": 1206, "start": 1170}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3477}, {"end": 3541, "start": 3510}]}, {"name": "SM00181", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1211}, {"end": 1282, "start": 1249}, {"end": 1320, "start": 1287}, {"end": 1358, "start": 1325}, {"end": 1396, "start": 1363}, {"end": 1761, "start": 1725}, {"end": 3477, "start": 3448}, {"end": 3509, "start": 3480}, {"end": 3541, "start": 3512}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PS50311", "regions": [{"end": 1386, "start": 1174}, {"end": 3531, "start": 3445}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}, {"end": 1029, "start": 982}, {"end": 1083, "start": 1036}, {"end": 1137, "start": 1090}]}, {"name": "PF00008", "regions": [{"end": 1203, "start": 1174}, {"end": 1242, "start": 1212}, {"end": 1279, "start": 1250}, {"end": 1356, "start": 1326}, {"end": 1394, "start": 1364}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}, {"end": 1664, "start": 1606}, {"end": 1722, "start": 1665}, {"end": 1821, "start": 1764}, {"end": 1879, "start": 1822}, {"end": 1937, "start": 1880}, {"end": 1995, "start": 1938}, {"end": 2057, "start": 1996}, {"end": 2120, "start": 2058}, {"end": 2178, "start": 2121}, {"end": 2238, "start": 2179}, {"end": 2297, "start": 2239}, {"end": 2355, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2472, "start": 2415}, {"end": 2530, "start": 2473}, {"end": 2587, "start": 2531}, {"end": 2691, "start": 2640}, {"end": 2749, "start": 2692}, {"end": 2807, "start": 2750}, {"end": 2865, "start": 2808}, {"end": 2923, "start": 2866}, {"end": 2981, "start": 2924}, {"end": 3038, "start": 2982}, {"end": 3096, "start": 3039}, {"end": 3155, "start": 3097}, {"end": 3215, "start": 3156}, {"end": 3273, "start": 3216}, {"end": 3331, "start": 3274}, {"end": 3390, "start": 3332}, {"end": 3447, "start": 3391}]}, {"name": "PF07645", "regions": [{"end": 1760, "start": 1722}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}, {"end": 1662, "start": 1605}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2113, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2644}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3061}, {"end": 3149, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3267, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "PF07974", "regions": [{"end": 1243, "start": 1212}, {"end": 3476, "start": 3452}, {"end": 3508, "start": 3484}, {"end": 3540, "start": 3513}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}]}]}, {"end": 113341823, "exons": [{"end": 113206000, "start": 113204759}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220399, "start": 113220395}, {"end": 113220842, "start": 113220756}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113341823, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000302728", "start": 113204759, "translations": [{"cdna_coding_end": 4650, "cdna_coding_start": 1, "domains": [{"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00010", "regions": [{"end": 1318, "start": 1307}, {"end": 1364, "start": 1357}, {"end": 1413, "start": 1403}, {"end": 1420, "start": 1414}]}, {"name": "PF00354", "regions": [{"end": 1532, "start": 1442}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}]}, {"name": "SSF49899", "regions": [{"end": 1547, "start": 1421}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}]}, {"name": "PF07645", "regions": [{"end": 1262, "start": 1231}, {"end": 1338, "start": 1308}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1337, "start": 1311}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}]}]}, {"end": 113342160, "exons": [{"end": 113238595, "start": 113238163}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374461", "start": 113238163, "translations": [{"cdna_coding_end": 2944, "cdna_coding_start": 407, "domains": [{"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}]}, {"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}]}]}]}]}, {"aliases": ["ARID1B"], "chr": "6", "end": 157530401, "name": "ENSG00000049618", "start": 157099063, "strand": "+", "transcripts": [{"end": 157529495, "exons": [{"end": 157100605, "start": 157099063}, {"end": 157150555, "start": 157150361}, {"end": 157192786, "start": 157192748}, {"end": 157222659, "start": 157222510}, {"end": 157256710, "start": 157256600}, {"end": 157406039, "start": 157405796}, {"end": 157431695, "start": 157431606}, {"end": 157454341, "start": 157454162}, {"end": 157470085, "start": 157469758}, {"end": 157488319, "start": 157488174}, {"end": 157495251, "start": 157495142}, {"end": 157502312, "start": 157502103}, {"end": 157505569, "start": 157505365}, {"end": 157510914, "start": 157510776}, {"end": 157511344, "start": 157511172}, {"end": 157517449, "start": 157517299}, {"end": 157520041, "start": 157519945}, {"end": 157522622, "start": 157521839}, {"end": 157525130, "start": 157525000}, {"end": 157529495, "start": 157527301}], "is_best_transcript": true, "name": "ENST00000346085", "start": 157099063, "translations": [{"cdna_coding_end": 6751, "cdna_coding_start": 2, "domains": [{"name": "PF12031", "regions": [{"end": 2195, "start": 1939}]}, {"name": "PS50324", "regions": [{"end": 57, "start": 35}, {"end": 784, "start": 697}]}, {"name": "PF01388", "regions": [{"end": 1153, "start": 1065}]}, {"name": "PS50099", "regions": [{"end": 820, "start": 715}, {"end": 1610, "start": 1472}]}, {"name": "SSF48371", "regions": [{"end": 2220, "start": 2075}]}, {"name": "PS50316", "regions": [{"end": 104, "start": 81}]}, {"name": "PS50322", "regions": [{"end": 131, "start": 107}, {"end": 646, "start": 574}]}, {"name": "PS51011", "regions": [{"end": 1157, "start": 1066}]}, {"name": "PS50310", "regions": [{"end": 47, "start": 2}, {"end": 493, "start": 329}]}, {"name": "PS50315", "regions": [{"end": 401, "start": 141}]}, {"name": "SSF46774", "regions": [{"end": 1168, "start": 1049}]}, {"name": "SM00501", "regions": [{"end": 1158, "start": 1067}]}]}]}]}]}
\ No newline at end of file
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3 b/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
deleted file mode 100644
index be16e852..00000000
--- a/tests/tools/data/Homo_sapiens.GRCh38.105.kras.gff3
+++ /dev/null
@@ -1,19 +0,0 @@
-12	ensembl_havana	gene	25205246	25250936	.	-	.	ID=gene:ENSG00000133703;Name=KRAS;biotype=protein_coding;description=KRAS proto-oncogene%2C GTPase [Source:HGNC Symbol%3BAcc:HGNC:6407];gene_id=ENSG00000133703;logic_name=ensembl_havana_gene_homo_sapiens;version=14
-12	havana	mRNA	25205246	25225773	.	-	.	ID=transcript:ENST00000690406;Parent=gene:ENSG00000133703;Name=KRAS-211;biotype=nonsense_mediated_decay;transcript_id=ENST00000690406;version=1
-12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000256078;Parent=gene:ENSG00000133703;Name=KRAS-201;biotype=protein_coding;ccdsid=CCDS8703.1;tag=basic;transcript_id=ENST00000256078;transcript_support_level=1 (assigned to previous version 8);version=10
-12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000311936;Parent=gene:ENSG00000133703;Name=KRAS-202;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000311936;transcript_support_level=1 (assigned to previous version 7);version=8
-12	havana	mRNA	25205250	25250908	.	-	.	ID=transcript:ENST00000686877;Parent=gene:ENSG00000133703;Name=KRAS-206;biotype=nonsense_mediated_decay;transcript_id=ENST00000686877;version=1
-12	havana	mRNA	25205258	25250935	.	-	.	ID=transcript:ENST00000685328;Parent=gene:ENSG00000133703;Name=KRAS-205;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000685328;version=1
-12	havana	mRNA	25205260	25250899	.	-	.	ID=transcript:ENST00000693229;Parent=gene:ENSG00000133703;Name=KRAS-214;biotype=protein_coding;tag=basic;transcript_id=ENST00000693229;version=1
-12	havana	mRNA	25205270	25250927	.	-	.	ID=transcript:ENST00000687356;Parent=gene:ENSG00000133703;Name=KRAS-208;biotype=nonsense_mediated_decay;transcript_id=ENST00000687356;version=1
-12	havana	mRNA	25205343	25250917	.	-	.	ID=transcript:ENST00000692768;Parent=gene:ENSG00000133703;Name=KRAS-213;biotype=protein_coding;tag=basic;transcript_id=ENST00000692768;version=1
-12	havana	mRNA	25206933	25250444	.	-	.	ID=transcript:ENST00000688940;Parent=gene:ENSG00000133703;Name=KRAS-210;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000688940;version=1
-12	havana	mRNA	25207948	25250929	.	-	.	ID=transcript:ENST00000690804;Parent=gene:ENSG00000133703;Name=KRAS-212;biotype=nonsense_mediated_decay;transcript_id=ENST00000690804;version=1
-12	havana	mRNA	25209178	25250936	.	-	.	ID=transcript:ENST00000557334;Parent=gene:ENSG00000133703;Name=KRAS-204;biotype=protein_coding;tag=basic;transcript_id=ENST00000557334;transcript_support_level=5 (assigned to previous version 5);version=6
-12	havana	lnc_RNA	25209673	25227997	.	-	.	ID=transcript:ENST00000688228;Parent=gene:ENSG00000133703;Name=KRAS-209;biotype=retained_intron;transcript_id=ENST00000688228;version=1
-12	havana	mRNA	25232558	25250929	.	-	.	ID=transcript:ENST00000686969;Parent=gene:ENSG00000133703;Name=KRAS-207;biotype=protein_coding;tag=basic;transcript_id=ENST00000686969;version=1
-12	havana	mRNA	25232591	25250929	.	-	.	ID=transcript:ENST00000556131;Parent=gene:ENSG00000133703;Name=KRAS-203;biotype=protein_coding;tag=basic;transcript_id=ENST00000556131;transcript_support_level=1 (assigned to previous version 1);version=2
-12	havana	ncRNA_gene	25210652	25211233	.	+	.	ID=gene:ENSG00000274987;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000274987;logic_name=havana_homo_sapiens;version=1
-12	havana	ncRNA_gene	25225103	25225665	.	+	.	ID=gene:ENSG00000275197;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000275197;logic_name=havana_homo_sapiens;version=1
-6	havana	pseudogene	54770583	54771134	.	+	.	ID=gene:ENSG00000220635;Name=KRASP1;biotype=processed_pseudogene;description=KRAS proto-oncogene%2C GTPase pseudogene 1 [Source:HGNC Symbol%3BAcc:HGNC:6406];gene_id=ENSG00000220635;logic_name=havana_homo_sapiens;version=2
-6	havana	pseudogenic_transcript	54770583	54771134	.	+	.	ID=transcript:ENST00000407852;Parent=gene:ENSG00000220635;Name=KRASP1-201;biotype=processed_pseudogene;tag=basic;transcript_id=ENST00000407852;transcript_support_level=NA;version=2
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3 b/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3
new file mode 100644
index 00000000..8ed7eb87
--- /dev/null
+++ b/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3
@@ -0,0 +1,163 @@
+12	ensembl_havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000308495;Parent=transcript:ENST00000311936;protein_id=ENSP00000308495
+12	ensembl_havana	CDS	25215441	25215560	.	-	0	ID=CDS:ENSP00000256078;Parent=transcript:ENST00000256078;protein_id=ENSP00000256078
+12	ensembl_havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000256078;Parent=transcript:ENST00000256078;protein_id=ENSP00000256078
+12	ensembl_havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000308495;Parent=transcript:ENST00000311936;protein_id=ENSP00000308495
+12	ensembl_havana	CDS	25227234	25227412	.	-	0	ID=CDS:ENSP00000256078;Parent=transcript:ENST00000256078;protein_id=ENSP00000256078
+12	ensembl_havana	CDS	25227234	25227412	.	-	0	ID=CDS:ENSP00000308495;Parent=transcript:ENST00000311936;protein_id=ENSP00000308495
+12	ensembl_havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000256078;Parent=transcript:ENST00000256078;protein_id=ENSP00000256078
+12	ensembl_havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000308495;Parent=transcript:ENST00000311936;protein_id=ENSP00000308495
+12	ensembl_havana	exon	25205246	25209911	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00002477035;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002477035;rank=6;version=3
+12	ensembl_havana	exon	25205246	25209911	.	-	.	Parent=transcript:ENST00000311936;Name=ENSE00002456976;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00002456976;rank=5;version=2
+12	ensembl_havana	exon	25215437	25215560	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00001189807;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00001189807;rank=5;version=5
+12	ensembl_havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=4;version=1
+12	ensembl_havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000311936;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=4;version=1
+12	ensembl_havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00001719809;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001719809;rank=3;version=1
+12	ensembl_havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000311936;Name=ENSE00001719809;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001719809;rank=3;version=1
+12	ensembl_havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	ensembl_havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000311936;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	ensembl_havana	exon	25250751	25250929	.	-	.	Parent=transcript:ENST00000256078;Name=ENSE00003903543;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003903543;rank=1;version=1
+12	ensembl_havana	exon	25250751	25250929	.	-	.	Parent=transcript:ENST00000311936;Name=ENSE00003903543;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003903543;rank=1;version=1
+12	ensembl_havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000256078
+12	ensembl_havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000311936
+12	ensembl_havana	five_prime_UTR	25250751	25250929	.	-	.	Parent=transcript:ENST00000256078
+12	ensembl_havana	five_prime_UTR	25250751	25250929	.	-	.	Parent=transcript:ENST00000311936
+12	ensembl_havana	gene	25205246	25250936	.	-	.	ID=gene:ENSG00000133703;Name=KRAS;biotype=protein_coding;description=KRAS proto-oncogene%2C GTPase [Source:HGNC Symbol%3BAcc:HGNC:6407];gene_id=ENSG00000133703;logic_name=ensembl_havana_gene_homo_sapiens;version=14
+12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000256078;Parent=gene:ENSG00000133703;Name=KRAS-201;biotype=protein_coding;ccdsid=CCDS8703.1;tag=basic;transcript_id=ENST00000256078;transcript_support_level=1 (assigned to previous version 8);version=10
+12	ensembl_havana	mRNA	25205246	25250929	.	-	.	ID=transcript:ENST00000311936;Parent=gene:ENSG00000133703;Name=KRAS-202;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000311936;transcript_support_level=1 (assigned to previous version 7);version=8
+12	ensembl_havana	three_prime_UTR	25205246	25209794	.	-	.	Parent=transcript:ENST00000311936
+12	ensembl_havana	three_prime_UTR	25205246	25209911	.	-	.	Parent=transcript:ENST00000256078
+12	ensembl_havana	three_prime_UTR	25215437	25215440	.	-	.	Parent=transcript:ENST00000256078
+12	havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000452512;Parent=transcript:ENST00000557334;protein_id=ENSP00000452512
+12	havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000508921;Parent=transcript:ENST00000685328;protein_id=ENSP00000508921
+12	havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000509223;Parent=transcript:ENST00000693229;protein_id=ENSP00000509223
+12	havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000509238;Parent=transcript:ENST00000688940;protein_id=ENSP00000509238
+12	havana	CDS	25209795	25209911	.	-	0	ID=CDS:ENSP00000510254;Parent=transcript:ENST00000692768;protein_id=ENSP00000510254
+12	havana	CDS	25213204	25213206	.	-	0	ID=CDS:ENSP00000509798;Parent=transcript:ENST00000690406;protein_id=ENSP00000509798
+12	havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000508921;Parent=transcript:ENST00000685328;protein_id=ENSP00000508921
+12	havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000509223;Parent=transcript:ENST00000693229;protein_id=ENSP00000509223
+12	havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000509238;Parent=transcript:ENST00000688940;protein_id=ENSP00000509238
+12	havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000509798;Parent=transcript:ENST00000690406;protein_id=ENSP00000509798
+12	havana	CDS	25225614	25225773	.	-	1	ID=CDS:ENSP00000510254;Parent=transcript:ENST00000692768;protein_id=ENSP00000510254
+12	havana	CDS	25225762	25225773	.	-	0	ID=CDS:ENSP00000510511;Parent=transcript:ENST00000687356;protein_id=ENSP00000510511
+12	havana	CDS	25227234	25227325	.	-	0	ID=CDS:ENSP00000510254;Parent=transcript:ENST00000692768;protein_id=ENSP00000510254
+12	havana	CDS	25227234	25227337	.	-	0	ID=CDS:ENSP00000509223;Parent=transcript:ENST00000693229;protein_id=ENSP00000509223
+12	havana	CDS	25227234	25227412	.	-	0	ID=CDS:ENSP00000508921;Parent=transcript:ENST00000685328;protein_id=ENSP00000508921
+12	havana	CDS	25227234	25227412	.	-	0	ID=CDS:ENSP00000509238;Parent=transcript:ENST00000688940;protein_id=ENSP00000509238
+12	havana	CDS	25228847	25228891	.	-	0	ID=CDS:ENSP00000508568;Parent=transcript:ENST00000690804;protein_id=ENSP00000508568
+12	havana	CDS	25230565	25230621	.	-	0	ID=CDS:ENSP00000510431;Parent=transcript:ENST00000686877;protein_id=ENSP00000510431
+12	havana	CDS	25235206	25235226	.	-	0	ID=CDS:ENSP00000451856;Parent=transcript:ENST00000556131;protein_id=ENSP00000451856
+12	havana	CDS	25235206	25235226	.	-	0	ID=CDS:ENSP00000510479;Parent=transcript:ENST00000686969;protein_id=ENSP00000510479
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000451856;Parent=transcript:ENST00000556131;protein_id=ENSP00000451856
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000452512;Parent=transcript:ENST00000557334;protein_id=ENSP00000452512
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000508568;Parent=transcript:ENST00000690804;protein_id=ENSP00000508568
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000508921;Parent=transcript:ENST00000685328;protein_id=ENSP00000508921
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000509223;Parent=transcript:ENST00000693229;protein_id=ENSP00000509223
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000509238;Parent=transcript:ENST00000688940;protein_id=ENSP00000509238
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000510431;Parent=transcript:ENST00000686877;protein_id=ENSP00000510431
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000510479;Parent=transcript:ENST00000686969;protein_id=ENSP00000510479
+12	havana	CDS	25245274	25245384	.	-	0	ID=CDS:ENSP00000510511;Parent=transcript:ENST00000687356;protein_id=ENSP00000510511
+12	havana	exon	25205246	25209911	.	-	.	Parent=transcript:ENST00000690406;Name=ENSE00002477035;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002477035;rank=3;version=3
+12	havana	exon	25205250	25209911	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00003934058;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003934058;rank=6;version=1
+12	havana	exon	25205258	25209911	.	-	.	Parent=transcript:ENST00000685328;Name=ENSE00003924510;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003924510;rank=5;version=1
+12	havana	exon	25205260	25209911	.	-	.	Parent=transcript:ENST00000693229;Name=ENSE00003927775;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003927775;rank=5;version=1
+12	havana	exon	25205270	25209911	.	-	.	Parent=transcript:ENST00000687356;Name=ENSE00003933328;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003933328;rank=4;version=1
+12	havana	exon	25205343	25209911	.	-	.	Parent=transcript:ENST00000692768;Name=ENSE00003925822;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003925822;rank=4;version=1
+12	havana	exon	25206933	25209911	.	-	.	Parent=transcript:ENST00000688940;Name=ENSE00003930148;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003930148;rank=5;version=1
+12	havana	exon	25207948	25209911	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00003935620;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003935620;rank=6;version=1
+12	havana	exon	25209178	25209911	.	-	.	Parent=transcript:ENST00000557334;Name=ENSE00002464674;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00002464674;rank=3;version=2
+12	havana	exon	25209673	25209911	.	-	.	Parent=transcript:ENST00000688228;Name=ENSE00003925173;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003925173;rank=3;version=1
+12	havana	exon	25213114	25213206	.	-	.	Parent=transcript:ENST00000690406;Name=ENSE00003927570;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003927570;rank=2;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000685328;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=4;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00003937476;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003937476;rank=5;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000687356;Name=ENSE00003930939;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003930939;rank=3;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000688228;Name=ENSE00003937476;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003937476;rank=2;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000688940;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=4;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000690406;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=1;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00003937476;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003937476;rank=5;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000692768;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=3;version=1
+12	havana	exon	25225614	25225773	.	-	.	Parent=transcript:ENST00000693229;Name=ENSE00001644818;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=ENSE00001644818;rank=4;version=1
+12	havana	exon	25227234	25227337	.	-	.	Parent=transcript:ENST00000693229;Name=ENSE00003923411;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00003923411;rank=3;version=1
+12	havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000685328;Name=ENSE00001719809;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001719809;rank=3;version=1
+12	havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00003930847;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003930847;rank=4;version=1
+12	havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000688940;Name=ENSE00001719809;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=ENSE00001719809;rank=3;version=1
+12	havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00003930847;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003930847;rank=4;version=1
+12	havana	exon	25227234	25227412	.	-	.	Parent=transcript:ENST00000692768;Name=ENSE00003923061;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=ENSE00003923061;rank=2;version=1
+12	havana	exon	25227234	25227997	.	-	.	Parent=transcript:ENST00000688228;Name=ENSE00003935871;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003935871;rank=1;version=1
+12	havana	exon	25228775	25228891	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00003925179;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003925179;rank=3;version=1
+12	havana	exon	25230483	25230621	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00003930732;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003930732;rank=3;version=1
+12	havana	exon	25232558	25235226	.	-	.	Parent=transcript:ENST00000686969;Name=ENSE00003927408;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00003927408;rank=3;version=1
+12	havana	exon	25232591	25235226	.	-	.	Parent=transcript:ENST00000556131;Name=ENSE00002478081;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=ENSE00002478081;rank=3;version=2
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000556131;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000557334;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000685328;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000686969;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000687356;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000688940;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25245274	25245395	.	-	.	Parent=transcript:ENST00000693229;Name=ENSE00000936617;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=ENSE00000936617;rank=2;version=1
+12	havana	exon	25250255	25250444	.	-	.	Parent=transcript:ENST00000688940;Name=ENSE00003932539;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003932539;rank=1;version=1
+12	havana	exon	25250751	25250899	.	-	.	Parent=transcript:ENST00000693229;Name=ENSE00003938559;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003938559;rank=1;version=1
+12	havana	exon	25250751	25250908	.	-	.	Parent=transcript:ENST00000686877;Name=ENSE00003928105;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003928105;rank=1;version=1
+12	havana	exon	25250751	25250917	.	-	.	Parent=transcript:ENST00000692768;Name=ENSE00003923448;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003923448;rank=1;version=1
+12	havana	exon	25250751	25250927	.	-	.	Parent=transcript:ENST00000687356;Name=ENSE00003930705;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003930705;rank=1;version=1
+12	havana	exon	25250751	25250929	.	-	.	Parent=transcript:ENST00000556131;Name=ENSE00003903543;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003903543;rank=1;version=1
+12	havana	exon	25250751	25250929	.	-	.	Parent=transcript:ENST00000690804;Name=ENSE00003903543;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003903543;rank=1;version=1
+12	havana	exon	25250751	25250936	.	-	.	Parent=transcript:ENST00000557334;Name=ENSE00002446502;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002446502;rank=1;version=1
+12	havana	exon	25250764	25250929	.	-	.	Parent=transcript:ENST00000686969;Name=ENSE00002530521;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00002530521;rank=1;version=1
+12	havana	exon	25250764	25250935	.	-	.	Parent=transcript:ENST00000685328;Name=ENSE00003934964;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSE00003934964;rank=1;version=1
+12	havana	five_prime_UTR	25227326	25227412	.	-	.	Parent=transcript:ENST00000692768
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000556131
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000557334
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000685328
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000686877
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000686969
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000687356
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000688940
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000690804
+12	havana	five_prime_UTR	25245385	25245395	.	-	.	Parent=transcript:ENST00000693229
+12	havana	five_prime_UTR	25250255	25250444	.	-	.	Parent=transcript:ENST00000688940
+12	havana	five_prime_UTR	25250751	25250899	.	-	.	Parent=transcript:ENST00000693229
+12	havana	five_prime_UTR	25250751	25250908	.	-	.	Parent=transcript:ENST00000686877
+12	havana	five_prime_UTR	25250751	25250917	.	-	.	Parent=transcript:ENST00000692768
+12	havana	five_prime_UTR	25250751	25250927	.	-	.	Parent=transcript:ENST00000687356
+12	havana	five_prime_UTR	25250751	25250929	.	-	.	Parent=transcript:ENST00000556131
+12	havana	five_prime_UTR	25250751	25250929	.	-	.	Parent=transcript:ENST00000690804
+12	havana	five_prime_UTR	25250751	25250936	.	-	.	Parent=transcript:ENST00000557334
+12	havana	five_prime_UTR	25250764	25250929	.	-	.	Parent=transcript:ENST00000686969
+12	havana	five_prime_UTR	25250764	25250935	.	-	.	Parent=transcript:ENST00000685328
+12	havana	lnc_RNA	25209673	25227997	.	-	.	ID=transcript:ENST00000688228;Parent=gene:ENSG00000133703;Name=KRAS-209;biotype=retained_intron;transcript_id=ENST00000688228;version=1
+12	havana	lnc_RNA	25210652	25211233	.	+	.	ID=transcript:ENST00000612734;Parent=gene:ENSG00000274987;biotype=lncRNA;tag=basic;transcript_id=ENST00000612734;transcript_support_level=NA;version=1
+12	havana	lnc_RNA	25225103	25225665	.	+	.	ID=transcript:ENST00000620933;Parent=gene:ENSG00000275197;biotype=lncRNA;tag=basic;transcript_id=ENST00000620933;transcript_support_level=NA;version=1
+12	havana	mRNA	25205246	25225773	.	-	.	ID=transcript:ENST00000690406;Parent=gene:ENSG00000133703;Name=KRAS-211;biotype=nonsense_mediated_decay;transcript_id=ENST00000690406;version=1
+12	havana	mRNA	25205250	25250908	.	-	.	ID=transcript:ENST00000686877;Parent=gene:ENSG00000133703;Name=KRAS-206;biotype=nonsense_mediated_decay;transcript_id=ENST00000686877;version=1
+12	havana	mRNA	25205258	25250935	.	-	.	ID=transcript:ENST00000685328;Parent=gene:ENSG00000133703;Name=KRAS-205;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000685328;version=1
+12	havana	mRNA	25205260	25250899	.	-	.	ID=transcript:ENST00000693229;Parent=gene:ENSG00000133703;Name=KRAS-214;biotype=protein_coding;tag=basic;transcript_id=ENST00000693229;version=1
+12	havana	mRNA	25205270	25250927	.	-	.	ID=transcript:ENST00000687356;Parent=gene:ENSG00000133703;Name=KRAS-208;biotype=nonsense_mediated_decay;transcript_id=ENST00000687356;version=1
+12	havana	mRNA	25205343	25250917	.	-	.	ID=transcript:ENST00000692768;Parent=gene:ENSG00000133703;Name=KRAS-213;biotype=protein_coding;tag=basic;transcript_id=ENST00000692768;version=1
+12	havana	mRNA	25206933	25250444	.	-	.	ID=transcript:ENST00000688940;Parent=gene:ENSG00000133703;Name=KRAS-210;biotype=protein_coding;ccdsid=CCDS8702.1;tag=basic;transcript_id=ENST00000688940;version=1
+12	havana	mRNA	25207948	25250929	.	-	.	ID=transcript:ENST00000690804;Parent=gene:ENSG00000133703;Name=KRAS-212;biotype=nonsense_mediated_decay;transcript_id=ENST00000690804;version=1
+12	havana	mRNA	25209178	25250936	.	-	.	ID=transcript:ENST00000557334;Parent=gene:ENSG00000133703;Name=KRAS-204;biotype=protein_coding;tag=basic;transcript_id=ENST00000557334;transcript_support_level=5 (assigned to previous version 5);version=6
+12	havana	mRNA	25232558	25250929	.	-	.	ID=transcript:ENST00000686969;Parent=gene:ENSG00000133703;Name=KRAS-207;biotype=protein_coding;tag=basic;transcript_id=ENST00000686969;version=1
+12	havana	mRNA	25232591	25250929	.	-	.	ID=transcript:ENST00000556131;Parent=gene:ENSG00000133703;Name=KRAS-203;biotype=protein_coding;tag=basic;transcript_id=ENST00000556131;transcript_support_level=1 (assigned to previous version 1);version=2
+12	havana	ncRNA_gene	25210652	25211233	.	+	.	ID=gene:ENSG00000274987;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000274987;logic_name=havana_homo_sapiens;version=1
+12	havana	ncRNA_gene	25225103	25225665	.	+	.	ID=gene:ENSG00000275197;biotype=lncRNA;description=novel transcript%2C antisense to KRAS;gene_id=ENSG00000275197;logic_name=havana_homo_sapiens;version=1
+12	havana	three_prime_UTR	25205246	25209911	.	-	.	Parent=transcript:ENST00000690406
+12	havana	three_prime_UTR	25205250	25209911	.	-	.	Parent=transcript:ENST00000686877
+12	havana	three_prime_UTR	25205258	25209794	.	-	.	Parent=transcript:ENST00000685328
+12	havana	three_prime_UTR	25205260	25209794	.	-	.	Parent=transcript:ENST00000693229
+12	havana	three_prime_UTR	25205270	25209911	.	-	.	Parent=transcript:ENST00000687356
+12	havana	three_prime_UTR	25205343	25209794	.	-	.	Parent=transcript:ENST00000692768
+12	havana	three_prime_UTR	25206933	25209794	.	-	.	Parent=transcript:ENST00000688940
+12	havana	three_prime_UTR	25207948	25209911	.	-	.	Parent=transcript:ENST00000690804
+12	havana	three_prime_UTR	25209178	25209794	.	-	.	Parent=transcript:ENST00000557334
+12	havana	three_prime_UTR	25213114	25213203	.	-	.	Parent=transcript:ENST00000690406
+12	havana	three_prime_UTR	25225614	25225761	.	-	.	Parent=transcript:ENST00000687356
+12	havana	three_prime_UTR	25225614	25225773	.	-	.	Parent=transcript:ENST00000686877
+12	havana	three_prime_UTR	25225614	25225773	.	-	.	Parent=transcript:ENST00000690804
+12	havana	three_prime_UTR	25227234	25227412	.	-	.	Parent=transcript:ENST00000686877
+12	havana	three_prime_UTR	25227234	25227412	.	-	.	Parent=transcript:ENST00000690804
+12	havana	three_prime_UTR	25228775	25228846	.	-	.	Parent=transcript:ENST00000690804
+12	havana	three_prime_UTR	25230483	25230564	.	-	.	Parent=transcript:ENST00000686877
+12	havana	three_prime_UTR	25232558	25235205	.	-	.	Parent=transcript:ENST00000686969
+12	havana	three_prime_UTR	25232591	25235205	.	-	.	Parent=transcript:ENST00000556131
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json b/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json
new file mode 100644
index 00000000..eb35287b
--- /dev/null
+++ b/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json
@@ -0,0 +1 @@
+{"genes": [{"aliases": ["KRAS"], "biotype": "gene", "chr": "12", "end": 25250936, "name": "ENSG00000133703", "start": 25205246, "strand": "-", "transcripts": [{"aliases": ["KRAS-201"], "biotype": "mRNA", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25215560, "name": "ENSE00001189807", "start": 25215437, "version": "5"}, {"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00002477035", "start": 25205246, "version": "3"}, {"end": 25250929, "name": "ENSE00003903543", "start": 25250751, "version": "1"}], "name": "ENST00000256078", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000256078", "start": 25215441}], "version": "10"}, {"aliases": ["KRAS-202"], "biotype": "mRNA", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00002456976", "start": 25205246, "version": "2"}, {"end": 25250929, "name": "ENSE00003903543", "start": 25250751, "version": "1"}], "name": "ENST00000311936", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000308495", "start": 25209795}], "version": "8"}, {"aliases": ["KRAS-203"], "biotype": "mRNA", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25235226, "name": "ENSE00002478081", "start": 25232591, "version": "2"}, {"end": 25250929, "name": "ENSE00003903543", "start": 25250751, "version": "1"}], "name": "ENST00000556131", "start": 25232591, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000451856", "start": 25235206}], "version": "2"}, {"aliases": ["KRAS-204"], "biotype": "mRNA", "end": 25250936, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25250936, "name": "ENSE00002446502", "start": 25250751, "version": "1"}, {"end": 25209911, "name": "ENSE00002464674", "start": 25209178, "version": "2"}], "name": "ENST00000557334", "start": 25209178, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000452512", "start": 25209795}], "version": "6"}, {"aliases": ["KRAS-205"], "biotype": "mRNA", "end": 25250935, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003924510", "start": 25205258, "version": "1"}, {"end": 25250935, "name": "ENSE00003934964", "start": 25250764, "version": "1"}], "name": "ENST00000685328", "start": 25205258, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000508921", "start": 25209795}], "version": "1"}, {"aliases": ["KRAS-206"], "biotype": "mRNA", "end": 25250908, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25250908, "name": "ENSE00003928105", "start": 25250751, "version": "1"}, {"end": 25230621, "name": "ENSE00003930732", "start": 25230483, "version": "1"}, {"end": 25227412, "name": "ENSE00003930847", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003934058", "start": 25205250, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "start": 25225614, "version": "1"}], "name": "ENST00000686877", "start": 25205250, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510431", "start": 25230565}], "version": "1"}, {"aliases": ["KRAS-207"], "biotype": "mRNA", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25250929, "name": "ENSE00002530521", "start": 25250764, "version": "1"}, {"end": 25235226, "name": "ENSE00003927408", "start": 25232558, "version": "1"}], "name": "ENST00000686969", "start": 25232558, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510479", "start": 25235206}], "version": "1"}, {"aliases": ["KRAS-208"], "biotype": "mRNA", "end": 25250927, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25250927, "name": "ENSE00003930705", "start": 25250751, "version": "1"}, {"end": 25225773, "name": "ENSE00003930939", "start": 25225614, "version": "1"}, {"end": 25209911, "name": "ENSE00003933328", "start": 25205270, "version": "1"}], "name": "ENST00000687356", "start": 25205270, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510511", "start": 25225762}], "version": "1"}, {"aliases": ["KRAS-209"], "biotype": "lnc_RNA", "end": 25227997, "exons": [{"end": 25209911, "name": "ENSE00003925173", "start": 25209673, "version": "1"}, {"end": 25227997, "name": "ENSE00003935871", "start": 25227234, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "start": 25225614, "version": "1"}], "name": "ENST00000688228", "start": 25209673, "version": "1"}, {"aliases": ["KRAS-210"], "biotype": "mRNA", "end": 25250444, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003930148", "start": 25206933, "version": "1"}, {"end": 25250444, "name": "ENSE00003932539", "start": 25250255, "version": "1"}], "name": "ENST00000688940", "start": 25206933, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000509238", "start": 25209795}], "version": "1"}, {"aliases": ["KRAS-211"], "biotype": "mRNA", "end": 25225773, "exons": [{"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25209911, "name": "ENSE00002477035", "start": 25205246, "version": "3"}, {"end": 25213206, "name": "ENSE00003927570", "start": 25213114, "version": "1"}], "name": "ENST00000690406", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25225773, "name": "ENSP00000509798", "start": 25213204}], "version": "1"}, {"aliases": ["KRAS-212"], "biotype": "mRNA", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25250929, "name": "ENSE00003903543", "start": 25250751, "version": "1"}, {"end": 25228891, "name": "ENSE00003925179", "start": 25228775, "version": "1"}, {"end": 25227412, "name": "ENSE00003930847", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003935620", "start": 25207948, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "start": 25225614, "version": "1"}], "name": "ENST00000690804", "start": 25207948, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000508568", "start": 25228847}], "version": "1"}, {"aliases": ["KRAS-213"], "biotype": "mRNA", "end": 25250917, "exons": [{"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00003923061", "start": 25227234, "version": "1"}, {"end": 25250917, "name": "ENSE00003923448", "start": 25250751, "version": "1"}, {"end": 25209911, "name": "ENSE00003925822", "start": 25205343, "version": "1"}], "name": "ENST00000692768", "start": 25205343, "translations": [{"biotype": "CDS", "end": 25227325, "name": "ENSP00000510254", "start": 25209795}], "version": "1"}, {"aliases": ["KRAS-214"], "biotype": "mRNA", "end": 25250899, "exons": [{"end": 25245395, "name": "ENSE00000936617", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "start": 25225614, "version": "1"}, {"end": 25227337, "name": "ENSE00003923411", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003927775", "start": 25205260, "version": "1"}, {"end": 25250899, "name": "ENSE00003938559", "start": 25250751, "version": "1"}], "name": "ENST00000693229", "start": 25205260, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000509223", "start": 25209795}], "version": "1"}], "version": "14"}, {"biotype": "ncRNA_gene", "chr": "12", "end": 25211233, "name": "ENSG00000274987", "start": 25210652, "strand": "+", "transcripts": [{"biotype": "lnc_RNA", "end": 25211233, "name": "ENST00000612734", "start": 25210652, "version": "1"}], "version": "1"}, {"biotype": "ncRNA_gene", "chr": "12", "end": 25225665, "name": "ENSG00000275197", "start": 25225103, "strand": "+", "transcripts": [{"biotype": "lnc_RNA", "end": 25225665, "name": "ENST00000620933", "start": 25225103, "version": "1"}], "version": "1"}]}
\ No newline at end of file
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf b/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf
similarity index 100%
rename from tests/tools/data/Homo_sapiens.GRCh38.105.chr.kras.gtf
rename to tests/tools/data/Homo_sapiens.GRCh38.kras.gtf
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json b/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json
new file mode 100644
index 00000000..37fa8ad0
--- /dev/null
+++ b/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json
@@ -0,0 +1 @@
+{"genes": [{"aliases": ["KRAS"], "biotype": "gene", "chr": "12", "end": 25250936, "name": "ENSG00000133703", "start": 25205246, "strand": "-", "transcripts": [{"aliases": ["KRAS-201"], "biotype": "transcript", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25215560, "name": "ENSE00001189807", "number": "5", "start": 25215437, "version": "5"}, {"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "number": "3", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00002477035", "number": "3;6", "start": 25205246, "version": "3"}, {"end": 25250929, "name": "ENSE00003903543", "number": "1", "start": 25250751, "version": "1"}], "name": "ENST00000256078", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000256078", "start": 25215444, "version": "5"}], "version": "10"}, {"aliases": ["KRAS-202"], "biotype": "transcript", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "number": "3", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00002456976", "number": "5", "start": 25205246, "version": "2"}, {"end": 25250929, "name": "ENSE00003903543", "number": "1", "start": 25250751, "version": "1"}], "name": "ENST00000311936", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000308495", "start": 25209798, "version": "3"}], "version": "8"}, {"aliases": ["KRAS-203"], "biotype": "transcript", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25235226, "name": "ENSE00002478081", "number": "3", "start": 25232591, "version": "2"}, {"end": 25250929, "name": "ENSE00003903543", "number": "1", "start": 25250751, "version": "1"}], "name": "ENST00000556131", "start": 25232591, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000451856", "start": 25235209, "version": "1"}], "version": "2"}, {"aliases": ["KRAS-204"], "biotype": "transcript", "end": 25250936, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25250936, "name": "ENSE00002446502", "number": "1", "start": 25250751, "version": "1"}, {"end": 25209911, "name": "ENSE00002464674", "number": "3", "start": 25209178, "version": "2"}], "name": "ENST00000557334", "start": 25209178, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000452512", "start": 25209798, "version": "1"}], "version": "6"}, {"aliases": ["KRAS-205"], "biotype": "transcript", "end": 25250935, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "number": "3", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003924510", "number": "5", "start": 25205258, "version": "1"}, {"end": 25250935, "name": "ENSE00003934964", "number": "1", "start": 25250764, "version": "1"}], "name": "ENST00000685328", "start": 25205258, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000508921", "start": 25209798, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-206"], "biotype": "transcript", "end": 25250908, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25250908, "name": "ENSE00003928105", "number": "1", "start": 25250751, "version": "1"}, {"end": 25230621, "name": "ENSE00003930732", "number": "3", "start": 25230483, "version": "1"}, {"end": 25227412, "name": "ENSE00003930847", "number": "4", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003934058", "number": "6", "start": 25205250, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "number": "5;2", "start": 25225614, "version": "1"}], "name": "ENST00000686877", "start": 25205250, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510431", "start": 25230568, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-207"], "biotype": "transcript", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25250929, "name": "ENSE00002530521", "number": "1", "start": 25250764, "version": "1"}, {"end": 25235226, "name": "ENSE00003927408", "number": "3", "start": 25232558, "version": "1"}], "name": "ENST00000686969", "start": 25232558, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510479", "start": 25235209, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-208"], "biotype": "transcript", "end": 25250927, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25250927, "name": "ENSE00003930705", "number": "1", "start": 25250751, "version": "1"}, {"end": 25225773, "name": "ENSE00003930939", "number": "3", "start": 25225614, "version": "1"}, {"end": 25209911, "name": "ENSE00003933328", "number": "4", "start": 25205270, "version": "1"}], "name": "ENST00000687356", "start": 25205270, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000510511", "start": 25225765, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-209"], "biotype": "transcript", "end": 25227997, "exons": [{"end": 25209911, "name": "ENSE00003925173", "number": "3", "start": 25209673, "version": "1"}, {"end": 25227997, "name": "ENSE00003935871", "number": "1", "start": 25227234, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "number": "5;2", "start": 25225614, "version": "1"}], "name": "ENST00000688228", "start": 25209673, "version": "1"}, {"aliases": ["KRAS-210"], "biotype": "transcript", "end": 25250444, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00001719809", "number": "3", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003930148", "number": "5", "start": 25206933, "version": "1"}, {"end": 25250444, "name": "ENSE00003932539", "number": "1", "start": 25250255, "version": "1"}], "name": "ENST00000688940", "start": 25206933, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000509238", "start": 25209798, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-211"], "biotype": "transcript", "end": 25225773, "exons": [{"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25209911, "name": "ENSE00002477035", "number": "3;6", "start": 25205246, "version": "3"}, {"end": 25213206, "name": "ENSE00003927570", "number": "2", "start": 25213114, "version": "1"}], "name": "ENST00000690406", "start": 25205246, "translations": [{"biotype": "CDS", "end": 25225773, "name": "ENSP00000509798", "start": 25225614, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-212"], "biotype": "transcript", "end": 25250929, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25250929, "name": "ENSE00003903543", "number": "1", "start": 25250751, "version": "1"}, {"end": 25228891, "name": "ENSE00003925179", "number": "3", "start": 25228775, "version": "1"}, {"end": 25227412, "name": "ENSE00003930847", "number": "4", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003935620", "number": "6", "start": 25207948, "version": "1"}, {"end": 25225773, "name": "ENSE00003937476", "number": "5;2", "start": 25225614, "version": "1"}], "name": "ENST00000690804", "start": 25207948, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000508568", "start": 25228850, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-213"], "biotype": "transcript", "end": 25250917, "exons": [{"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227412, "name": "ENSE00003923061", "number": "2", "start": 25227234, "version": "1"}, {"end": 25250917, "name": "ENSE00003923448", "number": "1", "start": 25250751, "version": "1"}, {"end": 25209911, "name": "ENSE00003925822", "number": "4", "start": 25205343, "version": "1"}], "name": "ENST00000692768", "start": 25205343, "translations": [{"biotype": "CDS", "end": 25227325, "name": "ENSP00000510254", "start": 25209798, "version": "1"}], "version": "1"}, {"aliases": ["KRAS-214"], "biotype": "transcript", "end": 25250899, "exons": [{"end": 25245395, "name": "ENSE00000936617", "number": "2", "start": 25245274, "version": "1"}, {"end": 25225773, "name": "ENSE00001644818", "number": "1;4;3", "start": 25225614, "version": "1"}, {"end": 25227337, "name": "ENSE00003923411", "number": "3", "start": 25227234, "version": "1"}, {"end": 25209911, "name": "ENSE00003927775", "number": "5", "start": 25205260, "version": "1"}, {"end": 25250899, "name": "ENSE00003938559", "number": "1", "start": 25250751, "version": "1"}], "name": "ENST00000693229", "start": 25205260, "translations": [{"biotype": "CDS", "end": 25245384, "name": "ENSP00000509223", "start": 25209798, "version": "1"}], "version": "1"}], "version": "14"}, {"aliases": ["KRASP1"], "biotype": "gene", "chr": "6", "end": 54771134, "name": "ENSG00000220635", "start": 54770583, "strand": "+", "transcripts": [{"aliases": ["KRASP1-201"], "biotype": "transcript", "end": 54771134, "exons": [{"end": 54771134, "name": "ENSE00001550689", "number": "1", "start": 54770583, "version": "2"}], "name": "ENST00000407852", "start": 54770583, "version": "2"}], "version": "2"}]}
\ No newline at end of file
diff --git a/tests/tools/data/K02718.1.gff3 b/tests/tools/data/K02718.1.gff3
new file mode 100644
index 00000000..22645792
--- /dev/null
+++ b/tests/tools/data/K02718.1.gff3
@@ -0,0 +1,24 @@
+K02718.1	Genbank	CDS	1140	2813	.	+	0	ID=cds-AAA46936.1;Parent=gene-E1;Dbxref=NCBI_GP:AAA46936.1;Name=AAA46936.1;Note=E1 interrupted ORF from 859 to 2813%3B putative;gbkey=CDS;gene=E1;product=replication protein;protein_id=AAA46936.1
+K02718.1	Genbank	CDS	2755	3852	.	+	0	ID=cds-AAA46941.1;Parent=gene-E2;Dbxref=NCBI_GP:AAA46941.1;Name=AAA46941.1;Note=E2 ORF from 2725 to 3852%3B putative;gbkey=CDS;gene=E2;product=regulatory protein;protein_id=AAA46941.1
+K02718.1	Genbank	CDS	3332	3619	.	+	0	ID=cds-AAA46937.1;Parent=gene-E4;Dbxref=NCBI_GP:AAA46937.1;Name=AAA46937.1;gbkey=CDS;gene=E4;partial=true;product=AAA46937.1;protein_id=AAA46937.1;start_range=.,3332
+K02718.1	Genbank	CDS	3863	4099	.	+	0	ID=cds-AAA46938.1;Parent=gene-E5;Dbxref=NCBI_GP:AAA46938.1;Name=AAA46938.1;gbkey=CDS;gene=E5;partial=true;product=AAA46938.1;protein_id=AAA46938.1;start_range=.,3863
+K02718.1	Genbank	CDS	4235	5656	.	+	0	ID=cds-AAA46942.1;Parent=gene-L2;Dbxref=NCBI_GP:AAA46942.1;Name=AAA46942.1;Note=L2 ORF from 4133 to 5656%3B putative;gbkey=CDS;gene=L2;product=minor capsid protein;protein_id=AAA46942.1
+K02718.1	Genbank	CDS	5559	7154	.	+	0	ID=cds-AAA46943.1;Parent=gene-L1;Dbxref=NCBI_GP:AAA46943.1;Name=AAA46943.1;Note=L1 ORF from 5526 to 7154%3B putative;gbkey=CDS;gene=L1;product=major capsid protein;protein_id=AAA46943.1
+K02718.1	Genbank	CDS	562	858	.	+	0	ID=cds-AAA46940.1;Parent=gene-E7;Dbxref=NCBI_GP:AAA46940.1;Name=AAA46940.1;Note=E7 ORF from 544 to 858%3B putative;gbkey=CDS;gene=E7;product=transforming protein;protein_id=AAA46940.1
+K02718.1	Genbank	CDS	83	559	.	+	0	ID=cds-AAA46939.1;Parent=gene-E6;Dbxref=NCBI_GP:AAA46939.1;Name=AAA46939.1;Note=E6 ORF from 65 to 559%3B putative;gbkey=CDS;gene=E6;product=transforming protein;protein_id=AAA46939.1
+K02718.1	Genbank	CDS	865	1140	.	+	0	ID=cds-AAA46936.1;Parent=gene-E1;Dbxref=NCBI_GP:AAA46936.1;Name=AAA46936.1;Note=E1 interrupted ORF from 859 to 2813%3B putative;gbkey=CDS;gene=E1;product=replication protein;protein_id=AAA46936.1
+K02718.1	Genbank	gene	1140	2813	.	+	.	ID=gene-E1;Name=E1;gbkey=Gene;gene=E1;gene_biotype=protein_coding
+K02718.1	Genbank	gene	2755	3852	.	+	.	ID=gene-E2;Name=E2;gbkey=Gene;gene=E2;gene_biotype=protein_coding
+K02718.1	Genbank	gene	3332	3619	.	+	.	ID=gene-E4;Name=E4;gbkey=Gene;gene=E4;gene_biotype=protein_coding
+K02718.1	Genbank	gene	3863	4099	.	+	.	ID=gene-E5;Name=E5;gbkey=Gene;gene=E5;gene_biotype=protein_coding
+K02718.1	Genbank	gene	4235	5656	.	+	.	ID=gene-L2;Name=L2;gbkey=Gene;gene=L2;gene_biotype=protein_coding
+K02718.1	Genbank	gene	5559	7154	.	+	.	ID=gene-L1;Name=L1;gbkey=Gene;gene=L1;gene_biotype=protein_coding
+K02718.1	Genbank	gene	562	858	.	+	.	ID=gene-E7;Name=E7;gbkey=Gene;gene=E7;gene_biotype=protein_coding
+K02718.1	Genbank	gene	83	559	.	+	.	ID=gene-E6;Name=E6;gbkey=Gene;gene=E6;gene_biotype=protein_coding
+K02718.1	Genbank	gene	865	1140	.	+	.	ID=gene-E1;Name=E1;gbkey=Gene;gene=E1;gene_biotype=protein_coding
+K02718.1	Genbank	region	17	23	.	+	.	ID=id-K02718.1:17..23;gbkey=TATA_signal
+K02718.1	Genbank	region	1	7904	.	+	.	ID=K02718.1:1..7904;Dbxref=taxon:333760;Is_circular=true;gbkey=Src;mol_type=genomic DNA
+K02718.1	Genbank	region	4213	4218	.	+	.	ID=id-K02718.1:4213..4218;Note=putative;gbkey=polyA_signal
+K02718.1	Genbank	region	4289	4295	.	+	.	ID=id-L2;gbkey=TATA_signal;gene=L2
+K02718.1	Genbank	region	65	71	.	+	.	ID=id-K02718.1:65..71;gbkey=TATA_signal
+K02718.1	Genbank	region	7260	7265	.	+	.	ID=id-K02718.1:7260..7265;gbkey=polyA_signal
diff --git a/tests/tools/data/K02718.1.gff3.json b/tests/tools/data/K02718.1.gff3.json
new file mode 100644
index 00000000..f34a4ec3
--- /dev/null
+++ b/tests/tools/data/K02718.1.gff3.json
@@ -0,0 +1,243 @@
+{
+    "genes": [
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 7904,
+            "name": "K02718.1:1..7904",
+            "start": 1,
+            "strand": "+"
+        },
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 23,
+            "name": "id-K02718.1:17..23",
+            "start": 17,
+            "strand": "+"
+        },
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 71,
+            "name": "id-K02718.1:65..71",
+            "start": 65,
+            "strand": "+"
+        },
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 7265,
+            "name": "id-K02718.1:7260..7265",
+            "start": 7260,
+            "strand": "+"
+        },
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 4295,
+            "name": "id-L2",
+            "start": 4289,
+            "strand": "+"
+        },
+        {
+            "biotype": "region",
+            "chr": "K02718.1",
+            "end": 4218,
+            "name": "id-K02718.1:4213..4218",
+            "start": 4213,
+            "strand": "+",
+            "note": "putative"
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 2813,
+            "name": "E1",
+            "start": 865,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2813,
+                    "name": "E1_T",
+                    "start": 865,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2813,
+                            "name": "AAA46936.1",
+                            "note": "E1 interrupted ORF from 859 to 2813%3B putative",
+                            "start": 865
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3852,
+            "name": "E2",
+            "start": 2755,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3852,
+                    "name": "E2_T",
+                    "start": 2755,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3852,
+                            "name": "AAA46941.1",
+                            "note": "E2 ORF from 2725 to 3852%3B putative",
+                            "start": 2755
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3619,
+            "name": "E4",
+            "start": 3332,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3619,
+                    "name": "E4_T",
+                    "start": 3332,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3619,
+                            "name": "AAA46937.1",
+                            "start": 3332
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 4099,
+            "name": "E5",
+            "start": 3863,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4099,
+                    "name": "E5_T",
+                    "start": 3863,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4099,
+                            "name": "AAA46938.1",
+                            "start": 3863
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 5656,
+            "name": "L2",
+            "start": 4235,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5656,
+                    "name": "L2_T",
+                    "start": 4235,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5656,
+                            "name": "AAA46942.1",
+                            "note": "L2 ORF from 4133 to 5656%3B putative",
+                            "start": 4235
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 7154,
+            "name": "L1",
+            "start": 5559,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7154,
+                    "name": "L1_T",
+                    "start": 5559,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7154,
+                            "name": "AAA46943.1",
+                            "note": "L1 ORF from 5526 to 7154%3B putative",
+                            "start": 5559
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 559,
+            "name": "E6",
+            "start": 83,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 559,
+                    "name": "E6_T",
+                    "start": 83,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 559,
+                            "name": "AAA46939.1",
+                            "note": "E6 ORF from 65 to 559%3B putative",
+                            "start": 83
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 858,
+            "name": "E7",
+            "start": 562,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 858,
+                    "name": "E7_T",
+                    "start": 562,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 858,
+                            "name": "AAA46940.1",
+                            "note": "E7 ORF from 544 to 858%3B putative",
+                            "start": 562
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/tools/data/K02718.1.gtf b/tests/tools/data/K02718.1.gtf
new file mode 100644
index 00000000..94cb884a
--- /dev/null
+++ b/tests/tools/data/K02718.1.gtf
@@ -0,0 +1,32 @@
+K02718.1	Genbank	gene	83	559	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	83	556	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	83	85	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	557	559	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	gene	562	858	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	562	855	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	562	564	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	856	858	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	gene	865	1140	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; part "1"; 
+K02718.1	Genbank	gene	1140	2813	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; part "2"; 
+K02718.1	Genbank	CDS	865	1140	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "1"; 
+K02718.1	Genbank	CDS	1140	2810	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "2"; 
+K02718.1	Genbank	start_codon	865	867	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	2811	2813	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "2"; 
+K02718.1	Genbank	gene	2755	3852	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	2755	3849	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	2755	2757	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	3850	3852	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	gene	3332	3619	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	3332	3616	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46937.1"; protein_id "AAA46937.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	3617	3619	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46937.1"; protein_id "AAA46937.1"; exon_number "1"; 
+K02718.1	Genbank	gene	3863	4099	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	3863	4096	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; partial "true"; product "AAA46938.1"; protein_id "AAA46938.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	4097	4099	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; partial "true"; product "AAA46938.1"; protein_id "AAA46938.1"; exon_number "1"; 
+K02718.1	Genbank	gene	4235	5656	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	4235	5653	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	4235	4237	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	5654	5656	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	gene	5559	7154	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	5559	7151	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	5559	5561	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	7152	7154	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
diff --git a/tests/tools/data/K02718.1.gtf.json b/tests/tools/data/K02718.1.gtf.json
new file mode 100644
index 00000000..71866865
--- /dev/null
+++ b/tests/tools/data/K02718.1.gtf.json
@@ -0,0 +1,188 @@
+{
+    "genes": [
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 2813,
+            "name": "E1",
+            "start": 865,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2810,
+                    "name": "E1_T",
+                    "start": 865,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2810,
+                            "name": "AAA46936.1",
+                            "start": 865
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3852,
+            "name": "E2",
+            "start": 2755,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3849,
+                    "name": "E2_T",
+                    "start": 2755,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3849,
+                            "name": "AAA46941.1",
+                            "start": 2755
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3619,
+            "name": "E4",
+            "start": 3332,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3616,
+                    "name": "E4_T",
+                    "start": 3332,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3616,
+                            "name": "AAA46937.1",
+                            "start": 3332
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 4099,
+            "name": "E5",
+            "start": 3863,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4096,
+                    "name": "E5_T",
+                    "start": 3863,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4096,
+                            "name": "AAA46938.1",
+                            "start": 3863
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 559,
+            "name": "E6",
+            "start": 83,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 556,
+                    "name": "E6_T",
+                    "start": 83,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 556,
+                            "name": "AAA46939.1",
+                            "start": 83
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 858,
+            "name": "E7",
+            "start": 562,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 855,
+                    "name": "E7_T",
+                    "start": 562,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 855,
+                            "name": "AAA46940.1",
+                            "start": 562
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 7154,
+            "name": "L1",
+            "start": 5559,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7151,
+                    "name": "L1_T",
+                    "start": 5559,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7151,
+                            "name": "AAA46943.1",
+                            "start": 5559
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 5656,
+            "name": "L2",
+            "start": 4235,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5653,
+                    "name": "L2_T",
+                    "start": 4235,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5653,
+                            "name": "AAA46942.1",
+                            "start": 4235
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/tools/data/example_genes.v2.json b/tests/tools/data/example_genes.v2.json
new file mode 100644
index 00000000..f508d6d0
--- /dev/null
+++ b/tests/tools/data/example_genes.v2.json
@@ -0,0 +1,7700 @@
+{
+    "genes": [
+        {
+            "aliases": [
+                "EGFR"
+            ],
+            "chr": "7",
+            "end": 55324313,
+            "name": "ENSG00000146648",
+            "start": 55086714,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 3533,
+                    "cdna_coding_start": 258,
+                    "domains": [
+                        {
+                            "name": "PIRSF000619",
+                            "regions": [
+                                {
+                                    "end": 1090,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 920,
+                                    "start": 669
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 191,
+                                    "start": 28
+                                },
+                                {
+                                    "end": 475,
+                                    "start": 283
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 293,
+                                    "start": 141
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 934,
+                                    "start": 667
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 219,
+                                    "start": 145
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 290,
+                                    "start": 142
+                                },
+                                {
+                                    "end": 593,
+                                    "start": 460
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00109",
+                            "regions": [
+                                {
+                                    "end": 758,
+                                    "start": 745
+                                },
+                                {
+                                    "end": 800,
+                                    "start": 782
+                                },
+                                {
+                                    "end": 841,
+                                    "start": 831
+                                },
+                                {
+                                    "end": 872,
+                                    "start": 850
+                                },
+                                {
+                                    "end": 916,
+                                    "start": 894
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 975,
+                                    "start": 651
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 141,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 435,
+                                    "start": 316
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 924,
+                                    "start": 667
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 225,
+                                    "start": 183
+                                },
+                                {
+                                    "end": 502,
+                                    "start": 451
+                                },
+                                {
+                                    "end": 556,
+                                    "start": 507
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 923,
+                                    "start": 667
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 919,
+                                    "start": 667
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55270769,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086714
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270769,
+                            "start": 55270210
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000455089",
+                    "start": 55086714
+                },
+                {
+                    "cdna_coding_end": 2133,
+                    "cdna_coding_start": 247,
+                    "domains": [
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 187
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 339,
+                                    "start": 182
+                                },
+                                {
+                                    "end": 624,
+                                    "start": 505
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 29
+                                },
+                                {
+                                    "end": 520,
+                                    "start": 328
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 338,
+                                    "start": 185
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 270,
+                                    "start": 228
+                                },
+                                {
+                                    "end": 547,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 601,
+                                    "start": 552
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 167,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 480,
+                                    "start": 361
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55236328,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086725
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55236328,
+                            "start": 55236216
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000342916",
+                    "start": 55086725
+                },
+                {
+                    "cdna_coding_end": 2363,
+                    "cdna_coding_start": 246,
+                    "domains": [
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 339,
+                                    "start": 182
+                                },
+                                {
+                                    "end": 624,
+                                    "start": 505
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 187
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 338,
+                                    "start": 185
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 29
+                                },
+                                {
+                                    "end": 520,
+                                    "start": 328
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 270,
+                                    "start": 228
+                                },
+                                {
+                                    "end": 547,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 601,
+                                    "start": 552
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 167,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 480,
+                                    "start": 361
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55238738,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086726
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238738,
+                            "start": 55238000
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000344576",
+                    "start": 55086726
+                },
+                {
+                    "cdna_coding_end": 1462,
+                    "cdna_coding_start": 245,
+                    "domains": [
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 339,
+                                    "start": 182
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 187
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 338,
+                                    "start": 185
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 29
+                                },
+                                {
+                                    "end": 403,
+                                    "start": 328
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 270,
+                                    "start": 228
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 167,
+                                    "start": 57
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55224644,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086727
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224644,
+                            "start": 55224452
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000420316",
+                    "start": 55086727
+                },
+                {
+                    "cdna_coding_end": 3810,
+                    "cdna_coding_start": 178,
+                    "domains": [
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 969,
+                                    "start": 712
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 167,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 480,
+                                    "start": 361
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 1020,
+                                    "start": 696
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 964,
+                                    "start": 712
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 968,
+                                    "start": 712
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 270,
+                                    "start": 228
+                                },
+                                {
+                                    "end": 547,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 601,
+                                    "start": 552
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 338,
+                                    "start": 185
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 29
+                                },
+                                {
+                                    "end": 520,
+                                    "start": 328
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 965,
+                                    "start": 714
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PIRSF000619",
+                            "regions": [
+                                {
+                                    "end": 1210,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00109",
+                            "regions": [
+                                {
+                                    "end": 803,
+                                    "start": 790
+                                },
+                                {
+                                    "end": 845,
+                                    "start": 827
+                                },
+                                {
+                                    "end": 886,
+                                    "start": 876
+                                },
+                                {
+                                    "end": 917,
+                                    "start": 895
+                                },
+                                {
+                                    "end": 961,
+                                    "start": 939
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 339,
+                                    "start": 182
+                                },
+                                {
+                                    "end": 638,
+                                    "start": 505
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 187
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 979,
+                                    "start": 712
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55279321,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086794
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270318,
+                            "start": 55270210
+                        },
+                        {
+                            "end": 55279321,
+                            "start": 55272949
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000275493",
+                    "start": 55086794
+                },
+                {
+                    "cdna_coding_end": 2134,
+                    "cdna_coding_start": 161,
+                    "domains": [
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 167,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 480,
+                                    "start": 361
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 270,
+                                    "start": 228
+                                },
+                                {
+                                    "end": 547,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 601,
+                                    "start": 552
+                                },
+                                {
+                                    "end": 653,
+                                    "start": 614
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 29
+                                },
+                                {
+                                    "end": 520,
+                                    "start": 328
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 338,
+                                    "start": 185
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 187
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 339,
+                                    "start": 182
+                                },
+                                {
+                                    "end": 638,
+                                    "start": 505
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55324313,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086811
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240621,
+                            "start": 55240539
+                        },
+                        {
+                            "end": 55324313,
+                            "start": 55323947
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000442591",
+                    "start": 55086811
+                },
+                {
+                    "cdna_coding_end": 691,
+                    "cdna_coding_start": 308,
+                    "domains": [
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 127,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 114,
+                                    "start": 4
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55214417,
+                    "exons": [
+                        {
+                            "end": 55177651,
+                            "start": 55177416
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214417,
+                            "start": 55214299
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000450046",
+                    "start": 55177416
+                },
+                {
+                    "cdna_coding_end": 3657,
+                    "cdna_coding_start": 184,
+                    "domains": [
+                        {
+                            "name": "SM00261",
+                            "regions": [
+                                {
+                                    "end": 217,
+                                    "start": 175
+                                },
+                                {
+                                    "end": 494,
+                                    "start": 443
+                                },
+                                {
+                                    "end": 548,
+                                    "start": 499
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 911,
+                                    "start": 659
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 915,
+                                    "start": 659
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 967,
+                                    "start": 643
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 916,
+                                    "start": 659
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01030",
+                            "regions": [
+                                {
+                                    "end": 114,
+                                    "start": 4
+                                },
+                                {
+                                    "end": 427,
+                                    "start": 308
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 211,
+                                    "start": 134
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 926,
+                                    "start": 659
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00109",
+                            "regions": [
+                                {
+                                    "end": 750,
+                                    "start": 737
+                                },
+                                {
+                                    "end": 792,
+                                    "start": 774
+                                },
+                                {
+                                    "end": 833,
+                                    "start": 823
+                                },
+                                {
+                                    "end": 864,
+                                    "start": 842
+                                },
+                                {
+                                    "end": 908,
+                                    "start": 886
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 286,
+                                    "start": 129
+                                },
+                                {
+                                    "end": 585,
+                                    "start": 452
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PIRSF000619",
+                            "regions": [
+                                {
+                                    "end": 1157,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 912,
+                                    "start": 661
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52058",
+                            "regions": [
+                                {
+                                    "end": 158,
+                                    "start": 1
+                                },
+                                {
+                                    "end": 467,
+                                    "start": 275
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00757",
+                            "regions": [
+                                {
+                                    "end": 285,
+                                    "start": 132
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 55273591,
+                    "exons": [
+                        {
+                            "end": 55177651,
+                            "start": 55177540
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270318,
+                            "start": 55270210
+                        },
+                        {
+                            "end": 55273591,
+                            "start": 55272949
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000454757",
+                    "start": 55177540
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "DSTYK"
+            ],
+            "chr": "1",
+            "end": 205180727,
+            "name": "ENSG00000133059",
+            "start": 205111632,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 1831,
+                    "cdna_coding_start": 65,
+                    "domains": [
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 565,
+                                    "start": 337
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 585,
+                                    "start": 452
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 556,
+                                    "start": 451
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 558,
+                                    "start": 471
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 565,
+                                    "start": 312
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 205180727,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111632
+                        },
+                        {
+                            "end": 205117467,
+                            "start": 205117333
+                        },
+                        {
+                            "end": 205119898,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205133055
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180727,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000367160",
+                    "start": 205111632
+                },
+                {
+                    "cdna_coding_end": 2686,
+                    "cdna_coding_start": 32,
+                    "domains": [
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 820,
+                                    "start": 654
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 884,
+                                    "start": 652
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 853,
+                                    "start": 627
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 861,
+                                    "start": 652
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 824,
+                                    "start": 654
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 861,
+                                    "start": 652
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 205180694,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111633
+                        },
+                        {
+                            "end": 205119922,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205126514,
+                            "start": 205126401
+                        },
+                        {
+                            "end": 205128807,
+                            "start": 205128675
+                        },
+                        {
+                            "end": 205129398,
+                            "start": 205129242
+                        },
+                        {
+                            "end": 205130515,
+                            "start": 205130386
+                        },
+                        {
+                            "end": 205131340,
+                            "start": 205131164
+                        },
+                        {
+                            "end": 205132134,
+                            "start": 205132051
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205132851
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180694,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000367161",
+                    "start": 205111633
+                },
+                {
+                    "cdna_coding_end": 2821,
+                    "cdna_coding_start": 32,
+                    "domains": [
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 899,
+                                    "start": 654
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 906,
+                                    "start": 652
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 897,
+                                    "start": 638
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 906,
+                                    "start": 652
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 906,
+                                    "start": 652
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 897,
+                                    "start": 654
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 205180694,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111633
+                        },
+                        {
+                            "end": 205117467,
+                            "start": 205117333
+                        },
+                        {
+                            "end": 205119922,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205126514,
+                            "start": 205126401
+                        },
+                        {
+                            "end": 205128807,
+                            "start": 205128675
+                        },
+                        {
+                            "end": 205129398,
+                            "start": 205129242
+                        },
+                        {
+                            "end": 205130515,
+                            "start": 205130386
+                        },
+                        {
+                            "end": 205131340,
+                            "start": 205131164
+                        },
+                        {
+                            "end": 205132134,
+                            "start": 205132051
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205132851
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180694,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000367162",
+                    "start": 205111633
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "NDUFA12"
+            ],
+            "chr": "12",
+            "end": 95397546,
+            "name": "ENSG00000184752",
+            "start": 95290831,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "domains": [
+                    ],
+                    "end": 95397436,
+                    "exons": [
+                        {
+                            "end": 95291086,
+                            "start": 95290831
+                        },
+                        {
+                            "end": 95318582,
+                            "start": 95318422
+                        },
+                        {
+                            "end": 95322039,
+                            "start": 95321793
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397436,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000552205",
+                    "start": 95290831
+                },
+                {
+                    "cdna_coding_end": 188,
+                    "cdna_coding_start": 21,
+                    "domains": [
+                    ],
+                    "end": 95397476,
+                    "exons": [
+                        {
+                            "end": 95365261,
+                            "start": 95365108
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396582
+                        },
+                        {
+                            "end": 95397476,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000547157",
+                    "start": 95365108
+                },
+                {
+                    "cdna_coding_end": 144,
+                    "cdna_coding_start": 1,
+                    "domains": [
+                        {
+                            "name": "PF05071",
+                            "regions": [
+                                {
+                                    "end": 33,
+                                    "start": 12
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 95397384,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365109
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95390752,
+                            "start": 95390680
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397384,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000551991",
+                    "start": 95365109
+                },
+                {
+                    "cdna_coding_end": 528,
+                    "cdna_coding_start": 91,
+                    "domains": [
+                        {
+                            "name": "PF05071",
+                            "regions": [
+                                {
+                                    "end": 137,
+                                    "start": 36
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 95397546,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365109
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397546,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000327772",
+                    "start": 95365109
+                },
+                {
+                    "cdna_coding_end": 225,
+                    "cdna_coding_start": 34,
+                    "domains": [
+                        {
+                            "name": "PF05071",
+                            "regions": [
+                                {
+                                    "end": 53,
+                                    "start": 36
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 95397489,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365112
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397489,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000547986",
+                    "start": 95365112
+                },
+                {
+                    "cdna_coding_end": 368,
+                    "cdna_coding_start": 69,
+                    "domains": [
+                        {
+                            "name": "PF05071",
+                            "regions": [
+                                {
+                                    "end": 87,
+                                    "start": 36
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 95397524,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365254
+                        },
+                        {
+                            "end": 95366265,
+                            "start": 95366171
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397524,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000546788",
+                    "start": 95365254
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "FRMD6"
+            ],
+            "chr": "14",
+            "end": 52197445,
+            "name": "ENSG00000139926",
+            "start": 51955818,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 2338,
+                    "cdna_coding_start": 494,
+                    "domains": [
+                        {
+                            "name": "PF09379",
+                            "regions": [
+                                {
+                                    "end": 109,
+                                    "start": 20
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF09380",
+                            "regions": [
+                                {
+                                    "end": 322,
+                                    "start": 237
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF50729",
+                            "regions": [
+                                {
+                                    "end": 375,
+                                    "start": 219
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00295",
+                            "regions": [
+                                {
+                                    "end": 226,
+                                    "start": 12
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 320,
+                                    "start": 16
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00373",
+                            "regions": [
+                                {
+                                    "end": 226,
+                                    "start": 115
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF47031",
+                            "regions": [
+                                {
+                                    "end": 218,
+                                    "start": 110
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF54236",
+                            "regions": [
+                                {
+                                    "end": 110,
+                                    "start": 14
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52197177,
+                    "exons": [
+                        {
+                            "end": 51956138,
+                            "start": 51955855
+                        },
+                        {
+                            "end": 52037128,
+                            "start": 52037066
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197177,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000356218",
+                    "start": 51955855
+                },
+                {
+                    "cdna_coding_end": 2130,
+                    "cdna_coding_start": 286,
+                    "domains": [
+                        {
+                            "name": "PF00373",
+                            "regions": [
+                                {
+                                    "end": 226,
+                                    "start": 115
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF47031",
+                            "regions": [
+                                {
+                                    "end": 218,
+                                    "start": 110
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF54236",
+                            "regions": [
+                                {
+                                    "end": 110,
+                                    "start": 14
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 320,
+                                    "start": 16
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00295",
+                            "regions": [
+                                {
+                                    "end": 226,
+                                    "start": 12
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF50729",
+                            "regions": [
+                                {
+                                    "end": 375,
+                                    "start": 219
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF09380",
+                            "regions": [
+                                {
+                                    "end": 322,
+                                    "start": 237
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF09379",
+                            "regions": [
+                                {
+                                    "end": 109,
+                                    "start": 20
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52197445,
+                    "exons": [
+                        {
+                            "end": 52118714,
+                            "start": 52118576
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197445,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000395718",
+                    "start": 52118576
+                },
+                {
+                    "cdna_coding_end": 2065,
+                    "cdna_coding_start": 197,
+                    "domains": [
+                        {
+                            "name": "PF09380",
+                            "regions": [
+                                {
+                                    "end": 330,
+                                    "start": 245
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF09379",
+                            "regions": [
+                                {
+                                    "end": 117,
+                                    "start": 20
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF47031",
+                            "regions": [
+                                {
+                                    "end": 226,
+                                    "start": 118
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00373",
+                            "regions": [
+                                {
+                                    "end": 234,
+                                    "start": 123
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF54236",
+                            "regions": [
+                                {
+                                    "end": 118,
+                                    "start": 14
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 328,
+                                    "start": 16
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00295",
+                            "regions": [
+                                {
+                                    "end": 234,
+                                    "start": 12
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF50729",
+                            "regions": [
+                                {
+                                    "end": 383,
+                                    "start": 227
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52195654,
+                    "exons": [
+                        {
+                            "end": 52118714,
+                            "start": 52118665
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167877,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52195654,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000344768",
+                    "start": 52118665
+                },
+                {
+                    "domains": [
+                    ],
+                    "end": 52164945,
+                    "exons": [
+                        {
+                            "end": 52118935,
+                            "start": 52118698
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164945,
+                            "start": 52164860
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000554778",
+                    "start": 52118698
+                },
+                {
+                    "domains": [
+                    ],
+                    "end": 52174806,
+                    "exons": [
+                        {
+                            "end": 52164950,
+                            "start": 52164706
+                        },
+                        {
+                            "end": 52167877,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174806,
+                            "start": 52174796
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555936",
+                    "start": 52164706
+                },
+                {
+                    "cdna_coding_end": 1775,
+                    "cdna_coding_start": 138,
+                    "domains": [
+                        {
+                            "name": "SSF50729",
+                            "regions": [
+                                {
+                                    "end": 306,
+                                    "start": 150
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 251,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF54236",
+                            "regions": [
+                                {
+                                    "end": 41,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF47031",
+                            "regions": [
+                                {
+                                    "end": 149,
+                                    "start": 41
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00373",
+                            "regions": [
+                                {
+                                    "end": 157,
+                                    "start": 46
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF09380",
+                            "regions": [
+                                {
+                                    "end": 253,
+                                    "start": 168
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52197148,
+                    "exons": [
+                        {
+                            "end": 52164950,
+                            "start": 52164831
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197148,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000554167",
+                    "start": 52164831
+                },
+                {
+                    "cdna_coding_end": 390,
+                    "cdna_coding_start": 1,
+                    "domains": [
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 129,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00373",
+                            "regions": [
+                                {
+                                    "end": 124,
+                                    "start": 13
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF47031",
+                            "regions": [
+                                {
+                                    "end": 116,
+                                    "start": 8
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52175062,
+                    "exons": [
+                        {
+                            "end": 52169306,
+                            "start": 52169266
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52175062,
+                            "start": 52174796
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000557405",
+                    "start": 52169266
+                },
+                {
+                    "cdna_coding_end": 618,
+                    "cdna_coding_start": 1,
+                    "domains": [
+                        {
+                            "name": "PF09380",
+                            "regions": [
+                                {
+                                    "end": 60,
+                                    "start": 2
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50057",
+                            "regions": [
+                                {
+                                    "end": 58,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF50729",
+                            "regions": [
+                                {
+                                    "end": 113,
+                                    "start": 2
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 52187243,
+                    "exons": [
+                        {
+                            "end": 52179269,
+                            "start": 52179231
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187243,
+                            "start": 52186773
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555197",
+                    "start": 52179231
+                },
+                {
+                    "cdna_coding_end": 573,
+                    "cdna_coding_start": 145,
+                    "domains": [
+                    ],
+                    "end": 52192513,
+                    "exons": [
+                        {
+                            "end": 52184066,
+                            "start": 52183973
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188673
+                        },
+                        {
+                            "end": 52192513,
+                            "start": 52192497
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555703",
+                    "start": 52183973
+                },
+                {
+                    "cdna_coding_end": 939,
+                    "cdna_coding_start": 145,
+                    "domains": [
+                    ],
+                    "end": 52195487,
+                    "exons": [
+                        {
+                            "end": 52184066,
+                            "start": 52183973
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52195487,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000553556",
+                    "start": 52183973
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "PRKCB"
+            ],
+            "chr": "16",
+            "end": 24231932,
+            "name": "ENSG00000166501",
+            "start": 23847322,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 2191,
+                    "cdna_coding_start": 176,
+                    "domains": [
+                        {
+                            "name": "SM00239",
+                            "regions": [
+                                {
+                                    "end": 275,
+                                    "start": 172
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 583,
+                                    "start": 344
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF49562",
+                            "regions": [
+                                {
+                                    "end": 288,
+                                    "start": 157
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00109",
+                            "regions": [
+                                {
+                                    "end": 86,
+                                    "start": 37
+                                },
+                                {
+                                    "end": 151,
+                                    "start": 102
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 600,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00008",
+                            "regions": [
+                                {
+                                    "end": 48,
+                                    "start": 34
+                                },
+                                {
+                                    "end": 59,
+                                    "start": 50
+                                },
+                                {
+                                    "end": 74,
+                                    "start": 63
+                                },
+                                {
+                                    "end": 152,
+                                    "start": 140
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00433",
+                            "regions": [
+                                {
+                                    "end": 666,
+                                    "start": 623
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 600,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00168",
+                            "regions": [
+                                {
+                                    "end": 259,
+                                    "start": 175
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57889",
+                            "regions": [
+                                {
+                                    "end": 92,
+                                    "start": 6
+                                },
+                                {
+                                    "end": 157,
+                                    "start": 101
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00130",
+                            "regions": [
+                                {
+                                    "end": 87,
+                                    "start": 37
+                                },
+                                {
+                                    "end": 153,
+                                    "start": 102
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50081",
+                            "regions": [
+                                {
+                                    "end": 86,
+                                    "start": 36
+                                },
+                                {
+                                    "end": 151,
+                                    "start": 101
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 627,
+                                    "start": 317
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 586,
+                                    "start": 343
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 576,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00360",
+                            "regions": [
+                                {
+                                    "end": 200,
+                                    "start": 188
+                                },
+                                {
+                                    "end": 230,
+                                    "start": 217
+                                },
+                                {
+                                    "end": 248,
+                                    "start": 240
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00133",
+                            "regions": [
+                                {
+                                    "end": 664,
+                                    "start": 601
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50004",
+                            "regions": [
+                                {
+                                    "end": 260,
+                                    "start": 173
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PIRSF000550",
+                            "regions": [
+                                {
+                                    "end": 671,
+                                    "start": 1
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 24231932,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847322
+                        },
+                        {
+                            "end": 23848727,
+                            "start": 23848696
+                        },
+                        {
+                            "end": 23999911,
+                            "start": 23999829
+                        },
+                        {
+                            "end": 24043568,
+                            "start": 24043457
+                        },
+                        {
+                            "end": 24046868,
+                            "start": 24046740
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124390,
+                            "start": 24124294
+                        },
+                        {
+                            "end": 24135302,
+                            "start": 24135156
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192249,
+                            "start": 24192111
+                        },
+                        {
+                            "end": 24196512,
+                            "start": 24196432
+                        },
+                        {
+                            "end": 24196888,
+                            "start": 24196781
+                        },
+                        {
+                            "end": 24202551,
+                            "start": 24202411
+                        },
+                        {
+                            "end": 24231932,
+                            "start": 24231282
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000321728",
+                    "start": 23847322
+                },
+                {
+                    "cdna_coding_end": 2174,
+                    "cdna_coding_start": 153,
+                    "domains": [
+                        {
+                            "name": "SM00133",
+                            "regions": [
+                                {
+                                    "end": 663,
+                                    "start": 601
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50004",
+                            "regions": [
+                                {
+                                    "end": 260,
+                                    "start": 173
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PIRSF000550",
+                            "regions": [
+                                {
+                                    "end": 672,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00069",
+                            "regions": [
+                                {
+                                    "end": 586,
+                                    "start": 343
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00360",
+                            "regions": [
+                                {
+                                    "end": 200,
+                                    "start": 188
+                                },
+                                {
+                                    "end": 230,
+                                    "start": 217
+                                },
+                                {
+                                    "end": 248,
+                                    "start": 240
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00219",
+                            "regions": [
+                                {
+                                    "end": 576,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50081",
+                            "regions": [
+                                {
+                                    "end": 86,
+                                    "start": 36
+                                },
+                                {
+                                    "end": 151,
+                                    "start": 101
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF56112",
+                            "regions": [
+                                {
+                                    "end": 627,
+                                    "start": 317
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00220",
+                            "regions": [
+                                {
+                                    "end": 600,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00433",
+                            "regions": [
+                                {
+                                    "end": 664,
+                                    "start": 627
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00130",
+                            "regions": [
+                                {
+                                    "end": 87,
+                                    "start": 37
+                                },
+                                {
+                                    "end": 153,
+                                    "start": 102
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00168",
+                            "regions": [
+                                {
+                                    "end": 259,
+                                    "start": 175
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57889",
+                            "regions": [
+                                {
+                                    "end": 92,
+                                    "start": 6
+                                },
+                                {
+                                    "end": 157,
+                                    "start": 101
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00008",
+                            "regions": [
+                                {
+                                    "end": 48,
+                                    "start": 34
+                                },
+                                {
+                                    "end": 59,
+                                    "start": 50
+                                },
+                                {
+                                    "end": 74,
+                                    "start": 63
+                                },
+                                {
+                                    "end": 152,
+                                    "start": 140
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50011",
+                            "regions": [
+                                {
+                                    "end": 600,
+                                    "start": 342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00109",
+                            "regions": [
+                                {
+                                    "end": 86,
+                                    "start": 37
+                                },
+                                {
+                                    "end": 151,
+                                    "start": 102
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07714",
+                            "regions": [
+                                {
+                                    "end": 583,
+                                    "start": 344
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF49562",
+                            "regions": [
+                                {
+                                    "end": 288,
+                                    "start": 157
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00239",
+                            "regions": [
+                                {
+                                    "end": 275,
+                                    "start": 172
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 24231932,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847345
+                        },
+                        {
+                            "end": 23848727,
+                            "start": 23848696
+                        },
+                        {
+                            "end": 23999911,
+                            "start": 23999829
+                        },
+                        {
+                            "end": 24043568,
+                            "start": 24043457
+                        },
+                        {
+                            "end": 24046868,
+                            "start": 24046740
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124390,
+                            "start": 24124294
+                        },
+                        {
+                            "end": 24135302,
+                            "start": 24135156
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192249,
+                            "start": 24192111
+                        },
+                        {
+                            "end": 24196512,
+                            "start": 24196432
+                        },
+                        {
+                            "end": 24196888,
+                            "start": 24196781
+                        },
+                        {
+                            "end": 24202551,
+                            "start": 24202411
+                        },
+                        {
+                            "end": 24231932,
+                            "start": 24225979
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000303531",
+                    "start": 23847345
+                },
+                {
+                    "cdna_coding_end": 268,
+                    "cdna_coding_start": 95,
+                    "domains": [
+                        {
+                            "name": "PR00008",
+                            "regions": [
+                                {
+                                    "end": 48,
+                                    "start": 34
+                                },
+                                {
+                                    "end": 57,
+                                    "start": 50
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50081",
+                            "regions": [
+                                {
+                                    "end": 57,
+                                    "start": 36
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57889",
+                            "regions": [
+                                {
+                                    "end": 57,
+                                    "start": 6
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 23880647,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847403
+                        },
+                        {
+                            "end": 23880647,
+                            "start": 23880435
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000498058",
+                    "start": 23847403
+                },
+                {
+                    "domains": [
+                    ],
+                    "end": 24124386,
+                    "exons": [
+                        {
+                            "end": 23848727,
+                            "start": 23848544
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124386,
+                            "start": 24124294
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000498739",
+                    "start": 23848544
+                },
+                {
+                    "domains": [
+                    ],
+                    "end": 24192166,
+                    "exons": [
+                        {
+                            "end": 24163176,
+                            "start": 24163006
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192166,
+                            "start": 24192111
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000472066",
+                    "start": 24163006
+                },
+                {
+                    "domains": [
+                    ],
+                    "end": 24202909,
+                    "exons": [
+                        {
+                            "end": 24196888,
+                            "start": 24196852
+                        },
+                        {
+                            "end": 24202909,
+                            "start": 24202411
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000466124",
+                    "start": 24196852
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "GIMAP4"
+            ],
+            "chr": "7",
+            "end": 150271041,
+            "name": "ENSG00000133574",
+            "start": 150264365,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 1165,
+                    "cdna_coding_start": 176,
+                    "domains": [
+                        {
+                            "name": "PF04548",
+                            "regions": [
+                                {
+                                    "end": 238,
+                                    "start": 31
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52540",
+                            "regions": [
+                                {
+                                    "end": 288,
+                                    "start": 24
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 150271041,
+                    "exons": [
+                        {
+                            "end": 150264525,
+                            "start": 150264365
+                        },
+                        {
+                            "end": 150267047,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150271041,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000255945",
+                    "start": 150264365
+                },
+                {
+                    "cdna_coding_end": 1115,
+                    "cdna_coding_start": 84,
+                    "domains": [
+                        {
+                            "name": "PF04548",
+                            "regions": [
+                                {
+                                    "end": 252,
+                                    "start": 45
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF52540",
+                            "regions": [
+                                {
+                                    "end": 302,
+                                    "start": 38
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 150270602,
+                    "exons": [
+                        {
+                            "end": 150264525,
+                            "start": 150264457
+                        },
+                        {
+                            "end": 150267089,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150270602,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000461940",
+                    "start": 150264457
+                },
+                {
+                    "cdna_coding_end": 552,
+                    "cdna_coding_start": 100,
+                    "domains": [
+                        {
+                            "name": "SSF52540",
+                            "regions": [
+                                {
+                                    "end": 151,
+                                    "start": 38
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF04548",
+                            "regions": [
+                                {
+                                    "end": 151,
+                                    "start": 45
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 150269569,
+                    "exons": [
+                        {
+                            "end": 150264608,
+                            "start": 150264524
+                        },
+                        {
+                            "end": 150267089,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150269569,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000479232",
+                    "start": 150264524
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "IL7"
+            ],
+            "chr": "8",
+            "end": 79717758,
+            "name": "ENSG00000104432",
+            "start": 79587978,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 1135,
+                    "cdna_coding_start": 602,
+                    "domains": [
+                        {
+                            "name": "PIRSF001942",
+                            "regions": [
+                                {
+                                    "end": 177,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 48,
+                                    "start": 26
+                                },
+                                {
+                                    "end": 77,
+                                    "start": 57
+                                },
+                                {
+                                    "end": 98,
+                                    "start": 78
+                                },
+                                {
+                                    "end": 118,
+                                    "start": 99
+                                },
+                                {
+                                    "end": 173,
+                                    "start": 151
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01415",
+                            "regions": [
+                                {
+                                    "end": 173,
+                                    "start": 28
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00127",
+                            "regions": [
+                                {
+                                    "end": 173,
+                                    "start": 27
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717758,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645007
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717758,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000263851",
+                    "start": 79645007
+                },
+                {
+                    "cdna_coding_end": 758,
+                    "cdna_coding_start": 543,
+                    "domains": [
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 48,
+                                    "start": 26
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01415",
+                            "regions": [
+                                {
+                                    "end": 54,
+                                    "start": 28
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717699,
+                    "exons": [
+                        {
+                            "end": 79646063,
+                            "start": 79645283
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79659331,
+                            "start": 79659129
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717699,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000518982",
+                    "start": 79645283
+                },
+                {
+                    "cdna_coding_end": 408,
+                    "cdna_coding_start": 7,
+                    "domains": [
+                        {
+                            "name": "PF01415",
+                            "regions": [
+                                {
+                                    "end": 77,
+                                    "start": 28
+                                },
+                                {
+                                    "end": 129,
+                                    "start": 91
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00127",
+                            "regions": [
+                                {
+                                    "end": 129,
+                                    "start": 27
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 48,
+                                    "start": 26
+                                },
+                                {
+                                    "end": 77,
+                                    "start": 57
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PIRSF001942",
+                            "regions": [
+                                {
+                                    "end": 133,
+                                    "start": 1
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717163,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717163,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520269",
+                    "start": 79645900
+                },
+                {
+                    "cdna_coding_end": 120,
+                    "cdna_coding_start": 7,
+                    "domains": [
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 37,
+                                    "start": 26
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717163,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710363
+                        },
+                        {
+                            "end": 79717163,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520215",
+                    "start": 79645900
+                },
+                {
+                    "cdna_coding_end": 643,
+                    "cdna_coding_start": 530,
+                    "domains": [
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 37,
+                                    "start": 26
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717686,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710363
+                        },
+                        {
+                            "end": 79717686,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520317",
+                    "start": 79645900
+                },
+                {
+                    "cdna_coding_end": 195,
+                    "cdna_coding_start": 1,
+                    "domains": [
+                        {
+                            "name": "SM00127",
+                            "regions": [
+                                {
+                                    "end": 60,
+                                    "start": 1
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01415",
+                            "regions": [
+                                {
+                                    "end": 60,
+                                    "start": 1
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79652311,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645948
+                        },
+                        {
+                            "end": 79652311,
+                            "start": 79652237
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000541183",
+                    "start": 79645948
+                },
+                {
+                    "cdna_coding_end": 817,
+                    "cdna_coding_start": 602,
+                    "domains": [
+                        {
+                            "name": "PF01415",
+                            "regions": [
+                                {
+                                    "end": 54,
+                                    "start": 28
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00435",
+                            "regions": [
+                                {
+                                    "end": 25,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 48,
+                                    "start": 26
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 79717758,
+                    "exons": [
+                        {
+                            "end": 79659331,
+                            "start": 79659263
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717758,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000379113",
+                    "start": 79659263
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "SVEP1"
+            ],
+            "chr": "9",
+            "end": 113342160,
+            "name": "ENSG00000165124",
+            "start": 113127531,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 11053,
+                    "cdna_coding_start": 338,
+                    "domains": [
+                        {
+                            "name": "SM00032",
+                            "regions": [
+                                {
+                                    "end": 433,
+                                    "start": 378
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 438
+                                },
+                                {
+                                    "end": 559,
+                                    "start": 498
+                                },
+                                {
+                                    "end": 787,
+                                    "start": 727
+                                },
+                                {
+                                    "end": 1685,
+                                    "start": 1631
+                                },
+                                {
+                                    "end": 1743,
+                                    "start": 1690
+                                },
+                                {
+                                    "end": 1842,
+                                    "start": 1789
+                                },
+                                {
+                                    "end": 1900,
+                                    "start": 1847
+                                },
+                                {
+                                    "end": 1958,
+                                    "start": 1905
+                                },
+                                {
+                                    "end": 2016,
+                                    "start": 1963
+                                },
+                                {
+                                    "end": 2078,
+                                    "start": 2021
+                                },
+                                {
+                                    "end": 2141,
+                                    "start": 2083
+                                },
+                                {
+                                    "end": 2199,
+                                    "start": 2146
+                                },
+                                {
+                                    "end": 2259,
+                                    "start": 2204
+                                },
+                                {
+                                    "end": 2318,
+                                    "start": 2264
+                                },
+                                {
+                                    "end": 2376,
+                                    "start": 2323
+                                },
+                                {
+                                    "end": 2435,
+                                    "start": 2381
+                                },
+                                {
+                                    "end": 2493,
+                                    "start": 2440
+                                },
+                                {
+                                    "end": 2551,
+                                    "start": 2498
+                                },
+                                {
+                                    "end": 2608,
+                                    "start": 2556
+                                },
+                                {
+                                    "end": 2712,
+                                    "start": 2654
+                                },
+                                {
+                                    "end": 2770,
+                                    "start": 2717
+                                },
+                                {
+                                    "end": 2828,
+                                    "start": 2775
+                                },
+                                {
+                                    "end": 2886,
+                                    "start": 2833
+                                },
+                                {
+                                    "end": 2944,
+                                    "start": 2891
+                                },
+                                {
+                                    "end": 3002,
+                                    "start": 2949
+                                },
+                                {
+                                    "end": 3059,
+                                    "start": 3007
+                                },
+                                {
+                                    "end": 3117,
+                                    "start": 3064
+                                },
+                                {
+                                    "end": 3176,
+                                    "start": 3122
+                                },
+                                {
+                                    "end": 3236,
+                                    "start": 3181
+                                },
+                                {
+                                    "end": 3294,
+                                    "start": 3241
+                                },
+                                {
+                                    "end": 3352,
+                                    "start": 3299
+                                },
+                                {
+                                    "end": 3411,
+                                    "start": 3357
+                                },
+                                {
+                                    "end": 3468,
+                                    "start": 3416
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF02494",
+                            "regions": [
+                                {
+                                    "end": 642,
+                                    "start": 561
+                                },
+                                {
+                                    "end": 721,
+                                    "start": 644
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00895",
+                            "regions": [
+                                {
+                                    "end": 1530,
+                                    "start": 1512
+                                },
+                                {
+                                    "end": 1558,
+                                    "start": 1539
+                                },
+                                {
+                                    "end": 1592,
+                                    "start": 1559
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57535",
+                            "regions": [
+                                {
+                                    "end": 433,
+                                    "start": 374
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 434
+                                },
+                                {
+                                    "end": 560,
+                                    "start": 494
+                                },
+                                {
+                                    "end": 790,
+                                    "start": 727
+                                },
+                                {
+                                    "end": 1746,
+                                    "start": 1626
+                                },
+                                {
+                                    "end": 1842,
+                                    "start": 1785
+                                },
+                                {
+                                    "end": 1900,
+                                    "start": 1843
+                                },
+                                {
+                                    "end": 1958,
+                                    "start": 1901
+                                },
+                                {
+                                    "end": 2016,
+                                    "start": 1959
+                                },
+                                {
+                                    "end": 2078,
+                                    "start": 2017
+                                },
+                                {
+                                    "end": 2199,
+                                    "start": 2081
+                                },
+                                {
+                                    "end": 2318,
+                                    "start": 2202
+                                },
+                                {
+                                    "end": 2377,
+                                    "start": 2321
+                                },
+                                {
+                                    "end": 2437,
+                                    "start": 2379
+                                },
+                                {
+                                    "end": 2551,
+                                    "start": 2438
+                                },
+                                {
+                                    "end": 2616,
+                                    "start": 2552
+                                },
+                                {
+                                    "end": 2712,
+                                    "start": 2643
+                                },
+                                {
+                                    "end": 2828,
+                                    "start": 2715
+                                },
+                                {
+                                    "end": 2886,
+                                    "start": 2829
+                                },
+                                {
+                                    "end": 2944,
+                                    "start": 2887
+                                },
+                                {
+                                    "end": 3117,
+                                    "start": 2945
+                                },
+                                {
+                                    "end": 3176,
+                                    "start": 3118
+                                },
+                                {
+                                    "end": 3229,
+                                    "start": 3177
+                                },
+                                {
+                                    "end": 3475,
+                                    "start": 3239
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF49899",
+                            "regions": [
+                                {
+                                    "end": 1632,
+                                    "start": 1421
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00159",
+                            "regions": [
+                                {
+                                    "end": 1627,
+                                    "start": 1420
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00354",
+                            "regions": [
+                                {
+                                    "end": 1620,
+                                    "start": 1442
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07699",
+                            "regions": [
+                                {
+                                    "end": 360,
+                                    "start": 310
+                                },
+                                {
+                                    "end": 1052,
+                                    "start": 1005
+                                },
+                                {
+                                    "end": 1106,
+                                    "start": 1059
+                                },
+                                {
+                                    "end": 1160,
+                                    "start": 1113
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 1409,
+                                    "start": 1197
+                                },
+                                {
+                                    "end": 3554,
+                                    "start": 3468
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50825",
+                            "regions": [
+                                {
+                                    "end": 642,
+                                    "start": 560
+                                },
+                                {
+                                    "end": 724,
+                                    "start": 643
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00092",
+                            "regions": [
+                                {
+                                    "end": 252,
+                                    "start": 84
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57196",
+                            "regions": [
+                                {
+                                    "end": 1267,
+                                    "start": 1189
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1268
+                                },
+                                {
+                                    "end": 1342,
+                                    "start": 1306
+                                },
+                                {
+                                    "end": 1423,
+                                    "start": 1344
+                                },
+                                {
+                                    "end": 1786,
+                                    "start": 1735
+                                },
+                                {
+                                    "end": 3506,
+                                    "start": 3463
+                                },
+                                {
+                                    "end": 3535,
+                                    "start": 3507
+                                },
+                                {
+                                    "end": 3570,
+                                    "start": 3537
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50026",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1193
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1231
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1269
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1307
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1345
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1383
+                                },
+                                {
+                                    "end": 1784,
+                                    "start": 1745
+                                },
+                                {
+                                    "end": 3532,
+                                    "start": 3500
+                                },
+                                {
+                                    "end": 3564,
+                                    "start": 3533
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00181",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1196
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1234
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1272
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1310
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1348
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1386
+                                },
+                                {
+                                    "end": 1784,
+                                    "start": 1748
+                                },
+                                {
+                                    "end": 3500,
+                                    "start": 3471
+                                },
+                                {
+                                    "end": 3532,
+                                    "start": 3503
+                                },
+                                {
+                                    "end": 3564,
+                                    "start": 3535
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00179",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1196
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1231
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1269
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1307
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1345
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1383
+                                },
+                                {
+                                    "end": 1784,
+                                    "start": 1745
+                                },
+                                {
+                                    "end": 3532,
+                                    "start": 3504
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 440,
+                                    "start": 269
+                                },
+                                {
+                                    "end": 1144,
+                                    "start": 988
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07645",
+                            "regions": [
+                                {
+                                    "end": 1783,
+                                    "start": 1745
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50923",
+                            "regions": [
+                                {
+                                    "end": 435,
+                                    "start": 376
+                                },
+                                {
+                                    "end": 495,
+                                    "start": 436
+                                },
+                                {
+                                    "end": 561,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 789,
+                                    "start": 725
+                                },
+                                {
+                                    "end": 1687,
+                                    "start": 1629
+                                },
+                                {
+                                    "end": 1745,
+                                    "start": 1688
+                                },
+                                {
+                                    "end": 1844,
+                                    "start": 1787
+                                },
+                                {
+                                    "end": 1902,
+                                    "start": 1845
+                                },
+                                {
+                                    "end": 1960,
+                                    "start": 1903
+                                },
+                                {
+                                    "end": 2018,
+                                    "start": 1961
+                                },
+                                {
+                                    "end": 2080,
+                                    "start": 2019
+                                },
+                                {
+                                    "end": 2143,
+                                    "start": 2081
+                                },
+                                {
+                                    "end": 2201,
+                                    "start": 2144
+                                },
+                                {
+                                    "end": 2261,
+                                    "start": 2202
+                                },
+                                {
+                                    "end": 2320,
+                                    "start": 2262
+                                },
+                                {
+                                    "end": 2378,
+                                    "start": 2321
+                                },
+                                {
+                                    "end": 2437,
+                                    "start": 2379
+                                },
+                                {
+                                    "end": 2495,
+                                    "start": 2438
+                                },
+                                {
+                                    "end": 2553,
+                                    "start": 2496
+                                },
+                                {
+                                    "end": 2610,
+                                    "start": 2554
+                                },
+                                {
+                                    "end": 2714,
+                                    "start": 2663
+                                },
+                                {
+                                    "end": 2772,
+                                    "start": 2715
+                                },
+                                {
+                                    "end": 2830,
+                                    "start": 2773
+                                },
+                                {
+                                    "end": 2888,
+                                    "start": 2831
+                                },
+                                {
+                                    "end": 2946,
+                                    "start": 2889
+                                },
+                                {
+                                    "end": 3004,
+                                    "start": 2947
+                                },
+                                {
+                                    "end": 3061,
+                                    "start": 3005
+                                },
+                                {
+                                    "end": 3119,
+                                    "start": 3062
+                                },
+                                {
+                                    "end": 3178,
+                                    "start": 3120
+                                },
+                                {
+                                    "end": 3238,
+                                    "start": 3179
+                                },
+                                {
+                                    "end": 3296,
+                                    "start": 3239
+                                },
+                                {
+                                    "end": 3354,
+                                    "start": 3297
+                                },
+                                {
+                                    "end": 3413,
+                                    "start": 3355
+                                },
+                                {
+                                    "end": 3470,
+                                    "start": 3414
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00327",
+                            "regions": [
+                                {
+                                    "end": 260,
+                                    "start": 81
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00008",
+                            "regions": [
+                                {
+                                    "end": 1226,
+                                    "start": 1197
+                                },
+                                {
+                                    "end": 1265,
+                                    "start": 1235
+                                },
+                                {
+                                    "end": 1302,
+                                    "start": 1273
+                                },
+                                {
+                                    "end": 1379,
+                                    "start": 1349
+                                },
+                                {
+                                    "end": 1417,
+                                    "start": 1387
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50234",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 83
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07974",
+                            "regions": [
+                                {
+                                    "end": 1266,
+                                    "start": 1235
+                                },
+                                {
+                                    "end": 3499,
+                                    "start": 3475
+                                },
+                                {
+                                    "end": 3531,
+                                    "start": 3507
+                                },
+                                {
+                                    "end": 3563,
+                                    "start": 3536
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF53300",
+                            "regions": [
+                                {
+                                    "end": 262,
+                                    "start": 79
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00084",
+                            "regions": [
+                                {
+                                    "end": 430,
+                                    "start": 378
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 438
+                                },
+                                {
+                                    "end": 1685,
+                                    "start": 1628
+                                },
+                                {
+                                    "end": 1743,
+                                    "start": 1690
+                                },
+                                {
+                                    "end": 1842,
+                                    "start": 1789
+                                },
+                                {
+                                    "end": 1900,
+                                    "start": 1847
+                                },
+                                {
+                                    "end": 1958,
+                                    "start": 1905
+                                },
+                                {
+                                    "end": 2016,
+                                    "start": 1963
+                                },
+                                {
+                                    "end": 2078,
+                                    "start": 2021
+                                },
+                                {
+                                    "end": 2136,
+                                    "start": 2083
+                                },
+                                {
+                                    "end": 2199,
+                                    "start": 2146
+                                },
+                                {
+                                    "end": 2259,
+                                    "start": 2204
+                                },
+                                {
+                                    "end": 2318,
+                                    "start": 2264
+                                },
+                                {
+                                    "end": 2376,
+                                    "start": 2323
+                                },
+                                {
+                                    "end": 2435,
+                                    "start": 2381
+                                },
+                                {
+                                    "end": 2493,
+                                    "start": 2440
+                                },
+                                {
+                                    "end": 2551,
+                                    "start": 2498
+                                },
+                                {
+                                    "end": 2608,
+                                    "start": 2556
+                                },
+                                {
+                                    "end": 2712,
+                                    "start": 2667
+                                },
+                                {
+                                    "end": 2770,
+                                    "start": 2717
+                                },
+                                {
+                                    "end": 2828,
+                                    "start": 2775
+                                },
+                                {
+                                    "end": 2886,
+                                    "start": 2833
+                                },
+                                {
+                                    "end": 2944,
+                                    "start": 2891
+                                },
+                                {
+                                    "end": 3002,
+                                    "start": 2949
+                                },
+                                {
+                                    "end": 3059,
+                                    "start": 3007
+                                },
+                                {
+                                    "end": 3117,
+                                    "start": 3084
+                                },
+                                {
+                                    "end": 3172,
+                                    "start": 3122
+                                },
+                                {
+                                    "end": 3236,
+                                    "start": 3181
+                                },
+                                {
+                                    "end": 3290,
+                                    "start": 3241
+                                },
+                                {
+                                    "end": 3352,
+                                    "start": 3299
+                                },
+                                {
+                                    "end": 3411,
+                                    "start": 3357
+                                },
+                                {
+                                    "end": 3468,
+                                    "start": 3416
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 113342160,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127531
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        },
+                        {
+                            "end": 113191614,
+                            "start": 113191423
+                        },
+                        {
+                            "end": 113192284,
+                            "start": 113192200
+                        },
+                        {
+                            "end": 113192730,
+                            "start": 113192554
+                        },
+                        {
+                            "end": 113194314,
+                            "start": 113194195
+                        },
+                        {
+                            "end": 113194915,
+                            "start": 113194742
+                        },
+                        {
+                            "end": 113196786,
+                            "start": 113196616
+                        },
+                        {
+                            "end": 113197644,
+                            "start": 113197521
+                        },
+                        {
+                            "end": 113198784,
+                            "start": 113198660
+                        },
+                        {
+                            "end": 113206000,
+                            "start": 113205825
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220751
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342160,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000401783",
+                    "start": 113127531
+                },
+                {
+                    "cdna_coding_end": 4909,
+                    "cdna_coding_start": 416,
+                    "domains": [
+                        {
+                            "name": "PF00084",
+                            "regions": [
+                                {
+                                    "end": 62,
+                                    "start": 9
+                                },
+                                {
+                                    "end": 125,
+                                    "start": 72
+                                },
+                                {
+                                    "end": 185,
+                                    "start": 130
+                                },
+                                {
+                                    "end": 244,
+                                    "start": 190
+                                },
+                                {
+                                    "end": 302,
+                                    "start": 249
+                                },
+                                {
+                                    "end": 361,
+                                    "start": 307
+                                },
+                                {
+                                    "end": 419,
+                                    "start": 366
+                                },
+                                {
+                                    "end": 477,
+                                    "start": 424
+                                },
+                                {
+                                    "end": 534,
+                                    "start": 482
+                                },
+                                {
+                                    "end": 638,
+                                    "start": 593
+                                },
+                                {
+                                    "end": 696,
+                                    "start": 643
+                                },
+                                {
+                                    "end": 754,
+                                    "start": 701
+                                },
+                                {
+                                    "end": 812,
+                                    "start": 759
+                                },
+                                {
+                                    "end": 870,
+                                    "start": 817
+                                },
+                                {
+                                    "end": 928,
+                                    "start": 875
+                                },
+                                {
+                                    "end": 985,
+                                    "start": 933
+                                },
+                                {
+                                    "end": 1043,
+                                    "start": 1010
+                                },
+                                {
+                                    "end": 1098,
+                                    "start": 1048
+                                },
+                                {
+                                    "end": 1162,
+                                    "start": 1107
+                                },
+                                {
+                                    "end": 1216,
+                                    "start": 1167
+                                },
+                                {
+                                    "end": 1278,
+                                    "start": 1225
+                                },
+                                {
+                                    "end": 1337,
+                                    "start": 1283
+                                },
+                                {
+                                    "end": 1394,
+                                    "start": 1342
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07974",
+                            "regions": [
+                                {
+                                    "end": 1425,
+                                    "start": 1401
+                                },
+                                {
+                                    "end": 1457,
+                                    "start": 1433
+                                },
+                                {
+                                    "end": 1489,
+                                    "start": 1462
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00008",
+                            "regions": [
+                                {
+                                    "end": 1456,
+                                    "start": 1427
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50923",
+                            "regions": [
+                                {
+                                    "end": 69,
+                                    "start": 7
+                                },
+                                {
+                                    "end": 127,
+                                    "start": 70
+                                },
+                                {
+                                    "end": 187,
+                                    "start": 128
+                                },
+                                {
+                                    "end": 246,
+                                    "start": 188
+                                },
+                                {
+                                    "end": 304,
+                                    "start": 247
+                                },
+                                {
+                                    "end": 363,
+                                    "start": 305
+                                },
+                                {
+                                    "end": 421,
+                                    "start": 364
+                                },
+                                {
+                                    "end": 479,
+                                    "start": 422
+                                },
+                                {
+                                    "end": 536,
+                                    "start": 480
+                                },
+                                {
+                                    "end": 640,
+                                    "start": 589
+                                },
+                                {
+                                    "end": 698,
+                                    "start": 641
+                                },
+                                {
+                                    "end": 756,
+                                    "start": 699
+                                },
+                                {
+                                    "end": 814,
+                                    "start": 757
+                                },
+                                {
+                                    "end": 872,
+                                    "start": 815
+                                },
+                                {
+                                    "end": 930,
+                                    "start": 873
+                                },
+                                {
+                                    "end": 987,
+                                    "start": 931
+                                },
+                                {
+                                    "end": 1045,
+                                    "start": 988
+                                },
+                                {
+                                    "end": 1104,
+                                    "start": 1046
+                                },
+                                {
+                                    "end": 1164,
+                                    "start": 1105
+                                },
+                                {
+                                    "end": 1222,
+                                    "start": 1165
+                                },
+                                {
+                                    "end": 1280,
+                                    "start": 1223
+                                },
+                                {
+                                    "end": 1339,
+                                    "start": 1281
+                                },
+                                {
+                                    "end": 1396,
+                                    "start": 1340
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00181",
+                            "regions": [
+                                {
+                                    "end": 1426,
+                                    "start": 1397
+                                },
+                                {
+                                    "end": 1458,
+                                    "start": 1429
+                                },
+                                {
+                                    "end": 1490,
+                                    "start": 1461
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57196",
+                            "regions": [
+                                {
+                                    "end": 1432,
+                                    "start": 1389
+                                },
+                                {
+                                    "end": 1461,
+                                    "start": 1433
+                                },
+                                {
+                                    "end": 1496,
+                                    "start": 1463
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50026",
+                            "regions": [
+                                {
+                                    "end": 1458,
+                                    "start": 1426
+                                },
+                                {
+                                    "end": 1490,
+                                    "start": 1459
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 1480,
+                                    "start": 1394
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57535",
+                            "regions": [
+                                {
+                                    "end": 125,
+                                    "start": 7
+                                },
+                                {
+                                    "end": 244,
+                                    "start": 128
+                                },
+                                {
+                                    "end": 303,
+                                    "start": 247
+                                },
+                                {
+                                    "end": 363,
+                                    "start": 305
+                                },
+                                {
+                                    "end": 477,
+                                    "start": 364
+                                },
+                                {
+                                    "end": 542,
+                                    "start": 478
+                                },
+                                {
+                                    "end": 638,
+                                    "start": 569
+                                },
+                                {
+                                    "end": 754,
+                                    "start": 641
+                                },
+                                {
+                                    "end": 812,
+                                    "start": 755
+                                },
+                                {
+                                    "end": 870,
+                                    "start": 813
+                                },
+                                {
+                                    "end": 1043,
+                                    "start": 871
+                                },
+                                {
+                                    "end": 1102,
+                                    "start": 1044
+                                },
+                                {
+                                    "end": 1155,
+                                    "start": 1103
+                                },
+                                {
+                                    "end": 1401,
+                                    "start": 1165
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00032",
+                            "regions": [
+                                {
+                                    "end": 67,
+                                    "start": 9
+                                },
+                                {
+                                    "end": 125,
+                                    "start": 72
+                                },
+                                {
+                                    "end": 185,
+                                    "start": 130
+                                },
+                                {
+                                    "end": 244,
+                                    "start": 190
+                                },
+                                {
+                                    "end": 302,
+                                    "start": 249
+                                },
+                                {
+                                    "end": 361,
+                                    "start": 307
+                                },
+                                {
+                                    "end": 419,
+                                    "start": 366
+                                },
+                                {
+                                    "end": 477,
+                                    "start": 424
+                                },
+                                {
+                                    "end": 534,
+                                    "start": 482
+                                },
+                                {
+                                    "end": 638,
+                                    "start": 580
+                                },
+                                {
+                                    "end": 696,
+                                    "start": 643
+                                },
+                                {
+                                    "end": 754,
+                                    "start": 701
+                                },
+                                {
+                                    "end": 812,
+                                    "start": 759
+                                },
+                                {
+                                    "end": 870,
+                                    "start": 817
+                                },
+                                {
+                                    "end": 928,
+                                    "start": 875
+                                },
+                                {
+                                    "end": 985,
+                                    "start": 933
+                                },
+                                {
+                                    "end": 1043,
+                                    "start": 990
+                                },
+                                {
+                                    "end": 1102,
+                                    "start": 1048
+                                },
+                                {
+                                    "end": 1162,
+                                    "start": 1107
+                                },
+                                {
+                                    "end": 1220,
+                                    "start": 1167
+                                },
+                                {
+                                    "end": 1278,
+                                    "start": 1225
+                                },
+                                {
+                                    "end": 1337,
+                                    "start": 1283
+                                },
+                                {
+                                    "end": 1394,
+                                    "start": 1342
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 113190038,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127536
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000297826",
+                    "start": 113127536
+                },
+                {
+                    "cdna_coding_end": 10911,
+                    "cdna_coding_start": 265,
+                    "domains": [
+                        {
+                            "name": "SSF57535",
+                            "regions": [
+                                {
+                                    "end": 410,
+                                    "start": 351
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 411
+                                },
+                                {
+                                    "end": 537,
+                                    "start": 471
+                                },
+                                {
+                                    "end": 767,
+                                    "start": 704
+                                },
+                                {
+                                    "end": 1723,
+                                    "start": 1603
+                                },
+                                {
+                                    "end": 1819,
+                                    "start": 1762
+                                },
+                                {
+                                    "end": 1877,
+                                    "start": 1820
+                                },
+                                {
+                                    "end": 1935,
+                                    "start": 1878
+                                },
+                                {
+                                    "end": 1993,
+                                    "start": 1936
+                                },
+                                {
+                                    "end": 2055,
+                                    "start": 1994
+                                },
+                                {
+                                    "end": 2176,
+                                    "start": 2058
+                                },
+                                {
+                                    "end": 2295,
+                                    "start": 2179
+                                },
+                                {
+                                    "end": 2354,
+                                    "start": 2298
+                                },
+                                {
+                                    "end": 2414,
+                                    "start": 2356
+                                },
+                                {
+                                    "end": 2528,
+                                    "start": 2415
+                                },
+                                {
+                                    "end": 2593,
+                                    "start": 2529
+                                },
+                                {
+                                    "end": 2689,
+                                    "start": 2620
+                                },
+                                {
+                                    "end": 2805,
+                                    "start": 2692
+                                },
+                                {
+                                    "end": 2863,
+                                    "start": 2806
+                                },
+                                {
+                                    "end": 2921,
+                                    "start": 2864
+                                },
+                                {
+                                    "end": 3094,
+                                    "start": 2922
+                                },
+                                {
+                                    "end": 3153,
+                                    "start": 3095
+                                },
+                                {
+                                    "end": 3206,
+                                    "start": 3154
+                                },
+                                {
+                                    "end": 3452,
+                                    "start": 3216
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF49899",
+                            "regions": [
+                                {
+                                    "end": 1609,
+                                    "start": 1398
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00159",
+                            "regions": [
+                                {
+                                    "end": 1604,
+                                    "start": 1397
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00354",
+                            "regions": [
+                                {
+                                    "end": 1597,
+                                    "start": 1419
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00895",
+                            "regions": [
+                                {
+                                    "end": 1507,
+                                    "start": 1489
+                                },
+                                {
+                                    "end": 1535,
+                                    "start": 1516
+                                },
+                                {
+                                    "end": 1569,
+                                    "start": 1536
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF02494",
+                            "regions": [
+                                {
+                                    "end": 619,
+                                    "start": 538
+                                },
+                                {
+                                    "end": 698,
+                                    "start": 621
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00032",
+                            "regions": [
+                                {
+                                    "end": 410,
+                                    "start": 355
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 415
+                                },
+                                {
+                                    "end": 536,
+                                    "start": 475
+                                },
+                                {
+                                    "end": 764,
+                                    "start": 704
+                                },
+                                {
+                                    "end": 1662,
+                                    "start": 1608
+                                },
+                                {
+                                    "end": 1720,
+                                    "start": 1667
+                                },
+                                {
+                                    "end": 1819,
+                                    "start": 1766
+                                },
+                                {
+                                    "end": 1877,
+                                    "start": 1824
+                                },
+                                {
+                                    "end": 1935,
+                                    "start": 1882
+                                },
+                                {
+                                    "end": 1993,
+                                    "start": 1940
+                                },
+                                {
+                                    "end": 2055,
+                                    "start": 1998
+                                },
+                                {
+                                    "end": 2118,
+                                    "start": 2060
+                                },
+                                {
+                                    "end": 2176,
+                                    "start": 2123
+                                },
+                                {
+                                    "end": 2236,
+                                    "start": 2181
+                                },
+                                {
+                                    "end": 2295,
+                                    "start": 2241
+                                },
+                                {
+                                    "end": 2353,
+                                    "start": 2300
+                                },
+                                {
+                                    "end": 2412,
+                                    "start": 2358
+                                },
+                                {
+                                    "end": 2470,
+                                    "start": 2417
+                                },
+                                {
+                                    "end": 2528,
+                                    "start": 2475
+                                },
+                                {
+                                    "end": 2585,
+                                    "start": 2533
+                                },
+                                {
+                                    "end": 2689,
+                                    "start": 2631
+                                },
+                                {
+                                    "end": 2747,
+                                    "start": 2694
+                                },
+                                {
+                                    "end": 2805,
+                                    "start": 2752
+                                },
+                                {
+                                    "end": 2863,
+                                    "start": 2810
+                                },
+                                {
+                                    "end": 2921,
+                                    "start": 2868
+                                },
+                                {
+                                    "end": 2979,
+                                    "start": 2926
+                                },
+                                {
+                                    "end": 3036,
+                                    "start": 2984
+                                },
+                                {
+                                    "end": 3094,
+                                    "start": 3041
+                                },
+                                {
+                                    "end": 3153,
+                                    "start": 3099
+                                },
+                                {
+                                    "end": 3213,
+                                    "start": 3158
+                                },
+                                {
+                                    "end": 3271,
+                                    "start": 3218
+                                },
+                                {
+                                    "end": 3329,
+                                    "start": 3276
+                                },
+                                {
+                                    "end": 3388,
+                                    "start": 3334
+                                },
+                                {
+                                    "end": 3445,
+                                    "start": 3393
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00179",
+                            "regions": [
+                                {
+                                    "end": 1206,
+                                    "start": 1173
+                                },
+                                {
+                                    "end": 1244,
+                                    "start": 1208
+                                },
+                                {
+                                    "end": 1282,
+                                    "start": 1246
+                                },
+                                {
+                                    "end": 1320,
+                                    "start": 1284
+                                },
+                                {
+                                    "end": 1358,
+                                    "start": 1322
+                                },
+                                {
+                                    "end": 1396,
+                                    "start": 1360
+                                },
+                                {
+                                    "end": 1761,
+                                    "start": 1722
+                                },
+                                {
+                                    "end": 3509,
+                                    "start": 3481
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 417,
+                                    "start": 246
+                                },
+                                {
+                                    "end": 1121,
+                                    "start": 965
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57196",
+                            "regions": [
+                                {
+                                    "end": 1244,
+                                    "start": 1166
+                                },
+                                {
+                                    "end": 1282,
+                                    "start": 1245
+                                },
+                                {
+                                    "end": 1319,
+                                    "start": 1283
+                                },
+                                {
+                                    "end": 1400,
+                                    "start": 1321
+                                },
+                                {
+                                    "end": 1763,
+                                    "start": 1712
+                                },
+                                {
+                                    "end": 3483,
+                                    "start": 3440
+                                },
+                                {
+                                    "end": 3512,
+                                    "start": 3484
+                                },
+                                {
+                                    "end": 3547,
+                                    "start": 3514
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50026",
+                            "regions": [
+                                {
+                                    "end": 1206,
+                                    "start": 1170
+                                },
+                                {
+                                    "end": 1244,
+                                    "start": 1208
+                                },
+                                {
+                                    "end": 1282,
+                                    "start": 1246
+                                },
+                                {
+                                    "end": 1320,
+                                    "start": 1284
+                                },
+                                {
+                                    "end": 1358,
+                                    "start": 1322
+                                },
+                                {
+                                    "end": 1396,
+                                    "start": 1360
+                                },
+                                {
+                                    "end": 1761,
+                                    "start": 1722
+                                },
+                                {
+                                    "end": 3509,
+                                    "start": 3477
+                                },
+                                {
+                                    "end": 3541,
+                                    "start": 3510
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00181",
+                            "regions": [
+                                {
+                                    "end": 1206,
+                                    "start": 1173
+                                },
+                                {
+                                    "end": 1244,
+                                    "start": 1211
+                                },
+                                {
+                                    "end": 1282,
+                                    "start": 1249
+                                },
+                                {
+                                    "end": 1320,
+                                    "start": 1287
+                                },
+                                {
+                                    "end": 1358,
+                                    "start": 1325
+                                },
+                                {
+                                    "end": 1396,
+                                    "start": 1363
+                                },
+                                {
+                                    "end": 1761,
+                                    "start": 1725
+                                },
+                                {
+                                    "end": 3477,
+                                    "start": 3448
+                                },
+                                {
+                                    "end": 3509,
+                                    "start": 3480
+                                },
+                                {
+                                    "end": 3541,
+                                    "start": 3512
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00092",
+                            "regions": [
+                                {
+                                    "end": 229,
+                                    "start": 61
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50825",
+                            "regions": [
+                                {
+                                    "end": 619,
+                                    "start": 537
+                                },
+                                {
+                                    "end": 701,
+                                    "start": 620
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 1386,
+                                    "start": 1174
+                                },
+                                {
+                                    "end": 3531,
+                                    "start": 3445
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07699",
+                            "regions": [
+                                {
+                                    "end": 337,
+                                    "start": 287
+                                },
+                                {
+                                    "end": 1029,
+                                    "start": 982
+                                },
+                                {
+                                    "end": 1083,
+                                    "start": 1036
+                                },
+                                {
+                                    "end": 1137,
+                                    "start": 1090
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00008",
+                            "regions": [
+                                {
+                                    "end": 1203,
+                                    "start": 1174
+                                },
+                                {
+                                    "end": 1242,
+                                    "start": 1212
+                                },
+                                {
+                                    "end": 1279,
+                                    "start": 1250
+                                },
+                                {
+                                    "end": 1356,
+                                    "start": 1326
+                                },
+                                {
+                                    "end": 1394,
+                                    "start": 1364
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00327",
+                            "regions": [
+                                {
+                                    "end": 237,
+                                    "start": 58
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50923",
+                            "regions": [
+                                {
+                                    "end": 412,
+                                    "start": 353
+                                },
+                                {
+                                    "end": 472,
+                                    "start": 413
+                                },
+                                {
+                                    "end": 538,
+                                    "start": 473
+                                },
+                                {
+                                    "end": 766,
+                                    "start": 702
+                                },
+                                {
+                                    "end": 1664,
+                                    "start": 1606
+                                },
+                                {
+                                    "end": 1722,
+                                    "start": 1665
+                                },
+                                {
+                                    "end": 1821,
+                                    "start": 1764
+                                },
+                                {
+                                    "end": 1879,
+                                    "start": 1822
+                                },
+                                {
+                                    "end": 1937,
+                                    "start": 1880
+                                },
+                                {
+                                    "end": 1995,
+                                    "start": 1938
+                                },
+                                {
+                                    "end": 2057,
+                                    "start": 1996
+                                },
+                                {
+                                    "end": 2120,
+                                    "start": 2058
+                                },
+                                {
+                                    "end": 2178,
+                                    "start": 2121
+                                },
+                                {
+                                    "end": 2238,
+                                    "start": 2179
+                                },
+                                {
+                                    "end": 2297,
+                                    "start": 2239
+                                },
+                                {
+                                    "end": 2355,
+                                    "start": 2298
+                                },
+                                {
+                                    "end": 2414,
+                                    "start": 2356
+                                },
+                                {
+                                    "end": 2472,
+                                    "start": 2415
+                                },
+                                {
+                                    "end": 2530,
+                                    "start": 2473
+                                },
+                                {
+                                    "end": 2587,
+                                    "start": 2531
+                                },
+                                {
+                                    "end": 2691,
+                                    "start": 2640
+                                },
+                                {
+                                    "end": 2749,
+                                    "start": 2692
+                                },
+                                {
+                                    "end": 2807,
+                                    "start": 2750
+                                },
+                                {
+                                    "end": 2865,
+                                    "start": 2808
+                                },
+                                {
+                                    "end": 2923,
+                                    "start": 2866
+                                },
+                                {
+                                    "end": 2981,
+                                    "start": 2924
+                                },
+                                {
+                                    "end": 3038,
+                                    "start": 2982
+                                },
+                                {
+                                    "end": 3096,
+                                    "start": 3039
+                                },
+                                {
+                                    "end": 3155,
+                                    "start": 3097
+                                },
+                                {
+                                    "end": 3215,
+                                    "start": 3156
+                                },
+                                {
+                                    "end": 3273,
+                                    "start": 3216
+                                },
+                                {
+                                    "end": 3331,
+                                    "start": 3274
+                                },
+                                {
+                                    "end": 3390,
+                                    "start": 3332
+                                },
+                                {
+                                    "end": 3447,
+                                    "start": 3391
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07645",
+                            "regions": [
+                                {
+                                    "end": 1760,
+                                    "start": 1722
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF53300",
+                            "regions": [
+                                {
+                                    "end": 239,
+                                    "start": 56
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00084",
+                            "regions": [
+                                {
+                                    "end": 407,
+                                    "start": 355
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 415
+                                },
+                                {
+                                    "end": 1662,
+                                    "start": 1605
+                                },
+                                {
+                                    "end": 1720,
+                                    "start": 1667
+                                },
+                                {
+                                    "end": 1819,
+                                    "start": 1766
+                                },
+                                {
+                                    "end": 1877,
+                                    "start": 1824
+                                },
+                                {
+                                    "end": 1935,
+                                    "start": 1882
+                                },
+                                {
+                                    "end": 1993,
+                                    "start": 1940
+                                },
+                                {
+                                    "end": 2055,
+                                    "start": 1998
+                                },
+                                {
+                                    "end": 2113,
+                                    "start": 2060
+                                },
+                                {
+                                    "end": 2176,
+                                    "start": 2123
+                                },
+                                {
+                                    "end": 2236,
+                                    "start": 2181
+                                },
+                                {
+                                    "end": 2295,
+                                    "start": 2241
+                                },
+                                {
+                                    "end": 2353,
+                                    "start": 2300
+                                },
+                                {
+                                    "end": 2412,
+                                    "start": 2358
+                                },
+                                {
+                                    "end": 2470,
+                                    "start": 2417
+                                },
+                                {
+                                    "end": 2528,
+                                    "start": 2475
+                                },
+                                {
+                                    "end": 2585,
+                                    "start": 2533
+                                },
+                                {
+                                    "end": 2689,
+                                    "start": 2644
+                                },
+                                {
+                                    "end": 2747,
+                                    "start": 2694
+                                },
+                                {
+                                    "end": 2805,
+                                    "start": 2752
+                                },
+                                {
+                                    "end": 2863,
+                                    "start": 2810
+                                },
+                                {
+                                    "end": 2921,
+                                    "start": 2868
+                                },
+                                {
+                                    "end": 2979,
+                                    "start": 2926
+                                },
+                                {
+                                    "end": 3036,
+                                    "start": 2984
+                                },
+                                {
+                                    "end": 3094,
+                                    "start": 3061
+                                },
+                                {
+                                    "end": 3149,
+                                    "start": 3099
+                                },
+                                {
+                                    "end": 3213,
+                                    "start": 3158
+                                },
+                                {
+                                    "end": 3267,
+                                    "start": 3218
+                                },
+                                {
+                                    "end": 3329,
+                                    "start": 3276
+                                },
+                                {
+                                    "end": 3388,
+                                    "start": 3334
+                                },
+                                {
+                                    "end": 3445,
+                                    "start": 3393
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07974",
+                            "regions": [
+                                {
+                                    "end": 1243,
+                                    "start": 1212
+                                },
+                                {
+                                    "end": 3476,
+                                    "start": 3452
+                                },
+                                {
+                                    "end": 3508,
+                                    "start": 3484
+                                },
+                                {
+                                    "end": 3540,
+                                    "start": 3513
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50234",
+                            "regions": [
+                                {
+                                    "end": 241,
+                                    "start": 60
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 113342018,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127536
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        },
+                        {
+                            "end": 113191614,
+                            "start": 113191423
+                        },
+                        {
+                            "end": 113192284,
+                            "start": 113192200
+                        },
+                        {
+                            "end": 113192730,
+                            "start": 113192554
+                        },
+                        {
+                            "end": 113194314,
+                            "start": 113194195
+                        },
+                        {
+                            "end": 113194915,
+                            "start": 113194742
+                        },
+                        {
+                            "end": 113196786,
+                            "start": 113196616
+                        },
+                        {
+                            "end": 113197644,
+                            "start": 113197521
+                        },
+                        {
+                            "end": 113198784,
+                            "start": 113198660
+                        },
+                        {
+                            "end": 113206000,
+                            "start": 113205825
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220751
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342018,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000374469",
+                    "start": 113127536
+                },
+                {
+                    "cdna_coding_end": 4650,
+                    "cdna_coding_start": 1,
+                    "domains": [
+                        {
+                            "name": "PS50825",
+                            "regions": [
+                                {
+                                    "end": 642,
+                                    "start": 560
+                                },
+                                {
+                                    "end": 724,
+                                    "start": 643
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07699",
+                            "regions": [
+                                {
+                                    "end": 360,
+                                    "start": 310
+                                },
+                                {
+                                    "end": 1052,
+                                    "start": 1005
+                                },
+                                {
+                                    "end": 1106,
+                                    "start": 1059
+                                },
+                                {
+                                    "end": 1160,
+                                    "start": 1113
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50311",
+                            "regions": [
+                                {
+                                    "end": 1409,
+                                    "start": 1197
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00181",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1196
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1234
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1272
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1310
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1348
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1386
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57196",
+                            "regions": [
+                                {
+                                    "end": 1267,
+                                    "start": 1189
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1268
+                                },
+                                {
+                                    "end": 1342,
+                                    "start": 1306
+                                },
+                                {
+                                    "end": 1423,
+                                    "start": 1344
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50026",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1193
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1231
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1269
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1307
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1345
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1383
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 440,
+                                    "start": 269
+                                },
+                                {
+                                    "end": 1144,
+                                    "start": 988
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00179",
+                            "regions": [
+                                {
+                                    "end": 1229,
+                                    "start": 1196
+                                },
+                                {
+                                    "end": 1267,
+                                    "start": 1231
+                                },
+                                {
+                                    "end": 1305,
+                                    "start": 1269
+                                },
+                                {
+                                    "end": 1343,
+                                    "start": 1307
+                                },
+                                {
+                                    "end": 1381,
+                                    "start": 1345
+                                },
+                                {
+                                    "end": 1419,
+                                    "start": 1383
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00092",
+                            "regions": [
+                                {
+                                    "end": 252,
+                                    "start": 84
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00032",
+                            "regions": [
+                                {
+                                    "end": 433,
+                                    "start": 378
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 438
+                                },
+                                {
+                                    "end": 559,
+                                    "start": 498
+                                },
+                                {
+                                    "end": 787,
+                                    "start": 727
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF02494",
+                            "regions": [
+                                {
+                                    "end": 642,
+                                    "start": 561
+                                },
+                                {
+                                    "end": 721,
+                                    "start": 644
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PR00010",
+                            "regions": [
+                                {
+                                    "end": 1318,
+                                    "start": 1307
+                                },
+                                {
+                                    "end": 1364,
+                                    "start": 1357
+                                },
+                                {
+                                    "end": 1413,
+                                    "start": 1403
+                                },
+                                {
+                                    "end": 1420,
+                                    "start": 1414
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00354",
+                            "regions": [
+                                {
+                                    "end": 1532,
+                                    "start": 1442
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57535",
+                            "regions": [
+                                {
+                                    "end": 433,
+                                    "start": 374
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 434
+                                },
+                                {
+                                    "end": 560,
+                                    "start": 494
+                                },
+                                {
+                                    "end": 790,
+                                    "start": 727
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF49899",
+                            "regions": [
+                                {
+                                    "end": 1547,
+                                    "start": 1421
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50234",
+                            "regions": [
+                                {
+                                    "end": 264,
+                                    "start": 83
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF53300",
+                            "regions": [
+                                {
+                                    "end": 262,
+                                    "start": 79
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00084",
+                            "regions": [
+                                {
+                                    "end": 430,
+                                    "start": 378
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 438
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50923",
+                            "regions": [
+                                {
+                                    "end": 435,
+                                    "start": 376
+                                },
+                                {
+                                    "end": 495,
+                                    "start": 436
+                                },
+                                {
+                                    "end": 561,
+                                    "start": 496
+                                },
+                                {
+                                    "end": 789,
+                                    "start": 725
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07645",
+                            "regions": [
+                                {
+                                    "end": 1262,
+                                    "start": 1231
+                                },
+                                {
+                                    "end": 1338,
+                                    "start": 1308
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00008",
+                            "regions": [
+                                {
+                                    "end": 1226,
+                                    "start": 1197
+                                },
+                                {
+                                    "end": 1265,
+                                    "start": 1235
+                                },
+                                {
+                                    "end": 1302,
+                                    "start": 1273
+                                },
+                                {
+                                    "end": 1337,
+                                    "start": 1311
+                                },
+                                {
+                                    "end": 1379,
+                                    "start": 1349
+                                },
+                                {
+                                    "end": 1417,
+                                    "start": 1387
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00327",
+                            "regions": [
+                                {
+                                    "end": 260,
+                                    "start": 81
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 113341823,
+                    "exons": [
+                        {
+                            "end": 113206000,
+                            "start": 113204759
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220399,
+                            "start": 113220395
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220756
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113341823,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000302728",
+                    "start": 113204759
+                },
+                {
+                    "cdna_coding_end": 2944,
+                    "cdna_coding_start": 407,
+                    "domains": [
+                        {
+                            "name": "PF02494",
+                            "regions": [
+                                {
+                                    "end": 619,
+                                    "start": 538
+                                },
+                                {
+                                    "end": 698,
+                                    "start": 621
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00032",
+                            "regions": [
+                                {
+                                    "end": 410,
+                                    "start": 355
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 415
+                                },
+                                {
+                                    "end": 536,
+                                    "start": 475
+                                },
+                                {
+                                    "end": 764,
+                                    "start": 704
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57535",
+                            "regions": [
+                                {
+                                    "end": 410,
+                                    "start": 351
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 411
+                                },
+                                {
+                                    "end": 537,
+                                    "start": 471
+                                },
+                                {
+                                    "end": 767,
+                                    "start": 704
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF07699",
+                            "regions": [
+                                {
+                                    "end": 337,
+                                    "start": 287
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50825",
+                            "regions": [
+                                {
+                                    "end": 619,
+                                    "start": 537
+                                },
+                                {
+                                    "end": 701,
+                                    "start": 620
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00092",
+                            "regions": [
+                                {
+                                    "end": 229,
+                                    "start": 61
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF57184",
+                            "regions": [
+                                {
+                                    "end": 417,
+                                    "start": 246
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50923",
+                            "regions": [
+                                {
+                                    "end": 412,
+                                    "start": 353
+                                },
+                                {
+                                    "end": 472,
+                                    "start": 413
+                                },
+                                {
+                                    "end": 538,
+                                    "start": 473
+                                },
+                                {
+                                    "end": 766,
+                                    "start": 702
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00327",
+                            "regions": [
+                                {
+                                    "end": 237,
+                                    "start": 58
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50234",
+                            "regions": [
+                                {
+                                    "end": 241,
+                                    "start": 60
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF53300",
+                            "regions": [
+                                {
+                                    "end": 239,
+                                    "start": 56
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF00084",
+                            "regions": [
+                                {
+                                    "end": 407,
+                                    "start": 355
+                                },
+                                {
+                                    "end": 470,
+                                    "start": 415
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 113342160,
+                    "exons": [
+                        {
+                            "end": 113238595,
+                            "start": 113238163
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342160,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000374461",
+                    "start": 113238163
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "ARID1B"
+            ],
+            "chr": "6",
+            "end": 157530401,
+            "name": "ENSG00000049618",
+            "start": 157099063,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "cdna_coding_end": 6751,
+                    "cdna_coding_start": 2,
+                    "domains": [
+                        {
+                            "name": "PF12031",
+                            "regions": [
+                                {
+                                    "end": 2195,
+                                    "start": 1939
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50324",
+                            "regions": [
+                                {
+                                    "end": 57,
+                                    "start": 35
+                                },
+                                {
+                                    "end": 784,
+                                    "start": 697
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PF01388",
+                            "regions": [
+                                {
+                                    "end": 1153,
+                                    "start": 1065
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50099",
+                            "regions": [
+                                {
+                                    "end": 820,
+                                    "start": 715
+                                },
+                                {
+                                    "end": 1610,
+                                    "start": 1472
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF48371",
+                            "regions": [
+                                {
+                                    "end": 2220,
+                                    "start": 2075
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50316",
+                            "regions": [
+                                {
+                                    "end": 104,
+                                    "start": 81
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50322",
+                            "regions": [
+                                {
+                                    "end": 131,
+                                    "start": 107
+                                },
+                                {
+                                    "end": 646,
+                                    "start": 574
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS51011",
+                            "regions": [
+                                {
+                                    "end": 1157,
+                                    "start": 1066
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50310",
+                            "regions": [
+                                {
+                                    "end": 47,
+                                    "start": 2
+                                },
+                                {
+                                    "end": 493,
+                                    "start": 329
+                                }
+                            ]
+                        },
+                        {
+                            "name": "PS50315",
+                            "regions": [
+                                {
+                                    "end": 401,
+                                    "start": 141
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SSF46774",
+                            "regions": [
+                                {
+                                    "end": 1168,
+                                    "start": 1049
+                                }
+                            ]
+                        },
+                        {
+                            "name": "SM00501",
+                            "regions": [
+                                {
+                                    "end": 1158,
+                                    "start": 1067
+                                }
+                            ]
+                        }
+                    ],
+                    "end": 157529495,
+                    "exons": [
+                        {
+                            "end": 157100605,
+                            "start": 157099063
+                        },
+                        {
+                            "end": 157150555,
+                            "start": 157150361
+                        },
+                        {
+                            "end": 157192786,
+                            "start": 157192748
+                        },
+                        {
+                            "end": 157222659,
+                            "start": 157222510
+                        },
+                        {
+                            "end": 157256710,
+                            "start": 157256600
+                        },
+                        {
+                            "end": 157406039,
+                            "start": 157405796
+                        },
+                        {
+                            "end": 157431695,
+                            "start": 157431606
+                        },
+                        {
+                            "end": 157454341,
+                            "start": 157454162
+                        },
+                        {
+                            "end": 157470085,
+                            "start": 157469758
+                        },
+                        {
+                            "end": 157488319,
+                            "start": 157488174
+                        },
+                        {
+                            "end": 157495251,
+                            "start": 157495142
+                        },
+                        {
+                            "end": 157502312,
+                            "start": 157502103
+                        },
+                        {
+                            "end": 157505569,
+                            "start": 157505365
+                        },
+                        {
+                            "end": 157510914,
+                            "start": 157510776
+                        },
+                        {
+                            "end": 157511344,
+                            "start": 157511172
+                        },
+                        {
+                            "end": 157517449,
+                            "start": 157517299
+                        },
+                        {
+                            "end": 157520041,
+                            "start": 157519945
+                        },
+                        {
+                            "end": 157522622,
+                            "start": 157521839
+                        },
+                        {
+                            "end": 157525130,
+                            "start": 157525000
+                        },
+                        {
+                            "end": 157529495,
+                            "start": 157527301
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000346085",
+                    "start": 157099063
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/tools/data/example_genes.v3.json b/tests/tools/data/example_genes.v3.json
new file mode 100644
index 00000000..6a590488
--- /dev/null
+++ b/tests/tools/data/example_genes.v3.json
@@ -0,0 +1 @@
+{"genes": [{"aliases": ["EGFR"], "chr": "7", "end": 55324313, "name": "ENSG00000146648", "start": 55086714, "strand": "+", "transcripts": [{"end": 55270769, "exons": [{"end": 55087058, "start": 55086714}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270769, "start": 55270210}], "is_best_transcript": false, "name": "ENST00000455089", "start": 55086714, "translations": [{"cdna_coding_end": 3533, "cdna_coding_start": 258, "domains": [{"name": "PIRSF000619", "regions": [{"end": 1090, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 920, "start": 669}]}, {"name": "SSF52058", "regions": [{"end": 191, "start": 28}, {"end": 475, "start": 283}]}, {"name": "PF00757", "regions": [{"end": 293, "start": 141}]}, {"name": "PS50011", "regions": [{"end": 934, "start": 667}]}, {"name": "PS50311", "regions": [{"end": 219, "start": 145}]}, {"name": "SSF57184", "regions": [{"end": 290, "start": 142}, {"end": 593, "start": 460}]}, {"name": "PR00109", "regions": [{"end": 758, "start": 745}, {"end": 800, "start": 782}, {"end": 841, "start": 831}, {"end": 872, "start": 850}, {"end": 916, "start": 894}]}, {"name": "SSF56112", "regions": [{"end": 975, "start": 651}]}, {"name": "PF01030", "regions": [{"end": 141, "start": 57}, {"end": 435, "start": 316}]}, {"name": "SM00220", "regions": [{"end": 924, "start": 667}]}, {"name": "SM00261", "regions": [{"end": 225, "start": 183}, {"end": 502, "start": 451}, {"end": 556, "start": 507}]}, {"name": "SM00219", "regions": [{"end": 923, "start": 667}]}, {"name": "PF00069", "regions": [{"end": 919, "start": 667}]}]}]}, {"end": 55236328, "exons": [{"end": 55087058, "start": 55086725}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55236328, "start": 55236216}], "is_best_transcript": false, "name": "ENST00000342916", "start": 55086725, "translations": [{"cdna_coding_end": 2133, "cdna_coding_start": 247, "domains": [{"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55238738, "exons": [{"end": 55087058, "start": 55086726}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238738, "start": 55238000}], "is_best_transcript": false, "name": "ENST00000344576", "start": 55086726, "translations": [{"cdna_coding_end": 2363, "cdna_coding_start": 246, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55224644, "exons": [{"end": 55087058, "start": 55086727}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224644, "start": 55224452}], "is_best_transcript": false, "name": "ENST00000420316", "start": 55086727, "translations": [{"cdna_coding_end": 1462, "cdna_coding_start": 245, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 403, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}]}]}]}, {"end": 55279321, "exons": [{"end": 55087058, "start": 55086794}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55279321, "start": 55272949}], "is_best_transcript": true, "name": "ENST00000275493", "start": 55086794, "translations": [{"cdna_coding_end": 3810, "cdna_coding_start": 178, "domains": [{"name": "SM00220", "regions": [{"end": 969, "start": 712}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SSF56112", "regions": [{"end": 1020, "start": 696}]}, {"name": "PF00069", "regions": [{"end": 964, "start": 712}]}, {"name": "SM00219", "regions": [{"end": 968, "start": 712}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF07714", "regions": [{"end": 965, "start": 714}]}, {"name": "PIRSF000619", "regions": [{"end": 1210, "start": 1}]}, {"name": "PR00109", "regions": [{"end": 803, "start": 790}, {"end": 845, "start": 827}, {"end": 886, "start": 876}, {"end": 917, "start": 895}, {"end": 961, "start": 939}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PS50011", "regions": [{"end": 979, "start": 712}]}]}]}, {"end": 55324313, "exons": [{"end": 55087058, "start": 55086811}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240621, "start": 55240539}, {"end": 55324313, "start": 55323947}], "is_best_transcript": false, "name": "ENST00000442591", "start": 55086811, "translations": [{"cdna_coding_end": 2134, "cdna_coding_start": 161, "domains": [{"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}, {"end": 653, "start": 614}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}]}]}, {"end": 55214417, "exons": [{"end": 55177651, "start": 55177416}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214417, "start": 55214299}], "is_best_transcript": false, "name": "ENST00000450046", "start": 55177416, "translations": [{"cdna_coding_end": 691, "cdna_coding_start": 308, "domains": [{"name": "SSF52058", "regions": [{"end": 127, "start": 1}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}]}]}]}, {"end": 55273591, "exons": [{"end": 55177651, "start": 55177540}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55273591, "start": 55272949}], "is_best_transcript": false, "name": "ENST00000454757", "start": 55177540, "translations": [{"cdna_coding_end": 3657, "cdna_coding_start": 184, "domains": [{"name": "SM00261", "regions": [{"end": 217, "start": 175}, {"end": 494, "start": 443}, {"end": 548, "start": 499}]}, {"name": "PF00069", "regions": [{"end": 911, "start": 659}]}, {"name": "SM00219", "regions": [{"end": 915, "start": 659}]}, {"name": "SSF56112", "regions": [{"end": 967, "start": 643}]}, {"name": "SM00220", "regions": [{"end": 916, "start": 659}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}, {"end": 427, "start": 308}]}, {"name": "PS50311", "regions": [{"end": 211, "start": 134}]}, {"name": "PS50011", "regions": [{"end": 926, "start": 659}]}, {"name": "PR00109", "regions": [{"end": 750, "start": 737}, {"end": 792, "start": 774}, {"end": 833, "start": 823}, {"end": 864, "start": 842}, {"end": 908, "start": 886}]}, {"name": "SSF57184", "regions": [{"end": 286, "start": 129}, {"end": 585, "start": 452}]}, {"name": "PIRSF000619", "regions": [{"end": 1157, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 912, "start": 661}]}, {"name": "SSF52058", "regions": [{"end": 158, "start": 1}, {"end": 467, "start": 275}]}, {"name": "PF00757", "regions": [{"end": 285, "start": 132}]}]}]}]}, {"aliases": ["DSTYK"], "chr": "1", "end": 205180727, "name": "ENSG00000133059", "start": 205111632, "strand": "-", "transcripts": [{"end": 205180727, "exons": [{"end": 205116873, "start": 205111632}, {"end": 205117467, "start": 205117333}, {"end": 205119898, "start": 205119808}, {"end": 205133083, "start": 205133055}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180727, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367160", "start": 205111632, "translations": [{"cdna_coding_end": 65, "cdna_coding_start": 1831, "domains": [{"name": "SM00220", "regions": [{"end": 565, "start": 337}]}, {"name": "SSF56112", "regions": [{"end": 585, "start": 452}]}, {"name": "PF00069", "regions": [{"end": 556, "start": 451}]}, {"name": "PF07714", "regions": [{"end": 558, "start": 471}]}, {"name": "PS50011", "regions": [{"end": 565, "start": 312}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367161", "start": 205111633, "translations": [{"cdna_coding_end": 32, "cdna_coding_start": 2686, "domains": [{"name": "PF07714", "regions": [{"end": 820, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 884, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 853, "start": 627}]}, {"name": "SM00220", "regions": [{"end": 861, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 824, "start": 654}]}, {"name": "SM00219", "regions": [{"end": 861, "start": 652}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205117467, "start": 205117333}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": true, "name": "ENST00000367162", "start": 205111633, "translations": [{"cdna_coding_end": 32, "cdna_coding_start": 2821, "domains": [{"name": "PF07714", "regions": [{"end": 899, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 906, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 897, "start": 638}]}, {"name": "SM00220", "regions": [{"end": 906, "start": 652}]}, {"name": "SM00219", "regions": [{"end": 906, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 897, "start": 654}]}]}]}]}, {"aliases": ["NDUFA12"], "chr": "12", "end": 95397546, "name": "ENSG00000184752", "start": 95290831, "strand": "-", "transcripts": [{"end": 95397436, "exons": [{"end": 95291086, "start": 95290831}, {"end": 95318582, "start": 95318422}, {"end": 95322039, "start": 95321793}, {"end": 95396597, "start": 95396515}, {"end": 95397436, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000552205", "start": 95290831}, {"end": 95397476, "exons": [{"end": 95365261, "start": 95365108}, {"end": 95396597, "start": 95396582}, {"end": 95397476, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547157", "start": 95365108, "translations": [{"cdna_coding_end": 21, "cdna_coding_start": 188}]}, {"end": 95397384, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95390752, "start": 95390680}, {"end": 95396597, "start": 95396515}, {"end": 95397384, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000551991", "start": 95365109, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 144, "domains": [{"name": "PF05071", "regions": [{"end": 33, "start": 12}]}]}]}, {"end": 95397546, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397546, "start": 95397371}], "is_best_transcript": true, "name": "ENST00000327772", "start": 95365109, "translations": [{"cdna_coding_end": 91, "cdna_coding_start": 528, "domains": [{"name": "PF05071", "regions": [{"end": 137, "start": 36}]}]}]}, {"end": 95397489, "exons": [{"end": 95365396, "start": 95365112}, {"end": 95396597, "start": 95396515}, {"end": 95397489, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547986", "start": 95365112, "translations": [{"cdna_coding_end": 34, "cdna_coding_start": 225, "domains": [{"name": "PF05071", "regions": [{"end": 53, "start": 36}]}]}]}, {"end": 95397524, "exons": [{"end": 95365396, "start": 95365254}, {"end": 95366265, "start": 95366171}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397524, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000546788", "start": 95365254, "translations": [{"cdna_coding_end": 69, "cdna_coding_start": 368, "domains": [{"name": "PF05071", "regions": [{"end": 87, "start": 36}]}]}]}]}, {"aliases": ["FRMD6"], "chr": "14", "end": 52197445, "name": "ENSG00000139926", "start": 51955818, "strand": "+", "transcripts": [{"end": 52197177, "exons": [{"end": 51956138, "start": 51955855}, {"end": 52037128, "start": 52037066}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197177, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000356218", "start": 51955855, "translations": [{"cdna_coding_end": 2338, "cdna_coding_start": 494, "domains": [{"name": "PF09379", "regions": [{"end": 109, "start": 20}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}]}]}, {"end": 52197445, "exons": [{"end": 52118714, "start": 52118576}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197445, "start": 52194463}], "is_best_transcript": true, "name": "ENST00000395718", "start": 52118576, "translations": [{"cdna_coding_end": 2130, "cdna_coding_start": 286, "domains": [{"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "PF09379", "regions": [{"end": 109, "start": 20}]}]}]}, {"end": 52195654, "exons": [{"end": 52118714, "start": 52118665}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195654, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000344768", "start": 52118665, "translations": [{"cdna_coding_end": 2065, "cdna_coding_start": 197, "domains": [{"name": "PF09380", "regions": [{"end": 330, "start": 245}]}, {"name": "PF09379", "regions": [{"end": 117, "start": 20}]}, {"name": "SSF47031", "regions": [{"end": 226, "start": 118}]}, {"name": "PF00373", "regions": [{"end": 234, "start": 123}]}, {"name": "SSF54236", "regions": [{"end": 118, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 328, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 234, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 383, "start": 227}]}]}]}, {"end": 52164945, "exons": [{"end": 52118935, "start": 52118698}, {"end": 52156653, "start": 52156409}, {"end": 52164945, "start": 52164860}], "is_best_transcript": false, "name": "ENST00000554778", "start": 52118698}, {"end": 52174806, "exons": [{"end": 52164950, "start": 52164706}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174806, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000555936", "start": 52164706}, {"end": 52197148, "exons": [{"end": 52164950, "start": 52164831}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197148, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000554167", "start": 52164831, "translations": [{"cdna_coding_end": 1775, "cdna_coding_start": 138, "domains": [{"name": "SSF50729", "regions": [{"end": 306, "start": 150}]}, {"name": "PS50057", "regions": [{"end": 251, "start": 1}]}, {"name": "SSF54236", "regions": [{"end": 41, "start": 1}]}, {"name": "SSF47031", "regions": [{"end": 149, "start": 41}]}, {"name": "PF00373", "regions": [{"end": 157, "start": 46}]}, {"name": "PF09380", "regions": [{"end": 253, "start": 168}]}]}]}, {"end": 52175062, "exons": [{"end": 52169306, "start": 52169266}, {"end": 52171653, "start": 52171467}, {"end": 52175062, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000557405", "start": 52169266, "translations": [{"cdna_coding_end": 390, "cdna_coding_start": 1, "domains": [{"name": "PS50057", "regions": [{"end": 129, "start": 1}]}, {"name": "PF00373", "regions": [{"end": 124, "start": 13}]}, {"name": "SSF47031", "regions": [{"end": 116, "start": 8}]}]}]}, {"end": 52187243, "exons": [{"end": 52179269, "start": 52179231}, {"end": 52182217, "start": 52182043}, {"end": 52187243, "start": 52186773}], "is_best_transcript": false, "name": "ENST00000555197", "start": 52179231, "translations": [{"cdna_coding_end": 618, "cdna_coding_start": 1, "domains": [{"name": "PF09380", "regions": [{"end": 60, "start": 2}]}, {"name": "PS50057", "regions": [{"end": 58, "start": 1}]}, {"name": "SSF50729", "regions": [{"end": 113, "start": 2}]}]}]}, {"end": 52192513, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188673}, {"end": 52192513, "start": 52192497}], "is_best_transcript": false, "name": "ENST00000555703", "start": 52183973, "translations": [{"cdna_coding_end": 573, "cdna_coding_start": 145}]}, {"end": 52195487, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195487, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000553556", "start": 52183973, "translations": [{"cdna_coding_end": 939, "cdna_coding_start": 145}]}]}, {"aliases": ["PRKCB"], "chr": "16", "end": 24231932, "name": "ENSG00000166501", "start": 23847322, "strand": "+", "transcripts": [{"end": 24231932, "exons": [{"end": 23847669, "start": 23847322}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24231282}], "is_best_transcript": true, "name": "ENST00000321728", "start": 23847322, "translations": [{"cdna_coding_end": 2191, "cdna_coding_start": 176, "domains": [{"name": "SM00239", "regions": [{"end": 275, "start": 172}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PF00433", "regions": [{"end": 666, "start": 623}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00133", "regions": [{"end": 664, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 671, "start": 1}]}]}]}, {"end": 24231932, "exons": [{"end": 23847669, "start": 23847345}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24225979}], "is_best_transcript": false, "name": "ENST00000303531", "start": 23847345, "translations": [{"cdna_coding_end": 2174, "cdna_coding_start": 153, "domains": [{"name": "SM00133", "regions": [{"end": 663, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 672, "start": 1}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00433", "regions": [{"end": 664, "start": 627}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00239", "regions": [{"end": 275, "start": 172}]}]}]}, {"end": 23880647, "exons": [{"end": 23847669, "start": 23847403}, {"end": 23880647, "start": 23880435}], "is_best_transcript": false, "name": "ENST00000498058", "start": 23847403, "translations": [{"cdna_coding_end": 268, "cdna_coding_start": 95, "domains": [{"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 57, "start": 50}]}, {"name": "PS50081", "regions": [{"end": 57, "start": 36}]}, {"name": "SSF57889", "regions": [{"end": 57, "start": 6}]}]}]}, {"end": 24124386, "exons": [{"end": 23848727, "start": 23848544}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124386, "start": 24124294}], "is_best_transcript": false, "name": "ENST00000498739", "start": 23848544}, {"end": 24192166, "exons": [{"end": 24163176, "start": 24163006}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192166, "start": 24192111}], "is_best_transcript": false, "name": "ENST00000472066", "start": 24163006}, {"end": 24202909, "exons": [{"end": 24196888, "start": 24196852}, {"end": 24202909, "start": 24202411}], "is_best_transcript": false, "name": "ENST00000466124", "start": 24196852}]}, {"aliases": ["GIMAP4"], "chr": "7", "end": 150271041, "name": "ENSG00000133574", "start": 150264365, "strand": "+", "transcripts": [{"end": 150271041, "exons": [{"end": 150264525, "start": 150264365}, {"end": 150267047, "start": 150266976}, {"end": 150271041, "start": 150269217}], "is_best_transcript": true, "name": "ENST00000255945", "start": 150264365, "translations": [{"cdna_coding_end": 1165, "cdna_coding_start": 176, "domains": [{"name": "PF04548", "regions": [{"end": 238, "start": 31}]}, {"name": "SSF52540", "regions": [{"end": 288, "start": 24}]}]}]}, {"end": 150270602, "exons": [{"end": 150264525, "start": 150264457}, {"end": 150267089, "start": 150266976}, {"end": 150270602, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000461940", "start": 150264457, "translations": [{"cdna_coding_end": 1115, "cdna_coding_start": 84, "domains": [{"name": "PF04548", "regions": [{"end": 252, "start": 45}]}, {"name": "SSF52540", "regions": [{"end": 302, "start": 38}]}]}]}, {"end": 150269569, "exons": [{"end": 150264608, "start": 150264524}, {"end": 150267089, "start": 150266976}, {"end": 150269569, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000479232", "start": 150264524, "translations": [{"cdna_coding_end": 552, "cdna_coding_start": 100, "domains": [{"name": "SSF52540", "regions": [{"end": 151, "start": 38}]}, {"name": "PF04548", "regions": [{"end": 151, "start": 45}]}]}]}]}, {"aliases": ["IL7"], "chr": "8", "end": 79717758, "name": "ENSG00000104432", "start": 79587978, "strand": "-", "transcripts": [{"end": 79717758, "exons": [{"end": 79646067, "start": 79645007}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": true, "name": "ENST00000263851", "start": 79645007, "translations": [{"cdna_coding_end": 602, "cdna_coding_start": 1135, "domains": [{"name": "PIRSF001942", "regions": [{"end": 177, "start": 1}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}, {"end": 98, "start": 78}, {"end": 118, "start": 99}, {"end": 173, "start": 151}]}, {"name": "PF01415", "regions": [{"end": 173, "start": 28}]}, {"name": "SM00127", "regions": [{"end": 173, "start": 27}]}]}]}, {"end": 79717699, "exons": [{"end": 79646063, "start": 79645283}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79659331, "start": 79659129}, {"end": 79710443, "start": 79710307}, {"end": 79717699, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000518982", "start": 79645283, "translations": [{"cdna_coding_end": 543, "cdna_coding_start": 758, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}, {"name": "PF01415", "regions": [{"end": 54, "start": 28}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520269", "start": 79645900, "translations": [{"cdna_coding_end": 7, "cdna_coding_start": 408, "domains": [{"name": "PF01415", "regions": [{"end": 77, "start": 28}, {"end": 129, "start": 91}]}, {"name": "SM00127", "regions": [{"end": 129, "start": 27}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}]}, {"name": "PIRSF001942", "regions": [{"end": 133, "start": 1}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520215", "start": 79645900, "translations": [{"cdna_coding_end": 7, "cdna_coding_start": 120, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79717686, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717686, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520317", "start": 79645900, "translations": [{"cdna_coding_end": 530, "cdna_coding_start": 643, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79652311, "exons": [{"end": 79646067, "start": 79645948}, {"end": 79652311, "start": 79652237}], "is_best_transcript": false, "name": "ENST00000541183", "start": 79645948, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 195, "domains": [{"name": "SM00127", "regions": [{"end": 60, "start": 1}]}, {"name": "PF01415", "regions": [{"end": 60, "start": 1}]}]}]}, {"end": 79717758, "exons": [{"end": 79659331, "start": 79659263}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000379113", "start": 79659263, "translations": [{"cdna_coding_end": 602, "cdna_coding_start": 817, "domains": [{"name": "PF01415", "regions": [{"end": 54, "start": 28}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}]}]}]}, {"aliases": ["SVEP1"], "chr": "9", "end": 113342160, "name": "ENSG00000165124", "start": 113127531, "strand": "-", "transcripts": [{"end": 113342160, "exons": [{"end": 113128840, "start": 113127531}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": true, "name": "ENST00000401783", "start": 113127531, "translations": [{"cdna_coding_end": 338, "cdna_coding_start": 11053, "domains": [{"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}, {"end": 1685, "start": 1631}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2141, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2654}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3064}, {"end": 3176, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3294, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00895", "regions": [{"end": 1530, "start": 1512}, {"end": 1558, "start": 1539}, {"end": 1592, "start": 1559}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}, {"end": 1746, "start": 1626}, {"end": 1842, "start": 1785}, {"end": 1900, "start": 1843}, {"end": 1958, "start": 1901}, {"end": 2016, "start": 1959}, {"end": 2078, "start": 2017}, {"end": 2199, "start": 2081}, {"end": 2318, "start": 2202}, {"end": 2377, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2551, "start": 2438}, {"end": 2616, "start": 2552}, {"end": 2712, "start": 2643}, {"end": 2828, "start": 2715}, {"end": 2886, "start": 2829}, {"end": 2944, "start": 2887}, {"end": 3117, "start": 2945}, {"end": 3176, "start": 3118}, {"end": 3229, "start": 3177}, {"end": 3475, "start": 3239}]}, {"name": "SSF49899", "regions": [{"end": 1632, "start": 1421}]}, {"name": "SM00159", "regions": [{"end": 1627, "start": 1420}]}, {"name": "PF00354", "regions": [{"end": 1620, "start": 1442}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}, {"end": 3554, "start": 3468}]}, {"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}, {"end": 1786, "start": 1735}, {"end": 3506, "start": 3463}, {"end": 3535, "start": 3507}, {"end": 3570, "start": 3537}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3500}, {"end": 3564, "start": 3533}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}, {"end": 1784, "start": 1748}, {"end": 3500, "start": 3471}, {"end": 3532, "start": 3503}, {"end": 3564, "start": 3535}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3504}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "PF07645", "regions": [{"end": 1783, "start": 1745}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}, {"end": 1687, "start": 1629}, {"end": 1745, "start": 1688}, {"end": 1844, "start": 1787}, {"end": 1902, "start": 1845}, {"end": 1960, "start": 1903}, {"end": 2018, "start": 1961}, {"end": 2080, "start": 2019}, {"end": 2143, "start": 2081}, {"end": 2201, "start": 2144}, {"end": 2261, "start": 2202}, {"end": 2320, "start": 2262}, {"end": 2378, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2495, "start": 2438}, {"end": 2553, "start": 2496}, {"end": 2610, "start": 2554}, {"end": 2714, "start": 2663}, {"end": 2772, "start": 2715}, {"end": 2830, "start": 2773}, {"end": 2888, "start": 2831}, {"end": 2946, "start": 2889}, {"end": 3004, "start": 2947}, {"end": 3061, "start": 3005}, {"end": 3119, "start": 3062}, {"end": 3178, "start": 3120}, {"end": 3238, "start": 3179}, {"end": 3296, "start": 3239}, {"end": 3354, "start": 3297}, {"end": 3413, "start": 3355}, {"end": 3470, "start": 3414}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "PF07974", "regions": [{"end": 1266, "start": 1235}, {"end": 3499, "start": 3475}, {"end": 3531, "start": 3507}, {"end": 3563, "start": 3536}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}, {"end": 1685, "start": 1628}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2136, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2667}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3084}, {"end": 3172, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3290, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}]}]}, {"end": 113190038, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}], "is_best_transcript": false, "name": "ENST00000297826", "start": 113127536, "translations": [{"cdna_coding_end": 416, "cdna_coding_start": 4909, "domains": [{"name": "PF00084", "regions": [{"end": 62, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 593}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 1010}, {"end": 1098, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1216, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}, {"name": "PF07974", "regions": [{"end": 1425, "start": 1401}, {"end": 1457, "start": 1433}, {"end": 1489, "start": 1462}]}, {"name": "PF00008", "regions": [{"end": 1456, "start": 1427}]}, {"name": "PS50923", "regions": [{"end": 69, "start": 7}, {"end": 127, "start": 70}, {"end": 187, "start": 128}, {"end": 246, "start": 188}, {"end": 304, "start": 247}, {"end": 363, "start": 305}, {"end": 421, "start": 364}, {"end": 479, "start": 422}, {"end": 536, "start": 480}, {"end": 640, "start": 589}, {"end": 698, "start": 641}, {"end": 756, "start": 699}, {"end": 814, "start": 757}, {"end": 872, "start": 815}, {"end": 930, "start": 873}, {"end": 987, "start": 931}, {"end": 1045, "start": 988}, {"end": 1104, "start": 1046}, {"end": 1164, "start": 1105}, {"end": 1222, "start": 1165}, {"end": 1280, "start": 1223}, {"end": 1339, "start": 1281}, {"end": 1396, "start": 1340}]}, {"name": "SM00181", "regions": [{"end": 1426, "start": 1397}, {"end": 1458, "start": 1429}, {"end": 1490, "start": 1461}]}, {"name": "SSF57196", "regions": [{"end": 1432, "start": 1389}, {"end": 1461, "start": 1433}, {"end": 1496, "start": 1463}]}, {"name": "PS50026", "regions": [{"end": 1458, "start": 1426}, {"end": 1490, "start": 1459}]}, {"name": "PS50311", "regions": [{"end": 1480, "start": 1394}]}, {"name": "SSF57535", "regions": [{"end": 125, "start": 7}, {"end": 244, "start": 128}, {"end": 303, "start": 247}, {"end": 363, "start": 305}, {"end": 477, "start": 364}, {"end": 542, "start": 478}, {"end": 638, "start": 569}, {"end": 754, "start": 641}, {"end": 812, "start": 755}, {"end": 870, "start": 813}, {"end": 1043, "start": 871}, {"end": 1102, "start": 1044}, {"end": 1155, "start": 1103}, {"end": 1401, "start": 1165}]}, {"name": "SM00032", "regions": [{"end": 67, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 580}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 990}, {"end": 1102, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1220, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}]}]}, {"end": 113342018, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342018, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374469", "start": 113127536, "translations": [{"cdna_coding_end": 265, "cdna_coding_start": 10911, "domains": [{"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}, {"end": 1723, "start": 1603}, {"end": 1819, "start": 1762}, {"end": 1877, "start": 1820}, {"end": 1935, "start": 1878}, {"end": 1993, "start": 1936}, {"end": 2055, "start": 1994}, {"end": 2176, "start": 2058}, {"end": 2295, "start": 2179}, {"end": 2354, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2528, "start": 2415}, {"end": 2593, "start": 2529}, {"end": 2689, "start": 2620}, {"end": 2805, "start": 2692}, {"end": 2863, "start": 2806}, {"end": 2921, "start": 2864}, {"end": 3094, "start": 2922}, {"end": 3153, "start": 3095}, {"end": 3206, "start": 3154}, {"end": 3452, "start": 3216}]}, {"name": "SSF49899", "regions": [{"end": 1609, "start": 1398}]}, {"name": "SM00159", "regions": [{"end": 1604, "start": 1397}]}, {"name": "PF00354", "regions": [{"end": 1597, "start": 1419}]}, {"name": "PR00895", "regions": [{"end": 1507, "start": 1489}, {"end": 1535, "start": 1516}, {"end": 1569, "start": 1536}]}, {"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}, {"end": 1662, "start": 1608}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2118, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2631}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3041}, {"end": 3153, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3271, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "SM00179", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3481}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}, {"end": 1121, "start": 965}]}, {"name": "SSF57196", "regions": [{"end": 1244, "start": 1166}, {"end": 1282, "start": 1245}, {"end": 1319, "start": 1283}, {"end": 1400, "start": 1321}, {"end": 1763, "start": 1712}, {"end": 3483, "start": 3440}, {"end": 3512, "start": 3484}, {"end": 3547, "start": 3514}]}, {"name": "PS50026", "regions": [{"end": 1206, "start": 1170}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3477}, {"end": 3541, "start": 3510}]}, {"name": "SM00181", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1211}, {"end": 1282, "start": 1249}, {"end": 1320, "start": 1287}, {"end": 1358, "start": 1325}, {"end": 1396, "start": 1363}, {"end": 1761, "start": 1725}, {"end": 3477, "start": 3448}, {"end": 3509, "start": 3480}, {"end": 3541, "start": 3512}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PS50311", "regions": [{"end": 1386, "start": 1174}, {"end": 3531, "start": 3445}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}, {"end": 1029, "start": 982}, {"end": 1083, "start": 1036}, {"end": 1137, "start": 1090}]}, {"name": "PF00008", "regions": [{"end": 1203, "start": 1174}, {"end": 1242, "start": 1212}, {"end": 1279, "start": 1250}, {"end": 1356, "start": 1326}, {"end": 1394, "start": 1364}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}, {"end": 1664, "start": 1606}, {"end": 1722, "start": 1665}, {"end": 1821, "start": 1764}, {"end": 1879, "start": 1822}, {"end": 1937, "start": 1880}, {"end": 1995, "start": 1938}, {"end": 2057, "start": 1996}, {"end": 2120, "start": 2058}, {"end": 2178, "start": 2121}, {"end": 2238, "start": 2179}, {"end": 2297, "start": 2239}, {"end": 2355, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2472, "start": 2415}, {"end": 2530, "start": 2473}, {"end": 2587, "start": 2531}, {"end": 2691, "start": 2640}, {"end": 2749, "start": 2692}, {"end": 2807, "start": 2750}, {"end": 2865, "start": 2808}, {"end": 2923, "start": 2866}, {"end": 2981, "start": 2924}, {"end": 3038, "start": 2982}, {"end": 3096, "start": 3039}, {"end": 3155, "start": 3097}, {"end": 3215, "start": 3156}, {"end": 3273, "start": 3216}, {"end": 3331, "start": 3274}, {"end": 3390, "start": 3332}, {"end": 3447, "start": 3391}]}, {"name": "PF07645", "regions": [{"end": 1760, "start": 1722}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}, {"end": 1662, "start": 1605}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2113, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2644}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3061}, {"end": 3149, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3267, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "PF07974", "regions": [{"end": 1243, "start": 1212}, {"end": 3476, "start": 3452}, {"end": 3508, "start": 3484}, {"end": 3540, "start": 3513}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}]}]}, {"end": 113341823, "exons": [{"end": 113206000, "start": 113204759}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220399, "start": 113220395}, {"end": 113220842, "start": 113220756}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113341823, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000302728", "start": 113204759, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 4650, "domains": [{"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00010", "regions": [{"end": 1318, "start": 1307}, {"end": 1364, "start": 1357}, {"end": 1413, "start": 1403}, {"end": 1420, "start": 1414}]}, {"name": "PF00354", "regions": [{"end": 1532, "start": 1442}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}]}, {"name": "SSF49899", "regions": [{"end": 1547, "start": 1421}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}]}, {"name": "PF07645", "regions": [{"end": 1262, "start": 1231}, {"end": 1338, "start": 1308}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1337, "start": 1311}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}]}]}, {"end": 113342160, "exons": [{"end": 113238595, "start": 113238163}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374461", "start": 113238163, "translations": [{"cdna_coding_end": 407, "cdna_coding_start": 2944, "domains": [{"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}]}, {"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}]}]}]}]}, {"aliases": ["ARID1B"], "chr": "6", "end": 157530401, "name": "ENSG00000049618", "start": 157099063, "strand": "+", "transcripts": [{"end": 157529495, "exons": [{"end": 157100605, "start": 157099063}, {"end": 157150555, "start": 157150361}, {"end": 157192786, "start": 157192748}, {"end": 157222659, "start": 157222510}, {"end": 157256710, "start": 157256600}, {"end": 157406039, "start": 157405796}, {"end": 157431695, "start": 157431606}, {"end": 157454341, "start": 157454162}, {"end": 157470085, "start": 157469758}, {"end": 157488319, "start": 157488174}, {"end": 157495251, "start": 157495142}, {"end": 157502312, "start": 157502103}, {"end": 157505569, "start": 157505365}, {"end": 157510914, "start": 157510776}, {"end": 157511344, "start": 157511172}, {"end": 157517449, "start": 157517299}, {"end": 157520041, "start": 157519945}, {"end": 157522622, "start": 157521839}, {"end": 157525130, "start": 157525000}, {"end": 157529495, "start": 157527301}], "is_best_transcript": true, "name": "ENST00000346085", "start": 157099063, "translations": [{"cdna_coding_end": 6751, "cdna_coding_start": 2, "domains": [{"name": "PF12031", "regions": [{"end": 2195, "start": 1939}]}, {"name": "PS50324", "regions": [{"end": 57, "start": 35}, {"end": 784, "start": 697}]}, {"name": "PF01388", "regions": [{"end": 1153, "start": 1065}]}, {"name": "PS50099", "regions": [{"end": 820, "start": 715}, {"end": 1610, "start": 1472}]}, {"name": "SSF48371", "regions": [{"end": 2220, "start": 2075}]}, {"name": "PS50316", "regions": [{"end": 104, "start": 81}]}, {"name": "PS50322", "regions": [{"end": 131, "start": 107}, {"end": 646, "start": 574}]}, {"name": "PS51011", "regions": [{"end": 1157, "start": 1066}]}, {"name": "PS50310", "regions": [{"end": 47, "start": 2}, {"end": 493, "start": 329}]}, {"name": "PS50315", "regions": [{"end": 401, "start": 141}]}, {"name": "SSF46774", "regions": [{"end": 1168, "start": 1049}]}, {"name": "SM00501", "regions": [{"end": 1158, "start": 1067}]}]}]}]}]}
\ No newline at end of file
diff --git a/tests/tools/test_convert_annotations_format.py b/tests/tools/test_convert_annotations_format.py
index 0f837b30..a5530dd7 100644
--- a/tests/tools/test_convert_annotations_format.py
+++ b/tests/tools/test_convert_annotations_format.py
@@ -1,22 +1,61 @@
+import json
 import os
 
-from tools.convert_annotations_format import convert_gff2_to_mavis, convert_gff3_to_mavis
+import pytest
 
+from tools.convert_annotations_format import (
+    convert_gff2_to_mavis,
+    convert_gff3_to_mavis,
+    convert_mavis_json_2to3,
+)
 
-def test_load_gff3():
-    input = os.path.join(os.path.dirname(__file__), 'data', 'Homo_sapiens.GRCh38.105.chr.kras.gtf')
-    data = convert_gff2_to_mavis(input, False)
-    assert len(data['genes']) == 2
-    assert sum([len(g['transcripts']) for g in data['genes']]) == 15
-    exons = 0
-    for gene in data['genes']:
-        for transcript in gene['transcripts']:
-            exons += len(transcript['exons'])
-    assert exons == 62
+CONVERTERS = {
+    'gff3': convert_gff3_to_mavis,
+    'gtf': convert_gff2_to_mavis,
+    'v2-json': convert_mavis_json_2to3,
+}
 
 
-def test_load_gtf():
-    input = os.path.join(os.path.dirname(__file__), 'data', 'Homo_sapiens.GRCh38.105.kras.gff3')
-    data = convert_gff3_to_mavis(input, False)
-    assert len(data['genes']) == 4
-    assert sum([len(g['transcripts']) for g in data['genes']]) == 15
+def sort_elements(data):
+    """
+    Sort lists of exons, domains, genes, etc by position and name to facilitate comparison
+    """
+    if not isinstance(data, dict):
+        if isinstance(data, list):
+            items = [sort_elements(e) for e in data]
+
+            if all(isinstance(elem, dict) for elem in data):
+                return sorted(
+                    items, key=lambda elem: (elem.get('start'), elem.get('end'), elem.get('name'))
+                )
+            return items
+        else:
+            return data
+
+    for key, value in data.items():
+        data[key] = sort_elements(value)
+    return data
+
+
+@pytest.mark.parametrize(
+    'filename,expected_file,input_type',
+    [
+        ['K02718.1.gff3', 'K02718.1.gff3.json', 'gff3'],
+        ['K02718.1.gtf', 'K02718.1.gtf.json', 'gtf'],
+        ['Homo_sapiens.GRCh38.kras.gff3', 'Homo_sapiens.GRCh38.kras.gff3.json', 'gff3'],
+        ['Homo_sapiens.GRCh38.kras.gtf', 'Homo_sapiens.GRCh38.kras.gtf.json', 'gtf'],
+        ['example_genes.v2.json', 'example_genes.v3.json', 'v2-json'],
+    ],
+)
+def test_gff_examples(filename, expected_file, input_type):
+    data_dir = os.path.join(os.path.dirname(__file__), 'data')
+    input_file = os.path.join(data_dir, filename)
+    with open(os.path.join(data_dir, expected_file), 'r') as fh:
+        expected = json.load(fh)
+
+    # order doesn't matter
+    data = sort_elements(CONVERTERS[input_type](input_file))
+    expected = sort_elements(expected)
+
+    assert len(data['genes']) == len(expected['genes'])
+    assert data == expected

From 6368996d9e093b781784be3db684ae2c970cf58f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 20:56:14 -0800
Subject: [PATCH 111/137] Support polycistronic transcripts

- Nest translations under transcripts in annotations json instead of
  expecting one cds per transcript
- To support HPV genes

resolves: #302
---
 src/mavis/annotate/annotations_schema.json | 159 ++++++++++++++-------
 src/mavis/annotate/file_io.py              | 117 +++++++++------
 2 files changed, 179 insertions(+), 97 deletions(-)

diff --git a/src/mavis/annotate/annotations_schema.json b/src/mavis/annotate/annotations_schema.json
index 83f1b501..85adb748 100644
--- a/src/mavis/annotate/annotations_schema.json
+++ b/src/mavis/annotate/annotations_schema.json
@@ -62,69 +62,32 @@
                                     },
                                     "type": "array"
                                 },
-                                "cdna_coding_end": {
-                                    "default": null,
-                                    "minimum": 1,
-                                    "type": [
-                                        "integer",
-                                        "null"
-                                    ]
-                                },
-                                "cdna_coding_start": {
-                                    "default": null,
-                                    "minimum": 1,
-                                    "type": [
-                                        "integer",
-                                        "null"
-                                    ]
-                                },
-                                "domains": {
-                                    "default": [
-                                    ],
-                                    "items": {
-                                        "additionalProperties": true,
-                                        "properties": {
-                                            "name": {
-                                                "minLength": 1,
-                                                "type": "string"
-                                            },
-                                            "regions": {
-                                                "minItems": 1,
-                                                "properties": {
-                                                    "end": {
-                                                        "minimum": 1,
-                                                        "type": "integer"
-                                                    },
-                                                    "start": {
-                                                        "minimum": 1,
-                                                        "type": "integer"
-                                                    }
-                                                },
-                                                "type": "array"
-                                            }
-                                        },
-                                        "required": [
-                                            "name",
-                                            "regions"
-                                        ],
-                                        "type": "object"
-                                    },
-                                    "type": "array"
-                                },
                                 "end": {
                                     "minimum": 1,
                                     "type": "integer"
                                 },
                                 "exons": {
-                                    "defualt": [
+                                    "default": [
                                     ],
                                     "items": {
                                         "additionalProperties": true,
                                         "properties": {
+                                            "aliases": {
+                                                "default": [
+                                                ],
+                                                "items": {
+                                                    "minLength": 1,
+                                                    "type": "string"
+                                                },
+                                                "type": "array"
+                                            },
                                             "end": {
                                                 "minimum": 1,
                                                 "type": "integer"
                                             },
+                                            "name": {
+                                                "type": "string"
+                                            },
                                             "start": {
                                                 "minimum": 1,
                                                 "type": "integer"
@@ -149,6 +112,102 @@
                                 "start": {
                                     "minimum": 1,
                                     "type": "integer"
+                                },
+                                "translations": {
+                                    "default": [
+                                    ],
+                                    "items": {
+                                        "additionalProperties": true,
+                                        "anyOf": [
+                                            {
+                                                "required": [
+                                                    "start",
+                                                    "end"
+                                                ]
+                                            },
+                                            {
+                                                "required": [
+                                                    "cdna_coding_end",
+                                                    "cdna_coding_start"
+                                                ]
+                                            }
+                                        ],
+                                        "properties": {
+                                            "aliases": {
+                                                "default": [
+                                                ],
+                                                "items": {
+                                                    "minLength": 1,
+                                                    "type": "string"
+                                                },
+                                                "type": "array"
+                                            },
+                                            "cdna_coding_end": {
+                                                "description": "coding start position relative to its parent transcript sequence",
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            },
+                                            "cdna_coding_start": {
+                                                "description": "coding end position relative to its parent transcript sequence",
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            },
+                                            "domains": {
+                                                "default": [
+                                                ],
+                                                "items": {
+                                                    "additionalProperties": true,
+                                                    "properties": {
+                                                        "desc": {
+                                                            "type": "string"
+                                                        },
+                                                        "name": {
+                                                            "minLength": 1,
+                                                            "type": "string"
+                                                        },
+                                                        "regions": {
+                                                            "minItems": 1,
+                                                            "properties": {
+                                                                "end": {
+                                                                    "description": "end of the protein domain region in AA coordinates",
+                                                                    "minimum": 1,
+                                                                    "type": "integer"
+                                                                },
+                                                                "start": {
+                                                                    "description": "start of the protein domain region in AA coordinates",
+                                                                    "minimum": 1,
+                                                                    "type": "integer"
+                                                                }
+                                                            },
+                                                            "type": "array"
+                                                        }
+                                                    },
+                                                    "required": [
+                                                        "name",
+                                                        "regions"
+                                                    ],
+                                                    "type": "object"
+                                                },
+                                                "type": "array"
+                                            },
+                                            "end": {
+                                                "description": "coding start position in genomic coordinates",
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            },
+                                            "name": {
+                                                "minLength": 1,
+                                                "type": "string"
+                                            },
+                                            "start": {
+                                                "description": "coding start position in genomic coordinates",
+                                                "minimum": 1,
+                                                "type": "integer"
+                                            }
+                                        },
+                                        "type": "object"
+                                    },
+                                    "type": "array"
                                 }
                             },
                             "required": [
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 4b6ba264..a96200c9 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -10,7 +10,7 @@
 from Bio import SeqIO
 from snakemake.utils import validate as snakemake_validate
 
-from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, translate
+from ..constants import CODON_SIZE, GIEMSA_STAIN, START_AA, STOP_AA, STRAND, translate
 from ..interval import Interval
 from ..types import ReferenceGenome
 from ..util import logger
@@ -127,10 +127,13 @@ def parse_annotations_json(
 
         has_best = False
         for transcript in gene_dict['transcripts']:
-            transcript.setdefault('exons', [])
-            exons = [Exon(strand=gene.strand, **ex) for ex in transcript['exons']]
+            exons = []
+            for ex in transcript.get('exons', []):
+                exons.append(
+                    Exon(strand=gene.strand, start=ex['start'], end=ex['end'], name=ex.get('name'))
+                )
             if not exons:
-                exons = [(transcript['start'], transcript['end'])]
+                exons = [Exon(transcript['start'], transcript['end'], strand=gene.strand)]
             pre_transcript = PreTranscript(
                 name=transcript['name'],
                 gene=gene,
@@ -148,55 +151,75 @@ def parse_annotations_json(
                 spl_tx = Transcript(pre_transcript, spl_patt)
                 pre_transcript.spliced_transcripts.append(spl_tx)
 
-                if (
-                    transcript.get('cdna_coding_end', None) is None
-                    or transcript.get('cdna_coding_start', None) is None
-                ):
-                    continue
-                tx_length = transcript['cdna_coding_end'] - transcript['cdna_coding_start'] + 1
-                # check that the translation makes sense before including it
-                if tx_length % CODON_SIZE != 0:
-                    logger.warning(
-                        'Ignoring translation. The translated region is not a multiple of three'
-                    )
-                    continue
-                tx_length = tx_length // CODON_SIZE
-                domains = []
-                for dom in transcript['domains']:
+                for translation in transcript.get('translations', []):
                     try:
-                        regions = [Interval(r['start'], r['end']) for r in dom['regions']]
-                        regions = Interval.min_nonoverlapping(*regions)
-                        for region in regions:
-                            if region.start < 1 or region.end > tx_length:
-                                raise AssertionError(
-                                    'region cannot be outside the translated length'
-                                )
-                        domains.append(
-                            Domain(
-                                name=dom['name'],
-                                data={'desc': dom.get('desc', None)},
-                                regions=regions,
+                        if 'cdna_coding_end' not in translation:
+                            translation['cdna_coding_end'] = spl_tx.convert_genomic_to_cdna(
+                                translation['end']
+                            )
+                        if 'cdna_coding_start' not in translation:
+                            translation['cdna_coding_start'] = spl_tx.convert_genomic_to_cdna(
+                                translation['start']
                             )
+                    except IndexError as err:
+                        raise IndexError(
+                            f'Invalid specification of CDS ({translation["name"]}: {translation["start"]}-{translation["end"]}) '
+                            f'region on transcript ({transcript["name"]}: {transcript["start"]}-{transcript["end"]}): {err}'
                         )
-                    except AssertionError as err:
-                        logger.warning(repr(err))
-                translation = Translation(
-                    transcript['cdna_coding_start'],
-                    transcript['cdna_coding_end'],
-                    transcript=spl_tx,
-                    domains=domains,
-                )
-                if reference_genome and gene.chr in reference_genome:
-                    # get the sequence near here to see why these are wrong?
-                    seq = pre_transcript.get_cdna_seq(spl_tx.splicing_pattern, reference_genome)
-                    met = seq[translation.start - 1 : translation.start + 2]
-                    stop = seq[translation.end - CODON_SIZE : translation.end]
-                    if translate(met) != START_AA or translate(stop) != STOP_AA:
+
+                    if gene.strand == STRAND.NEG:
+                        translation['cdna_coding_start'], translation['cdna_coding_end'] = (
+                            translation['cdna_coding_end'],
+                            translation['cdna_coding_start'],
+                        )
+
+                    tx_length = (
+                        translation['cdna_coding_end'] - translation['cdna_coding_start'] + 1
+                    )
+                    # check that the translation makes sense before including it
+                    if tx_length % CODON_SIZE != 0:
                         logger.warning(
-                            'Sequence error. The sequence computed from the reference does look like a valid translation'
+                            f'Ignoring translation ({translation.get("name")}). The translated region is not a multiple of three (length={tx_length})'
                         )
                         continue
-                spl_tx.translations.append(translation)
+                    tx_length = tx_length // CODON_SIZE
+                    domains = []
+                    for dom in translation.get('domains', []):
+                        try:
+                            regions = [Interval(r['start'], r['end']) for r in dom['regions']]
+                            regions = Interval.min_nonoverlapping(*regions)
+                            for region in regions:
+                                if region.start < 1 or region.end > tx_length:
+                                    raise AssertionError(
+                                        'region cannot be outside the translated length'
+                                    )
+                            domains.append(
+                                Domain(
+                                    name=dom['name'],
+                                    data={'desc': dom.get('desc', None)},
+                                    regions=regions,
+                                )
+                            )
+                        except AssertionError as err:
+                            logger.warning(repr(err))
+                    translation = Translation(
+                        translation['cdna_coding_start'],
+                        translation['cdna_coding_end'],
+                        transcript=spl_tx,
+                        domains=domains,
+                        name=translation.get('name'),
+                    )
+                    if reference_genome and gene.chr in reference_genome:
+                        # get the sequence near here to see why these are wrong?
+                        seq = pre_transcript.get_cdna_seq(spl_tx.splicing_pattern, reference_genome)
+                        met = seq[translation.start - 1 : translation.start + 2]
+                        stop = seq[translation.end - CODON_SIZE : translation.end]
+                        if translate(met) != START_AA or translate(stop) != STOP_AA:
+                            logger.warning(
+                                'Sequence error. The sequence computed from the reference does look like a valid translation'
+                            )
+                            continue
+                    spl_tx.translations.append(translation)
         if not best_transcripts_only or has_best:
             genes_by_chr.setdefault(gene.chr, []).append(gene)
     return genes_by_chr

From 81abded25f3c9e068d650b5805138fef40d2e073 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 20:58:31 -0800
Subject: [PATCH 112/137] remove debugging code

---
 src/tools/convert_annotations_format.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index 28ca9dac..a01176f0 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -2,7 +2,6 @@
 import json
 import logging
 import re
-import traceback
 from typing import Dict, Tuple
 
 import pandas as pd
@@ -315,13 +314,6 @@ def split_col_into_rows(df, col, delimiter=',', new_col=None):
     return new_df.merge(s, left_index=True, right_index=True)
 
 
-def print_marker(df, links_df=None):
-    stack = traceback.extract_stack(limit=2)[0]
-    print(f'{stack.filename}:{stack.lineno} {stack.name}')
-    print(df.shape, links_df.shape if links_df is not None else '')
-    print(df.groupby(['type']).agg({'feature_id': 'count', 'feature_id': 'unique'}).reset_index())
-
-
 def fix_dangling_parent_reference(nodes_df, links_df):
     """
     Insert a pseudo element for any parents referenced by an element that do not already have their own line/definition

From 8ad2848f1872f002431911f61e99b992b85298b8 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 20:58:54 -0800
Subject: [PATCH 113/137] Improve type annotations and error messages

---
 src/mavis/annotate/file_io.py  |  2 +-
 src/mavis/annotate/genomic.py  |  2 +-
 src/mavis/annotate/protein.py  | 16 ++++++++--------
 src/mavis/annotate/splicing.py | 23 ++---------------------
 4 files changed, 12 insertions(+), 31 deletions(-)

diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index a96200c9..ec0c5f48 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -346,7 +346,7 @@ def __init__(
         Args:
             *filepaths (str): list of paths to load
             file_type (str): Type of file to load
-            eager_load (bool=False): load the files immeadiately
+            eager_load (bool=False): load the files immediately
             assert_exists (bool=False): check that all files exist
             **opt: key word arguments to be passed to the load function and used as part of the file cache key
 
diff --git a/src/mavis/annotate/genomic.py b/src/mavis/annotate/genomic.py
index 22f6831d..5f2544c1 100644
--- a/src/mavis/annotate/genomic.py
+++ b/src/mavis/annotate/genomic.py
@@ -599,7 +599,7 @@ class Transcript(BioInterval):
     def __init__(
         self,
         pre_transcript: PreTranscript,
-        splicing_patt: List[int],
+        splicing_patt: List[SpliceSite],
         seq: Optional[str] = None,
         translations: Optional[List[Translation]] = None,
     ):
diff --git a/src/mavis/annotate/protein.py b/src/mavis/annotate/protein.py
index 50d31f33..2468953f 100644
--- a/src/mavis/annotate/protein.py
+++ b/src/mavis/annotate/protein.py
@@ -256,18 +256,18 @@ def __init__(
         end: int,
         transcript: Optional['Transcript'] = None,
         domains: Optional[List[Domain]] = None,
-        seq=None,
-        name=None,
+        seq: Optional[str] = None,
+        name: Optional[str] = None,
     ):
         """
         describes the splicing pattern and cds start and end with reference to a particular transcript
 
         Args:
-            start (int): start of the coding sequence (cds) relative to the start of the first exon in the transcript
-            end (int): end of the coding sequence (cds) relative to the start of the first exon in the transcript
-            transcript (Transcript): the transcript this is a Translation of
-            domains (List[Domain]): a list of the domains on this translation
-            sequence (str): the cds sequence
+            start: start of the coding sequence (cds) relative to the start of the first exon in the transcript
+            end: end of the coding sequence (cds) relative to the start of the first exon in the transcript
+            transcript: the transcript this is a Translation of
+            domains: a list of the domains on this translation
+            sequence: the cds sequence
         """
         domains = [] if domains is None else domains
         BioInterval.__init__(
@@ -279,7 +279,7 @@ def __init__(
             raise AttributeError('start must be a positive integer', start)
         if transcript and end > len(transcript):
             raise AttributeError(
-                'translation cannot be outside of related transcript range', end, len(transcript)
+                f'translation ({self.name}) cannot be outside of related transcript range ({end} > {len(transcript)})'
             )
 
         for domain in domains:
diff --git a/src/mavis/annotate/splicing.py b/src/mavis/annotate/splicing.py
index 9de8ce24..08fcd0d5 100644
--- a/src/mavis/annotate/splicing.py
+++ b/src/mavis/annotate/splicing.py
@@ -16,9 +16,9 @@ def __str__(self):
         temp = []
         for site in self:
             temp.append(
-                '{}{}{}'.format(
-                    'D' if site.type == SPLICE_SITE_TYPE.DONOR else 'A',
+                '{}:{}{}'.format(
                     site.pos,
+                    'D' if site.type == SPLICE_SITE_TYPE.DONOR else 'A',
                     '' if site.intact else '*',
                 )
             )
@@ -161,25 +161,6 @@ def __init__(
     def __or__(self, other):
         return Interval.__or__(self, other)
 
-    def __repr__(self):
-        cls = self.__class__.__name__
-        refname = self.reference_object
-        try:
-            refname = self.reference_object.name
-        except AttributeError:
-            pass
-        seq = '' if not self.seq else ', seq=' + self.seq
-        return '{}(type={}, {}:{}({}-{}){}, strand={})'.format(
-            cls,
-            SPLICE_SITE_TYPE.reverse(self.type),
-            refname,
-            self.pos,
-            self.start,
-            self.end,
-            seq,
-            self.get_strand(),
-        )
-
 
 def predict_splice_sites(input_sequence: str, is_reverse: bool = False) -> List[SpliceSite]:
     """

From 7e41fe38e07eadd42ef94e11cbe19b4124683880 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 21:06:10 -0800
Subject: [PATCH 114/137] Update docs

---
 docs/migrating.md | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/docs/migrating.md b/docs/migrating.md
index db9c76b2..29231381 100644
--- a/docs/migrating.md
+++ b/docs/migrating.md
@@ -22,11 +22,20 @@ MAVIS is now integrated with snakemake instead of handling its own scheduling
 
 ## Reference Annotation Files
 
-MAVIS no longer supports the previously deprecated tab-delimited format of the annotations file. If you are still using these files in your project we have provided a script to automatically convert them to the newer format in the tools directory
+MAVIS no longer supports the previously deprecated tab-delimited format of the annotations file. If you are still using these files in your project we have provided a script to automatically convert them to the newer format in the tools directory.
 
 ```bash
 python src/tools/convert_annotations_format.py \
     /path/to/tab/file.tab \
-    --input_type v2 \
+    --input_type v2-tab \
+    /path/to/new/json/file.json
+```
+
+In v3 the JSON files are slightly different to support multiple translations per transcript. You old v3 files can be automatically converted to the new format with the same script
+
+```bash
+python src/tools/convert_annotations_format.py \
+    /path/to/json/file.json \
+    --input_type v2-json \
     /path/to/new/json/file.json
 ```

From bc4c0f8f72b1e2278f17b4a9225f3c846a728979 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 5 Feb 2022 21:06:35 -0800
Subject: [PATCH 115/137] Fix merge conflict

---
 src/mavis/annotate/file_io.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 3e889870..bcd0db0f 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -344,17 +344,10 @@ def __init__(
     ):
         """
         Args:
-<<<<<<< HEAD
-            *filepaths (str): list of paths to load
-            file_type (str): Type of file to load
-            eager_load (bool=False): load the files immediately
-            assert_exists (bool=False): check that all files exist
-=======
             *filepaths: list of paths to load
             file_type: Type of file to load
-            eager_load: load the files immeadiately
+            eager_load: load the files immediately
             assert_exists: check that all files exist
->>>>>>> develop_v3
             **opt: key word arguments to be passed to the load function and used as part of the file cache key
 
         Raises

From 5e36b2d96d2b369ebd9c2188762bdbf37538b042 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 8 Feb 2022 20:22:57 -0800
Subject: [PATCH 116/137] Add tests for tab conversion

---
 .github/workflows/build.yml                   |    1 +
 setup.cfg                                     |    5 +-
 .../data/ensembl69_hg19_annotations.kras.tab  |    9 +
 .../ensembl69_hg19_annotations.kras.tab.json  |  466 +
 tests/tools/data/example_genes.v3.json        | 7855 ++++++++++++++++-
 .../tools/test_convert_annotations_format.py  |    7 +
 6 files changed, 8339 insertions(+), 4 deletions(-)
 create mode 100644 tests/tools/data/ensembl69_hg19_annotations.kras.tab
 create mode 100644 tests/tools/data/ensembl69_hg19_annotations.kras.tab.json

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index b5809403..83ccec48 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -51,6 +51,7 @@ jobs:
         pytest tests -v \
           --junitxml=junit/test-results-${{ matrix.python-version }}.xml \
           --cov mavis \
+          --cov tools.convert_annotations_format \
           --cov-report term-missing \
           --cov-report xml \
           --durations=10 \
diff --git a/setup.cfg b/setup.cfg
index d0a4934c..371d7e35 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -23,9 +23,7 @@ ignore = E501
 statistics = True
 
 [flake8]
-ignore = E501
-    W503
-    E203
+ignore = E501,W503,E203
 
 [options]
 packages = find:
@@ -79,6 +77,7 @@ dev =
     pycodestyle>=2.3.1
     pytest
     pytest-cov
+    pytest-xdist
     mkdocs==1.1.2
     markdown-refdocs
     mkdocs-material==5.4.0
diff --git a/tests/tools/data/ensembl69_hg19_annotations.kras.tab b/tests/tools/data/ensembl69_hg19_annotations.kras.tab
new file mode 100644
index 00000000..32a4113f
--- /dev/null
+++ b/tests/tools/data/ensembl69_hg19_annotations.kras.tab
@@ -0,0 +1,9 @@
+## input file used to map hugo gene names: compiled_gene_drug_pathway.v1_2_5.tsv
+## input file for picking best transcript: ens69_best_transcript.txt
+## Ensembl Api version 69
+## generated at: Thu Aug  4 16:38:01 2016
+#ensembl_gene_id	hugo_names	chr	strand	gene_start	gene_end	best_ensembl_transcript_id	ensembl_transcript_id	refseq_equivalents	transcript_genomic_start	transcript_genomic_end	cdna_coding_start	cdna_coding_end	genomic_exon_ranges	AA_domain_ranges
+ENSG00000133703	KRAS	12	-1	25357723	25403870	ENST00000311936	ENST00000311936	NP_004976.2;NM_004985.3	25357723	25403865	193	759	25403685-25403865;25398208-25398329;25380168-25380346;25378548-25378707;25357723-25362845	PR00449:4-25,27-43,44-66,107-120,141-163;PF00025:3-162;SM00173:1-166;PF00009:45-163;PF08477:5-119;PS50318:165-184;SSF52540:3-184;TIGR00231:1-159;SM00175:4-166;PF00071:5-164;SM00174:6-166
+ENSG00000133703	KRAS	12	-1	25357723	25403870	ENST00000311936	ENST00000557334		25362102	25403870	198	425	25403685-25403870;25398208-25398329;25362102-25362845	PR00449:4-25,27-43;PS50318:52-71;SM00173:1-53;PF00071:5-44;SSF52540:3-37
+ENSG00000133703	KRAS	12	-1	25357723	25403870	ENST00000311936	ENST00000256078	NP_203524.1;NM_033360.2	25362365	25403737	65	634	25403685-25403737;25398208-25398329;25380168-25380346;25378548-25378707;25368371-25368494;25362365-25362845	SM00175:4-166;PF00071:5-164;SSF52540:3-185;SM00176:9-189;TIGR00231:1-159;SM00174:6-166;PR00449:4-25,27-43,44-66,107-120,141-163;PF00025:3-161;PF08477:5-119;PF00009:45-162;SM00173:1-166
+ENSG00000133703	KRAS	12	-1	25357723	25403870	ENST00000311936	ENST00000556131		25386753	25403863	178	309	25403698-25403863;25398208-25398329;25386753-25388160	PR00449:4-25,27-43;PF00071:5-37;SSF52540:3-38
diff --git a/tests/tools/data/ensembl69_hg19_annotations.kras.tab.json b/tests/tools/data/ensembl69_hg19_annotations.kras.tab.json
new file mode 100644
index 00000000..eeaab2bb
--- /dev/null
+++ b/tests/tools/data/ensembl69_hg19_annotations.kras.tab.json
@@ -0,0 +1,466 @@
+{
+    "genes": [
+        {
+            "aliases": [
+                "KRAS"
+            ],
+            "chr": "12",
+            "end": 25403870,
+            "name": "ENSG00000133703",
+            "start": 25357723,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "end": 25403865,
+                    "exons": [
+                        {
+                            "end": 25403865,
+                            "start": 25403685
+                        },
+                        {
+                            "end": 25398329,
+                            "start": 25398208
+                        },
+                        {
+                            "end": 25380346,
+                            "start": 25380168
+                        },
+                        {
+                            "end": 25378707,
+                            "start": 25378548
+                        },
+                        {
+                            "end": 25362845,
+                            "start": 25357723
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000311936",
+                    "start": 25357723,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 759,
+                            "cdna_coding_start": 193,
+                            "domains": [
+                                {
+                                    "name": "PR00449",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 4
+                                        },
+                                        {
+                                            "end": 43,
+                                            "start": 27
+                                        },
+                                        {
+                                            "end": 66,
+                                            "start": 44
+                                        },
+                                        {
+                                            "end": 120,
+                                            "start": 107
+                                        },
+                                        {
+                                            "end": 163,
+                                            "start": 141
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00025",
+                                    "regions": [
+                                        {
+                                            "end": 162,
+                                            "start": 3
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00173",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00009",
+                                    "regions": [
+                                        {
+                                            "end": 163,
+                                            "start": 45
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF08477",
+                                    "regions": [
+                                        {
+                                            "end": 119,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50318",
+                                    "regions": [
+                                        {
+                                            "end": 184,
+                                            "start": 165
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 184,
+                                            "start": 3
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "TIGR00231",
+                                    "regions": [
+                                        {
+                                            "end": 159,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00175",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 4
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00071",
+                                    "regions": [
+                                        {
+                                            "end": 164,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00174",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 6
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "aliases": [
+                    ],
+                    "end": 25403870,
+                    "exons": [
+                        {
+                            "end": 25403870,
+                            "start": 25403685
+                        },
+                        {
+                            "end": 25398329,
+                            "start": 25398208
+                        },
+                        {
+                            "end": 25362845,
+                            "start": 25362102
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000557334",
+                    "start": 25362102,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 425,
+                            "cdna_coding_start": 198,
+                            "domains": [
+                                {
+                                    "name": "PR00449",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 4
+                                        },
+                                        {
+                                            "end": 43,
+                                            "start": 27
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50318",
+                                    "regions": [
+                                        {
+                                            "end": 71,
+                                            "start": 52
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00173",
+                                    "regions": [
+                                        {
+                                            "end": 53,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00071",
+                                    "regions": [
+                                        {
+                                            "end": 44,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 37,
+                                            "start": 3
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "aliases": [
+                    ],
+                    "end": 25403737,
+                    "exons": [
+                        {
+                            "end": 25403737,
+                            "start": 25403685
+                        },
+                        {
+                            "end": 25398329,
+                            "start": 25398208
+                        },
+                        {
+                            "end": 25380346,
+                            "start": 25380168
+                        },
+                        {
+                            "end": 25378707,
+                            "start": 25378548
+                        },
+                        {
+                            "end": 25368494,
+                            "start": 25368371
+                        },
+                        {
+                            "end": 25362845,
+                            "start": 25362365
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000256078",
+                    "start": 25362365,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 634,
+                            "cdna_coding_start": 65,
+                            "domains": [
+                                {
+                                    "name": "SM00175",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 4
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00071",
+                                    "regions": [
+                                        {
+                                            "end": 164,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 185,
+                                            "start": 3
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00176",
+                                    "regions": [
+                                        {
+                                            "end": 189,
+                                            "start": 9
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "TIGR00231",
+                                    "regions": [
+                                        {
+                                            "end": 159,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00174",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 6
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00449",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 4
+                                        },
+                                        {
+                                            "end": 43,
+                                            "start": 27
+                                        },
+                                        {
+                                            "end": 66,
+                                            "start": 44
+                                        },
+                                        {
+                                            "end": 120,
+                                            "start": 107
+                                        },
+                                        {
+                                            "end": 163,
+                                            "start": 141
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00025",
+                                    "regions": [
+                                        {
+                                            "end": 161,
+                                            "start": 3
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF08477",
+                                    "regions": [
+                                        {
+                                            "end": 119,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00009",
+                                    "regions": [
+                                        {
+                                            "end": 162,
+                                            "start": 45
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00173",
+                                    "regions": [
+                                        {
+                                            "end": 166,
+                                            "start": 1
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "aliases": [
+                    ],
+                    "end": 25403863,
+                    "exons": [
+                        {
+                            "end": 25403863,
+                            "start": 25403698
+                        },
+                        {
+                            "end": 25398329,
+                            "start": 25398208
+                        },
+                        {
+                            "end": 25388160,
+                            "start": 25386753
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000556131",
+                    "start": 25386753,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 309,
+                            "cdna_coding_start": 178,
+                            "domains": [
+                                {
+                                    "name": "PR00449",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 4
+                                        },
+                                        {
+                                            "end": 43,
+                                            "start": 27
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00071",
+                                    "regions": [
+                                        {
+                                            "end": 37,
+                                            "start": 5
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 38,
+                                            "start": 3
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/tools/data/example_genes.v3.json b/tests/tools/data/example_genes.v3.json
index 6a590488..7f77a887 100644
--- a/tests/tools/data/example_genes.v3.json
+++ b/tests/tools/data/example_genes.v3.json
@@ -1 +1,7854 @@
-{"genes": [{"aliases": ["EGFR"], "chr": "7", "end": 55324313, "name": "ENSG00000146648", "start": 55086714, "strand": "+", "transcripts": [{"end": 55270769, "exons": [{"end": 55087058, "start": 55086714}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270769, "start": 55270210}], "is_best_transcript": false, "name": "ENST00000455089", "start": 55086714, "translations": [{"cdna_coding_end": 3533, "cdna_coding_start": 258, "domains": [{"name": "PIRSF000619", "regions": [{"end": 1090, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 920, "start": 669}]}, {"name": "SSF52058", "regions": [{"end": 191, "start": 28}, {"end": 475, "start": 283}]}, {"name": "PF00757", "regions": [{"end": 293, "start": 141}]}, {"name": "PS50011", "regions": [{"end": 934, "start": 667}]}, {"name": "PS50311", "regions": [{"end": 219, "start": 145}]}, {"name": "SSF57184", "regions": [{"end": 290, "start": 142}, {"end": 593, "start": 460}]}, {"name": "PR00109", "regions": [{"end": 758, "start": 745}, {"end": 800, "start": 782}, {"end": 841, "start": 831}, {"end": 872, "start": 850}, {"end": 916, "start": 894}]}, {"name": "SSF56112", "regions": [{"end": 975, "start": 651}]}, {"name": "PF01030", "regions": [{"end": 141, "start": 57}, {"end": 435, "start": 316}]}, {"name": "SM00220", "regions": [{"end": 924, "start": 667}]}, {"name": "SM00261", "regions": [{"end": 225, "start": 183}, {"end": 502, "start": 451}, {"end": 556, "start": 507}]}, {"name": "SM00219", "regions": [{"end": 923, "start": 667}]}, {"name": "PF00069", "regions": [{"end": 919, "start": 667}]}]}]}, {"end": 55236328, "exons": [{"end": 55087058, "start": 55086725}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55236328, "start": 55236216}], "is_best_transcript": false, "name": "ENST00000342916", "start": 55086725, "translations": [{"cdna_coding_end": 2133, "cdna_coding_start": 247, "domains": [{"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55238738, "exons": [{"end": 55087058, "start": 55086726}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238738, "start": 55238000}], "is_best_transcript": false, "name": "ENST00000344576", "start": 55086726, "translations": [{"cdna_coding_end": 2363, "cdna_coding_start": 246, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 624, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}]}]}, {"end": 55224644, "exons": [{"end": 55087058, "start": 55086727}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224644, "start": 55224452}], "is_best_transcript": false, "name": "ENST00000420316", "start": 55086727, "translations": [{"cdna_coding_end": 1462, "cdna_coding_start": 245, "domains": [{"name": "SSF57184", "regions": [{"end": 339, "start": 182}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 403, "start": 328}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}]}]}]}, {"end": 55279321, "exons": [{"end": 55087058, "start": 55086794}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55279321, "start": 55272949}], "is_best_transcript": true, "name": "ENST00000275493", "start": 55086794, "translations": [{"cdna_coding_end": 3810, "cdna_coding_start": 178, "domains": [{"name": "SM00220", "regions": [{"end": 969, "start": 712}]}, {"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SSF56112", "regions": [{"end": 1020, "start": 696}]}, {"name": "PF00069", "regions": [{"end": 964, "start": 712}]}, {"name": "SM00219", "regions": [{"end": 968, "start": 712}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF07714", "regions": [{"end": 965, "start": 714}]}, {"name": "PIRSF000619", "regions": [{"end": 1210, "start": 1}]}, {"name": "PR00109", "regions": [{"end": 803, "start": 790}, {"end": 845, "start": 827}, {"end": 886, "start": 876}, {"end": 917, "start": 895}, {"end": 961, "start": 939}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "PS50011", "regions": [{"end": 979, "start": 712}]}]}]}, {"end": 55324313, "exons": [{"end": 55087058, "start": 55086811}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240621, "start": 55240539}, {"end": 55324313, "start": 55323947}], "is_best_transcript": false, "name": "ENST00000442591", "start": 55086811, "translations": [{"cdna_coding_end": 2134, "cdna_coding_start": 161, "domains": [{"name": "PF01030", "regions": [{"end": 167, "start": 57}, {"end": 480, "start": 361}]}, {"name": "SM00261", "regions": [{"end": 270, "start": 228}, {"end": 547, "start": 496}, {"end": 601, "start": 552}, {"end": 653, "start": 614}]}, {"name": "SSF52058", "regions": [{"end": 211, "start": 29}, {"end": 520, "start": 328}]}, {"name": "PF00757", "regions": [{"end": 338, "start": 185}]}, {"name": "PS50311", "regions": [{"end": 264, "start": 187}]}, {"name": "SSF57184", "regions": [{"end": 339, "start": 182}, {"end": 638, "start": 505}]}]}]}, {"end": 55214417, "exons": [{"end": 55177651, "start": 55177416}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214417, "start": 55214299}], "is_best_transcript": false, "name": "ENST00000450046", "start": 55177416, "translations": [{"cdna_coding_end": 691, "cdna_coding_start": 308, "domains": [{"name": "SSF52058", "regions": [{"end": 127, "start": 1}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}]}]}]}, {"end": 55273591, "exons": [{"end": 55177651, "start": 55177540}, {"end": 55210130, "start": 55209979}, {"end": 55211181, "start": 55210998}, {"end": 55214433, "start": 55214299}, {"end": 55219055, "start": 55218987}, {"end": 55220357, "start": 55220239}, {"end": 55221845, "start": 55221704}, {"end": 55223639, "start": 55223523}, {"end": 55224352, "start": 55224226}, {"end": 55224525, "start": 55224452}, {"end": 55225446, "start": 55225356}, {"end": 55228031, "start": 55227832}, {"end": 55229324, "start": 55229192}, {"end": 55231516, "start": 55231426}, {"end": 55233130, "start": 55232973}, {"end": 55238906, "start": 55238868}, {"end": 55240817, "start": 55240676}, {"end": 55241736, "start": 55241614}, {"end": 55242513, "start": 55242415}, {"end": 55249171, "start": 55248986}, {"end": 55259567, "start": 55259412}, {"end": 55260534, "start": 55260459}, {"end": 55266556, "start": 55266410}, {"end": 55268106, "start": 55268009}, {"end": 55269048, "start": 55268881}, {"end": 55269475, "start": 55269428}, {"end": 55270318, "start": 55270210}, {"end": 55273591, "start": 55272949}], "is_best_transcript": false, "name": "ENST00000454757", "start": 55177540, "translations": [{"cdna_coding_end": 3657, "cdna_coding_start": 184, "domains": [{"name": "SM00261", "regions": [{"end": 217, "start": 175}, {"end": 494, "start": 443}, {"end": 548, "start": 499}]}, {"name": "PF00069", "regions": [{"end": 911, "start": 659}]}, {"name": "SM00219", "regions": [{"end": 915, "start": 659}]}, {"name": "SSF56112", "regions": [{"end": 967, "start": 643}]}, {"name": "SM00220", "regions": [{"end": 916, "start": 659}]}, {"name": "PF01030", "regions": [{"end": 114, "start": 4}, {"end": 427, "start": 308}]}, {"name": "PS50311", "regions": [{"end": 211, "start": 134}]}, {"name": "PS50011", "regions": [{"end": 926, "start": 659}]}, {"name": "PR00109", "regions": [{"end": 750, "start": 737}, {"end": 792, "start": 774}, {"end": 833, "start": 823}, {"end": 864, "start": 842}, {"end": 908, "start": 886}]}, {"name": "SSF57184", "regions": [{"end": 286, "start": 129}, {"end": 585, "start": 452}]}, {"name": "PIRSF000619", "regions": [{"end": 1157, "start": 1}]}, {"name": "PF07714", "regions": [{"end": 912, "start": 661}]}, {"name": "SSF52058", "regions": [{"end": 158, "start": 1}, {"end": 467, "start": 275}]}, {"name": "PF00757", "regions": [{"end": 285, "start": 132}]}]}]}]}, {"aliases": ["DSTYK"], "chr": "1", "end": 205180727, "name": "ENSG00000133059", "start": 205111632, "strand": "-", "transcripts": [{"end": 205180727, "exons": [{"end": 205116873, "start": 205111632}, {"end": 205117467, "start": 205117333}, {"end": 205119898, "start": 205119808}, {"end": 205133083, "start": 205133055}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180727, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367160", "start": 205111632, "translations": [{"cdna_coding_end": 65, "cdna_coding_start": 1831, "domains": [{"name": "SM00220", "regions": [{"end": 565, "start": 337}]}, {"name": "SSF56112", "regions": [{"end": 585, "start": 452}]}, {"name": "PF00069", "regions": [{"end": 556, "start": 451}]}, {"name": "PF07714", "regions": [{"end": 558, "start": 471}]}, {"name": "PS50011", "regions": [{"end": 565, "start": 312}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": false, "name": "ENST00000367161", "start": 205111633, "translations": [{"cdna_coding_end": 32, "cdna_coding_start": 2686, "domains": [{"name": "PF07714", "regions": [{"end": 820, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 884, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 853, "start": 627}]}, {"name": "SM00220", "regions": [{"end": 861, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 824, "start": 654}]}, {"name": "SM00219", "regions": [{"end": 861, "start": 652}]}]}]}, {"end": 205180694, "exons": [{"end": 205116873, "start": 205111633}, {"end": 205117467, "start": 205117333}, {"end": 205119922, "start": 205119808}, {"end": 205126514, "start": 205126401}, {"end": 205128807, "start": 205128675}, {"end": 205129398, "start": 205129242}, {"end": 205130515, "start": 205130386}, {"end": 205131340, "start": 205131164}, {"end": 205132134, "start": 205132051}, {"end": 205133083, "start": 205132851}, {"end": 205138960, "start": 205138291}, {"end": 205156934, "start": 205156546}, {"end": 205180694, "start": 205180399}], "is_best_transcript": true, "name": "ENST00000367162", "start": 205111633, "translations": [{"cdna_coding_end": 32, "cdna_coding_start": 2821, "domains": [{"name": "PF07714", "regions": [{"end": 899, "start": 654}]}, {"name": "PS50011", "regions": [{"end": 906, "start": 652}]}, {"name": "SSF56112", "regions": [{"end": 897, "start": 638}]}, {"name": "SM00220", "regions": [{"end": 906, "start": 652}]}, {"name": "SM00219", "regions": [{"end": 906, "start": 652}]}, {"name": "PF00069", "regions": [{"end": 897, "start": 654}]}]}]}]}, {"aliases": ["NDUFA12"], "chr": "12", "end": 95397546, "name": "ENSG00000184752", "start": 95290831, "strand": "-", "transcripts": [{"end": 95397436, "exons": [{"end": 95291086, "start": 95290831}, {"end": 95318582, "start": 95318422}, {"end": 95322039, "start": 95321793}, {"end": 95396597, "start": 95396515}, {"end": 95397436, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000552205", "start": 95290831}, {"end": 95397476, "exons": [{"end": 95365261, "start": 95365108}, {"end": 95396597, "start": 95396582}, {"end": 95397476, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547157", "start": 95365108, "translations": [{"cdna_coding_end": 21, "cdna_coding_start": 188}]}, {"end": 95397384, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95390752, "start": 95390680}, {"end": 95396597, "start": 95396515}, {"end": 95397384, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000551991", "start": 95365109, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 144, "domains": [{"name": "PF05071", "regions": [{"end": 33, "start": 12}]}]}]}, {"end": 95397546, "exons": [{"end": 95365396, "start": 95365109}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397546, "start": 95397371}], "is_best_transcript": true, "name": "ENST00000327772", "start": 95365109, "translations": [{"cdna_coding_end": 91, "cdna_coding_start": 528, "domains": [{"name": "PF05071", "regions": [{"end": 137, "start": 36}]}]}]}, {"end": 95397489, "exons": [{"end": 95365396, "start": 95365112}, {"end": 95396597, "start": 95396515}, {"end": 95397489, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000547986", "start": 95365112, "translations": [{"cdna_coding_end": 34, "cdna_coding_start": 225, "domains": [{"name": "PF05071", "regions": [{"end": 53, "start": 36}]}]}]}, {"end": 95397524, "exons": [{"end": 95365396, "start": 95365254}, {"end": 95366265, "start": 95366171}, {"end": 95388033, "start": 95387946}, {"end": 95396597, "start": 95396515}, {"end": 95397524, "start": 95397371}], "is_best_transcript": false, "name": "ENST00000546788", "start": 95365254, "translations": [{"cdna_coding_end": 69, "cdna_coding_start": 368, "domains": [{"name": "PF05071", "regions": [{"end": 87, "start": 36}]}]}]}]}, {"aliases": ["FRMD6"], "chr": "14", "end": 52197445, "name": "ENSG00000139926", "start": 51955818, "strand": "+", "transcripts": [{"end": 52197177, "exons": [{"end": 51956138, "start": 51955855}, {"end": 52037128, "start": 52037066}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197177, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000356218", "start": 51955855, "translations": [{"cdna_coding_end": 2338, "cdna_coding_start": 494, "domains": [{"name": "PF09379", "regions": [{"end": 109, "start": 20}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}]}]}, {"end": 52197445, "exons": [{"end": 52118714, "start": 52118576}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197445, "start": 52194463}], "is_best_transcript": true, "name": "ENST00000395718", "start": 52118576, "translations": [{"cdna_coding_end": 2130, "cdna_coding_start": 286, "domains": [{"name": "PF00373", "regions": [{"end": 226, "start": 115}]}, {"name": "SSF47031", "regions": [{"end": 218, "start": 110}]}, {"name": "SSF54236", "regions": [{"end": 110, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 320, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 226, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 375, "start": 219}]}, {"name": "PF09380", "regions": [{"end": 322, "start": 237}]}, {"name": "PF09379", "regions": [{"end": 109, "start": 20}]}]}]}, {"end": 52195654, "exons": [{"end": 52118714, "start": 52118665}, {"end": 52156653, "start": 52156409}, {"end": 52164950, "start": 52164860}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195654, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000344768", "start": 52118665, "translations": [{"cdna_coding_end": 2065, "cdna_coding_start": 197, "domains": [{"name": "PF09380", "regions": [{"end": 330, "start": 245}]}, {"name": "PF09379", "regions": [{"end": 117, "start": 20}]}, {"name": "SSF47031", "regions": [{"end": 226, "start": 118}]}, {"name": "PF00373", "regions": [{"end": 234, "start": 123}]}, {"name": "SSF54236", "regions": [{"end": 118, "start": 14}]}, {"name": "PS50057", "regions": [{"end": 328, "start": 16}]}, {"name": "SM00295", "regions": [{"end": 234, "start": 12}]}, {"name": "SSF50729", "regions": [{"end": 383, "start": 227}]}]}]}, {"end": 52164945, "exons": [{"end": 52118935, "start": 52118698}, {"end": 52156653, "start": 52156409}, {"end": 52164945, "start": 52164860}], "is_best_transcript": false, "name": "ENST00000554778", "start": 52118698}, {"end": 52174806, "exons": [{"end": 52164950, "start": 52164706}, {"end": 52167877, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174806, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000555936", "start": 52164706}, {"end": 52197148, "exons": [{"end": 52164950, "start": 52164831}, {"end": 52167853, "start": 52167774}, {"end": 52169306, "start": 52169230}, {"end": 52171653, "start": 52171467}, {"end": 52174951, "start": 52174796}, {"end": 52178314, "start": 52178249}, {"end": 52179269, "start": 52179201}, {"end": 52182217, "start": 52182043}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52197148, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000554167", "start": 52164831, "translations": [{"cdna_coding_end": 1775, "cdna_coding_start": 138, "domains": [{"name": "SSF50729", "regions": [{"end": 306, "start": 150}]}, {"name": "PS50057", "regions": [{"end": 251, "start": 1}]}, {"name": "SSF54236", "regions": [{"end": 41, "start": 1}]}, {"name": "SSF47031", "regions": [{"end": 149, "start": 41}]}, {"name": "PF00373", "regions": [{"end": 157, "start": 46}]}, {"name": "PF09380", "regions": [{"end": 253, "start": 168}]}]}]}, {"end": 52175062, "exons": [{"end": 52169306, "start": 52169266}, {"end": 52171653, "start": 52171467}, {"end": 52175062, "start": 52174796}], "is_best_transcript": false, "name": "ENST00000557405", "start": 52169266, "translations": [{"cdna_coding_end": 390, "cdna_coding_start": 1, "domains": [{"name": "PS50057", "regions": [{"end": 129, "start": 1}]}, {"name": "PF00373", "regions": [{"end": 124, "start": 13}]}, {"name": "SSF47031", "regions": [{"end": 116, "start": 8}]}]}]}, {"end": 52187243, "exons": [{"end": 52179269, "start": 52179231}, {"end": 52182217, "start": 52182043}, {"end": 52187243, "start": 52186773}], "is_best_transcript": false, "name": "ENST00000555197", "start": 52179231, "translations": [{"cdna_coding_end": 618, "cdna_coding_start": 1, "domains": [{"name": "PF09380", "regions": [{"end": 60, "start": 2}]}, {"name": "PS50057", "regions": [{"end": 58, "start": 1}]}, {"name": "SSF50729", "regions": [{"end": 113, "start": 2}]}]}]}, {"end": 52192513, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188673}, {"end": 52192513, "start": 52192497}], "is_best_transcript": false, "name": "ENST00000555703", "start": 52183973, "translations": [{"cdna_coding_end": 573, "cdna_coding_start": 145}]}, {"end": 52195487, "exons": [{"end": 52184066, "start": 52183973}, {"end": 52187108, "start": 52186773}, {"end": 52188798, "start": 52188667}, {"end": 52192588, "start": 52192497}, {"end": 52195487, "start": 52194463}], "is_best_transcript": false, "name": "ENST00000553556", "start": 52183973, "translations": [{"cdna_coding_end": 939, "cdna_coding_start": 145}]}]}, {"aliases": ["PRKCB"], "chr": "16", "end": 24231932, "name": "ENSG00000166501", "start": 23847322, "strand": "+", "transcripts": [{"end": 24231932, "exons": [{"end": 23847669, "start": 23847322}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24231282}], "is_best_transcript": true, "name": "ENST00000321728", "start": 23847322, "translations": [{"cdna_coding_end": 2191, "cdna_coding_start": 176, "domains": [{"name": "SM00239", "regions": [{"end": 275, "start": 172}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PF00433", "regions": [{"end": 666, "start": 623}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00133", "regions": [{"end": 664, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 671, "start": 1}]}]}]}, {"end": 24231932, "exons": [{"end": 23847669, "start": 23847345}, {"end": 23848727, "start": 23848696}, {"end": 23999911, "start": 23999829}, {"end": 24043568, "start": 24043457}, {"end": 24046868, "start": 24046740}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124390, "start": 24124294}, {"end": 24135302, "start": 24135156}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192249, "start": 24192111}, {"end": 24196512, "start": 24196432}, {"end": 24196888, "start": 24196781}, {"end": 24202551, "start": 24202411}, {"end": 24231932, "start": 24225979}], "is_best_transcript": false, "name": "ENST00000303531", "start": 23847345, "translations": [{"cdna_coding_end": 2174, "cdna_coding_start": 153, "domains": [{"name": "SM00133", "regions": [{"end": 663, "start": 601}]}, {"name": "PS50004", "regions": [{"end": 260, "start": 173}]}, {"name": "PIRSF000550", "regions": [{"end": 672, "start": 1}]}, {"name": "PF00069", "regions": [{"end": 586, "start": 343}]}, {"name": "PR00360", "regions": [{"end": 200, "start": 188}, {"end": 230, "start": 217}, {"end": 248, "start": 240}]}, {"name": "SM00219", "regions": [{"end": 576, "start": 342}]}, {"name": "PS50081", "regions": [{"end": 86, "start": 36}, {"end": 151, "start": 101}]}, {"name": "SSF56112", "regions": [{"end": 627, "start": 317}]}, {"name": "SM00220", "regions": [{"end": 600, "start": 342}]}, {"name": "PF00433", "regions": [{"end": 664, "start": 627}]}, {"name": "PF00130", "regions": [{"end": 87, "start": 37}, {"end": 153, "start": 102}]}, {"name": "PF00168", "regions": [{"end": 259, "start": 175}]}, {"name": "SSF57889", "regions": [{"end": 92, "start": 6}, {"end": 157, "start": 101}]}, {"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 59, "start": 50}, {"end": 74, "start": 63}, {"end": 152, "start": 140}]}, {"name": "PS50011", "regions": [{"end": 600, "start": 342}]}, {"name": "SM00109", "regions": [{"end": 86, "start": 37}, {"end": 151, "start": 102}]}, {"name": "PF07714", "regions": [{"end": 583, "start": 344}]}, {"name": "SSF49562", "regions": [{"end": 288, "start": 157}]}, {"name": "SM00239", "regions": [{"end": 275, "start": 172}]}]}]}, {"end": 23880647, "exons": [{"end": 23847669, "start": 23847403}, {"end": 23880647, "start": 23880435}], "is_best_transcript": false, "name": "ENST00000498058", "start": 23847403, "translations": [{"cdna_coding_end": 268, "cdna_coding_start": 95, "domains": [{"name": "PR00008", "regions": [{"end": 48, "start": 34}, {"end": 57, "start": 50}]}, {"name": "PS50081", "regions": [{"end": 57, "start": 36}]}, {"name": "SSF57889", "regions": [{"end": 57, "start": 6}]}]}]}, {"end": 24124386, "exons": [{"end": 23848727, "start": 23848544}, {"end": 24104268, "start": 24104112}, {"end": 24105618, "start": 24105484}, {"end": 24124386, "start": 24124294}], "is_best_transcript": false, "name": "ENST00000498739", "start": 23848544}, {"end": 24192166, "exons": [{"end": 24163176, "start": 24163006}, {"end": 24166178, "start": 24166005}, {"end": 24183682, "start": 24183591}, {"end": 24185901, "start": 24185839}, {"end": 24192166, "start": 24192111}], "is_best_transcript": false, "name": "ENST00000472066", "start": 24163006}, {"end": 24202909, "exons": [{"end": 24196888, "start": 24196852}, {"end": 24202909, "start": 24202411}], "is_best_transcript": false, "name": "ENST00000466124", "start": 24196852}]}, {"aliases": ["GIMAP4"], "chr": "7", "end": 150271041, "name": "ENSG00000133574", "start": 150264365, "strand": "+", "transcripts": [{"end": 150271041, "exons": [{"end": 150264525, "start": 150264365}, {"end": 150267047, "start": 150266976}, {"end": 150271041, "start": 150269217}], "is_best_transcript": true, "name": "ENST00000255945", "start": 150264365, "translations": [{"cdna_coding_end": 1165, "cdna_coding_start": 176, "domains": [{"name": "PF04548", "regions": [{"end": 238, "start": 31}]}, {"name": "SSF52540", "regions": [{"end": 288, "start": 24}]}]}]}, {"end": 150270602, "exons": [{"end": 150264525, "start": 150264457}, {"end": 150267089, "start": 150266976}, {"end": 150270602, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000461940", "start": 150264457, "translations": [{"cdna_coding_end": 1115, "cdna_coding_start": 84, "domains": [{"name": "PF04548", "regions": [{"end": 252, "start": 45}]}, {"name": "SSF52540", "regions": [{"end": 302, "start": 38}]}]}]}, {"end": 150269569, "exons": [{"end": 150264608, "start": 150264524}, {"end": 150267089, "start": 150266976}, {"end": 150269569, "start": 150269217}], "is_best_transcript": false, "name": "ENST00000479232", "start": 150264524, "translations": [{"cdna_coding_end": 552, "cdna_coding_start": 100, "domains": [{"name": "SSF52540", "regions": [{"end": 151, "start": 38}]}, {"name": "PF04548", "regions": [{"end": 151, "start": 45}]}]}]}]}, {"aliases": ["IL7"], "chr": "8", "end": 79717758, "name": "ENSG00000104432", "start": 79587978, "strand": "-", "transcripts": [{"end": 79717758, "exons": [{"end": 79646067, "start": 79645007}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": true, "name": "ENST00000263851", "start": 79645007, "translations": [{"cdna_coding_end": 602, "cdna_coding_start": 1135, "domains": [{"name": "PIRSF001942", "regions": [{"end": 177, "start": 1}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}, {"end": 98, "start": 78}, {"end": 118, "start": 99}, {"end": 173, "start": 151}]}, {"name": "PF01415", "regions": [{"end": 173, "start": 28}]}, {"name": "SM00127", "regions": [{"end": 173, "start": 27}]}]}]}, {"end": 79717699, "exons": [{"end": 79646063, "start": 79645283}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79659331, "start": 79659129}, {"end": 79710443, "start": 79710307}, {"end": 79717699, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000518982", "start": 79645283, "translations": [{"cdna_coding_end": 543, "cdna_coding_start": 758, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}, {"name": "PF01415", "regions": [{"end": 54, "start": 28}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710307}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520269", "start": 79645900, "translations": [{"cdna_coding_end": 7, "cdna_coding_start": 408, "domains": [{"name": "PF01415", "regions": [{"end": 77, "start": 28}, {"end": 129, "start": 91}]}, {"name": "SM00127", "regions": [{"end": 129, "start": 27}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}, {"end": 77, "start": 57}]}, {"name": "PIRSF001942", "regions": [{"end": 133, "start": 1}]}]}]}, {"end": 79717163, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717163, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520215", "start": 79645900, "translations": [{"cdna_coding_end": 7, "cdna_coding_start": 120, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79717686, "exons": [{"end": 79646067, "start": 79645900}, {"end": 79648762, "start": 79648709}, {"end": 79650870, "start": 79650739}, {"end": 79652317, "start": 79652237}, {"end": 79710443, "start": 79710363}, {"end": 79717686, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000520317", "start": 79645900, "translations": [{"cdna_coding_end": 530, "cdna_coding_start": 643, "domains": [{"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 37, "start": 26}]}]}]}, {"end": 79652311, "exons": [{"end": 79646067, "start": 79645948}, {"end": 79652311, "start": 79652237}], "is_best_transcript": false, "name": "ENST00000541183", "start": 79645948, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 195, "domains": [{"name": "SM00127", "regions": [{"end": 60, "start": 1}]}, {"name": "PF01415", "regions": [{"end": 60, "start": 1}]}]}]}, {"end": 79717758, "exons": [{"end": 79659331, "start": 79659263}, {"end": 79710443, "start": 79710307}, {"end": 79717758, "start": 79717148}], "is_best_transcript": false, "name": "ENST00000379113", "start": 79659263, "translations": [{"cdna_coding_end": 602, "cdna_coding_start": 817, "domains": [{"name": "PF01415", "regions": [{"end": 54, "start": 28}]}, {"name": "PR00435", "regions": [{"end": 25, "start": 2}, {"end": 48, "start": 26}]}]}]}]}, {"aliases": ["SVEP1"], "chr": "9", "end": 113342160, "name": "ENSG00000165124", "start": 113127531, "strand": "-", "transcripts": [{"end": 113342160, "exons": [{"end": 113128840, "start": 113127531}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": true, "name": "ENST00000401783", "start": 113127531, "translations": [{"cdna_coding_end": 338, "cdna_coding_start": 11053, "domains": [{"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}, {"end": 1685, "start": 1631}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2141, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2654}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3064}, {"end": 3176, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3294, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00895", "regions": [{"end": 1530, "start": 1512}, {"end": 1558, "start": 1539}, {"end": 1592, "start": 1559}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}, {"end": 1746, "start": 1626}, {"end": 1842, "start": 1785}, {"end": 1900, "start": 1843}, {"end": 1958, "start": 1901}, {"end": 2016, "start": 1959}, {"end": 2078, "start": 2017}, {"end": 2199, "start": 2081}, {"end": 2318, "start": 2202}, {"end": 2377, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2551, "start": 2438}, {"end": 2616, "start": 2552}, {"end": 2712, "start": 2643}, {"end": 2828, "start": 2715}, {"end": 2886, "start": 2829}, {"end": 2944, "start": 2887}, {"end": 3117, "start": 2945}, {"end": 3176, "start": 3118}, {"end": 3229, "start": 3177}, {"end": 3475, "start": 3239}]}, {"name": "SSF49899", "regions": [{"end": 1632, "start": 1421}]}, {"name": "SM00159", "regions": [{"end": 1627, "start": 1420}]}, {"name": "PF00354", "regions": [{"end": 1620, "start": 1442}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}, {"end": 3554, "start": 3468}]}, {"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}, {"end": 1786, "start": 1735}, {"end": 3506, "start": 3463}, {"end": 3535, "start": 3507}, {"end": 3570, "start": 3537}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3500}, {"end": 3564, "start": 3533}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}, {"end": 1784, "start": 1748}, {"end": 3500, "start": 3471}, {"end": 3532, "start": 3503}, {"end": 3564, "start": 3535}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}, {"end": 1784, "start": 1745}, {"end": 3532, "start": 3504}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "PF07645", "regions": [{"end": 1783, "start": 1745}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}, {"end": 1687, "start": 1629}, {"end": 1745, "start": 1688}, {"end": 1844, "start": 1787}, {"end": 1902, "start": 1845}, {"end": 1960, "start": 1903}, {"end": 2018, "start": 1961}, {"end": 2080, "start": 2019}, {"end": 2143, "start": 2081}, {"end": 2201, "start": 2144}, {"end": 2261, "start": 2202}, {"end": 2320, "start": 2262}, {"end": 2378, "start": 2321}, {"end": 2437, "start": 2379}, {"end": 2495, "start": 2438}, {"end": 2553, "start": 2496}, {"end": 2610, "start": 2554}, {"end": 2714, "start": 2663}, {"end": 2772, "start": 2715}, {"end": 2830, "start": 2773}, {"end": 2888, "start": 2831}, {"end": 2946, "start": 2889}, {"end": 3004, "start": 2947}, {"end": 3061, "start": 3005}, {"end": 3119, "start": 3062}, {"end": 3178, "start": 3120}, {"end": 3238, "start": 3179}, {"end": 3296, "start": 3239}, {"end": 3354, "start": 3297}, {"end": 3413, "start": 3355}, {"end": 3470, "start": 3414}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "PF07974", "regions": [{"end": 1266, "start": 1235}, {"end": 3499, "start": 3475}, {"end": 3531, "start": 3507}, {"end": 3563, "start": 3536}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}, {"end": 1685, "start": 1628}, {"end": 1743, "start": 1690}, {"end": 1842, "start": 1789}, {"end": 1900, "start": 1847}, {"end": 1958, "start": 1905}, {"end": 2016, "start": 1963}, {"end": 2078, "start": 2021}, {"end": 2136, "start": 2083}, {"end": 2199, "start": 2146}, {"end": 2259, "start": 2204}, {"end": 2318, "start": 2264}, {"end": 2376, "start": 2323}, {"end": 2435, "start": 2381}, {"end": 2493, "start": 2440}, {"end": 2551, "start": 2498}, {"end": 2608, "start": 2556}, {"end": 2712, "start": 2667}, {"end": 2770, "start": 2717}, {"end": 2828, "start": 2775}, {"end": 2886, "start": 2833}, {"end": 2944, "start": 2891}, {"end": 3002, "start": 2949}, {"end": 3059, "start": 3007}, {"end": 3117, "start": 3084}, {"end": 3172, "start": 3122}, {"end": 3236, "start": 3181}, {"end": 3290, "start": 3241}, {"end": 3352, "start": 3299}, {"end": 3411, "start": 3357}, {"end": 3468, "start": 3416}]}]}]}, {"end": 113190038, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}], "is_best_transcript": false, "name": "ENST00000297826", "start": 113127536, "translations": [{"cdna_coding_end": 416, "cdna_coding_start": 4909, "domains": [{"name": "PF00084", "regions": [{"end": 62, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 593}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 1010}, {"end": 1098, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1216, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}, {"name": "PF07974", "regions": [{"end": 1425, "start": 1401}, {"end": 1457, "start": 1433}, {"end": 1489, "start": 1462}]}, {"name": "PF00008", "regions": [{"end": 1456, "start": 1427}]}, {"name": "PS50923", "regions": [{"end": 69, "start": 7}, {"end": 127, "start": 70}, {"end": 187, "start": 128}, {"end": 246, "start": 188}, {"end": 304, "start": 247}, {"end": 363, "start": 305}, {"end": 421, "start": 364}, {"end": 479, "start": 422}, {"end": 536, "start": 480}, {"end": 640, "start": 589}, {"end": 698, "start": 641}, {"end": 756, "start": 699}, {"end": 814, "start": 757}, {"end": 872, "start": 815}, {"end": 930, "start": 873}, {"end": 987, "start": 931}, {"end": 1045, "start": 988}, {"end": 1104, "start": 1046}, {"end": 1164, "start": 1105}, {"end": 1222, "start": 1165}, {"end": 1280, "start": 1223}, {"end": 1339, "start": 1281}, {"end": 1396, "start": 1340}]}, {"name": "SM00181", "regions": [{"end": 1426, "start": 1397}, {"end": 1458, "start": 1429}, {"end": 1490, "start": 1461}]}, {"name": "SSF57196", "regions": [{"end": 1432, "start": 1389}, {"end": 1461, "start": 1433}, {"end": 1496, "start": 1463}]}, {"name": "PS50026", "regions": [{"end": 1458, "start": 1426}, {"end": 1490, "start": 1459}]}, {"name": "PS50311", "regions": [{"end": 1480, "start": 1394}]}, {"name": "SSF57535", "regions": [{"end": 125, "start": 7}, {"end": 244, "start": 128}, {"end": 303, "start": 247}, {"end": 363, "start": 305}, {"end": 477, "start": 364}, {"end": 542, "start": 478}, {"end": 638, "start": 569}, {"end": 754, "start": 641}, {"end": 812, "start": 755}, {"end": 870, "start": 813}, {"end": 1043, "start": 871}, {"end": 1102, "start": 1044}, {"end": 1155, "start": 1103}, {"end": 1401, "start": 1165}]}, {"name": "SM00032", "regions": [{"end": 67, "start": 9}, {"end": 125, "start": 72}, {"end": 185, "start": 130}, {"end": 244, "start": 190}, {"end": 302, "start": 249}, {"end": 361, "start": 307}, {"end": 419, "start": 366}, {"end": 477, "start": 424}, {"end": 534, "start": 482}, {"end": 638, "start": 580}, {"end": 696, "start": 643}, {"end": 754, "start": 701}, {"end": 812, "start": 759}, {"end": 870, "start": 817}, {"end": 928, "start": 875}, {"end": 985, "start": 933}, {"end": 1043, "start": 990}, {"end": 1102, "start": 1048}, {"end": 1162, "start": 1107}, {"end": 1220, "start": 1167}, {"end": 1278, "start": 1225}, {"end": 1337, "start": 1283}, {"end": 1394, "start": 1342}]}]}]}, {"end": 113342018, "exons": [{"end": 113128840, "start": 113127536}, {"end": 113132296, "start": 113132203}, {"end": 113137743, "start": 113137648}, {"end": 113139646, "start": 113139551}, {"end": 113141797, "start": 113141627}, {"end": 113148354, "start": 113148178}, {"end": 113149738, "start": 113149565}, {"end": 113151867, "start": 113151804}, {"end": 113163289, "start": 113163134}, {"end": 113166832, "start": 113166607}, {"end": 113171231, "start": 113168440}, {"end": 113174015, "start": 113173343}, {"end": 113190038, "start": 113189871}, {"end": 113191614, "start": 113191423}, {"end": 113192284, "start": 113192200}, {"end": 113192730, "start": 113192554}, {"end": 113194314, "start": 113194195}, {"end": 113194915, "start": 113194742}, {"end": 113196786, "start": 113196616}, {"end": 113197644, "start": 113197521}, {"end": 113198784, "start": 113198660}, {"end": 113206000, "start": 113205825}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220842, "start": 113220751}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342018, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374469", "start": 113127536, "translations": [{"cdna_coding_end": 265, "cdna_coding_start": 10911, "domains": [{"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}, {"end": 1723, "start": 1603}, {"end": 1819, "start": 1762}, {"end": 1877, "start": 1820}, {"end": 1935, "start": 1878}, {"end": 1993, "start": 1936}, {"end": 2055, "start": 1994}, {"end": 2176, "start": 2058}, {"end": 2295, "start": 2179}, {"end": 2354, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2528, "start": 2415}, {"end": 2593, "start": 2529}, {"end": 2689, "start": 2620}, {"end": 2805, "start": 2692}, {"end": 2863, "start": 2806}, {"end": 2921, "start": 2864}, {"end": 3094, "start": 2922}, {"end": 3153, "start": 3095}, {"end": 3206, "start": 3154}, {"end": 3452, "start": 3216}]}, {"name": "SSF49899", "regions": [{"end": 1609, "start": 1398}]}, {"name": "SM00159", "regions": [{"end": 1604, "start": 1397}]}, {"name": "PF00354", "regions": [{"end": 1597, "start": 1419}]}, {"name": "PR00895", "regions": [{"end": 1507, "start": 1489}, {"end": 1535, "start": 1516}, {"end": 1569, "start": 1536}]}, {"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}, {"end": 1662, "start": 1608}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2118, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2631}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3041}, {"end": 3153, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3271, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "SM00179", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3481}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}, {"end": 1121, "start": 965}]}, {"name": "SSF57196", "regions": [{"end": 1244, "start": 1166}, {"end": 1282, "start": 1245}, {"end": 1319, "start": 1283}, {"end": 1400, "start": 1321}, {"end": 1763, "start": 1712}, {"end": 3483, "start": 3440}, {"end": 3512, "start": 3484}, {"end": 3547, "start": 3514}]}, {"name": "PS50026", "regions": [{"end": 1206, "start": 1170}, {"end": 1244, "start": 1208}, {"end": 1282, "start": 1246}, {"end": 1320, "start": 1284}, {"end": 1358, "start": 1322}, {"end": 1396, "start": 1360}, {"end": 1761, "start": 1722}, {"end": 3509, "start": 3477}, {"end": 3541, "start": 3510}]}, {"name": "SM00181", "regions": [{"end": 1206, "start": 1173}, {"end": 1244, "start": 1211}, {"end": 1282, "start": 1249}, {"end": 1320, "start": 1287}, {"end": 1358, "start": 1325}, {"end": 1396, "start": 1363}, {"end": 1761, "start": 1725}, {"end": 3477, "start": 3448}, {"end": 3509, "start": 3480}, {"end": 3541, "start": 3512}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PS50311", "regions": [{"end": 1386, "start": 1174}, {"end": 3531, "start": 3445}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}, {"end": 1029, "start": 982}, {"end": 1083, "start": 1036}, {"end": 1137, "start": 1090}]}, {"name": "PF00008", "regions": [{"end": 1203, "start": 1174}, {"end": 1242, "start": 1212}, {"end": 1279, "start": 1250}, {"end": 1356, "start": 1326}, {"end": 1394, "start": 1364}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}, {"end": 1664, "start": 1606}, {"end": 1722, "start": 1665}, {"end": 1821, "start": 1764}, {"end": 1879, "start": 1822}, {"end": 1937, "start": 1880}, {"end": 1995, "start": 1938}, {"end": 2057, "start": 1996}, {"end": 2120, "start": 2058}, {"end": 2178, "start": 2121}, {"end": 2238, "start": 2179}, {"end": 2297, "start": 2239}, {"end": 2355, "start": 2298}, {"end": 2414, "start": 2356}, {"end": 2472, "start": 2415}, {"end": 2530, "start": 2473}, {"end": 2587, "start": 2531}, {"end": 2691, "start": 2640}, {"end": 2749, "start": 2692}, {"end": 2807, "start": 2750}, {"end": 2865, "start": 2808}, {"end": 2923, "start": 2866}, {"end": 2981, "start": 2924}, {"end": 3038, "start": 2982}, {"end": 3096, "start": 3039}, {"end": 3155, "start": 3097}, {"end": 3215, "start": 3156}, {"end": 3273, "start": 3216}, {"end": 3331, "start": 3274}, {"end": 3390, "start": 3332}, {"end": 3447, "start": 3391}]}, {"name": "PF07645", "regions": [{"end": 1760, "start": 1722}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}, {"end": 1662, "start": 1605}, {"end": 1720, "start": 1667}, {"end": 1819, "start": 1766}, {"end": 1877, "start": 1824}, {"end": 1935, "start": 1882}, {"end": 1993, "start": 1940}, {"end": 2055, "start": 1998}, {"end": 2113, "start": 2060}, {"end": 2176, "start": 2123}, {"end": 2236, "start": 2181}, {"end": 2295, "start": 2241}, {"end": 2353, "start": 2300}, {"end": 2412, "start": 2358}, {"end": 2470, "start": 2417}, {"end": 2528, "start": 2475}, {"end": 2585, "start": 2533}, {"end": 2689, "start": 2644}, {"end": 2747, "start": 2694}, {"end": 2805, "start": 2752}, {"end": 2863, "start": 2810}, {"end": 2921, "start": 2868}, {"end": 2979, "start": 2926}, {"end": 3036, "start": 2984}, {"end": 3094, "start": 3061}, {"end": 3149, "start": 3099}, {"end": 3213, "start": 3158}, {"end": 3267, "start": 3218}, {"end": 3329, "start": 3276}, {"end": 3388, "start": 3334}, {"end": 3445, "start": 3393}]}, {"name": "PF07974", "regions": [{"end": 1243, "start": 1212}, {"end": 3476, "start": 3452}, {"end": 3508, "start": 3484}, {"end": 3540, "start": 3513}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}]}]}, {"end": 113341823, "exons": [{"end": 113206000, "start": 113204759}, {"end": 113208318, "start": 113208117}, {"end": 113209337, "start": 113209180}, {"end": 113212540, "start": 113212339}, {"end": 113213682, "start": 113213569}, {"end": 113217983, "start": 113217870}, {"end": 113219632, "start": 113219536}, {"end": 113220399, "start": 113220395}, {"end": 113220842, "start": 113220756}, {"end": 113221393, "start": 113221232}, {"end": 113228306, "start": 113228145}, {"end": 113231381, "start": 113231220}, {"end": 113233877, "start": 113233644}, {"end": 113234603, "start": 113234439}, {"end": 113238595, "start": 113238484}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113341823, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000302728", "start": 113204759, "translations": [{"cdna_coding_end": 1, "cdna_coding_start": 4650, "domains": [{"name": "PS50825", "regions": [{"end": 642, "start": 560}, {"end": 724, "start": 643}]}, {"name": "PF07699", "regions": [{"end": 360, "start": 310}, {"end": 1052, "start": 1005}, {"end": 1106, "start": 1059}, {"end": 1160, "start": 1113}]}, {"name": "PS50311", "regions": [{"end": 1409, "start": 1197}]}, {"name": "SM00181", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1234}, {"end": 1305, "start": 1272}, {"end": 1343, "start": 1310}, {"end": 1381, "start": 1348}, {"end": 1419, "start": 1386}]}, {"name": "SSF57196", "regions": [{"end": 1267, "start": 1189}, {"end": 1305, "start": 1268}, {"end": 1342, "start": 1306}, {"end": 1423, "start": 1344}]}, {"name": "PS50026", "regions": [{"end": 1229, "start": 1193}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "SSF57184", "regions": [{"end": 440, "start": 269}, {"end": 1144, "start": 988}]}, {"name": "SM00179", "regions": [{"end": 1229, "start": 1196}, {"end": 1267, "start": 1231}, {"end": 1305, "start": 1269}, {"end": 1343, "start": 1307}, {"end": 1381, "start": 1345}, {"end": 1419, "start": 1383}]}, {"name": "PF00092", "regions": [{"end": 252, "start": 84}]}, {"name": "SM00032", "regions": [{"end": 433, "start": 378}, {"end": 493, "start": 438}, {"end": 559, "start": 498}, {"end": 787, "start": 727}]}, {"name": "PF02494", "regions": [{"end": 642, "start": 561}, {"end": 721, "start": 644}]}, {"name": "PR00010", "regions": [{"end": 1318, "start": 1307}, {"end": 1364, "start": 1357}, {"end": 1413, "start": 1403}, {"end": 1420, "start": 1414}]}, {"name": "PF00354", "regions": [{"end": 1532, "start": 1442}]}, {"name": "SSF57535", "regions": [{"end": 433, "start": 374}, {"end": 493, "start": 434}, {"end": 560, "start": 494}, {"end": 790, "start": 727}]}, {"name": "SSF49899", "regions": [{"end": 1547, "start": 1421}]}, {"name": "PS50234", "regions": [{"end": 264, "start": 83}]}, {"name": "SSF53300", "regions": [{"end": 262, "start": 79}]}, {"name": "PF00084", "regions": [{"end": 430, "start": 378}, {"end": 493, "start": 438}]}, {"name": "PS50923", "regions": [{"end": 435, "start": 376}, {"end": 495, "start": 436}, {"end": 561, "start": 496}, {"end": 789, "start": 725}]}, {"name": "PF07645", "regions": [{"end": 1262, "start": 1231}, {"end": 1338, "start": 1308}]}, {"name": "PF00008", "regions": [{"end": 1226, "start": 1197}, {"end": 1265, "start": 1235}, {"end": 1302, "start": 1273}, {"end": 1337, "start": 1311}, {"end": 1379, "start": 1349}, {"end": 1417, "start": 1387}]}, {"name": "SM00327", "regions": [{"end": 260, "start": 81}]}]}]}, {"end": 113342160, "exons": [{"end": 113238595, "start": 113238163}, {"end": 113242036, "start": 113241915}, {"end": 113243716, "start": 113243522}, {"end": 113244772, "start": 113244641}, {"end": 113245973, "start": 113245866}, {"end": 113252059, "start": 113251930}, {"end": 113259213, "start": 113259095}, {"end": 113261518, "start": 113261321}, {"end": 113265497, "start": 113265318}, {"end": 113275385, "start": 113275206}, {"end": 113276386, "start": 113276228}, {"end": 113308571, "start": 113308395}, {"end": 113312384, "start": 113312129}, {"end": 113342160, "start": 113341293}], "is_best_transcript": false, "name": "ENST00000374461", "start": 113238163, "translations": [{"cdna_coding_end": 407, "cdna_coding_start": 2944, "domains": [{"name": "PF02494", "regions": [{"end": 619, "start": 538}, {"end": 698, "start": 621}]}, {"name": "SM00032", "regions": [{"end": 410, "start": 355}, {"end": 470, "start": 415}, {"end": 536, "start": 475}, {"end": 764, "start": 704}]}, {"name": "SSF57535", "regions": [{"end": 410, "start": 351}, {"end": 470, "start": 411}, {"end": 537, "start": 471}, {"end": 767, "start": 704}]}, {"name": "PF07699", "regions": [{"end": 337, "start": 287}]}, {"name": "PS50825", "regions": [{"end": 619, "start": 537}, {"end": 701, "start": 620}]}, {"name": "PF00092", "regions": [{"end": 229, "start": 61}]}, {"name": "SSF57184", "regions": [{"end": 417, "start": 246}]}, {"name": "PS50923", "regions": [{"end": 412, "start": 353}, {"end": 472, "start": 413}, {"end": 538, "start": 473}, {"end": 766, "start": 702}]}, {"name": "SM00327", "regions": [{"end": 237, "start": 58}]}, {"name": "PS50234", "regions": [{"end": 241, "start": 60}]}, {"name": "SSF53300", "regions": [{"end": 239, "start": 56}]}, {"name": "PF00084", "regions": [{"end": 407, "start": 355}, {"end": 470, "start": 415}]}]}]}]}, {"aliases": ["ARID1B"], "chr": "6", "end": 157530401, "name": "ENSG00000049618", "start": 157099063, "strand": "+", "transcripts": [{"end": 157529495, "exons": [{"end": 157100605, "start": 157099063}, {"end": 157150555, "start": 157150361}, {"end": 157192786, "start": 157192748}, {"end": 157222659, "start": 157222510}, {"end": 157256710, "start": 157256600}, {"end": 157406039, "start": 157405796}, {"end": 157431695, "start": 157431606}, {"end": 157454341, "start": 157454162}, {"end": 157470085, "start": 157469758}, {"end": 157488319, "start": 157488174}, {"end": 157495251, "start": 157495142}, {"end": 157502312, "start": 157502103}, {"end": 157505569, "start": 157505365}, {"end": 157510914, "start": 157510776}, {"end": 157511344, "start": 157511172}, {"end": 157517449, "start": 157517299}, {"end": 157520041, "start": 157519945}, {"end": 157522622, "start": 157521839}, {"end": 157525130, "start": 157525000}, {"end": 157529495, "start": 157527301}], "is_best_transcript": true, "name": "ENST00000346085", "start": 157099063, "translations": [{"cdna_coding_end": 6751, "cdna_coding_start": 2, "domains": [{"name": "PF12031", "regions": [{"end": 2195, "start": 1939}]}, {"name": "PS50324", "regions": [{"end": 57, "start": 35}, {"end": 784, "start": 697}]}, {"name": "PF01388", "regions": [{"end": 1153, "start": 1065}]}, {"name": "PS50099", "regions": [{"end": 820, "start": 715}, {"end": 1610, "start": 1472}]}, {"name": "SSF48371", "regions": [{"end": 2220, "start": 2075}]}, {"name": "PS50316", "regions": [{"end": 104, "start": 81}]}, {"name": "PS50322", "regions": [{"end": 131, "start": 107}, {"end": 646, "start": 574}]}, {"name": "PS51011", "regions": [{"end": 1157, "start": 1066}]}, {"name": "PS50310", "regions": [{"end": 47, "start": 2}, {"end": 493, "start": 329}]}, {"name": "PS50315", "regions": [{"end": 401, "start": 141}]}, {"name": "SSF46774", "regions": [{"end": 1168, "start": 1049}]}, {"name": "SM00501", "regions": [{"end": 1158, "start": 1067}]}]}]}]}]}
\ No newline at end of file
+{
+    "genes": [
+        {
+            "aliases": [
+                "EGFR"
+            ],
+            "chr": "7",
+            "end": 55324313,
+            "name": "ENSG00000146648",
+            "start": 55086714,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 55270769,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086714
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270769,
+                            "start": 55270210
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000455089",
+                    "start": 55086714,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 3533,
+                            "cdna_coding_start": 258,
+                            "domains": [
+                                {
+                                    "name": "PIRSF000619",
+                                    "regions": [
+                                        {
+                                            "end": 1090,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 920,
+                                            "start": 669
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 191,
+                                            "start": 28
+                                        },
+                                        {
+                                            "end": 475,
+                                            "start": 283
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 293,
+                                            "start": 141
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 934,
+                                            "start": 667
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 219,
+                                            "start": 145
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 290,
+                                            "start": 142
+                                        },
+                                        {
+                                            "end": 593,
+                                            "start": 460
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00109",
+                                    "regions": [
+                                        {
+                                            "end": 758,
+                                            "start": 745
+                                        },
+                                        {
+                                            "end": 800,
+                                            "start": 782
+                                        },
+                                        {
+                                            "end": 841,
+                                            "start": 831
+                                        },
+                                        {
+                                            "end": 872,
+                                            "start": 850
+                                        },
+                                        {
+                                            "end": 916,
+                                            "start": 894
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 975,
+                                            "start": 651
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 141,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 435,
+                                            "start": 316
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 924,
+                                            "start": 667
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 225,
+                                            "start": 183
+                                        },
+                                        {
+                                            "end": 502,
+                                            "start": 451
+                                        },
+                                        {
+                                            "end": 556,
+                                            "start": 507
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 923,
+                                            "start": 667
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 919,
+                                            "start": 667
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55236328,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086725
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55236328,
+                            "start": 55236216
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000342916",
+                    "start": 55086725,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2133,
+                            "cdna_coding_start": 247,
+                            "domains": [
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 187
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 339,
+                                            "start": 182
+                                        },
+                                        {
+                                            "end": 624,
+                                            "start": 505
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 29
+                                        },
+                                        {
+                                            "end": 520,
+                                            "start": 328
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 338,
+                                            "start": 185
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 270,
+                                            "start": 228
+                                        },
+                                        {
+                                            "end": 547,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 601,
+                                            "start": 552
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 167,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 480,
+                                            "start": 361
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55238738,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086726
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238738,
+                            "start": 55238000
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000344576",
+                    "start": 55086726,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2363,
+                            "cdna_coding_start": 246,
+                            "domains": [
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 339,
+                                            "start": 182
+                                        },
+                                        {
+                                            "end": 624,
+                                            "start": 505
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 187
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 338,
+                                            "start": 185
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 29
+                                        },
+                                        {
+                                            "end": 520,
+                                            "start": 328
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 270,
+                                            "start": 228
+                                        },
+                                        {
+                                            "end": 547,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 601,
+                                            "start": 552
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 167,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 480,
+                                            "start": 361
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55224644,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086727
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224644,
+                            "start": 55224452
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000420316",
+                    "start": 55086727,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1462,
+                            "cdna_coding_start": 245,
+                            "domains": [
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 339,
+                                            "start": 182
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 187
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 338,
+                                            "start": 185
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 29
+                                        },
+                                        {
+                                            "end": 403,
+                                            "start": 328
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 270,
+                                            "start": 228
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 167,
+                                            "start": 57
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55279321,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086794
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270318,
+                            "start": 55270210
+                        },
+                        {
+                            "end": 55279321,
+                            "start": 55272949
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000275493",
+                    "start": 55086794,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 3810,
+                            "cdna_coding_start": 178,
+                            "domains": [
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 969,
+                                            "start": 712
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 167,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 480,
+                                            "start": 361
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 1020,
+                                            "start": 696
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 964,
+                                            "start": 712
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 968,
+                                            "start": 712
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 270,
+                                            "start": 228
+                                        },
+                                        {
+                                            "end": 547,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 601,
+                                            "start": 552
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 338,
+                                            "start": 185
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 29
+                                        },
+                                        {
+                                            "end": 520,
+                                            "start": 328
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 965,
+                                            "start": 714
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PIRSF000619",
+                                    "regions": [
+                                        {
+                                            "end": 1210,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00109",
+                                    "regions": [
+                                        {
+                                            "end": 803,
+                                            "start": 790
+                                        },
+                                        {
+                                            "end": 845,
+                                            "start": 827
+                                        },
+                                        {
+                                            "end": 886,
+                                            "start": 876
+                                        },
+                                        {
+                                            "end": 917,
+                                            "start": 895
+                                        },
+                                        {
+                                            "end": 961,
+                                            "start": 939
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 339,
+                                            "start": 182
+                                        },
+                                        {
+                                            "end": 638,
+                                            "start": 505
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 187
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 979,
+                                            "start": 712
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55324313,
+                    "exons": [
+                        {
+                            "end": 55087058,
+                            "start": 55086811
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240621,
+                            "start": 55240539
+                        },
+                        {
+                            "end": 55324313,
+                            "start": 55323947
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000442591",
+                    "start": 55086811,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2134,
+                            "cdna_coding_start": 161,
+                            "domains": [
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 167,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 480,
+                                            "start": 361
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 270,
+                                            "start": 228
+                                        },
+                                        {
+                                            "end": 547,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 601,
+                                            "start": 552
+                                        },
+                                        {
+                                            "end": 653,
+                                            "start": 614
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 29
+                                        },
+                                        {
+                                            "end": 520,
+                                            "start": 328
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 338,
+                                            "start": 185
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 187
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 339,
+                                            "start": 182
+                                        },
+                                        {
+                                            "end": 638,
+                                            "start": 505
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55214417,
+                    "exons": [
+                        {
+                            "end": 55177651,
+                            "start": 55177416
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214417,
+                            "start": 55214299
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000450046",
+                    "start": 55177416,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 691,
+                            "cdna_coding_start": 308,
+                            "domains": [
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 127,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 114,
+                                            "start": 4
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 55273591,
+                    "exons": [
+                        {
+                            "end": 55177651,
+                            "start": 55177540
+                        },
+                        {
+                            "end": 55210130,
+                            "start": 55209979
+                        },
+                        {
+                            "end": 55211181,
+                            "start": 55210998
+                        },
+                        {
+                            "end": 55214433,
+                            "start": 55214299
+                        },
+                        {
+                            "end": 55219055,
+                            "start": 55218987
+                        },
+                        {
+                            "end": 55220357,
+                            "start": 55220239
+                        },
+                        {
+                            "end": 55221845,
+                            "start": 55221704
+                        },
+                        {
+                            "end": 55223639,
+                            "start": 55223523
+                        },
+                        {
+                            "end": 55224352,
+                            "start": 55224226
+                        },
+                        {
+                            "end": 55224525,
+                            "start": 55224452
+                        },
+                        {
+                            "end": 55225446,
+                            "start": 55225356
+                        },
+                        {
+                            "end": 55228031,
+                            "start": 55227832
+                        },
+                        {
+                            "end": 55229324,
+                            "start": 55229192
+                        },
+                        {
+                            "end": 55231516,
+                            "start": 55231426
+                        },
+                        {
+                            "end": 55233130,
+                            "start": 55232973
+                        },
+                        {
+                            "end": 55238906,
+                            "start": 55238868
+                        },
+                        {
+                            "end": 55240817,
+                            "start": 55240676
+                        },
+                        {
+                            "end": 55241736,
+                            "start": 55241614
+                        },
+                        {
+                            "end": 55242513,
+                            "start": 55242415
+                        },
+                        {
+                            "end": 55249171,
+                            "start": 55248986
+                        },
+                        {
+                            "end": 55259567,
+                            "start": 55259412
+                        },
+                        {
+                            "end": 55260534,
+                            "start": 55260459
+                        },
+                        {
+                            "end": 55266556,
+                            "start": 55266410
+                        },
+                        {
+                            "end": 55268106,
+                            "start": 55268009
+                        },
+                        {
+                            "end": 55269048,
+                            "start": 55268881
+                        },
+                        {
+                            "end": 55269475,
+                            "start": 55269428
+                        },
+                        {
+                            "end": 55270318,
+                            "start": 55270210
+                        },
+                        {
+                            "end": 55273591,
+                            "start": 55272949
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000454757",
+                    "start": 55177540,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 3657,
+                            "cdna_coding_start": 184,
+                            "domains": [
+                                {
+                                    "name": "SM00261",
+                                    "regions": [
+                                        {
+                                            "end": 217,
+                                            "start": 175
+                                        },
+                                        {
+                                            "end": 494,
+                                            "start": 443
+                                        },
+                                        {
+                                            "end": 548,
+                                            "start": 499
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 911,
+                                            "start": 659
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 915,
+                                            "start": 659
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 967,
+                                            "start": 643
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 916,
+                                            "start": 659
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01030",
+                                    "regions": [
+                                        {
+                                            "end": 114,
+                                            "start": 4
+                                        },
+                                        {
+                                            "end": 427,
+                                            "start": 308
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 211,
+                                            "start": 134
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 926,
+                                            "start": 659
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00109",
+                                    "regions": [
+                                        {
+                                            "end": 750,
+                                            "start": 737
+                                        },
+                                        {
+                                            "end": 792,
+                                            "start": 774
+                                        },
+                                        {
+                                            "end": 833,
+                                            "start": 823
+                                        },
+                                        {
+                                            "end": 864,
+                                            "start": 842
+                                        },
+                                        {
+                                            "end": 908,
+                                            "start": 886
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 286,
+                                            "start": 129
+                                        },
+                                        {
+                                            "end": 585,
+                                            "start": 452
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PIRSF000619",
+                                    "regions": [
+                                        {
+                                            "end": 1157,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 912,
+                                            "start": 661
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52058",
+                                    "regions": [
+                                        {
+                                            "end": 158,
+                                            "start": 1
+                                        },
+                                        {
+                                            "end": 467,
+                                            "start": 275
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00757",
+                                    "regions": [
+                                        {
+                                            "end": 285,
+                                            "start": 132
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "DSTYK"
+            ],
+            "chr": "1",
+            "end": 205180727,
+            "name": "ENSG00000133059",
+            "start": 205111632,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "end": 205180727,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111632
+                        },
+                        {
+                            "end": 205117467,
+                            "start": 205117333
+                        },
+                        {
+                            "end": 205119898,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205133055
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180727,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000367160",
+                    "start": 205111632,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1831,
+                            "cdna_coding_start": 65,
+                            "domains": [
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 565,
+                                            "start": 337
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 585,
+                                            "start": 452
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 556,
+                                            "start": 451
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 558,
+                                            "start": 471
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 565,
+                                            "start": 312
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 205180694,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111633
+                        },
+                        {
+                            "end": 205119922,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205126514,
+                            "start": 205126401
+                        },
+                        {
+                            "end": 205128807,
+                            "start": 205128675
+                        },
+                        {
+                            "end": 205129398,
+                            "start": 205129242
+                        },
+                        {
+                            "end": 205130515,
+                            "start": 205130386
+                        },
+                        {
+                            "end": 205131340,
+                            "start": 205131164
+                        },
+                        {
+                            "end": 205132134,
+                            "start": 205132051
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205132851
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180694,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000367161",
+                    "start": 205111633,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2686,
+                            "cdna_coding_start": 32,
+                            "domains": [
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 820,
+                                            "start": 654
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 884,
+                                            "start": 652
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 853,
+                                            "start": 627
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 861,
+                                            "start": 652
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 824,
+                                            "start": 654
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 861,
+                                            "start": 652
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 205180694,
+                    "exons": [
+                        {
+                            "end": 205116873,
+                            "start": 205111633
+                        },
+                        {
+                            "end": 205117467,
+                            "start": 205117333
+                        },
+                        {
+                            "end": 205119922,
+                            "start": 205119808
+                        },
+                        {
+                            "end": 205126514,
+                            "start": 205126401
+                        },
+                        {
+                            "end": 205128807,
+                            "start": 205128675
+                        },
+                        {
+                            "end": 205129398,
+                            "start": 205129242
+                        },
+                        {
+                            "end": 205130515,
+                            "start": 205130386
+                        },
+                        {
+                            "end": 205131340,
+                            "start": 205131164
+                        },
+                        {
+                            "end": 205132134,
+                            "start": 205132051
+                        },
+                        {
+                            "end": 205133083,
+                            "start": 205132851
+                        },
+                        {
+                            "end": 205138960,
+                            "start": 205138291
+                        },
+                        {
+                            "end": 205156934,
+                            "start": 205156546
+                        },
+                        {
+                            "end": 205180694,
+                            "start": 205180399
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000367162",
+                    "start": 205111633,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2821,
+                            "cdna_coding_start": 32,
+                            "domains": [
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 899,
+                                            "start": 654
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 906,
+                                            "start": 652
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 897,
+                                            "start": 638
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 906,
+                                            "start": 652
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 906,
+                                            "start": 652
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 897,
+                                            "start": 654
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "NDUFA12"
+            ],
+            "chr": "12",
+            "end": 95397546,
+            "name": "ENSG00000184752",
+            "start": 95290831,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "end": 95397436,
+                    "exons": [
+                        {
+                            "end": 95291086,
+                            "start": 95290831
+                        },
+                        {
+                            "end": 95318582,
+                            "start": 95318422
+                        },
+                        {
+                            "end": 95322039,
+                            "start": 95321793
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397436,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000552205",
+                    "start": 95290831
+                },
+                {
+                    "end": 95397476,
+                    "exons": [
+                        {
+                            "end": 95365261,
+                            "start": 95365108
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396582
+                        },
+                        {
+                            "end": 95397476,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000547157",
+                    "start": 95365108,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 188,
+                            "cdna_coding_start": 21
+                        }
+                    ]
+                },
+                {
+                    "end": 95397384,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365109
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95390752,
+                            "start": 95390680
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397384,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000551991",
+                    "start": 95365109,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 144,
+                            "cdna_coding_start": 1,
+                            "domains": [
+                                {
+                                    "name": "PF05071",
+                                    "regions": [
+                                        {
+                                            "end": 33,
+                                            "start": 12
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 95397546,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365109
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397546,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000327772",
+                    "start": 95365109,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 528,
+                            "cdna_coding_start": 91,
+                            "domains": [
+                                {
+                                    "name": "PF05071",
+                                    "regions": [
+                                        {
+                                            "end": 137,
+                                            "start": 36
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 95397489,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365112
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397489,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000547986",
+                    "start": 95365112,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 225,
+                            "cdna_coding_start": 34,
+                            "domains": [
+                                {
+                                    "name": "PF05071",
+                                    "regions": [
+                                        {
+                                            "end": 53,
+                                            "start": 36
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 95397524,
+                    "exons": [
+                        {
+                            "end": 95365396,
+                            "start": 95365254
+                        },
+                        {
+                            "end": 95366265,
+                            "start": 95366171
+                        },
+                        {
+                            "end": 95388033,
+                            "start": 95387946
+                        },
+                        {
+                            "end": 95396597,
+                            "start": 95396515
+                        },
+                        {
+                            "end": 95397524,
+                            "start": 95397371
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000546788",
+                    "start": 95365254,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 368,
+                            "cdna_coding_start": 69,
+                            "domains": [
+                                {
+                                    "name": "PF05071",
+                                    "regions": [
+                                        {
+                                            "end": 87,
+                                            "start": 36
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "FRMD6"
+            ],
+            "chr": "14",
+            "end": 52197445,
+            "name": "ENSG00000139926",
+            "start": 51955818,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 52197177,
+                    "exons": [
+                        {
+                            "end": 51956138,
+                            "start": 51955855
+                        },
+                        {
+                            "end": 52037128,
+                            "start": 52037066
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197177,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000356218",
+                    "start": 51955855,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2338,
+                            "cdna_coding_start": 494,
+                            "domains": [
+                                {
+                                    "name": "PF09379",
+                                    "regions": [
+                                        {
+                                            "end": 109,
+                                            "start": 20
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF09380",
+                                    "regions": [
+                                        {
+                                            "end": 322,
+                                            "start": 237
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF50729",
+                                    "regions": [
+                                        {
+                                            "end": 375,
+                                            "start": 219
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00295",
+                                    "regions": [
+                                        {
+                                            "end": 226,
+                                            "start": 12
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 320,
+                                            "start": 16
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00373",
+                                    "regions": [
+                                        {
+                                            "end": 226,
+                                            "start": 115
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF47031",
+                                    "regions": [
+                                        {
+                                            "end": 218,
+                                            "start": 110
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF54236",
+                                    "regions": [
+                                        {
+                                            "end": 110,
+                                            "start": 14
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52197445,
+                    "exons": [
+                        {
+                            "end": 52118714,
+                            "start": 52118576
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197445,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000395718",
+                    "start": 52118576,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2130,
+                            "cdna_coding_start": 286,
+                            "domains": [
+                                {
+                                    "name": "PF00373",
+                                    "regions": [
+                                        {
+                                            "end": 226,
+                                            "start": 115
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF47031",
+                                    "regions": [
+                                        {
+                                            "end": 218,
+                                            "start": 110
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF54236",
+                                    "regions": [
+                                        {
+                                            "end": 110,
+                                            "start": 14
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 320,
+                                            "start": 16
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00295",
+                                    "regions": [
+                                        {
+                                            "end": 226,
+                                            "start": 12
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF50729",
+                                    "regions": [
+                                        {
+                                            "end": 375,
+                                            "start": 219
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF09380",
+                                    "regions": [
+                                        {
+                                            "end": 322,
+                                            "start": 237
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF09379",
+                                    "regions": [
+                                        {
+                                            "end": 109,
+                                            "start": 20
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52195654,
+                    "exons": [
+                        {
+                            "end": 52118714,
+                            "start": 52118665
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164950,
+                            "start": 52164860
+                        },
+                        {
+                            "end": 52167877,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52195654,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000344768",
+                    "start": 52118665,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2065,
+                            "cdna_coding_start": 197,
+                            "domains": [
+                                {
+                                    "name": "PF09380",
+                                    "regions": [
+                                        {
+                                            "end": 330,
+                                            "start": 245
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF09379",
+                                    "regions": [
+                                        {
+                                            "end": 117,
+                                            "start": 20
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF47031",
+                                    "regions": [
+                                        {
+                                            "end": 226,
+                                            "start": 118
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00373",
+                                    "regions": [
+                                        {
+                                            "end": 234,
+                                            "start": 123
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF54236",
+                                    "regions": [
+                                        {
+                                            "end": 118,
+                                            "start": 14
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 328,
+                                            "start": 16
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00295",
+                                    "regions": [
+                                        {
+                                            "end": 234,
+                                            "start": 12
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF50729",
+                                    "regions": [
+                                        {
+                                            "end": 383,
+                                            "start": 227
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52164945,
+                    "exons": [
+                        {
+                            "end": 52118935,
+                            "start": 52118698
+                        },
+                        {
+                            "end": 52156653,
+                            "start": 52156409
+                        },
+                        {
+                            "end": 52164945,
+                            "start": 52164860
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000554778",
+                    "start": 52118698
+                },
+                {
+                    "end": 52174806,
+                    "exons": [
+                        {
+                            "end": 52164950,
+                            "start": 52164706
+                        },
+                        {
+                            "end": 52167877,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174806,
+                            "start": 52174796
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555936",
+                    "start": 52164706
+                },
+                {
+                    "end": 52197148,
+                    "exons": [
+                        {
+                            "end": 52164950,
+                            "start": 52164831
+                        },
+                        {
+                            "end": 52167853,
+                            "start": 52167774
+                        },
+                        {
+                            "end": 52169306,
+                            "start": 52169230
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52174951,
+                            "start": 52174796
+                        },
+                        {
+                            "end": 52178314,
+                            "start": 52178249
+                        },
+                        {
+                            "end": 52179269,
+                            "start": 52179201
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52197148,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000554167",
+                    "start": 52164831,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1775,
+                            "cdna_coding_start": 138,
+                            "domains": [
+                                {
+                                    "name": "SSF50729",
+                                    "regions": [
+                                        {
+                                            "end": 306,
+                                            "start": 150
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 251,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF54236",
+                                    "regions": [
+                                        {
+                                            "end": 41,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF47031",
+                                    "regions": [
+                                        {
+                                            "end": 149,
+                                            "start": 41
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00373",
+                                    "regions": [
+                                        {
+                                            "end": 157,
+                                            "start": 46
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF09380",
+                                    "regions": [
+                                        {
+                                            "end": 253,
+                                            "start": 168
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52175062,
+                    "exons": [
+                        {
+                            "end": 52169306,
+                            "start": 52169266
+                        },
+                        {
+                            "end": 52171653,
+                            "start": 52171467
+                        },
+                        {
+                            "end": 52175062,
+                            "start": 52174796
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000557405",
+                    "start": 52169266,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 390,
+                            "cdna_coding_start": 1,
+                            "domains": [
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 129,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00373",
+                                    "regions": [
+                                        {
+                                            "end": 124,
+                                            "start": 13
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF47031",
+                                    "regions": [
+                                        {
+                                            "end": 116,
+                                            "start": 8
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52187243,
+                    "exons": [
+                        {
+                            "end": 52179269,
+                            "start": 52179231
+                        },
+                        {
+                            "end": 52182217,
+                            "start": 52182043
+                        },
+                        {
+                            "end": 52187243,
+                            "start": 52186773
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555197",
+                    "start": 52179231,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 618,
+                            "cdna_coding_start": 1,
+                            "domains": [
+                                {
+                                    "name": "PF09380",
+                                    "regions": [
+                                        {
+                                            "end": 60,
+                                            "start": 2
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50057",
+                                    "regions": [
+                                        {
+                                            "end": 58,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF50729",
+                                    "regions": [
+                                        {
+                                            "end": 113,
+                                            "start": 2
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 52192513,
+                    "exons": [
+                        {
+                            "end": 52184066,
+                            "start": 52183973
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188673
+                        },
+                        {
+                            "end": 52192513,
+                            "start": 52192497
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000555703",
+                    "start": 52183973,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 573,
+                            "cdna_coding_start": 145
+                        }
+                    ]
+                },
+                {
+                    "end": 52195487,
+                    "exons": [
+                        {
+                            "end": 52184066,
+                            "start": 52183973
+                        },
+                        {
+                            "end": 52187108,
+                            "start": 52186773
+                        },
+                        {
+                            "end": 52188798,
+                            "start": 52188667
+                        },
+                        {
+                            "end": 52192588,
+                            "start": 52192497
+                        },
+                        {
+                            "end": 52195487,
+                            "start": 52194463
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000553556",
+                    "start": 52183973,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 939,
+                            "cdna_coding_start": 145
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "PRKCB"
+            ],
+            "chr": "16",
+            "end": 24231932,
+            "name": "ENSG00000166501",
+            "start": 23847322,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 24231932,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847322
+                        },
+                        {
+                            "end": 23848727,
+                            "start": 23848696
+                        },
+                        {
+                            "end": 23999911,
+                            "start": 23999829
+                        },
+                        {
+                            "end": 24043568,
+                            "start": 24043457
+                        },
+                        {
+                            "end": 24046868,
+                            "start": 24046740
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124390,
+                            "start": 24124294
+                        },
+                        {
+                            "end": 24135302,
+                            "start": 24135156
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192249,
+                            "start": 24192111
+                        },
+                        {
+                            "end": 24196512,
+                            "start": 24196432
+                        },
+                        {
+                            "end": 24196888,
+                            "start": 24196781
+                        },
+                        {
+                            "end": 24202551,
+                            "start": 24202411
+                        },
+                        {
+                            "end": 24231932,
+                            "start": 24231282
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000321728",
+                    "start": 23847322,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2191,
+                            "cdna_coding_start": 176,
+                            "domains": [
+                                {
+                                    "name": "SM00239",
+                                    "regions": [
+                                        {
+                                            "end": 275,
+                                            "start": 172
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 583,
+                                            "start": 344
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF49562",
+                                    "regions": [
+                                        {
+                                            "end": 288,
+                                            "start": 157
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00109",
+                                    "regions": [
+                                        {
+                                            "end": 86,
+                                            "start": 37
+                                        },
+                                        {
+                                            "end": 151,
+                                            "start": 102
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 600,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00008",
+                                    "regions": [
+                                        {
+                                            "end": 48,
+                                            "start": 34
+                                        },
+                                        {
+                                            "end": 59,
+                                            "start": 50
+                                        },
+                                        {
+                                            "end": 74,
+                                            "start": 63
+                                        },
+                                        {
+                                            "end": 152,
+                                            "start": 140
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00433",
+                                    "regions": [
+                                        {
+                                            "end": 666,
+                                            "start": 623
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 600,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00168",
+                                    "regions": [
+                                        {
+                                            "end": 259,
+                                            "start": 175
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57889",
+                                    "regions": [
+                                        {
+                                            "end": 92,
+                                            "start": 6
+                                        },
+                                        {
+                                            "end": 157,
+                                            "start": 101
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00130",
+                                    "regions": [
+                                        {
+                                            "end": 87,
+                                            "start": 37
+                                        },
+                                        {
+                                            "end": 153,
+                                            "start": 102
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50081",
+                                    "regions": [
+                                        {
+                                            "end": 86,
+                                            "start": 36
+                                        },
+                                        {
+                                            "end": 151,
+                                            "start": 101
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 627,
+                                            "start": 317
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 586,
+                                            "start": 343
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 576,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00360",
+                                    "regions": [
+                                        {
+                                            "end": 200,
+                                            "start": 188
+                                        },
+                                        {
+                                            "end": 230,
+                                            "start": 217
+                                        },
+                                        {
+                                            "end": 248,
+                                            "start": 240
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00133",
+                                    "regions": [
+                                        {
+                                            "end": 664,
+                                            "start": 601
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50004",
+                                    "regions": [
+                                        {
+                                            "end": 260,
+                                            "start": 173
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PIRSF000550",
+                                    "regions": [
+                                        {
+                                            "end": 671,
+                                            "start": 1
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 24231932,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847345
+                        },
+                        {
+                            "end": 23848727,
+                            "start": 23848696
+                        },
+                        {
+                            "end": 23999911,
+                            "start": 23999829
+                        },
+                        {
+                            "end": 24043568,
+                            "start": 24043457
+                        },
+                        {
+                            "end": 24046868,
+                            "start": 24046740
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124390,
+                            "start": 24124294
+                        },
+                        {
+                            "end": 24135302,
+                            "start": 24135156
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192249,
+                            "start": 24192111
+                        },
+                        {
+                            "end": 24196512,
+                            "start": 24196432
+                        },
+                        {
+                            "end": 24196888,
+                            "start": 24196781
+                        },
+                        {
+                            "end": 24202551,
+                            "start": 24202411
+                        },
+                        {
+                            "end": 24231932,
+                            "start": 24225979
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000303531",
+                    "start": 23847345,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2174,
+                            "cdna_coding_start": 153,
+                            "domains": [
+                                {
+                                    "name": "SM00133",
+                                    "regions": [
+                                        {
+                                            "end": 663,
+                                            "start": 601
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50004",
+                                    "regions": [
+                                        {
+                                            "end": 260,
+                                            "start": 173
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PIRSF000550",
+                                    "regions": [
+                                        {
+                                            "end": 672,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00069",
+                                    "regions": [
+                                        {
+                                            "end": 586,
+                                            "start": 343
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00360",
+                                    "regions": [
+                                        {
+                                            "end": 200,
+                                            "start": 188
+                                        },
+                                        {
+                                            "end": 230,
+                                            "start": 217
+                                        },
+                                        {
+                                            "end": 248,
+                                            "start": 240
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00219",
+                                    "regions": [
+                                        {
+                                            "end": 576,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50081",
+                                    "regions": [
+                                        {
+                                            "end": 86,
+                                            "start": 36
+                                        },
+                                        {
+                                            "end": 151,
+                                            "start": 101
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF56112",
+                                    "regions": [
+                                        {
+                                            "end": 627,
+                                            "start": 317
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00220",
+                                    "regions": [
+                                        {
+                                            "end": 600,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00433",
+                                    "regions": [
+                                        {
+                                            "end": 664,
+                                            "start": 627
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00130",
+                                    "regions": [
+                                        {
+                                            "end": 87,
+                                            "start": 37
+                                        },
+                                        {
+                                            "end": 153,
+                                            "start": 102
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00168",
+                                    "regions": [
+                                        {
+                                            "end": 259,
+                                            "start": 175
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57889",
+                                    "regions": [
+                                        {
+                                            "end": 92,
+                                            "start": 6
+                                        },
+                                        {
+                                            "end": 157,
+                                            "start": 101
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00008",
+                                    "regions": [
+                                        {
+                                            "end": 48,
+                                            "start": 34
+                                        },
+                                        {
+                                            "end": 59,
+                                            "start": 50
+                                        },
+                                        {
+                                            "end": 74,
+                                            "start": 63
+                                        },
+                                        {
+                                            "end": 152,
+                                            "start": 140
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50011",
+                                    "regions": [
+                                        {
+                                            "end": 600,
+                                            "start": 342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00109",
+                                    "regions": [
+                                        {
+                                            "end": 86,
+                                            "start": 37
+                                        },
+                                        {
+                                            "end": 151,
+                                            "start": 102
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07714",
+                                    "regions": [
+                                        {
+                                            "end": 583,
+                                            "start": 344
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF49562",
+                                    "regions": [
+                                        {
+                                            "end": 288,
+                                            "start": 157
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00239",
+                                    "regions": [
+                                        {
+                                            "end": 275,
+                                            "start": 172
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 23880647,
+                    "exons": [
+                        {
+                            "end": 23847669,
+                            "start": 23847403
+                        },
+                        {
+                            "end": 23880647,
+                            "start": 23880435
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000498058",
+                    "start": 23847403,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 268,
+                            "cdna_coding_start": 95,
+                            "domains": [
+                                {
+                                    "name": "PR00008",
+                                    "regions": [
+                                        {
+                                            "end": 48,
+                                            "start": 34
+                                        },
+                                        {
+                                            "end": 57,
+                                            "start": 50
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50081",
+                                    "regions": [
+                                        {
+                                            "end": 57,
+                                            "start": 36
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57889",
+                                    "regions": [
+                                        {
+                                            "end": 57,
+                                            "start": 6
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 24124386,
+                    "exons": [
+                        {
+                            "end": 23848727,
+                            "start": 23848544
+                        },
+                        {
+                            "end": 24104268,
+                            "start": 24104112
+                        },
+                        {
+                            "end": 24105618,
+                            "start": 24105484
+                        },
+                        {
+                            "end": 24124386,
+                            "start": 24124294
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000498739",
+                    "start": 23848544
+                },
+                {
+                    "end": 24192166,
+                    "exons": [
+                        {
+                            "end": 24163176,
+                            "start": 24163006
+                        },
+                        {
+                            "end": 24166178,
+                            "start": 24166005
+                        },
+                        {
+                            "end": 24183682,
+                            "start": 24183591
+                        },
+                        {
+                            "end": 24185901,
+                            "start": 24185839
+                        },
+                        {
+                            "end": 24192166,
+                            "start": 24192111
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000472066",
+                    "start": 24163006
+                },
+                {
+                    "end": 24202909,
+                    "exons": [
+                        {
+                            "end": 24196888,
+                            "start": 24196852
+                        },
+                        {
+                            "end": 24202909,
+                            "start": 24202411
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000466124",
+                    "start": 24196852
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "GIMAP4"
+            ],
+            "chr": "7",
+            "end": 150271041,
+            "name": "ENSG00000133574",
+            "start": 150264365,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 150271041,
+                    "exons": [
+                        {
+                            "end": 150264525,
+                            "start": 150264365
+                        },
+                        {
+                            "end": 150267047,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150271041,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000255945",
+                    "start": 150264365,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1165,
+                            "cdna_coding_start": 176,
+                            "domains": [
+                                {
+                                    "name": "PF04548",
+                                    "regions": [
+                                        {
+                                            "end": 238,
+                                            "start": 31
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 288,
+                                            "start": 24
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 150270602,
+                    "exons": [
+                        {
+                            "end": 150264525,
+                            "start": 150264457
+                        },
+                        {
+                            "end": 150267089,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150270602,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000461940",
+                    "start": 150264457,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1115,
+                            "cdna_coding_start": 84,
+                            "domains": [
+                                {
+                                    "name": "PF04548",
+                                    "regions": [
+                                        {
+                                            "end": 252,
+                                            "start": 45
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 302,
+                                            "start": 38
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 150269569,
+                    "exons": [
+                        {
+                            "end": 150264608,
+                            "start": 150264524
+                        },
+                        {
+                            "end": 150267089,
+                            "start": 150266976
+                        },
+                        {
+                            "end": 150269569,
+                            "start": 150269217
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000479232",
+                    "start": 150264524,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 552,
+                            "cdna_coding_start": 100,
+                            "domains": [
+                                {
+                                    "name": "SSF52540",
+                                    "regions": [
+                                        {
+                                            "end": 151,
+                                            "start": 38
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF04548",
+                                    "regions": [
+                                        {
+                                            "end": 151,
+                                            "start": 45
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "IL7"
+            ],
+            "chr": "8",
+            "end": 79717758,
+            "name": "ENSG00000104432",
+            "start": 79587978,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "end": 79717758,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645007
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717758,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000263851",
+                    "start": 79645007,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 1135,
+                            "cdna_coding_start": 602,
+                            "domains": [
+                                {
+                                    "name": "PIRSF001942",
+                                    "regions": [
+                                        {
+                                            "end": 177,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 48,
+                                            "start": 26
+                                        },
+                                        {
+                                            "end": 77,
+                                            "start": 57
+                                        },
+                                        {
+                                            "end": 98,
+                                            "start": 78
+                                        },
+                                        {
+                                            "end": 118,
+                                            "start": 99
+                                        },
+                                        {
+                                            "end": 173,
+                                            "start": 151
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01415",
+                                    "regions": [
+                                        {
+                                            "end": 173,
+                                            "start": 28
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00127",
+                                    "regions": [
+                                        {
+                                            "end": 173,
+                                            "start": 27
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79717699,
+                    "exons": [
+                        {
+                            "end": 79646063,
+                            "start": 79645283
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79659331,
+                            "start": 79659129
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717699,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000518982",
+                    "start": 79645283,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 758,
+                            "cdna_coding_start": 543,
+                            "domains": [
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 48,
+                                            "start": 26
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01415",
+                                    "regions": [
+                                        {
+                                            "end": 54,
+                                            "start": 28
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79717163,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717163,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520269",
+                    "start": 79645900,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 408,
+                            "cdna_coding_start": 7,
+                            "domains": [
+                                {
+                                    "name": "PF01415",
+                                    "regions": [
+                                        {
+                                            "end": 77,
+                                            "start": 28
+                                        },
+                                        {
+                                            "end": 129,
+                                            "start": 91
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00127",
+                                    "regions": [
+                                        {
+                                            "end": 129,
+                                            "start": 27
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 48,
+                                            "start": 26
+                                        },
+                                        {
+                                            "end": 77,
+                                            "start": 57
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PIRSF001942",
+                                    "regions": [
+                                        {
+                                            "end": 133,
+                                            "start": 1
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79717163,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710363
+                        },
+                        {
+                            "end": 79717163,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520215",
+                    "start": 79645900,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 120,
+                            "cdna_coding_start": 7,
+                            "domains": [
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 37,
+                                            "start": 26
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79717686,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645900
+                        },
+                        {
+                            "end": 79648762,
+                            "start": 79648709
+                        },
+                        {
+                            "end": 79650870,
+                            "start": 79650739
+                        },
+                        {
+                            "end": 79652317,
+                            "start": 79652237
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710363
+                        },
+                        {
+                            "end": 79717686,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000520317",
+                    "start": 79645900,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 643,
+                            "cdna_coding_start": 530,
+                            "domains": [
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 37,
+                                            "start": 26
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79652311,
+                    "exons": [
+                        {
+                            "end": 79646067,
+                            "start": 79645948
+                        },
+                        {
+                            "end": 79652311,
+                            "start": 79652237
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000541183",
+                    "start": 79645948,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 195,
+                            "cdna_coding_start": 1,
+                            "domains": [
+                                {
+                                    "name": "SM00127",
+                                    "regions": [
+                                        {
+                                            "end": 60,
+                                            "start": 1
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01415",
+                                    "regions": [
+                                        {
+                                            "end": 60,
+                                            "start": 1
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 79717758,
+                    "exons": [
+                        {
+                            "end": 79659331,
+                            "start": 79659263
+                        },
+                        {
+                            "end": 79710443,
+                            "start": 79710307
+                        },
+                        {
+                            "end": 79717758,
+                            "start": 79717148
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000379113",
+                    "start": 79659263,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 817,
+                            "cdna_coding_start": 602,
+                            "domains": [
+                                {
+                                    "name": "PF01415",
+                                    "regions": [
+                                        {
+                                            "end": 54,
+                                            "start": 28
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00435",
+                                    "regions": [
+                                        {
+                                            "end": 25,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 48,
+                                            "start": 26
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "SVEP1"
+            ],
+            "chr": "9",
+            "end": 113342160,
+            "name": "ENSG00000165124",
+            "start": 113127531,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "end": 113342160,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127531
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        },
+                        {
+                            "end": 113191614,
+                            "start": 113191423
+                        },
+                        {
+                            "end": 113192284,
+                            "start": 113192200
+                        },
+                        {
+                            "end": 113192730,
+                            "start": 113192554
+                        },
+                        {
+                            "end": 113194314,
+                            "start": 113194195
+                        },
+                        {
+                            "end": 113194915,
+                            "start": 113194742
+                        },
+                        {
+                            "end": 113196786,
+                            "start": 113196616
+                        },
+                        {
+                            "end": 113197644,
+                            "start": 113197521
+                        },
+                        {
+                            "end": 113198784,
+                            "start": 113198660
+                        },
+                        {
+                            "end": 113206000,
+                            "start": 113205825
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220751
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342160,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000401783",
+                    "start": 113127531,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 11053,
+                            "cdna_coding_start": 338,
+                            "domains": [
+                                {
+                                    "name": "SM00032",
+                                    "regions": [
+                                        {
+                                            "end": 433,
+                                            "start": 378
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 438
+                                        },
+                                        {
+                                            "end": 559,
+                                            "start": 498
+                                        },
+                                        {
+                                            "end": 787,
+                                            "start": 727
+                                        },
+                                        {
+                                            "end": 1685,
+                                            "start": 1631
+                                        },
+                                        {
+                                            "end": 1743,
+                                            "start": 1690
+                                        },
+                                        {
+                                            "end": 1842,
+                                            "start": 1789
+                                        },
+                                        {
+                                            "end": 1900,
+                                            "start": 1847
+                                        },
+                                        {
+                                            "end": 1958,
+                                            "start": 1905
+                                        },
+                                        {
+                                            "end": 2016,
+                                            "start": 1963
+                                        },
+                                        {
+                                            "end": 2078,
+                                            "start": 2021
+                                        },
+                                        {
+                                            "end": 2141,
+                                            "start": 2083
+                                        },
+                                        {
+                                            "end": 2199,
+                                            "start": 2146
+                                        },
+                                        {
+                                            "end": 2259,
+                                            "start": 2204
+                                        },
+                                        {
+                                            "end": 2318,
+                                            "start": 2264
+                                        },
+                                        {
+                                            "end": 2376,
+                                            "start": 2323
+                                        },
+                                        {
+                                            "end": 2435,
+                                            "start": 2381
+                                        },
+                                        {
+                                            "end": 2493,
+                                            "start": 2440
+                                        },
+                                        {
+                                            "end": 2551,
+                                            "start": 2498
+                                        },
+                                        {
+                                            "end": 2608,
+                                            "start": 2556
+                                        },
+                                        {
+                                            "end": 2712,
+                                            "start": 2654
+                                        },
+                                        {
+                                            "end": 2770,
+                                            "start": 2717
+                                        },
+                                        {
+                                            "end": 2828,
+                                            "start": 2775
+                                        },
+                                        {
+                                            "end": 2886,
+                                            "start": 2833
+                                        },
+                                        {
+                                            "end": 2944,
+                                            "start": 2891
+                                        },
+                                        {
+                                            "end": 3002,
+                                            "start": 2949
+                                        },
+                                        {
+                                            "end": 3059,
+                                            "start": 3007
+                                        },
+                                        {
+                                            "end": 3117,
+                                            "start": 3064
+                                        },
+                                        {
+                                            "end": 3176,
+                                            "start": 3122
+                                        },
+                                        {
+                                            "end": 3236,
+                                            "start": 3181
+                                        },
+                                        {
+                                            "end": 3294,
+                                            "start": 3241
+                                        },
+                                        {
+                                            "end": 3352,
+                                            "start": 3299
+                                        },
+                                        {
+                                            "end": 3411,
+                                            "start": 3357
+                                        },
+                                        {
+                                            "end": 3468,
+                                            "start": 3416
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF02494",
+                                    "regions": [
+                                        {
+                                            "end": 642,
+                                            "start": 561
+                                        },
+                                        {
+                                            "end": 721,
+                                            "start": 644
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00895",
+                                    "regions": [
+                                        {
+                                            "end": 1530,
+                                            "start": 1512
+                                        },
+                                        {
+                                            "end": 1558,
+                                            "start": 1539
+                                        },
+                                        {
+                                            "end": 1592,
+                                            "start": 1559
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57535",
+                                    "regions": [
+                                        {
+                                            "end": 433,
+                                            "start": 374
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 434
+                                        },
+                                        {
+                                            "end": 560,
+                                            "start": 494
+                                        },
+                                        {
+                                            "end": 790,
+                                            "start": 727
+                                        },
+                                        {
+                                            "end": 1746,
+                                            "start": 1626
+                                        },
+                                        {
+                                            "end": 1842,
+                                            "start": 1785
+                                        },
+                                        {
+                                            "end": 1900,
+                                            "start": 1843
+                                        },
+                                        {
+                                            "end": 1958,
+                                            "start": 1901
+                                        },
+                                        {
+                                            "end": 2016,
+                                            "start": 1959
+                                        },
+                                        {
+                                            "end": 2078,
+                                            "start": 2017
+                                        },
+                                        {
+                                            "end": 2199,
+                                            "start": 2081
+                                        },
+                                        {
+                                            "end": 2318,
+                                            "start": 2202
+                                        },
+                                        {
+                                            "end": 2377,
+                                            "start": 2321
+                                        },
+                                        {
+                                            "end": 2437,
+                                            "start": 2379
+                                        },
+                                        {
+                                            "end": 2551,
+                                            "start": 2438
+                                        },
+                                        {
+                                            "end": 2616,
+                                            "start": 2552
+                                        },
+                                        {
+                                            "end": 2712,
+                                            "start": 2643
+                                        },
+                                        {
+                                            "end": 2828,
+                                            "start": 2715
+                                        },
+                                        {
+                                            "end": 2886,
+                                            "start": 2829
+                                        },
+                                        {
+                                            "end": 2944,
+                                            "start": 2887
+                                        },
+                                        {
+                                            "end": 3117,
+                                            "start": 2945
+                                        },
+                                        {
+                                            "end": 3176,
+                                            "start": 3118
+                                        },
+                                        {
+                                            "end": 3229,
+                                            "start": 3177
+                                        },
+                                        {
+                                            "end": 3475,
+                                            "start": 3239
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF49899",
+                                    "regions": [
+                                        {
+                                            "end": 1632,
+                                            "start": 1421
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00159",
+                                    "regions": [
+                                        {
+                                            "end": 1627,
+                                            "start": 1420
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00354",
+                                    "regions": [
+                                        {
+                                            "end": 1620,
+                                            "start": 1442
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07699",
+                                    "regions": [
+                                        {
+                                            "end": 360,
+                                            "start": 310
+                                        },
+                                        {
+                                            "end": 1052,
+                                            "start": 1005
+                                        },
+                                        {
+                                            "end": 1106,
+                                            "start": 1059
+                                        },
+                                        {
+                                            "end": 1160,
+                                            "start": 1113
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 1409,
+                                            "start": 1197
+                                        },
+                                        {
+                                            "end": 3554,
+                                            "start": 3468
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50825",
+                                    "regions": [
+                                        {
+                                            "end": 642,
+                                            "start": 560
+                                        },
+                                        {
+                                            "end": 724,
+                                            "start": 643
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00092",
+                                    "regions": [
+                                        {
+                                            "end": 252,
+                                            "start": 84
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57196",
+                                    "regions": [
+                                        {
+                                            "end": 1267,
+                                            "start": 1189
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1268
+                                        },
+                                        {
+                                            "end": 1342,
+                                            "start": 1306
+                                        },
+                                        {
+                                            "end": 1423,
+                                            "start": 1344
+                                        },
+                                        {
+                                            "end": 1786,
+                                            "start": 1735
+                                        },
+                                        {
+                                            "end": 3506,
+                                            "start": 3463
+                                        },
+                                        {
+                                            "end": 3535,
+                                            "start": 3507
+                                        },
+                                        {
+                                            "end": 3570,
+                                            "start": 3537
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50026",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1193
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1231
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1269
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1307
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1345
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1383
+                                        },
+                                        {
+                                            "end": 1784,
+                                            "start": 1745
+                                        },
+                                        {
+                                            "end": 3532,
+                                            "start": 3500
+                                        },
+                                        {
+                                            "end": 3564,
+                                            "start": 3533
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00181",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1196
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1234
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1272
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1310
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1348
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1386
+                                        },
+                                        {
+                                            "end": 1784,
+                                            "start": 1748
+                                        },
+                                        {
+                                            "end": 3500,
+                                            "start": 3471
+                                        },
+                                        {
+                                            "end": 3532,
+                                            "start": 3503
+                                        },
+                                        {
+                                            "end": 3564,
+                                            "start": 3535
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00179",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1196
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1231
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1269
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1307
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1345
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1383
+                                        },
+                                        {
+                                            "end": 1784,
+                                            "start": 1745
+                                        },
+                                        {
+                                            "end": 3532,
+                                            "start": 3504
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 440,
+                                            "start": 269
+                                        },
+                                        {
+                                            "end": 1144,
+                                            "start": 988
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07645",
+                                    "regions": [
+                                        {
+                                            "end": 1783,
+                                            "start": 1745
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50923",
+                                    "regions": [
+                                        {
+                                            "end": 435,
+                                            "start": 376
+                                        },
+                                        {
+                                            "end": 495,
+                                            "start": 436
+                                        },
+                                        {
+                                            "end": 561,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 789,
+                                            "start": 725
+                                        },
+                                        {
+                                            "end": 1687,
+                                            "start": 1629
+                                        },
+                                        {
+                                            "end": 1745,
+                                            "start": 1688
+                                        },
+                                        {
+                                            "end": 1844,
+                                            "start": 1787
+                                        },
+                                        {
+                                            "end": 1902,
+                                            "start": 1845
+                                        },
+                                        {
+                                            "end": 1960,
+                                            "start": 1903
+                                        },
+                                        {
+                                            "end": 2018,
+                                            "start": 1961
+                                        },
+                                        {
+                                            "end": 2080,
+                                            "start": 2019
+                                        },
+                                        {
+                                            "end": 2143,
+                                            "start": 2081
+                                        },
+                                        {
+                                            "end": 2201,
+                                            "start": 2144
+                                        },
+                                        {
+                                            "end": 2261,
+                                            "start": 2202
+                                        },
+                                        {
+                                            "end": 2320,
+                                            "start": 2262
+                                        },
+                                        {
+                                            "end": 2378,
+                                            "start": 2321
+                                        },
+                                        {
+                                            "end": 2437,
+                                            "start": 2379
+                                        },
+                                        {
+                                            "end": 2495,
+                                            "start": 2438
+                                        },
+                                        {
+                                            "end": 2553,
+                                            "start": 2496
+                                        },
+                                        {
+                                            "end": 2610,
+                                            "start": 2554
+                                        },
+                                        {
+                                            "end": 2714,
+                                            "start": 2663
+                                        },
+                                        {
+                                            "end": 2772,
+                                            "start": 2715
+                                        },
+                                        {
+                                            "end": 2830,
+                                            "start": 2773
+                                        },
+                                        {
+                                            "end": 2888,
+                                            "start": 2831
+                                        },
+                                        {
+                                            "end": 2946,
+                                            "start": 2889
+                                        },
+                                        {
+                                            "end": 3004,
+                                            "start": 2947
+                                        },
+                                        {
+                                            "end": 3061,
+                                            "start": 3005
+                                        },
+                                        {
+                                            "end": 3119,
+                                            "start": 3062
+                                        },
+                                        {
+                                            "end": 3178,
+                                            "start": 3120
+                                        },
+                                        {
+                                            "end": 3238,
+                                            "start": 3179
+                                        },
+                                        {
+                                            "end": 3296,
+                                            "start": 3239
+                                        },
+                                        {
+                                            "end": 3354,
+                                            "start": 3297
+                                        },
+                                        {
+                                            "end": 3413,
+                                            "start": 3355
+                                        },
+                                        {
+                                            "end": 3470,
+                                            "start": 3414
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00327",
+                                    "regions": [
+                                        {
+                                            "end": 260,
+                                            "start": 81
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00008",
+                                    "regions": [
+                                        {
+                                            "end": 1226,
+                                            "start": 1197
+                                        },
+                                        {
+                                            "end": 1265,
+                                            "start": 1235
+                                        },
+                                        {
+                                            "end": 1302,
+                                            "start": 1273
+                                        },
+                                        {
+                                            "end": 1379,
+                                            "start": 1349
+                                        },
+                                        {
+                                            "end": 1417,
+                                            "start": 1387
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50234",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 83
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07974",
+                                    "regions": [
+                                        {
+                                            "end": 1266,
+                                            "start": 1235
+                                        },
+                                        {
+                                            "end": 3499,
+                                            "start": 3475
+                                        },
+                                        {
+                                            "end": 3531,
+                                            "start": 3507
+                                        },
+                                        {
+                                            "end": 3563,
+                                            "start": 3536
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF53300",
+                                    "regions": [
+                                        {
+                                            "end": 262,
+                                            "start": 79
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00084",
+                                    "regions": [
+                                        {
+                                            "end": 430,
+                                            "start": 378
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 438
+                                        },
+                                        {
+                                            "end": 1685,
+                                            "start": 1628
+                                        },
+                                        {
+                                            "end": 1743,
+                                            "start": 1690
+                                        },
+                                        {
+                                            "end": 1842,
+                                            "start": 1789
+                                        },
+                                        {
+                                            "end": 1900,
+                                            "start": 1847
+                                        },
+                                        {
+                                            "end": 1958,
+                                            "start": 1905
+                                        },
+                                        {
+                                            "end": 2016,
+                                            "start": 1963
+                                        },
+                                        {
+                                            "end": 2078,
+                                            "start": 2021
+                                        },
+                                        {
+                                            "end": 2136,
+                                            "start": 2083
+                                        },
+                                        {
+                                            "end": 2199,
+                                            "start": 2146
+                                        },
+                                        {
+                                            "end": 2259,
+                                            "start": 2204
+                                        },
+                                        {
+                                            "end": 2318,
+                                            "start": 2264
+                                        },
+                                        {
+                                            "end": 2376,
+                                            "start": 2323
+                                        },
+                                        {
+                                            "end": 2435,
+                                            "start": 2381
+                                        },
+                                        {
+                                            "end": 2493,
+                                            "start": 2440
+                                        },
+                                        {
+                                            "end": 2551,
+                                            "start": 2498
+                                        },
+                                        {
+                                            "end": 2608,
+                                            "start": 2556
+                                        },
+                                        {
+                                            "end": 2712,
+                                            "start": 2667
+                                        },
+                                        {
+                                            "end": 2770,
+                                            "start": 2717
+                                        },
+                                        {
+                                            "end": 2828,
+                                            "start": 2775
+                                        },
+                                        {
+                                            "end": 2886,
+                                            "start": 2833
+                                        },
+                                        {
+                                            "end": 2944,
+                                            "start": 2891
+                                        },
+                                        {
+                                            "end": 3002,
+                                            "start": 2949
+                                        },
+                                        {
+                                            "end": 3059,
+                                            "start": 3007
+                                        },
+                                        {
+                                            "end": 3117,
+                                            "start": 3084
+                                        },
+                                        {
+                                            "end": 3172,
+                                            "start": 3122
+                                        },
+                                        {
+                                            "end": 3236,
+                                            "start": 3181
+                                        },
+                                        {
+                                            "end": 3290,
+                                            "start": 3241
+                                        },
+                                        {
+                                            "end": 3352,
+                                            "start": 3299
+                                        },
+                                        {
+                                            "end": 3411,
+                                            "start": 3357
+                                        },
+                                        {
+                                            "end": 3468,
+                                            "start": 3416
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 113190038,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127536
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000297826",
+                    "start": 113127536,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 4909,
+                            "cdna_coding_start": 416,
+                            "domains": [
+                                {
+                                    "name": "PF00084",
+                                    "regions": [
+                                        {
+                                            "end": 62,
+                                            "start": 9
+                                        },
+                                        {
+                                            "end": 125,
+                                            "start": 72
+                                        },
+                                        {
+                                            "end": 185,
+                                            "start": 130
+                                        },
+                                        {
+                                            "end": 244,
+                                            "start": 190
+                                        },
+                                        {
+                                            "end": 302,
+                                            "start": 249
+                                        },
+                                        {
+                                            "end": 361,
+                                            "start": 307
+                                        },
+                                        {
+                                            "end": 419,
+                                            "start": 366
+                                        },
+                                        {
+                                            "end": 477,
+                                            "start": 424
+                                        },
+                                        {
+                                            "end": 534,
+                                            "start": 482
+                                        },
+                                        {
+                                            "end": 638,
+                                            "start": 593
+                                        },
+                                        {
+                                            "end": 696,
+                                            "start": 643
+                                        },
+                                        {
+                                            "end": 754,
+                                            "start": 701
+                                        },
+                                        {
+                                            "end": 812,
+                                            "start": 759
+                                        },
+                                        {
+                                            "end": 870,
+                                            "start": 817
+                                        },
+                                        {
+                                            "end": 928,
+                                            "start": 875
+                                        },
+                                        {
+                                            "end": 985,
+                                            "start": 933
+                                        },
+                                        {
+                                            "end": 1043,
+                                            "start": 1010
+                                        },
+                                        {
+                                            "end": 1098,
+                                            "start": 1048
+                                        },
+                                        {
+                                            "end": 1162,
+                                            "start": 1107
+                                        },
+                                        {
+                                            "end": 1216,
+                                            "start": 1167
+                                        },
+                                        {
+                                            "end": 1278,
+                                            "start": 1225
+                                        },
+                                        {
+                                            "end": 1337,
+                                            "start": 1283
+                                        },
+                                        {
+                                            "end": 1394,
+                                            "start": 1342
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07974",
+                                    "regions": [
+                                        {
+                                            "end": 1425,
+                                            "start": 1401
+                                        },
+                                        {
+                                            "end": 1457,
+                                            "start": 1433
+                                        },
+                                        {
+                                            "end": 1489,
+                                            "start": 1462
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00008",
+                                    "regions": [
+                                        {
+                                            "end": 1456,
+                                            "start": 1427
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50923",
+                                    "regions": [
+                                        {
+                                            "end": 69,
+                                            "start": 7
+                                        },
+                                        {
+                                            "end": 127,
+                                            "start": 70
+                                        },
+                                        {
+                                            "end": 187,
+                                            "start": 128
+                                        },
+                                        {
+                                            "end": 246,
+                                            "start": 188
+                                        },
+                                        {
+                                            "end": 304,
+                                            "start": 247
+                                        },
+                                        {
+                                            "end": 363,
+                                            "start": 305
+                                        },
+                                        {
+                                            "end": 421,
+                                            "start": 364
+                                        },
+                                        {
+                                            "end": 479,
+                                            "start": 422
+                                        },
+                                        {
+                                            "end": 536,
+                                            "start": 480
+                                        },
+                                        {
+                                            "end": 640,
+                                            "start": 589
+                                        },
+                                        {
+                                            "end": 698,
+                                            "start": 641
+                                        },
+                                        {
+                                            "end": 756,
+                                            "start": 699
+                                        },
+                                        {
+                                            "end": 814,
+                                            "start": 757
+                                        },
+                                        {
+                                            "end": 872,
+                                            "start": 815
+                                        },
+                                        {
+                                            "end": 930,
+                                            "start": 873
+                                        },
+                                        {
+                                            "end": 987,
+                                            "start": 931
+                                        },
+                                        {
+                                            "end": 1045,
+                                            "start": 988
+                                        },
+                                        {
+                                            "end": 1104,
+                                            "start": 1046
+                                        },
+                                        {
+                                            "end": 1164,
+                                            "start": 1105
+                                        },
+                                        {
+                                            "end": 1222,
+                                            "start": 1165
+                                        },
+                                        {
+                                            "end": 1280,
+                                            "start": 1223
+                                        },
+                                        {
+                                            "end": 1339,
+                                            "start": 1281
+                                        },
+                                        {
+                                            "end": 1396,
+                                            "start": 1340
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00181",
+                                    "regions": [
+                                        {
+                                            "end": 1426,
+                                            "start": 1397
+                                        },
+                                        {
+                                            "end": 1458,
+                                            "start": 1429
+                                        },
+                                        {
+                                            "end": 1490,
+                                            "start": 1461
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57196",
+                                    "regions": [
+                                        {
+                                            "end": 1432,
+                                            "start": 1389
+                                        },
+                                        {
+                                            "end": 1461,
+                                            "start": 1433
+                                        },
+                                        {
+                                            "end": 1496,
+                                            "start": 1463
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50026",
+                                    "regions": [
+                                        {
+                                            "end": 1458,
+                                            "start": 1426
+                                        },
+                                        {
+                                            "end": 1490,
+                                            "start": 1459
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 1480,
+                                            "start": 1394
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57535",
+                                    "regions": [
+                                        {
+                                            "end": 125,
+                                            "start": 7
+                                        },
+                                        {
+                                            "end": 244,
+                                            "start": 128
+                                        },
+                                        {
+                                            "end": 303,
+                                            "start": 247
+                                        },
+                                        {
+                                            "end": 363,
+                                            "start": 305
+                                        },
+                                        {
+                                            "end": 477,
+                                            "start": 364
+                                        },
+                                        {
+                                            "end": 542,
+                                            "start": 478
+                                        },
+                                        {
+                                            "end": 638,
+                                            "start": 569
+                                        },
+                                        {
+                                            "end": 754,
+                                            "start": 641
+                                        },
+                                        {
+                                            "end": 812,
+                                            "start": 755
+                                        },
+                                        {
+                                            "end": 870,
+                                            "start": 813
+                                        },
+                                        {
+                                            "end": 1043,
+                                            "start": 871
+                                        },
+                                        {
+                                            "end": 1102,
+                                            "start": 1044
+                                        },
+                                        {
+                                            "end": 1155,
+                                            "start": 1103
+                                        },
+                                        {
+                                            "end": 1401,
+                                            "start": 1165
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00032",
+                                    "regions": [
+                                        {
+                                            "end": 67,
+                                            "start": 9
+                                        },
+                                        {
+                                            "end": 125,
+                                            "start": 72
+                                        },
+                                        {
+                                            "end": 185,
+                                            "start": 130
+                                        },
+                                        {
+                                            "end": 244,
+                                            "start": 190
+                                        },
+                                        {
+                                            "end": 302,
+                                            "start": 249
+                                        },
+                                        {
+                                            "end": 361,
+                                            "start": 307
+                                        },
+                                        {
+                                            "end": 419,
+                                            "start": 366
+                                        },
+                                        {
+                                            "end": 477,
+                                            "start": 424
+                                        },
+                                        {
+                                            "end": 534,
+                                            "start": 482
+                                        },
+                                        {
+                                            "end": 638,
+                                            "start": 580
+                                        },
+                                        {
+                                            "end": 696,
+                                            "start": 643
+                                        },
+                                        {
+                                            "end": 754,
+                                            "start": 701
+                                        },
+                                        {
+                                            "end": 812,
+                                            "start": 759
+                                        },
+                                        {
+                                            "end": 870,
+                                            "start": 817
+                                        },
+                                        {
+                                            "end": 928,
+                                            "start": 875
+                                        },
+                                        {
+                                            "end": 985,
+                                            "start": 933
+                                        },
+                                        {
+                                            "end": 1043,
+                                            "start": 990
+                                        },
+                                        {
+                                            "end": 1102,
+                                            "start": 1048
+                                        },
+                                        {
+                                            "end": 1162,
+                                            "start": 1107
+                                        },
+                                        {
+                                            "end": 1220,
+                                            "start": 1167
+                                        },
+                                        {
+                                            "end": 1278,
+                                            "start": 1225
+                                        },
+                                        {
+                                            "end": 1337,
+                                            "start": 1283
+                                        },
+                                        {
+                                            "end": 1394,
+                                            "start": 1342
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 113342018,
+                    "exons": [
+                        {
+                            "end": 113128840,
+                            "start": 113127536
+                        },
+                        {
+                            "end": 113132296,
+                            "start": 113132203
+                        },
+                        {
+                            "end": 113137743,
+                            "start": 113137648
+                        },
+                        {
+                            "end": 113139646,
+                            "start": 113139551
+                        },
+                        {
+                            "end": 113141797,
+                            "start": 113141627
+                        },
+                        {
+                            "end": 113148354,
+                            "start": 113148178
+                        },
+                        {
+                            "end": 113149738,
+                            "start": 113149565
+                        },
+                        {
+                            "end": 113151867,
+                            "start": 113151804
+                        },
+                        {
+                            "end": 113163289,
+                            "start": 113163134
+                        },
+                        {
+                            "end": 113166832,
+                            "start": 113166607
+                        },
+                        {
+                            "end": 113171231,
+                            "start": 113168440
+                        },
+                        {
+                            "end": 113174015,
+                            "start": 113173343
+                        },
+                        {
+                            "end": 113190038,
+                            "start": 113189871
+                        },
+                        {
+                            "end": 113191614,
+                            "start": 113191423
+                        },
+                        {
+                            "end": 113192284,
+                            "start": 113192200
+                        },
+                        {
+                            "end": 113192730,
+                            "start": 113192554
+                        },
+                        {
+                            "end": 113194314,
+                            "start": 113194195
+                        },
+                        {
+                            "end": 113194915,
+                            "start": 113194742
+                        },
+                        {
+                            "end": 113196786,
+                            "start": 113196616
+                        },
+                        {
+                            "end": 113197644,
+                            "start": 113197521
+                        },
+                        {
+                            "end": 113198784,
+                            "start": 113198660
+                        },
+                        {
+                            "end": 113206000,
+                            "start": 113205825
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220751
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342018,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000374469",
+                    "start": 113127536,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 10911,
+                            "cdna_coding_start": 265,
+                            "domains": [
+                                {
+                                    "name": "SSF57535",
+                                    "regions": [
+                                        {
+                                            "end": 410,
+                                            "start": 351
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 411
+                                        },
+                                        {
+                                            "end": 537,
+                                            "start": 471
+                                        },
+                                        {
+                                            "end": 767,
+                                            "start": 704
+                                        },
+                                        {
+                                            "end": 1723,
+                                            "start": 1603
+                                        },
+                                        {
+                                            "end": 1819,
+                                            "start": 1762
+                                        },
+                                        {
+                                            "end": 1877,
+                                            "start": 1820
+                                        },
+                                        {
+                                            "end": 1935,
+                                            "start": 1878
+                                        },
+                                        {
+                                            "end": 1993,
+                                            "start": 1936
+                                        },
+                                        {
+                                            "end": 2055,
+                                            "start": 1994
+                                        },
+                                        {
+                                            "end": 2176,
+                                            "start": 2058
+                                        },
+                                        {
+                                            "end": 2295,
+                                            "start": 2179
+                                        },
+                                        {
+                                            "end": 2354,
+                                            "start": 2298
+                                        },
+                                        {
+                                            "end": 2414,
+                                            "start": 2356
+                                        },
+                                        {
+                                            "end": 2528,
+                                            "start": 2415
+                                        },
+                                        {
+                                            "end": 2593,
+                                            "start": 2529
+                                        },
+                                        {
+                                            "end": 2689,
+                                            "start": 2620
+                                        },
+                                        {
+                                            "end": 2805,
+                                            "start": 2692
+                                        },
+                                        {
+                                            "end": 2863,
+                                            "start": 2806
+                                        },
+                                        {
+                                            "end": 2921,
+                                            "start": 2864
+                                        },
+                                        {
+                                            "end": 3094,
+                                            "start": 2922
+                                        },
+                                        {
+                                            "end": 3153,
+                                            "start": 3095
+                                        },
+                                        {
+                                            "end": 3206,
+                                            "start": 3154
+                                        },
+                                        {
+                                            "end": 3452,
+                                            "start": 3216
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF49899",
+                                    "regions": [
+                                        {
+                                            "end": 1609,
+                                            "start": 1398
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00159",
+                                    "regions": [
+                                        {
+                                            "end": 1604,
+                                            "start": 1397
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00354",
+                                    "regions": [
+                                        {
+                                            "end": 1597,
+                                            "start": 1419
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00895",
+                                    "regions": [
+                                        {
+                                            "end": 1507,
+                                            "start": 1489
+                                        },
+                                        {
+                                            "end": 1535,
+                                            "start": 1516
+                                        },
+                                        {
+                                            "end": 1569,
+                                            "start": 1536
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF02494",
+                                    "regions": [
+                                        {
+                                            "end": 619,
+                                            "start": 538
+                                        },
+                                        {
+                                            "end": 698,
+                                            "start": 621
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00032",
+                                    "regions": [
+                                        {
+                                            "end": 410,
+                                            "start": 355
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 415
+                                        },
+                                        {
+                                            "end": 536,
+                                            "start": 475
+                                        },
+                                        {
+                                            "end": 764,
+                                            "start": 704
+                                        },
+                                        {
+                                            "end": 1662,
+                                            "start": 1608
+                                        },
+                                        {
+                                            "end": 1720,
+                                            "start": 1667
+                                        },
+                                        {
+                                            "end": 1819,
+                                            "start": 1766
+                                        },
+                                        {
+                                            "end": 1877,
+                                            "start": 1824
+                                        },
+                                        {
+                                            "end": 1935,
+                                            "start": 1882
+                                        },
+                                        {
+                                            "end": 1993,
+                                            "start": 1940
+                                        },
+                                        {
+                                            "end": 2055,
+                                            "start": 1998
+                                        },
+                                        {
+                                            "end": 2118,
+                                            "start": 2060
+                                        },
+                                        {
+                                            "end": 2176,
+                                            "start": 2123
+                                        },
+                                        {
+                                            "end": 2236,
+                                            "start": 2181
+                                        },
+                                        {
+                                            "end": 2295,
+                                            "start": 2241
+                                        },
+                                        {
+                                            "end": 2353,
+                                            "start": 2300
+                                        },
+                                        {
+                                            "end": 2412,
+                                            "start": 2358
+                                        },
+                                        {
+                                            "end": 2470,
+                                            "start": 2417
+                                        },
+                                        {
+                                            "end": 2528,
+                                            "start": 2475
+                                        },
+                                        {
+                                            "end": 2585,
+                                            "start": 2533
+                                        },
+                                        {
+                                            "end": 2689,
+                                            "start": 2631
+                                        },
+                                        {
+                                            "end": 2747,
+                                            "start": 2694
+                                        },
+                                        {
+                                            "end": 2805,
+                                            "start": 2752
+                                        },
+                                        {
+                                            "end": 2863,
+                                            "start": 2810
+                                        },
+                                        {
+                                            "end": 2921,
+                                            "start": 2868
+                                        },
+                                        {
+                                            "end": 2979,
+                                            "start": 2926
+                                        },
+                                        {
+                                            "end": 3036,
+                                            "start": 2984
+                                        },
+                                        {
+                                            "end": 3094,
+                                            "start": 3041
+                                        },
+                                        {
+                                            "end": 3153,
+                                            "start": 3099
+                                        },
+                                        {
+                                            "end": 3213,
+                                            "start": 3158
+                                        },
+                                        {
+                                            "end": 3271,
+                                            "start": 3218
+                                        },
+                                        {
+                                            "end": 3329,
+                                            "start": 3276
+                                        },
+                                        {
+                                            "end": 3388,
+                                            "start": 3334
+                                        },
+                                        {
+                                            "end": 3445,
+                                            "start": 3393
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00179",
+                                    "regions": [
+                                        {
+                                            "end": 1206,
+                                            "start": 1173
+                                        },
+                                        {
+                                            "end": 1244,
+                                            "start": 1208
+                                        },
+                                        {
+                                            "end": 1282,
+                                            "start": 1246
+                                        },
+                                        {
+                                            "end": 1320,
+                                            "start": 1284
+                                        },
+                                        {
+                                            "end": 1358,
+                                            "start": 1322
+                                        },
+                                        {
+                                            "end": 1396,
+                                            "start": 1360
+                                        },
+                                        {
+                                            "end": 1761,
+                                            "start": 1722
+                                        },
+                                        {
+                                            "end": 3509,
+                                            "start": 3481
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 417,
+                                            "start": 246
+                                        },
+                                        {
+                                            "end": 1121,
+                                            "start": 965
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57196",
+                                    "regions": [
+                                        {
+                                            "end": 1244,
+                                            "start": 1166
+                                        },
+                                        {
+                                            "end": 1282,
+                                            "start": 1245
+                                        },
+                                        {
+                                            "end": 1319,
+                                            "start": 1283
+                                        },
+                                        {
+                                            "end": 1400,
+                                            "start": 1321
+                                        },
+                                        {
+                                            "end": 1763,
+                                            "start": 1712
+                                        },
+                                        {
+                                            "end": 3483,
+                                            "start": 3440
+                                        },
+                                        {
+                                            "end": 3512,
+                                            "start": 3484
+                                        },
+                                        {
+                                            "end": 3547,
+                                            "start": 3514
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50026",
+                                    "regions": [
+                                        {
+                                            "end": 1206,
+                                            "start": 1170
+                                        },
+                                        {
+                                            "end": 1244,
+                                            "start": 1208
+                                        },
+                                        {
+                                            "end": 1282,
+                                            "start": 1246
+                                        },
+                                        {
+                                            "end": 1320,
+                                            "start": 1284
+                                        },
+                                        {
+                                            "end": 1358,
+                                            "start": 1322
+                                        },
+                                        {
+                                            "end": 1396,
+                                            "start": 1360
+                                        },
+                                        {
+                                            "end": 1761,
+                                            "start": 1722
+                                        },
+                                        {
+                                            "end": 3509,
+                                            "start": 3477
+                                        },
+                                        {
+                                            "end": 3541,
+                                            "start": 3510
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00181",
+                                    "regions": [
+                                        {
+                                            "end": 1206,
+                                            "start": 1173
+                                        },
+                                        {
+                                            "end": 1244,
+                                            "start": 1211
+                                        },
+                                        {
+                                            "end": 1282,
+                                            "start": 1249
+                                        },
+                                        {
+                                            "end": 1320,
+                                            "start": 1287
+                                        },
+                                        {
+                                            "end": 1358,
+                                            "start": 1325
+                                        },
+                                        {
+                                            "end": 1396,
+                                            "start": 1363
+                                        },
+                                        {
+                                            "end": 1761,
+                                            "start": 1725
+                                        },
+                                        {
+                                            "end": 3477,
+                                            "start": 3448
+                                        },
+                                        {
+                                            "end": 3509,
+                                            "start": 3480
+                                        },
+                                        {
+                                            "end": 3541,
+                                            "start": 3512
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00092",
+                                    "regions": [
+                                        {
+                                            "end": 229,
+                                            "start": 61
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50825",
+                                    "regions": [
+                                        {
+                                            "end": 619,
+                                            "start": 537
+                                        },
+                                        {
+                                            "end": 701,
+                                            "start": 620
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 1386,
+                                            "start": 1174
+                                        },
+                                        {
+                                            "end": 3531,
+                                            "start": 3445
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07699",
+                                    "regions": [
+                                        {
+                                            "end": 337,
+                                            "start": 287
+                                        },
+                                        {
+                                            "end": 1029,
+                                            "start": 982
+                                        },
+                                        {
+                                            "end": 1083,
+                                            "start": 1036
+                                        },
+                                        {
+                                            "end": 1137,
+                                            "start": 1090
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00008",
+                                    "regions": [
+                                        {
+                                            "end": 1203,
+                                            "start": 1174
+                                        },
+                                        {
+                                            "end": 1242,
+                                            "start": 1212
+                                        },
+                                        {
+                                            "end": 1279,
+                                            "start": 1250
+                                        },
+                                        {
+                                            "end": 1356,
+                                            "start": 1326
+                                        },
+                                        {
+                                            "end": 1394,
+                                            "start": 1364
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00327",
+                                    "regions": [
+                                        {
+                                            "end": 237,
+                                            "start": 58
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50923",
+                                    "regions": [
+                                        {
+                                            "end": 412,
+                                            "start": 353
+                                        },
+                                        {
+                                            "end": 472,
+                                            "start": 413
+                                        },
+                                        {
+                                            "end": 538,
+                                            "start": 473
+                                        },
+                                        {
+                                            "end": 766,
+                                            "start": 702
+                                        },
+                                        {
+                                            "end": 1664,
+                                            "start": 1606
+                                        },
+                                        {
+                                            "end": 1722,
+                                            "start": 1665
+                                        },
+                                        {
+                                            "end": 1821,
+                                            "start": 1764
+                                        },
+                                        {
+                                            "end": 1879,
+                                            "start": 1822
+                                        },
+                                        {
+                                            "end": 1937,
+                                            "start": 1880
+                                        },
+                                        {
+                                            "end": 1995,
+                                            "start": 1938
+                                        },
+                                        {
+                                            "end": 2057,
+                                            "start": 1996
+                                        },
+                                        {
+                                            "end": 2120,
+                                            "start": 2058
+                                        },
+                                        {
+                                            "end": 2178,
+                                            "start": 2121
+                                        },
+                                        {
+                                            "end": 2238,
+                                            "start": 2179
+                                        },
+                                        {
+                                            "end": 2297,
+                                            "start": 2239
+                                        },
+                                        {
+                                            "end": 2355,
+                                            "start": 2298
+                                        },
+                                        {
+                                            "end": 2414,
+                                            "start": 2356
+                                        },
+                                        {
+                                            "end": 2472,
+                                            "start": 2415
+                                        },
+                                        {
+                                            "end": 2530,
+                                            "start": 2473
+                                        },
+                                        {
+                                            "end": 2587,
+                                            "start": 2531
+                                        },
+                                        {
+                                            "end": 2691,
+                                            "start": 2640
+                                        },
+                                        {
+                                            "end": 2749,
+                                            "start": 2692
+                                        },
+                                        {
+                                            "end": 2807,
+                                            "start": 2750
+                                        },
+                                        {
+                                            "end": 2865,
+                                            "start": 2808
+                                        },
+                                        {
+                                            "end": 2923,
+                                            "start": 2866
+                                        },
+                                        {
+                                            "end": 2981,
+                                            "start": 2924
+                                        },
+                                        {
+                                            "end": 3038,
+                                            "start": 2982
+                                        },
+                                        {
+                                            "end": 3096,
+                                            "start": 3039
+                                        },
+                                        {
+                                            "end": 3155,
+                                            "start": 3097
+                                        },
+                                        {
+                                            "end": 3215,
+                                            "start": 3156
+                                        },
+                                        {
+                                            "end": 3273,
+                                            "start": 3216
+                                        },
+                                        {
+                                            "end": 3331,
+                                            "start": 3274
+                                        },
+                                        {
+                                            "end": 3390,
+                                            "start": 3332
+                                        },
+                                        {
+                                            "end": 3447,
+                                            "start": 3391
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07645",
+                                    "regions": [
+                                        {
+                                            "end": 1760,
+                                            "start": 1722
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF53300",
+                                    "regions": [
+                                        {
+                                            "end": 239,
+                                            "start": 56
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00084",
+                                    "regions": [
+                                        {
+                                            "end": 407,
+                                            "start": 355
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 415
+                                        },
+                                        {
+                                            "end": 1662,
+                                            "start": 1605
+                                        },
+                                        {
+                                            "end": 1720,
+                                            "start": 1667
+                                        },
+                                        {
+                                            "end": 1819,
+                                            "start": 1766
+                                        },
+                                        {
+                                            "end": 1877,
+                                            "start": 1824
+                                        },
+                                        {
+                                            "end": 1935,
+                                            "start": 1882
+                                        },
+                                        {
+                                            "end": 1993,
+                                            "start": 1940
+                                        },
+                                        {
+                                            "end": 2055,
+                                            "start": 1998
+                                        },
+                                        {
+                                            "end": 2113,
+                                            "start": 2060
+                                        },
+                                        {
+                                            "end": 2176,
+                                            "start": 2123
+                                        },
+                                        {
+                                            "end": 2236,
+                                            "start": 2181
+                                        },
+                                        {
+                                            "end": 2295,
+                                            "start": 2241
+                                        },
+                                        {
+                                            "end": 2353,
+                                            "start": 2300
+                                        },
+                                        {
+                                            "end": 2412,
+                                            "start": 2358
+                                        },
+                                        {
+                                            "end": 2470,
+                                            "start": 2417
+                                        },
+                                        {
+                                            "end": 2528,
+                                            "start": 2475
+                                        },
+                                        {
+                                            "end": 2585,
+                                            "start": 2533
+                                        },
+                                        {
+                                            "end": 2689,
+                                            "start": 2644
+                                        },
+                                        {
+                                            "end": 2747,
+                                            "start": 2694
+                                        },
+                                        {
+                                            "end": 2805,
+                                            "start": 2752
+                                        },
+                                        {
+                                            "end": 2863,
+                                            "start": 2810
+                                        },
+                                        {
+                                            "end": 2921,
+                                            "start": 2868
+                                        },
+                                        {
+                                            "end": 2979,
+                                            "start": 2926
+                                        },
+                                        {
+                                            "end": 3036,
+                                            "start": 2984
+                                        },
+                                        {
+                                            "end": 3094,
+                                            "start": 3061
+                                        },
+                                        {
+                                            "end": 3149,
+                                            "start": 3099
+                                        },
+                                        {
+                                            "end": 3213,
+                                            "start": 3158
+                                        },
+                                        {
+                                            "end": 3267,
+                                            "start": 3218
+                                        },
+                                        {
+                                            "end": 3329,
+                                            "start": 3276
+                                        },
+                                        {
+                                            "end": 3388,
+                                            "start": 3334
+                                        },
+                                        {
+                                            "end": 3445,
+                                            "start": 3393
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07974",
+                                    "regions": [
+                                        {
+                                            "end": 1243,
+                                            "start": 1212
+                                        },
+                                        {
+                                            "end": 3476,
+                                            "start": 3452
+                                        },
+                                        {
+                                            "end": 3508,
+                                            "start": 3484
+                                        },
+                                        {
+                                            "end": 3540,
+                                            "start": 3513
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50234",
+                                    "regions": [
+                                        {
+                                            "end": 241,
+                                            "start": 60
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 113341823,
+                    "exons": [
+                        {
+                            "end": 113206000,
+                            "start": 113204759
+                        },
+                        {
+                            "end": 113208318,
+                            "start": 113208117
+                        },
+                        {
+                            "end": 113209337,
+                            "start": 113209180
+                        },
+                        {
+                            "end": 113212540,
+                            "start": 113212339
+                        },
+                        {
+                            "end": 113213682,
+                            "start": 113213569
+                        },
+                        {
+                            "end": 113217983,
+                            "start": 113217870
+                        },
+                        {
+                            "end": 113219632,
+                            "start": 113219536
+                        },
+                        {
+                            "end": 113220399,
+                            "start": 113220395
+                        },
+                        {
+                            "end": 113220842,
+                            "start": 113220756
+                        },
+                        {
+                            "end": 113221393,
+                            "start": 113221232
+                        },
+                        {
+                            "end": 113228306,
+                            "start": 113228145
+                        },
+                        {
+                            "end": 113231381,
+                            "start": 113231220
+                        },
+                        {
+                            "end": 113233877,
+                            "start": 113233644
+                        },
+                        {
+                            "end": 113234603,
+                            "start": 113234439
+                        },
+                        {
+                            "end": 113238595,
+                            "start": 113238484
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113341823,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000302728",
+                    "start": 113204759,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 4650,
+                            "cdna_coding_start": 1,
+                            "domains": [
+                                {
+                                    "name": "PS50825",
+                                    "regions": [
+                                        {
+                                            "end": 642,
+                                            "start": 560
+                                        },
+                                        {
+                                            "end": 724,
+                                            "start": 643
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07699",
+                                    "regions": [
+                                        {
+                                            "end": 360,
+                                            "start": 310
+                                        },
+                                        {
+                                            "end": 1052,
+                                            "start": 1005
+                                        },
+                                        {
+                                            "end": 1106,
+                                            "start": 1059
+                                        },
+                                        {
+                                            "end": 1160,
+                                            "start": 1113
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50311",
+                                    "regions": [
+                                        {
+                                            "end": 1409,
+                                            "start": 1197
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00181",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1196
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1234
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1272
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1310
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1348
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1386
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57196",
+                                    "regions": [
+                                        {
+                                            "end": 1267,
+                                            "start": 1189
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1268
+                                        },
+                                        {
+                                            "end": 1342,
+                                            "start": 1306
+                                        },
+                                        {
+                                            "end": 1423,
+                                            "start": 1344
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50026",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1193
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1231
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1269
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1307
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1345
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1383
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 440,
+                                            "start": 269
+                                        },
+                                        {
+                                            "end": 1144,
+                                            "start": 988
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00179",
+                                    "regions": [
+                                        {
+                                            "end": 1229,
+                                            "start": 1196
+                                        },
+                                        {
+                                            "end": 1267,
+                                            "start": 1231
+                                        },
+                                        {
+                                            "end": 1305,
+                                            "start": 1269
+                                        },
+                                        {
+                                            "end": 1343,
+                                            "start": 1307
+                                        },
+                                        {
+                                            "end": 1381,
+                                            "start": 1345
+                                        },
+                                        {
+                                            "end": 1419,
+                                            "start": 1383
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00092",
+                                    "regions": [
+                                        {
+                                            "end": 252,
+                                            "start": 84
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00032",
+                                    "regions": [
+                                        {
+                                            "end": 433,
+                                            "start": 378
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 438
+                                        },
+                                        {
+                                            "end": 559,
+                                            "start": 498
+                                        },
+                                        {
+                                            "end": 787,
+                                            "start": 727
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF02494",
+                                    "regions": [
+                                        {
+                                            "end": 642,
+                                            "start": 561
+                                        },
+                                        {
+                                            "end": 721,
+                                            "start": 644
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PR00010",
+                                    "regions": [
+                                        {
+                                            "end": 1318,
+                                            "start": 1307
+                                        },
+                                        {
+                                            "end": 1364,
+                                            "start": 1357
+                                        },
+                                        {
+                                            "end": 1413,
+                                            "start": 1403
+                                        },
+                                        {
+                                            "end": 1420,
+                                            "start": 1414
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00354",
+                                    "regions": [
+                                        {
+                                            "end": 1532,
+                                            "start": 1442
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57535",
+                                    "regions": [
+                                        {
+                                            "end": 433,
+                                            "start": 374
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 434
+                                        },
+                                        {
+                                            "end": 560,
+                                            "start": 494
+                                        },
+                                        {
+                                            "end": 790,
+                                            "start": 727
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF49899",
+                                    "regions": [
+                                        {
+                                            "end": 1547,
+                                            "start": 1421
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50234",
+                                    "regions": [
+                                        {
+                                            "end": 264,
+                                            "start": 83
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF53300",
+                                    "regions": [
+                                        {
+                                            "end": 262,
+                                            "start": 79
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00084",
+                                    "regions": [
+                                        {
+                                            "end": 430,
+                                            "start": 378
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 438
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50923",
+                                    "regions": [
+                                        {
+                                            "end": 435,
+                                            "start": 376
+                                        },
+                                        {
+                                            "end": 495,
+                                            "start": 436
+                                        },
+                                        {
+                                            "end": 561,
+                                            "start": 496
+                                        },
+                                        {
+                                            "end": 789,
+                                            "start": 725
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07645",
+                                    "regions": [
+                                        {
+                                            "end": 1262,
+                                            "start": 1231
+                                        },
+                                        {
+                                            "end": 1338,
+                                            "start": 1308
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00008",
+                                    "regions": [
+                                        {
+                                            "end": 1226,
+                                            "start": 1197
+                                        },
+                                        {
+                                            "end": 1265,
+                                            "start": 1235
+                                        },
+                                        {
+                                            "end": 1302,
+                                            "start": 1273
+                                        },
+                                        {
+                                            "end": 1337,
+                                            "start": 1311
+                                        },
+                                        {
+                                            "end": 1379,
+                                            "start": 1349
+                                        },
+                                        {
+                                            "end": 1417,
+                                            "start": 1387
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00327",
+                                    "regions": [
+                                        {
+                                            "end": 260,
+                                            "start": 81
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                },
+                {
+                    "end": 113342160,
+                    "exons": [
+                        {
+                            "end": 113238595,
+                            "start": 113238163
+                        },
+                        {
+                            "end": 113242036,
+                            "start": 113241915
+                        },
+                        {
+                            "end": 113243716,
+                            "start": 113243522
+                        },
+                        {
+                            "end": 113244772,
+                            "start": 113244641
+                        },
+                        {
+                            "end": 113245973,
+                            "start": 113245866
+                        },
+                        {
+                            "end": 113252059,
+                            "start": 113251930
+                        },
+                        {
+                            "end": 113259213,
+                            "start": 113259095
+                        },
+                        {
+                            "end": 113261518,
+                            "start": 113261321
+                        },
+                        {
+                            "end": 113265497,
+                            "start": 113265318
+                        },
+                        {
+                            "end": 113275385,
+                            "start": 113275206
+                        },
+                        {
+                            "end": 113276386,
+                            "start": 113276228
+                        },
+                        {
+                            "end": 113308571,
+                            "start": 113308395
+                        },
+                        {
+                            "end": 113312384,
+                            "start": 113312129
+                        },
+                        {
+                            "end": 113342160,
+                            "start": 113341293
+                        }
+                    ],
+                    "is_best_transcript": false,
+                    "name": "ENST00000374461",
+                    "start": 113238163,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 2944,
+                            "cdna_coding_start": 407,
+                            "domains": [
+                                {
+                                    "name": "PF02494",
+                                    "regions": [
+                                        {
+                                            "end": 619,
+                                            "start": 538
+                                        },
+                                        {
+                                            "end": 698,
+                                            "start": 621
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00032",
+                                    "regions": [
+                                        {
+                                            "end": 410,
+                                            "start": 355
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 415
+                                        },
+                                        {
+                                            "end": 536,
+                                            "start": 475
+                                        },
+                                        {
+                                            "end": 764,
+                                            "start": 704
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57535",
+                                    "regions": [
+                                        {
+                                            "end": 410,
+                                            "start": 351
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 411
+                                        },
+                                        {
+                                            "end": 537,
+                                            "start": 471
+                                        },
+                                        {
+                                            "end": 767,
+                                            "start": 704
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF07699",
+                                    "regions": [
+                                        {
+                                            "end": 337,
+                                            "start": 287
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50825",
+                                    "regions": [
+                                        {
+                                            "end": 619,
+                                            "start": 537
+                                        },
+                                        {
+                                            "end": 701,
+                                            "start": 620
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00092",
+                                    "regions": [
+                                        {
+                                            "end": 229,
+                                            "start": 61
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF57184",
+                                    "regions": [
+                                        {
+                                            "end": 417,
+                                            "start": 246
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50923",
+                                    "regions": [
+                                        {
+                                            "end": 412,
+                                            "start": 353
+                                        },
+                                        {
+                                            "end": 472,
+                                            "start": 413
+                                        },
+                                        {
+                                            "end": 538,
+                                            "start": 473
+                                        },
+                                        {
+                                            "end": 766,
+                                            "start": 702
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00327",
+                                    "regions": [
+                                        {
+                                            "end": 237,
+                                            "start": 58
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50234",
+                                    "regions": [
+                                        {
+                                            "end": 241,
+                                            "start": 60
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF53300",
+                                    "regions": [
+                                        {
+                                            "end": 239,
+                                            "start": 56
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF00084",
+                                    "regions": [
+                                        {
+                                            "end": 407,
+                                            "start": 355
+                                        },
+                                        {
+                                            "end": 470,
+                                            "start": 415
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "aliases": [
+                "ARID1B"
+            ],
+            "chr": "6",
+            "end": 157530401,
+            "name": "ENSG00000049618",
+            "start": 157099063,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 157529495,
+                    "exons": [
+                        {
+                            "end": 157100605,
+                            "start": 157099063
+                        },
+                        {
+                            "end": 157150555,
+                            "start": 157150361
+                        },
+                        {
+                            "end": 157192786,
+                            "start": 157192748
+                        },
+                        {
+                            "end": 157222659,
+                            "start": 157222510
+                        },
+                        {
+                            "end": 157256710,
+                            "start": 157256600
+                        },
+                        {
+                            "end": 157406039,
+                            "start": 157405796
+                        },
+                        {
+                            "end": 157431695,
+                            "start": 157431606
+                        },
+                        {
+                            "end": 157454341,
+                            "start": 157454162
+                        },
+                        {
+                            "end": 157470085,
+                            "start": 157469758
+                        },
+                        {
+                            "end": 157488319,
+                            "start": 157488174
+                        },
+                        {
+                            "end": 157495251,
+                            "start": 157495142
+                        },
+                        {
+                            "end": 157502312,
+                            "start": 157502103
+                        },
+                        {
+                            "end": 157505569,
+                            "start": 157505365
+                        },
+                        {
+                            "end": 157510914,
+                            "start": 157510776
+                        },
+                        {
+                            "end": 157511344,
+                            "start": 157511172
+                        },
+                        {
+                            "end": 157517449,
+                            "start": 157517299
+                        },
+                        {
+                            "end": 157520041,
+                            "start": 157519945
+                        },
+                        {
+                            "end": 157522622,
+                            "start": 157521839
+                        },
+                        {
+                            "end": 157525130,
+                            "start": 157525000
+                        },
+                        {
+                            "end": 157529495,
+                            "start": 157527301
+                        }
+                    ],
+                    "is_best_transcript": true,
+                    "name": "ENST00000346085",
+                    "start": 157099063,
+                    "translations": [
+                        {
+                            "cdna_coding_end": 6751,
+                            "cdna_coding_start": 2,
+                            "domains": [
+                                {
+                                    "name": "PF12031",
+                                    "regions": [
+                                        {
+                                            "end": 2195,
+                                            "start": 1939
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50324",
+                                    "regions": [
+                                        {
+                                            "end": 57,
+                                            "start": 35
+                                        },
+                                        {
+                                            "end": 784,
+                                            "start": 697
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PF01388",
+                                    "regions": [
+                                        {
+                                            "end": 1153,
+                                            "start": 1065
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50099",
+                                    "regions": [
+                                        {
+                                            "end": 820,
+                                            "start": 715
+                                        },
+                                        {
+                                            "end": 1610,
+                                            "start": 1472
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF48371",
+                                    "regions": [
+                                        {
+                                            "end": 2220,
+                                            "start": 2075
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50316",
+                                    "regions": [
+                                        {
+                                            "end": 104,
+                                            "start": 81
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50322",
+                                    "regions": [
+                                        {
+                                            "end": 131,
+                                            "start": 107
+                                        },
+                                        {
+                                            "end": 646,
+                                            "start": 574
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS51011",
+                                    "regions": [
+                                        {
+                                            "end": 1157,
+                                            "start": 1066
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50310",
+                                    "regions": [
+                                        {
+                                            "end": 47,
+                                            "start": 2
+                                        },
+                                        {
+                                            "end": 493,
+                                            "start": 329
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "PS50315",
+                                    "regions": [
+                                        {
+                                            "end": 401,
+                                            "start": 141
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SSF46774",
+                                    "regions": [
+                                        {
+                                            "end": 1168,
+                                            "start": 1049
+                                        }
+                                    ]
+                                },
+                                {
+                                    "name": "SM00501",
+                                    "regions": [
+                                        {
+                                            "end": 1158,
+                                            "start": 1067
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/tools/test_convert_annotations_format.py b/tests/tools/test_convert_annotations_format.py
index a5530dd7..a42daadb 100644
--- a/tests/tools/test_convert_annotations_format.py
+++ b/tests/tools/test_convert_annotations_format.py
@@ -7,12 +7,14 @@
     convert_gff2_to_mavis,
     convert_gff3_to_mavis,
     convert_mavis_json_2to3,
+    convert_tab_to_json,
 )
 
 CONVERTERS = {
     'gff3': convert_gff3_to_mavis,
     'gtf': convert_gff2_to_mavis,
     'v2-json': convert_mavis_json_2to3,
+    'v2-tab': convert_tab_to_json,
 }
 
 
@@ -45,6 +47,11 @@ def sort_elements(data):
         ['Homo_sapiens.GRCh38.kras.gff3', 'Homo_sapiens.GRCh38.kras.gff3.json', 'gff3'],
         ['Homo_sapiens.GRCh38.kras.gtf', 'Homo_sapiens.GRCh38.kras.gtf.json', 'gtf'],
         ['example_genes.v2.json', 'example_genes.v3.json', 'v2-json'],
+        [
+            'ensembl69_hg19_annotations.kras.tab',
+            'ensembl69_hg19_annotations.kras.tab.json',
+            'v2-tab',
+        ],
     ],
 )
 def test_gff_examples(filename, expected_file, input_type):

From af6f8230a6c3947af0c5abcc81824439ba5d0d1a Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 8 Feb 2022 20:23:46 -0800
Subject: [PATCH 117/137] Only swap order when generating cdna_coord

---
 src/mavis/annotate/file_io.py | 39 ++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index bcd0db0f..0f8e9aef 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -153,26 +153,31 @@ def parse_annotations_json(
 
                 for translation in transcript.get('translations', []):
                     try:
-                        if 'cdna_coding_end' not in translation:
-                            translation['cdna_coding_end'] = spl_tx.convert_genomic_to_cdna(
-                                translation['end']
-                            )
-                        if 'cdna_coding_start' not in translation:
-                            translation['cdna_coding_start'] = spl_tx.convert_genomic_to_cdna(
-                                translation['start']
-                            )
+                        if (
+                            'cdna_coding_end' not in translation
+                            or 'cdna_coding_start' not in translation
+                        ):
+                            if 'cdna_coding_end' not in translation:
+                                translation['cdna_coding_end'] = spl_tx.convert_genomic_to_cdna(
+                                    translation['end']
+                                )
+                            if 'cdna_coding_start' not in translation:
+                                translation['cdna_coding_start'] = spl_tx.convert_genomic_to_cdna(
+                                    translation['start']
+                                )
+
+                            if gene.strand == STRAND.NEG:
+                                translation['cdna_coding_start'], translation['cdna_coding_end'] = (
+                                    translation['cdna_coding_end'],
+                                    translation['cdna_coding_start'],
+                                )
+
                     except IndexError as err:
                         raise IndexError(
                             f'Invalid specification of CDS ({translation["name"]}: {translation["start"]}-{translation["end"]}) '
                             f'region on transcript ({transcript["name"]}: {transcript["start"]}-{transcript["end"]}): {err}'
                         )
 
-                    if gene.strand == STRAND.NEG:
-                        translation['cdna_coding_start'], translation['cdna_coding_end'] = (
-                            translation['cdna_coding_end'],
-                            translation['cdna_coding_start'],
-                        )
-
                     tx_length = (
                         translation['cdna_coding_end'] - translation['cdna_coding_start'] + 1
                     )
@@ -191,7 +196,7 @@ def parse_annotations_json(
                             for region in regions:
                                 if region.start < 1 or region.end > tx_length:
                                     raise AssertionError(
-                                        'region cannot be outside the translated length'
+                                        f'region ({dom["name"]}:{region.start}-{region.end}) cannot be outside the translated length ({tx_length})'
                                     )
                             domains.append(
                                 Domain(
@@ -203,8 +208,8 @@ def parse_annotations_json(
                         except AssertionError as err:
                             logger.warning(repr(err))
                     translation = Translation(
-                        translation['cdna_coding_start'],
-                        translation['cdna_coding_end'],
+                        start=translation['cdna_coding_start'],
+                        end=translation['cdna_coding_end'],
                         transcript=spl_tx,
                         domains=domains,
                         name=translation.get('name'),

From 8edf65d453f7e9897b75abd20eeebd96acb3a6c0 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 8 Feb 2022 20:24:41 -0800
Subject: [PATCH 118/137] Reformat json to nest translations in v3

---
 src/tools/convert_annotations_format.py | 63 ++++++++++++++++++-------
 1 file changed, 45 insertions(+), 18 deletions(-)

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index a01176f0..ac2b2671 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -7,9 +7,6 @@
 import pandas as pd
 from mavis.annotate.file_io import parse_annotations_json
 
-# pd.set_option('display.width', 250)
-pd.options.display.width = 0
-
 PANDAS_DEFAULT_NA_VALUES = [
     '-1.#IND',
     '1.#QNAN',
@@ -92,12 +89,12 @@ def agg_strings_unique(series):
 
 
 def strip_empty_fields(input_obj):
-    """Remove all empty string fields from some dictionary object to reduce the size"""
+    """Remove all empty string or null fields from some dictionary object to reduce the size"""
 
     if isinstance(input_obj, dict):
         result = {}
         for k, v in input_obj.items():
-            if v == '' or (isinstance(v, list) and not len(v)):
+            if v == '' or v is None or (isinstance(v, list) and not len(v)):
                 continue
             result[k] = strip_empty_fields(v)
         return result
@@ -106,6 +103,22 @@ def strip_empty_fields(input_obj):
     return input_obj
 
 
+def coerce_number_types(input_obj, fields=['start', 'end', 'coding_cdna_start', 'coding_cdna_end']):
+    if isinstance(input_obj, dict):
+        result = {}
+        for k, v in input_obj.items():
+            if k in fields and isinstance(v, str):
+                if v.lower() in {'', 'null', 'none'}:
+                    continue
+                result[k] = int(v)
+            else:
+                result[k] = coerce_number_types(v)
+        return result
+    elif isinstance(input_obj, list):
+        return [coerce_number_types(v) for v in input_obj]
+    return input_obj
+
+
 def convert_tab_to_json(filepath: str) -> Dict:
     """
     given a file in the std input format (see below) reads and return a list of genes (and sub-objects)
@@ -167,10 +180,16 @@ def parse_domain_list(row):
                 logging.warning(f'error in domain: {domain}, {row}, {repr(err)}')
         return domains
 
+    skip_lines = 0
+    with open(filepath, 'r') as fh:
+        lines = fh.readlines()
+        skip_lines = len([l for l in lines if l.startswith('##')])
+
     df = pd.read_csv(
         filepath,
+        skiprows=skip_lines,
         dtype={
-            'ensembl_gene_id': str,
+            '#ensembl_gene_id': str,
             'ensembl_transcript_id': str,
             'chr': str,
             'cdna_coding_start': pd.Int64Dtype(),
@@ -185,8 +204,7 @@ def parse_domain_list(row):
             'gene_end': int,
         },
         sep='\t',
-        comment='#',
-    )
+    ).rename(columns={'#ensembl_gene_id': 'ensembl_gene_id'})
 
     for col in ['ensembl_gene_id', 'chr', 'ensembl_transcript_id', 'gene_start', 'gene_end']:
         if col not in df:
@@ -228,19 +246,20 @@ def parse_domain_list(row):
             'is_best_transcript': is_best_transcript,
             'name': row['ensembl_transcript_id'],
             'exons': row.get('genomic_exon_ranges', []),
-            'domains': row.get('AA_domain_ranges', []),
             'start': row.get('transcript_genomic_start'),
             'end': row.get('transcript_genomic_end'),
-            'cdna_coding_start': row.get('cdna_coding_start'),
-            'cdna_coding_end': row.get('cdna_coding_end'),
             'aliases': [],
+            'translations': [
+                {
+                    'domains': row.get('AA_domain_ranges', []),
+                    'cdna_coding_start': row.get('cdna_coding_start'),
+                    'cdna_coding_end': row.get('cdna_coding_end'),
+                }
+            ],
         }
-        for int_value in ['start', 'end', 'cdna_coding_start', 'cdna_coding_end']:
-            if transcript.get(int_value) is not None:
-                transcript[int_value] = int(transcript[int_value])
         gene['transcripts'].append(transcript)
 
-    return {'genes': list(genes.values())}
+    return coerce_number_types({'genes': list(genes.values())})
 
 
 def strip_id_field(feature_id) -> Tuple[str, str]:
@@ -620,8 +639,6 @@ def feature_key(row, parent=False):
         short_msg = '. '.join(
             [line for line in str(err).split('\n') if line.strip()][:3]
         )  # these can get super long
-        with open('tmp_out.json', 'w') as fh:
-            fh.write(json.dumps(result, sort_keys=True, indent='  '))
         raise AssertionError(short_msg)
     # re-strip (mavis adds defaults)
     result = strip_empty_fields({'genes': list(genes_by_id.values())})
@@ -799,6 +816,10 @@ def convert_mavis_json_2to3(filename):
 
     # move translations into sep object
     for gene in content['genes']:
+        if gene['strand'] == '1':
+            gene['strand'] = '+'
+        elif gene['strand'] == '-1':
+            gene['strand'] = '-'
         for transcript in gene.get('transcripts', []):
             if any(transcript.get(k) for k in ['cdna_coding_start', 'cdna_coding_end', 'domains']):
                 transcript['translations'] = [
@@ -811,12 +832,14 @@ def convert_mavis_json_2to3(filename):
                 del transcript['domains']
                 del transcript['cdna_coding_start']
                 del transcript['cdna_coding_end']
+    content = coerce_number_types(content)
+    content = strip_empty_fields(content)
     parse_annotations_json(content)
     content = strip_empty_fields(content)
     return content
 
 
-if __name__ == '__main__':
+def main():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         'input', help='path to the tab-delimated mavis v2 style reference annotations file'
@@ -849,3 +872,7 @@ def convert_mavis_json_2to3(filename):
     logging.info(f'writing: {args.output}')
     with open(args.output, 'w') as fh:
         fh.write(json.dumps(annotations, sort_keys=True))
+
+
+if __name__ == '__main__':
+    main()

From 480c66e97f60dcbb1be37126e6b2cb9431ee07f6 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 8 Feb 2022 20:25:13 -0800
Subject: [PATCH 119/137] Update ensembl script to generate nested translations

---
 src/tools/generate_ensembl_json.py | 36 +++++++++++++++++-------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/tools/generate_ensembl_json.py b/src/tools/generate_ensembl_json.py
index 9c44d2f1..91434eea 100755
--- a/src/tools/generate_ensembl_json.py
+++ b/src/tools/generate_ensembl_json.py
@@ -20,7 +20,6 @@
 import simplejson as json
 from pyensembl import EnsemblRelease
 
-
 VERSION = "1.0.0"
 SCRIPT = os.path.abspath(__file__)
 CACHE_DEFAULT = os.environ["HOME"] + "/.cache"
@@ -248,7 +247,7 @@ def __init__(
             self.alias = defaultdict(set)
 
         self.data = EnsemblRelease(release, species)
-        self.download_pyensembl_cache()
+        self.download_pyensembl_cache(self.data, self.custom_cache)
         self.get_domain_cache()
 
         if self.best_file:
@@ -311,35 +310,38 @@ def get_transcripts(self, eid):
         if not protein_id:
             return None
 
+        translation = {
+            'domains': [],
+            "name": protein_id,
+        }
         result = {
             "name": str(transcript.transcript_id),
             "start": int(transcript.start),
             "end": int(transcript.end),
             "aliases": [str(transcript.transcript_name)],
             "is_best_transcript": str(transcript.transcript_id) in self.best,
-            "protein_id": transcript.protein_id,
             "exons": [],
-            "domains": [],
+            "translations": [translation],
         }
 
         # start/end are absolute genomic positions, so calculate positions relative to the mRNA start
         cpos = transcript.coding_sequence_position_ranges
         if transcript.strand in ("+", "1"):
-            result["cdna_coding_start"] = transcript.spliced_offset(cpos[0][0]) + 1
-            result["cdna_coding_end"] = transcript.spliced_offset(cpos[-1][1]) + 1
+            translation["cdna_coding_start"] = transcript.spliced_offset(cpos[0][0]) + 1
+            translation["cdna_coding_end"] = transcript.spliced_offset(cpos[-1][1]) + 1
         elif transcript.strand in ("-", "-1"):
-            result["cdna_coding_start"] = transcript.spliced_offset(cpos[0][1]) + 1
-            result["cdna_coding_end"] = transcript.spliced_offset(cpos[-1][0]) + 1
+            translation["cdna_coding_start"] = transcript.spliced_offset(cpos[0][1]) + 1
+            translation["cdna_coding_end"] = transcript.spliced_offset(cpos[-1][0]) + 1
 
         return result
 
-    def get_exons(self, eid):
+    def get_exons(self, eid: str) -> dict:
         """
         Method parse exon info in the EnsemblRelease into json format.
         Args:
-            eid (str): Ensembl exon ID
+            eid: Ensembl exon ID
         Returns:
-            dict: exon info formatted for json
+            exon info formatted for json
         """
         exon = self.data.exon_by_id(eid)
         result = {"name": str(exon.exon_id), "start": int(exon.start), "end": int(exon.end)}
@@ -347,7 +349,7 @@ def get_exons(self, eid):
         return result
 
     @cached_domains
-    def get_domains(self, eid):
+    def get_domains(self, eid: str):
         """
         Method request domain info from Ensembl and parse into json format.
         Args:
@@ -404,8 +406,9 @@ def build_json(self):
                     for eid in self.data.exon_ids_of_transcript_id(tid):
                         exond = self.get_exons(eid)
                         transd["exons"].append(exond)
-                    domains = self.get_domains(transd["protein_id"])
-                    transd["domains"] = domains
+                    for translation in transd['translations']:
+                        domains = self.get_domains(translation["name"])
+                        transd["domains"] = domains
                     gened["transcripts"].append(transd)
                 else:
                     count["non_coding"] += 1
@@ -547,7 +550,10 @@ def main():
         help="a tab-separated file of Ensembl gene IDs and gene aliases (one ID and one alias per line)",
     )
     opt_parser.add_argument(
-        "-c", "--custom-cache", help="use a non-default path to cache ensembl data"
+        "-c",
+        "--custom-cache",
+        help="use a non-default path to cache ensembl data",
+        default=CACHE_DEFAULT,
     )
     opt_parser.add_argument(
         "-d",

From 42fa68249049313e9e9d6e9f40fd54538ea1642f Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 8 Feb 2022 22:26:38 -0800
Subject: [PATCH 120/137] Update references to pre-built annotations files

---
 docs/inputs/reference.md                |  4 +-
 docs/tutorials/full.md                  |  2 +-
 src/mavis/annotate/file_io.py           | 16 +++++++-
 src/tools/convert_annotations_format.py | 52 ++++++++++++++++++-------
 src/tools/generate_ensembl_json.py      |  5 +--
 src/tools/get_hg19_reference_files.sh   |  3 +-
 src/tools/get_hg38_reference_files.sh   |  3 +-
 7 files changed, 60 insertions(+), 25 deletions(-)

diff --git a/docs/inputs/reference.md b/docs/inputs/reference.md
index 854e1058..8c7f0f36 100644
--- a/docs/inputs/reference.md
+++ b/docs/inputs/reference.md
@@ -15,7 +15,7 @@ not available,
 | File Name (Type/Format)                                                                       | Environment Variable      | Download                                                                                                                                                                                                                                                      |
 | --------------------------------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | [reference genome](../../inputs/reference/#reference-genome) ([fasta](../../glossary/#fasta)) | `MAVIS_REFERENCE_GENOME`  | [![](../images/get_app-24px.svg) GRCh37/Hg19](http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/chromFa.tar.gz) <br> [![](../images/get_app-24px.svg) GRCh38](http://hgdownload.cse.ucsc.edu/goldenPath/hg38/bigZips/hg38.tar.gz)                        |
-| [annotations](../../inputs/reference/#annotations) ([JSON](../../glossary/#json))             | `MAVIS_ANNOTATIONS`       | [![](../images/get_app-24px.svg) GRCh37/Hg19 + Ensembl69](http://www.bcgsc.ca/downloads/mavis/ensembl69_hg19_annotations.json) <br> [![](../images/get_app-24px.svg) GRCh38 + Ensembl79](http://www.bcgsc.ca/downloads/mavis/ensembl79_hg38_annotations.json) |
+| [annotations](../../inputs/reference/#annotations) ([JSON](../../glossary/#json))             | `MAVIS_ANNOTATIONS`       | [![](../images/get_app-24px.svg) GRCh37/Hg19 + Ensembl69](http://www.bcgsc.ca/downloads/mavis/v3/ensembl69_hg19_annotations.v3.json.gz) <br> [![](../images/get_app-24px.svg) GRCh38 + Ensembl79](http://www.bcgsc.ca/downloads/mavis/v3/ensembl79_hg38_annotations.v3.json.gz) |
 | [masking](../../inputs/reference/#masking-file) (text/tabbed)                                 | `MAVIS_MASKING`           | [![](../images/get_app-24px.svg) GRCh37/Hg19](http://www.bcgsc.ca/downloads/mavis/hg19_masking.tab)<br>[![](../images/get_app-24px.svg) GRCh38](http://www.bcgsc.ca/downloads/mavis/GRCh38_masking.tab)                                                       |
 | [template metadata](../../inputs/reference/#template-metadata) (text/tabbed)                  | `MAVIS_TEMPLATE_METADATA` | [![](../images/get_app-24px.svg) GRCh37/Hg19](http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz)<br>[![](../images/get_app-24px.svg) GRCh38](http://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz)                   |
 | [DGV annotations](../../inputs/reference/#dgv-database-of-genomic-variants) (text/tabbed)     | `MAVIS_DGV_ANNOTATION`    | [![](../images/get_app-24px.svg) GRCh37/Hg19](http://www.bcgsc.ca/downloads/mavis/dgv_hg19_variants.tab)<br>[![](../images/get_app-24px.svg) GRCh38](http://www.bcgsc.ca/downloads/mavis/dgv_hg38_variants.tab)                                               |
@@ -82,7 +82,7 @@ the ensembl annotations file including non-coding transcripts below.
     annotations file. On our standard COLO829 we increased the default
     memory for the annotation step from 12G to 18G.
 
-[![](../images/get_app-24px.svg) GRCh37/Hg19 + Ensembl69 (includes non-coding genes)](http://www.bcgsc.ca/downloads/mavis/ensembl69_hg19_annotations_with_ncrna.json)
+[![](../images/get_app-24px.svg) GRCh37/Hg19 + Ensembl69 (includes non-coding genes)](http://www.bcgsc.ca/downloads/mavis/v3/ensembl69_hg19_annotations_with_ncrna.v3.json.gz)
 
 !!! warning
     the `mavis.annotate.file_io.load_reference_genes`{.interpreted-text
diff --git a/docs/tutorials/full.md b/docs/tutorials/full.md
index dc1828a9..20054def 100644
--- a/docs/tutorials/full.md
+++ b/docs/tutorials/full.md
@@ -155,7 +155,7 @@ Finally you will need to set output directory and the reference files
       "reference_inputs/hg19.2bit"
   ],
   "reference.annotations": [
-      "reference_inputs/ensembl69_hg19_annotations.json"
+      "reference_inputs/ensembl69_hg19_annotations.v3.json"
   ],
   "reference.dgv_annotation": [
       "reference_inputs/dgv_hg19_variants.tab"
diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 0f8e9aef..7220fc9c 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -113,6 +113,8 @@ def parse_annotations_json(
         raise AssertionError(short_msg)
 
     genes_by_chr: ReferenceAnnotations = {}
+    tx_skipped = 0
+    domain_errors = 0
 
     for gene_dict in data['genes']:
 
@@ -183,7 +185,8 @@ def parse_annotations_json(
                     )
                     # check that the translation makes sense before including it
                     if tx_length % CODON_SIZE != 0:
-                        logger.warning(
+                        tx_skipped += 1
+                        logger.debug(
                             f'Ignoring translation ({translation.get("name")}). The translated region is not a multiple of three (length={tx_length})'
                         )
                         continue
@@ -206,7 +209,8 @@ def parse_annotations_json(
                                 )
                             )
                         except AssertionError as err:
-                            logger.warning(repr(err))
+                            domain_errors += 1
+                            logger.debug(repr(err))
                     translation = Translation(
                         start=translation['cdna_coding_start'],
                         end=translation['cdna_coding_end'],
@@ -227,6 +231,14 @@ def parse_annotations_json(
                     spl_tx.translations.append(translation)
         if not best_transcripts_only or has_best:
             genes_by_chr.setdefault(gene.chr, []).append(gene)
+    if tx_skipped:
+        logger.warning(
+            f'Skipped {tx_skipped} translations where the CDS length was not a multiple of 3'
+        )
+    if domain_errors:
+        logger.warning(
+            f'Skipped {domain_errors} domains due to errors (coordinates defined outside the translated region)'
+        )
     return genes_by_chr
 
 
diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index ac2b2671..775b6d1d 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -183,7 +183,7 @@ def parse_domain_list(row):
     skip_lines = 0
     with open(filepath, 'r') as fh:
         lines = fh.readlines()
-        skip_lines = len([l for l in lines if l.startswith('##')])
+        skip_lines = len([line for line in lines if line.startswith('##')])
 
     df = pd.read_csv(
         filepath,
@@ -815,26 +815,47 @@ def convert_mavis_json_2to3(filename):
         content = json.load(fh)
 
     # move translations into sep object
+    skipped_tx = 0
+    total_tx = 0
     for gene in content['genes']:
-        if gene['strand'] == '1':
+        if str(gene['strand']) == '1':
             gene['strand'] = '+'
-        elif gene['strand'] == '-1':
+        elif str(gene['strand']) == '-1':
             gene['strand'] = '-'
         for transcript in gene.get('transcripts', []):
-            if any(transcript.get(k) for k in ['cdna_coding_start', 'cdna_coding_end', 'domains']):
-                transcript['translations'] = [
-                    {
-                        'cdna_coding_start': transcript['cdna_coding_start'],
-                        'cdna_coding_end': transcript['cdna_coding_end'],
-                        'domains': transcript['domains'],
-                    }
-                ]
-                del transcript['domains']
+            if all(transcript.get(k) for k in ['cdna_coding_start', 'cdna_coding_end']):
+                total_tx += 1
+                translation = {
+                    'cdna_coding_start': transcript['cdna_coding_start'],
+                    'cdna_coding_end': transcript['cdna_coding_end'],
+                    'domains': transcript.get('domains', []),
+                }
+                translated_length = (
+                    1 + transcript['cdna_coding_end'] - transcript['cdna_coding_start']
+                )
+
+                if 'domains' in transcript:
+                    del transcript['domains']
+
                 del transcript['cdna_coding_start']
                 del transcript['cdna_coding_end']
+
+                if translated_length % 3 != 0:
+                    skipped_tx += 1
+                    logging.debug(
+                        f'Ignoring translation ({transcript.get("name")}). The translated region is not a multiple of three (length={translated_length})'
+                    )
+                    continue
+                transcript['translations'] = [translation]
+    if skipped_tx:
+        logging.warning(
+            f'dropped {skipped_tx} / {total_tx} translations for lengths that were not a multiple of 3'
+        )
     content = coerce_number_types(content)
     content = strip_empty_fields(content)
+    logging.info('testing new JSON with MAVIS loader')
     parse_annotations_json(content)
+    logging.info('removing unnecessary empty fields')
     content = strip_empty_fields(content)
     return content
 
@@ -857,8 +878,11 @@ def main():
     )
 
     args = parser.parse_args()
-
-    logging.basicConfig(format='{message}', style='{', level=logging.getLevelName(args.log_level))
+    logging.basicConfig(
+        format='{asctime} [{levelname}] {message}',
+        style='{',
+        level=logging.getLevelName(args.log_level),
+    )
 
     if args.input_type == 'v2-tab':
         annotations = convert_tab_to_json(args.input)
diff --git a/src/tools/generate_ensembl_json.py b/src/tools/generate_ensembl_json.py
index 91434eea..176caee1 100755
--- a/src/tools/generate_ensembl_json.py
+++ b/src/tools/generate_ensembl_json.py
@@ -247,7 +247,7 @@ def __init__(
             self.alias = defaultdict(set)
 
         self.data = EnsemblRelease(release, species)
-        self.download_pyensembl_cache(self.data, self.custom_cache)
+        self.download_pyensembl_cache()
         self.get_domain_cache()
 
         if self.best_file:
@@ -260,9 +260,6 @@ def __init__(
     def download_pyensembl_cache(self):
         """
         Method download the pyensembl cache files for this release if not already there.
-        Args:
-            data (EnsemblRelease): pyensembl object for the release info
-            custom_cache (str): path to cirectory to cache pyensembl files
         """
         if self.custom_cache:
             os.environ["PYENSEMBL_CACHE_DIR"] = self.custom_cache
diff --git a/src/tools/get_hg19_reference_files.sh b/src/tools/get_hg19_reference_files.sh
index 3fb40f46..eba597de 100644
--- a/src/tools/get_hg19_reference_files.sh
+++ b/src/tools/get_hg19_reference_files.sh
@@ -15,7 +15,8 @@ rm -f chr*.fa
 rm -f chromeFa.tar.gz
 
 echo "downloading the gene annotations file"
-wget http://www.bcgsc.ca/downloads/mavis/ensembl69_hg19_annotations.json
+wget http://www.bcgsc.ca/downloads/mavis/v3/ensembl69_hg19_annotations.v3.json.gz
+gunzip ensembl69_hg19_annotations.v3.json.gz
 
 echo "downloading the masking file"
 wget http://www.bcgsc.ca/downloads/mavis/hg19_masking.tab
diff --git a/src/tools/get_hg38_reference_files.sh b/src/tools/get_hg38_reference_files.sh
index 97c1face..c63bfb52 100644
--- a/src/tools/get_hg38_reference_files.sh
+++ b/src/tools/get_hg38_reference_files.sh
@@ -5,7 +5,8 @@ wget ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/001/405/GCA_000001405.15_GRC
 gunzip GCA_000001405.15_GRCh38_no_alt_analysis_set.fna.gz
 
 echo "downloading the gene annotations file"
-wget http://www.bcgsc.ca/downloads/mavis/ensembl79_hg38_annotations.json
+wget http://www.bcgsc.ca/downloads/mavis/v3/ensembl79_hg38_annotations.v3.json.gz
+gunzip ensembl79_hg38_annotations.v3.json.gz
 
 echo "downloading the masking file"
 wget http://www.bcgsc.ca/downloads/mavis/GRCh38_masking.tab

From 74a2bf0c189721b4f6ad8081b4622158f8e24c19 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 12:04:06 -0800
Subject: [PATCH 121/137] Support loading gzipped vcfs

---
 src/mavis/cluster/main.py       |  2 +-
 src/mavis/tools/__init__.py     |  2 +-
 src/mavis/tools/vcf.py          | 24 +++++++++++++++++-------
 tests/full-tutorial.config.json |  2 +-
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/src/mavis/cluster/main.py b/src/mavis/cluster/main.py
index 861a1aea..1e1b5ff9 100644
--- a/src/mavis/cluster/main.py
+++ b/src/mavis/cluster/main.py
@@ -158,7 +158,7 @@ def main(
             annotations.content, breakpoint_pairs, max_proximity=config[f'{SECTION}.max_proximity']
         )
         logger.info(
-            f'filtered from {len(breakpoint_pairs)} down to {len(pass_clusters)} (removed {uninformative_clusters})'
+            f'filtered from {len(breakpoint_pairs)} down to {len(pass_clusters)} (removed {len(uninformative_clusters)})'
         )
         breakpoint_pairs = pass_clusters
         for bpp in uninformative_clusters:
diff --git a/src/mavis/tools/__init__.py b/src/mavis/tools/__init__.py
index 8649b31f..2dd0bc72 100644
--- a/src/mavis/tools/__init__.py
+++ b/src/mavis/tools/__init__.py
@@ -294,7 +294,7 @@ def _convert_tool_output(
         df.columns = [c[1:] if c.startswith('#') else c for c in df.columns]
         rows = df.where(df.notnull(), None).to_dict('records')
     if rows:
-        logger.info('found', len(rows), 'rows')
+        logger.info(f'found {len(rows)} rows')
         for row in rows:
             try:
                 std_rows = _convert_tool_row(
diff --git a/src/mavis/tools/vcf.py b/src/mavis/tools/vcf.py
index f756df43..c1c07faf 100644
--- a/src/mavis/tools/vcf.py
+++ b/src/mavis/tools/vcf.py
@@ -1,3 +1,4 @@
+import gzip
 import logging
 import re
 from dataclasses import dataclass
@@ -269,13 +270,22 @@ def pandas_vcf(input_file: str) -> Tuple[List[str], pd.DataFrame]:
     Read a standard vcf file into a pandas dataframe
     """
     # read the comment/header information
-    header_lines = []
-    with open(input_file, 'r') as fh:
-        line = '##'
-        while line.startswith('##'):
-            header_lines.append(line)
-            line = fh.readline().strip()
-        header_lines = header_lines[1:]
+    try:
+        header_lines = []
+        with open(input_file, 'r') as fh:
+            line = '##'
+            while line.startswith('##'):
+                header_lines.append(line)
+                line = fh.readline().strip()
+            header_lines = header_lines[1:]
+    except UnicodeDecodeError:
+        header_lines = []
+        with gzip.open(input_file, 'rt') as fh:
+            line = '##'
+            while line.startswith('##'):
+                header_lines.append(line)
+                line = fh.readline().strip()
+            header_lines = header_lines[1:]
     # read the data
     df = pd.read_csv(
         input_file,
diff --git a/tests/full-tutorial.config.json b/tests/full-tutorial.config.json
index bf125870..69fd41f1 100644
--- a/tests/full-tutorial.config.json
+++ b/tests/full-tutorial.config.json
@@ -77,7 +77,7 @@
         "reference_inputs/hg19.2bit"
     ],
     "reference.annotations": [
-        "reference_inputs/ensembl69_hg19_annotations.json"
+        "reference_inputs/ensembl69_hg19_annotations.v3.json"
     ],
     "reference.dgv_annotation": [
         "reference_inputs/dgv_hg19_variants.tab"

From 4663dfd4bf1d29ed7bc66cfa13be47207e1fc1fe Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 12:22:44 -0800
Subject: [PATCH 122/137] Gzip one of the test vcfs

---
 tests/data/pindel_events.vcf     |  50 -------------------------------
 tests/data/pindel_events.vcf.gz  | Bin 0 -> 1941 bytes
 tests/end_to_end/test_convert.py |   2 +-
 3 files changed, 1 insertion(+), 51 deletions(-)
 delete mode 100644 tests/data/pindel_events.vcf
 create mode 100644 tests/data/pindel_events.vcf.gz

diff --git a/tests/data/pindel_events.vcf b/tests/data/pindel_events.vcf
deleted file mode 100644
index b64481f8..00000000
--- a/tests/data/pindel_events.vcf
+++ /dev/null
@@ -1,50 +0,0 @@
-##fileformat=VCFv4.0
-##fileDate=april2017
-##source=pindel
-##reference=hg19
-##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
-##INFO=<ID=HOMLEN,Number=1,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">
-##INFO=<ID=PF,Number=1,Type=Integer,Description="The number of samples carry the variant">
-##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">
-##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">
-##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
-##INFO=<ID=NTLEN,Number=.,Type=Integer,Description="Number of bases inserted in place of deleted code">
-##FORMAT=<ID=PL,Number=3,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
-##FORMAT=<ID=RD,Number=1,Type=Integer,Description="Reference depth, how many reads support the reference">
-##FORMAT=<ID=AD,Number=2,Type=Integer,Description="Allele depth, how many reads support this allele">
-#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	tumour
-1	10097	.	TAACCCTAACCC	T	.	PASS	END=10108;HOMLEN=11;HOMSEQ=AACCCTAACCC;SVLEN=-11;SVTYPE=DEL	GT:AD	0/0:1,1
-1	10107	.	C	CA	.	PASS	END=10107;HOMLEN=0;SVLEN=1;SVTYPE=INS	GT:AD	0/0:1,1
-1	10108	.	C	CCTAACCCCTAACCCT	.	PASS	END=10108;HOMLEN=0;SVLEN=15;SVTYPE=INS	GT:AD	0/0:1,1
-1	10108	.	C	CAACCCTACCCCTACCCCTAACCCCTAACCCCTAACCCCAACCCCTACCCCTAACCCTAACCCTAAACCCT	.	PASS	END=10108;HOMLEN=7;HOMSEQ=AACCCTA;SVLEN=70;SVTYPE=INS	GT:AD	0/0:1,1
-1	10110	.	ACCCTAACCCTAACCCTAACCCTAACCCTA	AAACACAACCCCAAGCCTGAACTCCAGCCTCAACCAAATCCCATCCCCC	.	PASS	END=10139;HOMLEN=0;SVLEN=-29;SVTYPE=RPL;NTLEN=48	GT:AD	0/0:1,1
-1	10113	.	C	CTAACCCTACCCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCA	.	PASS	END=10113;HOMLEN=8;HOMSEQ=TAACCCTA;SVLEN=49;SVTYPE=INS	GT:AD	0/0:1,1
-1	10128	.	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAA	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAAGCCTAACCCCTAACCCTAAGCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAA	.	PASS	END=10256;HOMLEN=0;SVLEN=128;SVTYPE=DUP:TANDEM;NTLEN=89	GT:AD	0/0:1,4
-1	10172	.	CCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC	CCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTTAACCTTAACCTTAACCTTAACCTTAACCTAACCCTAACCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC	.	PASS	END=10233;HOMLEN=0;SVLEN=61;SVTYPE=DUP:TANDEM;NTLEN=63	GT:AD	0/0:0,1
-1	10212	.	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAA	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAAGCCTAACCCCTAACCCTAAGCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAACCCTAA	.	PASS	END=10256;HOMLEN=0;SVLEN=44;SVTYPE=DUP:TANDEM;NTLEN=89	GT:AD	0/0:0,4
-1	10334	.	TAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAA	TCGCCCTAACCTTAACCCCCCACCCTCACCCAAACCCCCACCCCTCACCCCCACCCCCAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAAC	.	PASS	END=10371;HOMLEN=0;SVLEN=-37;SVTYPE=RPL;NTLEN=110	GT:AD	0/0:0,1
-1	10387	.	TAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCC	TAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCAAACCCTAACCCTAACCCTAACCCCAACCCTAACCCCAACCCAAACCCCAAACCCAACCCCCACCCATAACCAAACCGCAACACAACCCCTAACCCCTAACCCTAACCCTAACCCTAACCCTAACCC	.	PASS	END=10430;HOMLEN=0;SVLEN=43;SVTYPE=DUP:TANDEM;NTLEN=103	GT:AD	0/0:1,1
-1	10398	.	CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTC	CCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCCCCCGACCCTAGCCCTCACCCTTACCCTCTCCCTCTTTTTTACTGATACGGCGACCACCGAGATCTCCCCTCTTCCCCTCCACGCCGCTCTCCCGATCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTC	.	PASS	END=10480;HOMLEN=0;SVLEN=82;SVTYPE=DUP:TANDEM;NTLEN=97	GT:AD	0/0:18,1
-1	10399	.	CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCG	CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGCTATCCCTAACCCTGGACCTCACACTTTTTTTCAAGCAGAAGCCGGCATACGCGATATTACAGTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCG	.	PASS	END=10472;HOMLEN=0;SVLEN=73;SVTYPE=DUP:TANDEM;NTLEN=96	GT:AD	0/0:17,1
-1	10399	.	CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCC	CCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCTCACCCTCACACTCGCCCTACGCCTGACCCTATTTTTTCAAGCAGAAGACGGCATACGAGATATATAGTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCC	.	PASS	END=10484;HOMLEN=0;SVLEN=85;SVTYPE=DUP:TANDEM;NTLEN=101	GT:AD	0/0:18,1
-1	10403	.	ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGC	ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCCCTAACCCTAACCCTAACCCCAACCCTACCCCTTCCCCTCACCCCTCGCCCTACCCCAAATCATAGCGCCTCCCGTTCCGACGCCCGCTCTCCCGCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGC	.	PASS	END=10471;HOMLEN=0;SVLEN=68;SVTYPE=DUP:TANDEM;NTLEN=95	GT:AD	0/0:17,1
-1	10409	.	ACCCTAACCCTAACCCTAACCCTAACCCTAAC	A	.	PASS	END=10440;HOMLEN=28;HOMSEQ=CCCTAACCCTAACCCTAACCCTAACCCT;SVLEN=-31;SVTYPE=DEL	GT:AD	0/0:2,2
-1	10415	.	ACCCTAACCCTAACCCTAACCCTAAC	A	.	PASS	END=10440;HOMLEN=27;HOMSEQ=CCCTAACCCTAACCCTAACCCTAACCC;SVLEN=-25;SVTYPE=DEL	GT:AD	0/0:2,1
-1	10421	.	A	ACCCTAACCCTAACCCTAACCCCTAACCCTACCCCAACCCCTAC	.	PASS	END=10421;HOMLEN=30;HOMSEQ=CCCTAACCCTAACCCTAACCCCTAACCCTA;SVLEN=43;SVTYPE=INS	GT:AD	0/0:0,1
-1	10421	.	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCC	ACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCGCCACCCGCCCCCCCGTTTTTTCATGCTACGGCGACCACCGAGACCTACACTCTTTCCCTACACCCCGCCCTTCCGCCCTACCCTAACCCTAACCCCAACCCTTACCCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCC	.	PASS	END=10496;HOMLEN=0;SVLEN=75;SVTYPE=DUP:TANDEM;NTLEN=106	GT:AD	0/0:18,1
-1	10439	.	ACCCCTAACCCTAACCCTAACCCTAACCCTC	AGCTCGACAGAGCACACATCTGAACCCCGGTCACTATAATATCCCCTAAGCCGTCTTCTGCTTCAAAAGCTGAGCCGCACGCCAGCCGTAGTCCCCGCCCCA	.	PASS	END=10469;HOMLEN=0;SVLEN=-30;SVTYPE=RPL;NTLEN=101	GT:AD	0/0:15,1
-1	10440	.	C	CCCCTAA	.	PASS	END=10440;HOMLEN=28;HOMSEQ=CCCTAACCCTAACCCTAACCCTAACCCT;SVLEN=6;SVTYPE=INS	GT:AD	0/0:3,1
-1	10440	.	C	CCCCTAACCCTAA	.	PASS	END=10440;HOMLEN=28;HOMSEQ=CCCTAACCCTAACCCTAACCCTAACCCT;SVLEN=12;SVTYPE=INS	GT:AD	0/0:3,2
-1	10440	.	C	CCCCTAACCCCTAACCCTAA	.	PASS	END=10440;HOMLEN=9;HOMSEQ=CCCTAACCC;SVLEN=19;SVTYPE=INS	GT:AD	0/0:3,3
-1	10440	.	C	CCCCTAACCCCTAACCCTAACCCTAA	.	PASS	END=10440;HOMLEN=9;HOMSEQ=CCCTAACCC;SVLEN=25;SVTYPE=INS	GT:AD	0/0:3,1
-1	10440	.	CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGT	CGCTCACCAGAGCCCCGAGCAGAGCAAGAGCGTAGACCTCGGCGGTCGCCGTAACCTTAAAAAAAACCATAACCATAACCATATCCCTGCCCTACCACAA	.	PASS	END=10474;HOMLEN=0;SVLEN=-34;SVTYPE=RPL;NTLEN=99	GT:AD	0/0:18,1
-1	10440	.	CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCT	CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTCCGCCAACTCTGCCGGCACCCCCGACCTCCCCCCCCTCCTTTTTTAATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCT	.	PASS	END=10508;HOMLEN=0;SVLEN=68;SVTYPE=DUP:TANDEM;NTLEN=115	GT:AD	0/0:18,2
-1	10440	.	C	CCCCTAACCCTAACTCTAGCACTCTAACCCTCTAACACTCTAACCCTAACCCTAACCCTAACCCCTAACCCCTAACCCTAA	.	PASS	END=10440;HOMLEN=13;HOMSEQ=CCCTAACCCTAAC;SVLEN=80;SVTYPE=INS	GT:AD	0/0:3,1
-1	10440	.	CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCC	CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCCCCACCGCACCCGACCTTCCCCGTGCCCGCGTCCACCCCCTCCTTTTTTAATGATACTGCCACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCTCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCC	.	PASS	END=10528;HOMLEN=0;SVLEN=88;SVTYPE=DUP:TANDEM;NTLEN=116	GT:AD	0/0:18,1
-1	10442	.	CCTAACCCTAACCCTAA	CTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCAAACCCAACCCTAACACAAAACCTCACCCCTGAACTCACGCCAC	.	PASS	END=10458;HOMLEN=0;SVLEN=-16;SVTYPE=RPL;NTLEN=92	GT:AD	0/0:10,1
-1	10442	.	C	CCCCTAACCCTAACCCTAA	.	PASS	END=10442;HOMLEN=1;HOMSEQ=C;SVLEN=18;SVTYPE=INS	GT:AD	0/0:4,1
-1	10442	.	CCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCC	CTAACGGGAACCCTAACCCTAACGCTCCCCCCACACTCATATCTCGTACCACGGCTGCAGCACGAAGGTTGGCCTCAGGTGGGACTGTCACCTTGAGT	.	PASS	END=10525;HOMLEN=0;SVLEN=-83;SVTYPE=RPL;NTLEN=97	GT:AD	0/0:18,1
-1	10447	.	C	CCCCTAACCCTAA	.	PASS	END=10447;HOMLEN=2;HOMSEQ=CC;SVLEN=12;SVTYPE=INS	GT:AD	0/0:5,2
-1	10452	.	ACCCTAACCCTAACCCTCGCGGTACCCTCAGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAGAGTACCACCGAAATCTGT	AGATCGGAAGACCACACTCCTGAACTCCAGTCACTATAATATCCCTTATCCCGTCTCCTCCTTGAAAAAAAAACCTCACACACAACCGCACCCACTTTGCAATCCAA	.	PASS	END=10551;HOMLEN=0;SVLEN=-99;SVTYPE=RPL;NTLEN=106	GT:AD	0/0:18,1
-1	10459	.	C	<INV>	.	PASS	END=249239947;HOMLEN=0;SVLEN=249229492;SVTYPE=INV;NTLEN=0,0	GT:AD	0/1:11,3
diff --git a/tests/data/pindel_events.vcf.gz b/tests/data/pindel_events.vcf.gz
new file mode 100644
index 0000000000000000000000000000000000000000..79b9c756b99ae71ea41b7f67bf3455cc0257cc2f
GIT binary patch
literal 1941
zcmV;G2Wt2qiwFp{-g#gE18`|>WMyn$Wp-t5baO6tV`c!&S?h1wND%+*{uNgHmE`;c
zJ1Lb`hHN7dl7?-h`{=+XSS2=RJFRkm{q5}7#!qlka(5st-iK#qfAiSc;nSywV8b7x
z_{mSlcizQM>s0&jNe)i@l#l&w9Bd3tABn*vdXATTybZz?--v$9A9&0|F|>ZvT`_Pu
zzqlTMzMPDw^T}ZT{B+Obu|8lgTRy%FQ~t=~!GtHvIM}8^6plYm!<D*?k^p*Y^q{6|
zuKx7nzz<V(B^TcFl^TR%B1qJjFQa(%@k^P}*Xyg<bl!*8jE9fu8o1p130Jp%5Uat8
zhiS0%H|kTcjH8ov^b~EP#~0O4RsK`VsrNDWe{7>5Op_|(n~Oe7nb=S$mjUC%f7)(%
zqAvY7ekpIZv8ToKTTcEI$uH742p_A|7yPehfd6QlTHN&na}qpcst4_C5ZZg5{^C4T
zZ>JZkFEmSLO!YS#F1B1}!~VRP*7&q-5GEvWY5crQpJTD7R>bD4B=_kax#xv00Ix&{
zpYS+E8{2Mtgi;tchp}a};*!tB_3ag5sP<XT#@xpS+-Vd1%~u2UW*zg@Nh0(iNN$23
zd=sprXqBkK=hR0YMyVi^sD2{&J_KQI?_3qms3u#!3?76sfo@$E%6dRZGY<8aZ+kiD
zZSLVhxNW)~sO#vL`s9Z%!Wn(Bhva#?jp9_wTSS98D^jpB_OK$-QV#MFaR6-!xOiW0
zudkGw>xFVTQH0qk!sL{1-^on5xSX-+tpXOJpeRcE{3PPn2VK!M%^fMH3M0hxJZu#v
z25!h=p@`5m)-_#o&M~%(b^RPYa$H(}E?s4OA|}zA$CK$yq3n!I6zvb~Odsfyldb_L
zPw_~d{wSxfWwZ)Pm-9s{B?l>G5@bK!au;;%o`kdQd8M}Hhcp%x?S6SiH5fDTM%n@K
zbWM;XO>U)D0p7&R1*Bq(iVhPUc$O1l34>W^gubg%nr<Dn6T{8ry1kj5%Lp^JoK^u%
z#E(?~Y?rm%shfmu=0Z3*RIG}X<sQJwa5}Nt10Ojs*=sk20TA-vOK_O;u0wkb^`rIv
z&{j;wu!jwAH=H6OeZM(lWImZ*W#;R+6`QfpW<~~>?{5kFo3D4aNVBW0LHtJ1tALtj
z9nhiPiRaKP;i+XlXy`_7AAFs^{4Z>~uN_)eKQq)aGc--Bb|mL*QwD{3v`~E}rU<7n
zBwLaVq5NK{<Gp3eH5)NUdgHJ#N6o_m4z8won9eTs=oLP^S1oQC>NK3jGs&MxK7z0j
zxkwIE7$@Fq#IJy7nOehA%uY)YaJ8Irrt6d<)|;Z~t`$3?6phS#51R0yI7bU>WZ5Lc
zR(=TNETW}Ut_Q4%7(f!R2!F){1vTIVEI2+4RgF0fjX6fA#zbOWQkYX{%#|8DmZV1{
zC7#)CgaY)SBa&%_$#haE0<<SoBQ*rD6VMo;LTB<V3M<947+^G07i#Zld#-9_WYo1X
z>h>ddSk}tuowSnM4?y)!WLXjAiI*P&9;W;9xvn!(=!{66iSNItvI-zIr?MJOW$y`t
zdYe~SntAYM)Kk>sP-%T3dzSBJF$_Do`n>FJ2D*{D6ay5MA(s@f?kF3rc~RpiA9knD
zuHEKEmge@*f>O(vEUSpBMseHQN2ACzItmWPz(8?yd#^bA1R3=Zq_`g2U2<fw7&^#t
zD77st1fFY>8HI8%Y3P)!=rl5l>NQ!b%J!QcO=)J7;N8b0MltJxo`=4GK@Fl*JwK4R
z5lkYytdsi+^L8%Q;*wp3iZ>!q)t%e85RCT4w_$61Gjq?{Ba%WOK2SJ|A?U-GVf6>(
zq7ZB#>JvQX;WZFkMti^$&I%Y}RE}eIg-gT%kwaN8)mX!3y~xyBt^n1*ZRZ=aw5*gT
z1v*Y-7`EHI+0OHroOPp%wb8-4hXigzfSg-L4a^*v2f4kKr_oVDt$};kcNBS=f%{5(
zlRlhTu57{Th+Hj~duWJ6dTaJ6u48#C{XTbexSG|^xJIgRVp>gGb=_7U?7Oq}s31C&
zVRpujLj}X5jCff_jzJRV@B?=uyi<{mJ5?iQ%3PzI10KpUUc32<PV8oteox+^>vmN&
zU2eq?7a0eOELQ%O`d#t9hksy!x)Bxf1h%7)|7s^WjHb{cC=&8`nYw#;<9H{^XgGK#
z6EDkRDZ{rZ%S%fQt7CYOzsBm0YICh9RqfDd*rBt}4%?4u%V@hsD*ZN7YD;w|v{?*y
z`F#QZ(ZK_{o^IHUeVpjSmKbN0H=tEF&}Kk&S12ys#eaguvE;N{zqKEwBWM+Y7AtBr
zX&w;K?Bq$oJc_3Tg_OsWkpk|?BFCh7y%vu^3aES1*x{mFw;H>Fwqv%S-C7H>tkK~_
zr1*2x*hQQJ*@m5mMZ2~B{yR>Cg@Oz@z)Pl>Op#tqvCfd8T>9g&mU=S$q4IEHi0tl6
z$C3sXRx#Ab3KZl8K&%FkZ8uBMZp;6bU5T|V3qQ)|%lX}xGG)VZ4bye4#(&c=VYs5%
bVSktL&<49Qw|=JUA`SZoA`BWhR3rcZJeR;i

literal 0
HcmV?d00001

diff --git a/tests/end_to_end/test_convert.py b/tests/end_to_end/test_convert.py
index 81f57c1b..0e23328d 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/end_to_end/test_convert.py
@@ -96,7 +96,7 @@ def test_manta(self):
         assert somatic_event.data.get('SOMATIC', False) is True
 
     def test_pindel(self):
-        self.run_main(get_data('pindel_events.vcf'), SUPPORTED_TOOL.PINDEL, False)
+        self.run_main(get_data('pindel_events.vcf.gz'), SUPPORTED_TOOL.PINDEL, False)
 
     def test_transabyss(self):
         self.run_main(get_data('transabyss_indels_output.tab'), SUPPORTED_TOOL.TA, False)

From 8e90dc7e282a03ec21d69b7e732aa9e7d95363a6 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 12:23:00 -0800
Subject: [PATCH 123/137] Add requests to tools dependencies

---
 setup.cfg | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.cfg b/setup.cfg
index 371d7e35..38a05509 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -90,6 +90,7 @@ deploy =
 tools =
     pyensembl
     simplejson
+    requests
 
 [options.entry_points]
 console_scripts =

From 777dc0a6ec7efe70717e55f29adb664d3825972e Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 13:35:09 -0800
Subject: [PATCH 124/137] Rename tools ubmodule to convert to match step name

---
 src/mavis/{tools => convert}/__init__.py    | 0
 src/mavis/{tools => convert}/breakdancer.py | 0
 src/mavis/{tools => convert}/chimerascan.py | 0
 src/mavis/{tools => convert}/cnvnator.py    | 0
 src/mavis/{tools => convert}/constants.py   | 0
 src/mavis/{tools => convert}/starfusion.py  | 0
 src/mavis/{tools => convert}/transabyss.py  | 0
 src/mavis/{tools => convert}/vcf.py         | 0
 src/mavis/main.py                           | 2 +-
 9 files changed, 1 insertion(+), 1 deletion(-)
 rename src/mavis/{tools => convert}/__init__.py (100%)
 rename src/mavis/{tools => convert}/breakdancer.py (100%)
 rename src/mavis/{tools => convert}/chimerascan.py (100%)
 rename src/mavis/{tools => convert}/cnvnator.py (100%)
 rename src/mavis/{tools => convert}/constants.py (100%)
 rename src/mavis/{tools => convert}/starfusion.py (100%)
 rename src/mavis/{tools => convert}/transabyss.py (100%)
 rename src/mavis/{tools => convert}/vcf.py (100%)

diff --git a/src/mavis/tools/__init__.py b/src/mavis/convert/__init__.py
similarity index 100%
rename from src/mavis/tools/__init__.py
rename to src/mavis/convert/__init__.py
diff --git a/src/mavis/tools/breakdancer.py b/src/mavis/convert/breakdancer.py
similarity index 100%
rename from src/mavis/tools/breakdancer.py
rename to src/mavis/convert/breakdancer.py
diff --git a/src/mavis/tools/chimerascan.py b/src/mavis/convert/chimerascan.py
similarity index 100%
rename from src/mavis/tools/chimerascan.py
rename to src/mavis/convert/chimerascan.py
diff --git a/src/mavis/tools/cnvnator.py b/src/mavis/convert/cnvnator.py
similarity index 100%
rename from src/mavis/tools/cnvnator.py
rename to src/mavis/convert/cnvnator.py
diff --git a/src/mavis/tools/constants.py b/src/mavis/convert/constants.py
similarity index 100%
rename from src/mavis/tools/constants.py
rename to src/mavis/convert/constants.py
diff --git a/src/mavis/tools/starfusion.py b/src/mavis/convert/starfusion.py
similarity index 100%
rename from src/mavis/tools/starfusion.py
rename to src/mavis/convert/starfusion.py
diff --git a/src/mavis/tools/transabyss.py b/src/mavis/convert/transabyss.py
similarity index 100%
rename from src/mavis/tools/transabyss.py
rename to src/mavis/convert/transabyss.py
diff --git a/src/mavis/tools/vcf.py b/src/mavis/convert/vcf.py
similarity index 100%
rename from src/mavis/tools/vcf.py
rename to src/mavis/convert/vcf.py
diff --git a/src/mavis/main.py b/src/mavis/main.py
index c7e94bbe..fac18049 100644
--- a/src/mavis/main.py
+++ b/src/mavis/main.py
@@ -17,11 +17,11 @@
 from .align import get_aligner_version
 from .annotate import main as annotate_main
 from .cluster import main as cluster_main
+from .convert import SUPPORTED_TOOL, convert_tool_output
 from .overlay import check_overlay_args
 from .overlay import main as overlay_main
 from .pairing import main as pairing_main
 from .summary import main as summary_main
-from .tools import SUPPORTED_TOOL, convert_tool_output
 from .util import filepath
 from .validate import main as validate_main
 

From 428328b3fec59da426fee339009c67d956caf699 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 13:35:26 -0800
Subject: [PATCH 125/137] Do not split tests by type

- Make it easier to find tests for a particular part of MAVIS by
  mimicking the directory structure of the package itself.
- To avoid a large number of changes and an untraceable diff, add 2 as
  suffix to all conflicting filenames
---
 .../{unit => }/data/calc_orf_test_sequence.fa |  0
 tests/{unit => }/data/reference_sequences.fa  |  0
 .../data/test_assembly_sequences.txt          |  0
 tests/{end_to_end => test_mavis}/__init__.py  |  0
 .../{unit => test_mavis/annotate}/__init__.py |  0
 .../annotate}/test_annotate.py                |  5 +-
 .../annotate/test_annotate2.py}               | 23 +++---
 .../annotate}/test_annotate_examples.py       |  4 +-
 .../annotate}/test_annotate_fileio.py         |  0
 .../annotate/test_annotate_fileio2.py}        |  2 +-
 .../annotate}/test_call_indels.py             |  2 +-
 .../annotate}/test_splicing.py                |  5 +-
 .../config.py => test_mavis/bam/__init__.py}  |  0
 .../bam}/test_bam.py                          |  5 +-
 .../bam}/test_bam_cigar.py                    |  4 +-
 tests/test_mavis/cluster/__init__.py          |  0
 .../cluster}/test_cluster.py                  |  0
 .../cluster/test_cluster2.py}                 |  4 +-
 tests/test_mavis/convert/__init__.py          |  0
 .../convert}/test_convert.py                  |  4 +-
 .../{unit => test_mavis/convert}/test_tool.py |  8 +--
 .../convert}/test_tools_vcf.py                |  6 +-
 tests/test_mavis/illustrate/__init__.py       |  0
 .../illustrate}/test_illustrate.py            |  0
 .../illustrate/test_illustrate2.py}           |  4 +-
 .../__init__.py => test_mavis/mock.py}        | 71 +++++++++++++------
 tests/test_mavis/pairing/__init__.py          |  0
 .../pairing}/test_pairing.py                  |  0
 tests/test_mavis/summary/__init__.py          |  0
 .../summary}/test_summary.py                  |  2 +-
 .../{integration => test_mavis}/test_align.py |  3 +-
 .../{integration => test_mavis}/test_args.py  |  0
 tests/{unit => test_mavis}/test_assemble.py   |  7 +-
 .../test_assemble2.py}                        |  2 +-
 tests/{unit => test_mavis}/test_bam.py        |  0
 tests/{unit => test_mavis}/test_blat.py       |  0
 .../test_blat.py => test_mavis/test_blat2.py} |  2 +-
 tests/{unit => test_mavis}/test_breakpoint.py |  0
 .../test_breakpoint2.py}                      |  2 +-
 tests/{unit => test_mavis}/test_constants.py  |  0
 tests/{end_to_end => test_mavis}/test_help.py |  0
 tests/{unit => test_mavis}/test_interval.py   |  0
 .../test_overlay.py                           |  0
 tests/{unit => test_mavis}/test_util.py       |  0
 tests/test_mavis/validate/__init__.py         |  0
 .../validate/test_call.py}                    |  4 +-
 .../validate/test_evidence.py}                |  2 +-
 .../validate}/test_validate.py                |  2 +-
 .../validate/test_validate2.py}               |  4 +-
 tests/test_tools/__init__.py                  |  0
 .../data/Homo_sapiens.GRCh38.kras.gff3        |  0
 .../data/Homo_sapiens.GRCh38.kras.gff3.json   |  0
 .../data/Homo_sapiens.GRCh38.kras.gtf         |  0
 .../data/Homo_sapiens.GRCh38.kras.gtf.json    |  0
 .../{tools => test_tools}/data/K02718.1.gff3  |  0
 .../data/K02718.1.gff3.json                   |  0
 tests/{tools => test_tools}/data/K02718.1.gtf |  0
 .../data/K02718.1.gtf.json                    |  0
 .../data/ensembl69_hg19_annotations.kras.tab  |  0
 .../ensembl69_hg19_annotations.kras.tab.json  |  0
 .../data/example_genes.v2.json                |  0
 .../data/example_genes.v3.json                |  0
 .../test_convert_annotations_format.py        |  1 -
 .../test_ref_alt_count.py                     |  0
 tests/unit/mock.py                            | 45 ------------
 65 files changed, 98 insertions(+), 125 deletions(-)
 rename tests/{unit => }/data/calc_orf_test_sequence.fa (100%)
 rename tests/{unit => }/data/reference_sequences.fa (100%)
 rename tests/{unit => }/data/test_assembly_sequences.txt (100%)
 rename tests/{end_to_end => test_mavis}/__init__.py (100%)
 rename tests/{unit => test_mavis/annotate}/__init__.py (100%)
 rename tests/{unit => test_mavis/annotate}/test_annotate.py (99%)
 rename tests/{integration/test_annotate.py => test_mavis/annotate/test_annotate2.py} (99%)
 rename tests/{integration => test_mavis/annotate}/test_annotate_examples.py (98%)
 rename tests/{unit => test_mavis/annotate}/test_annotate_fileio.py (100%)
 rename tests/{integration/test_annotate_fileio.py => test_mavis/annotate/test_annotate_fileio2.py} (89%)
 rename tests/{unit => test_mavis/annotate}/test_call_indels.py (99%)
 rename tests/{integration => test_mavis/annotate}/test_splicing.py (98%)
 rename tests/{integration/config.py => test_mavis/bam/__init__.py} (100%)
 rename tests/{integration => test_mavis/bam}/test_bam.py (99%)
 rename tests/{integration => test_mavis/bam}/test_bam_cigar.py (99%)
 create mode 100644 tests/test_mavis/cluster/__init__.py
 rename tests/{unit => test_mavis/cluster}/test_cluster.py (100%)
 rename tests/{integration/test_cluster.py => test_mavis/cluster/test_cluster2.py} (99%)
 create mode 100644 tests/test_mavis/convert/__init__.py
 rename tests/{end_to_end => test_mavis/convert}/test_convert.py (98%)
 rename tests/{unit => test_mavis/convert}/test_tool.py (99%)
 rename tests/{unit => test_mavis/convert}/test_tools_vcf.py (90%)
 create mode 100644 tests/test_mavis/illustrate/__init__.py
 rename tests/{unit => test_mavis/illustrate}/test_illustrate.py (100%)
 rename tests/{integration/test_illustrate.py => test_mavis/illustrate/test_illustrate2.py} (99%)
 rename tests/{integration/__init__.py => test_mavis/mock.py} (87%)
 create mode 100644 tests/test_mavis/pairing/__init__.py
 rename tests/{integration => test_mavis/pairing}/test_pairing.py (100%)
 create mode 100644 tests/test_mavis/summary/__init__.py
 rename tests/{unit => test_mavis/summary}/test_summary.py (99%)
 rename tests/{integration => test_mavis}/test_align.py (99%)
 rename tests/{integration => test_mavis}/test_args.py (100%)
 rename tests/{unit => test_mavis}/test_assemble.py (97%)
 rename tests/{integration/test_assemble.py => test_mavis/test_assemble2.py} (99%)
 rename tests/{unit => test_mavis}/test_bam.py (100%)
 rename tests/{unit => test_mavis}/test_blat.py (100%)
 rename tests/{integration/test_blat.py => test_mavis/test_blat2.py} (99%)
 rename tests/{unit => test_mavis}/test_breakpoint.py (100%)
 rename tests/{integration/test_breakpoint.py => test_mavis/test_breakpoint2.py} (99%)
 rename tests/{unit => test_mavis}/test_constants.py (100%)
 rename tests/{end_to_end => test_mavis}/test_help.py (100%)
 rename tests/{unit => test_mavis}/test_interval.py (100%)
 rename tests/{end_to_end => test_mavis}/test_overlay.py (100%)
 rename tests/{unit => test_mavis}/test_util.py (100%)
 create mode 100644 tests/test_mavis/validate/__init__.py
 rename tests/{integration/test_validate_call.py => test_mavis/validate/test_call.py} (99%)
 rename tests/{integration/test_validate_evidence.py => test_mavis/validate/test_evidence.py} (99%)
 rename tests/{unit => test_mavis/validate}/test_validate.py (98%)
 rename tests/{integration/test_validate.py => test_mavis/validate/test_validate2.py} (99%)
 create mode 100644 tests/test_tools/__init__.py
 rename tests/{tools => test_tools}/data/Homo_sapiens.GRCh38.kras.gff3 (100%)
 rename tests/{tools => test_tools}/data/Homo_sapiens.GRCh38.kras.gff3.json (100%)
 rename tests/{tools => test_tools}/data/Homo_sapiens.GRCh38.kras.gtf (100%)
 rename tests/{tools => test_tools}/data/Homo_sapiens.GRCh38.kras.gtf.json (100%)
 rename tests/{tools => test_tools}/data/K02718.1.gff3 (100%)
 rename tests/{tools => test_tools}/data/K02718.1.gff3.json (100%)
 rename tests/{tools => test_tools}/data/K02718.1.gtf (100%)
 rename tests/{tools => test_tools}/data/K02718.1.gtf.json (100%)
 rename tests/{tools => test_tools}/data/ensembl69_hg19_annotations.kras.tab (100%)
 rename tests/{tools => test_tools}/data/ensembl69_hg19_annotations.kras.tab.json (100%)
 rename tests/{tools => test_tools}/data/example_genes.v2.json (100%)
 rename tests/{tools => test_tools}/data/example_genes.v3.json (100%)
 rename tests/{tools => test_tools}/test_convert_annotations_format.py (99%)
 rename tests/{end_to_end => test_tools}/test_ref_alt_count.py (100%)
 delete mode 100644 tests/unit/mock.py

diff --git a/tests/unit/data/calc_orf_test_sequence.fa b/tests/data/calc_orf_test_sequence.fa
similarity index 100%
rename from tests/unit/data/calc_orf_test_sequence.fa
rename to tests/data/calc_orf_test_sequence.fa
diff --git a/tests/unit/data/reference_sequences.fa b/tests/data/reference_sequences.fa
similarity index 100%
rename from tests/unit/data/reference_sequences.fa
rename to tests/data/reference_sequences.fa
diff --git a/tests/unit/data/test_assembly_sequences.txt b/tests/data/test_assembly_sequences.txt
similarity index 100%
rename from tests/unit/data/test_assembly_sequences.txt
rename to tests/data/test_assembly_sequences.txt
diff --git a/tests/end_to_end/__init__.py b/tests/test_mavis/__init__.py
similarity index 100%
rename from tests/end_to_end/__init__.py
rename to tests/test_mavis/__init__.py
diff --git a/tests/unit/__init__.py b/tests/test_mavis/annotate/__init__.py
similarity index 100%
rename from tests/unit/__init__.py
rename to tests/test_mavis/annotate/__init__.py
diff --git a/tests/unit/test_annotate.py b/tests/test_mavis/annotate/test_annotate.py
similarity index 99%
rename from tests/unit/test_annotate.py
rename to tests/test_mavis/annotate/test_annotate.py
index 339d86ed..950b1558 100644
--- a/tests/unit/test_annotate.py
+++ b/tests/test_mavis/annotate/test_annotate.py
@@ -1,5 +1,4 @@
 import itertools
-import os
 
 import pytest
 import timeout_decorator
@@ -7,7 +6,7 @@
 from mavis.annotate.protein import Domain, DomainRegion, calculate_orf
 from mavis.annotate.variant import IndelCall
 
-DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
+from ...util import get_data
 
 
 class TestDomainAlignSeq:
@@ -279,7 +278,7 @@ class TestCalculateORF:
     @timeout_decorator.timeout(20)
     def test_very_long(self):
         # load the sequence
-        with open(os.path.join(DATA_DIR, 'calc_orf_test_sequence.fa'), 'r') as fh:
+        with open(get_data('calc_orf_test_sequence.fa'), 'r') as fh:
             seq = fh.readlines()[0].strip()
         calculate_orf(seq, 300)
 
diff --git a/tests/integration/test_annotate.py b/tests/test_mavis/annotate/test_annotate2.py
similarity index 99%
rename from tests/integration/test_annotate.py
rename to tests/test_mavis/annotate/test_annotate2.py
index d6e5e2ed..0ab63ddc 100644
--- a/tests/integration/test_annotate.py
+++ b/tests/test_mavis/annotate/test_annotate2.py
@@ -5,22 +5,21 @@
 from mavis.annotate.base import BioInterval, ReferenceName
 from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.annotate.fusion import FusionTranscript, determine_prime
-from mavis.annotate.genomic import Exon, Gene, PreTranscript, Template, Transcript
-from mavis.annotate.protein import Domain, DomainRegion, Translation, calculate_orf, translate
-from mavis.annotate.variant import (
-    Annotation,
-    _gather_annotations,
-    _gather_breakpoint_annotations,
-    annotate_events,
-    overlapping_transcripts,
-)
+from mavis.annotate.genomic import (Exon, Gene, PreTranscript, Template,
+                                    Transcript)
+from mavis.annotate.protein import (Domain, DomainRegion, Translation,
+                                    calculate_orf, translate)
+from mavis.annotate.variant import (Annotation, _gather_annotations,
+                                    _gather_breakpoint_annotations,
+                                    annotate_events, overlapping_transcripts)
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE, reverse_complement
+from mavis.constants import (ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE,
+                             reverse_complement)
 from mavis.error import NotSpecifiedError
 from mavis.interval import Interval
 
-from ..util import get_data
-from . import MockObject, get_example_genes
+from ...util import get_data
+from ..mock import MockObject, get_example_genes
 
 REFERENCE_ANNOTATIONS = None
 REFERENCE_GENOME = None
diff --git a/tests/integration/test_annotate_examples.py b/tests/test_mavis/annotate/test_annotate_examples.py
similarity index 98%
rename from tests/integration/test_annotate_examples.py
rename to tests/test_mavis/annotate/test_annotate_examples.py
index faf0297b..8e449dc0 100644
--- a/tests/integration/test_annotate_examples.py
+++ b/tests/test_mavis/annotate/test_annotate_examples.py
@@ -9,8 +9,8 @@
 from mavis.breakpoint import Breakpoint, BreakpointPair
 from mavis.constants import ORIENT, PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE
 
-from ..util import long_running_test
-from . import MockLongString, MockObject, get_example_genes
+from ...util import long_running_test
+from ..mock import MockLongString, MockObject, get_example_genes
 
 
 def get_best(gene):
diff --git a/tests/unit/test_annotate_fileio.py b/tests/test_mavis/annotate/test_annotate_fileio.py
similarity index 100%
rename from tests/unit/test_annotate_fileio.py
rename to tests/test_mavis/annotate/test_annotate_fileio.py
diff --git a/tests/integration/test_annotate_fileio.py b/tests/test_mavis/annotate/test_annotate_fileio2.py
similarity index 89%
rename from tests/integration/test_annotate_fileio.py
rename to tests/test_mavis/annotate/test_annotate_fileio2.py
index 53572a15..86c5a780 100644
--- a/tests/integration/test_annotate_fileio.py
+++ b/tests/test_mavis/annotate/test_annotate_fileio2.py
@@ -1,6 +1,6 @@
 from mavis.annotate.file_io import load_annotations
 
-from ..util import get_data
+from ...util import get_data
 
 JSON = get_data('annotations_subsample.json')
 
diff --git a/tests/unit/test_call_indels.py b/tests/test_mavis/annotate/test_call_indels.py
similarity index 99%
rename from tests/unit/test_call_indels.py
rename to tests/test_mavis/annotate/test_call_indels.py
index 840947df..b809a2ab 100644
--- a/tests/unit/test_call_indels.py
+++ b/tests/test_mavis/annotate/test_call_indels.py
@@ -1,7 +1,7 @@
 import pytest
 from mavis.annotate.variant import IndelCall, call_protein_indel
 
-from .mock import Mock, MockFunction
+from ..mock import Mock, MockFunction
 
 
 class TestIndelCall:
diff --git a/tests/integration/test_splicing.py b/tests/test_mavis/annotate/test_splicing.py
similarity index 98%
rename from tests/integration/test_splicing.py
rename to tests/test_mavis/annotate/test_splicing.py
index 471c2bf1..734cbf70 100644
--- a/tests/integration/test_splicing.py
+++ b/tests/test_mavis/annotate/test_splicing.py
@@ -6,10 +6,11 @@
 from mavis.annotate.splicing import predict_splice_sites
 from mavis.annotate.variant import annotate_events
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
+from mavis.constants import (PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE,
+                             reverse_complement)
 from mavis.interval import Interval
 
-from . import MockLongString, MockObject, get_example_genes
+from ..mock import MockLongString, MockObject, get_example_genes
 
 EXAMPLE_GENES = None
 
diff --git a/tests/integration/config.py b/tests/test_mavis/bam/__init__.py
similarity index 100%
rename from tests/integration/config.py
rename to tests/test_mavis/bam/__init__.py
diff --git a/tests/integration/test_bam.py b/tests/test_mavis/bam/test_bam.py
similarity index 99%
rename from tests/integration/test_bam.py
rename to tests/test_mavis/bam/test_bam.py
index 9ccbc09d..4f2132e7 100644
--- a/tests/integration/test_bam.py
+++ b/tests/test_mavis/bam/test_bam.py
@@ -1,5 +1,4 @@
 import argparse
-import logging
 import warnings
 from unittest import mock
 
@@ -19,8 +18,8 @@
 from mavis.constants import CIGAR, DNA_ALPHABET, ORIENT, READ_PAIR_TYPE, STRAND, SVTYPE
 from mavis.interval import Interval
 
-from ..util import get_data
-from . import MockBamFileHandle, MockRead
+from ...util import get_data
+from ..mock import MockBamFileHandle, MockRead
 
 REFERENCE_GENOME = None
 
diff --git a/tests/integration/test_bam_cigar.py b/tests/test_mavis/bam/test_bam_cigar.py
similarity index 99%
rename from tests/integration/test_bam_cigar.py
rename to tests/test_mavis/bam/test_bam_cigar.py
index 1d0c49d8..11fd7b6a 100644
--- a/tests/integration/test_bam_cigar.py
+++ b/tests/test_mavis/bam/test_bam_cigar.py
@@ -21,8 +21,8 @@
 from mavis.bam.read import SamRead
 from mavis.constants import CIGAR
 
-from ..util import get_data
-from . import MockObject, MockRead
+from ...util import get_data
+from ..mock import MockObject, MockRead
 
 REFERENCE_GENOME = None
 
diff --git a/tests/test_mavis/cluster/__init__.py b/tests/test_mavis/cluster/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/test_cluster.py b/tests/test_mavis/cluster/test_cluster.py
similarity index 100%
rename from tests/unit/test_cluster.py
rename to tests/test_mavis/cluster/test_cluster.py
diff --git a/tests/integration/test_cluster.py b/tests/test_mavis/cluster/test_cluster2.py
similarity index 99%
rename from tests/integration/test_cluster.py
rename to tests/test_mavis/cluster/test_cluster2.py
index 3434c62b..0e8aa0a0 100644
--- a/tests/integration/test_cluster.py
+++ b/tests/test_mavis/cluster/test_cluster2.py
@@ -6,9 +6,7 @@
 from mavis.interval import Interval
 from mavis.util import read_bpp_from_input_file
 
-
-from ..util import get_data
-
+from ...util import get_data
 
 FULL_BASE_EVENTS = get_data('mock_sv_events.tsv')
 CLUSTERED_EVENTS = get_data('clustering_input.tab')
diff --git a/tests/test_mavis/convert/__init__.py b/tests/test_mavis/convert/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/end_to_end/test_convert.py b/tests/test_mavis/convert/test_convert.py
similarity index 98%
rename from tests/end_to_end/test_convert.py
rename to tests/test_mavis/convert/test_convert.py
index 0e23328d..4470bc66 100644
--- a/tests/end_to_end/test_convert.py
+++ b/tests/test_mavis/convert/test_convert.py
@@ -6,12 +6,12 @@
 from unittest.mock import patch
 
 from mavis.constants import ORIENT, SVTYPE
+from mavis.convert import SUPPORTED_TOOL
 from mavis.main import main
-from mavis.tools import SUPPORTED_TOOL
 from mavis.util import read_bpp_from_input_file
 from mavis_config.constants import SUBCOMMAND
 
-from ..util import get_data, glob_exists
+from ...util import get_data, glob_exists
 
 TEMP_OUTPUT = None
 
diff --git a/tests/unit/test_tool.py b/tests/test_mavis/convert/test_tool.py
similarity index 99%
rename from tests/unit/test_tool.py
rename to tests/test_mavis/convert/test_tool.py
index 96531588..a6371452 100644
--- a/tests/unit/test_tool.py
+++ b/tests/test_mavis/convert/test_tool.py
@@ -2,11 +2,11 @@
 
 import pytest
 from mavis.constants import COLUMNS, ORIENT, STRAND, SVTYPE
-from mavis.tools import SUPPORTED_TOOL, _convert_tool_row, _parse_transabyss
-from mavis.tools.vcf import convert_record as _parse_vcf_record
-from mavis.tools.vcf import parse_bnd_alt as _parse_bnd_alt
+from mavis.convert import SUPPORTED_TOOL, _convert_tool_row, _parse_transabyss
+from mavis.convert.vcf import convert_record as _parse_vcf_record
+from mavis.convert.vcf import parse_bnd_alt as _parse_bnd_alt
 
-from .mock import Mock
+from ..mock import Mock
 
 
 class TestDelly:
diff --git a/tests/unit/test_tools_vcf.py b/tests/test_mavis/convert/test_tools_vcf.py
similarity index 90%
rename from tests/unit/test_tools_vcf.py
rename to tests/test_mavis/convert/test_tools_vcf.py
index 2036e656..f846d41a 100644
--- a/tests/unit/test_tools_vcf.py
+++ b/tests/test_mavis/convert/test_tools_vcf.py
@@ -1,7 +1,7 @@
-from mavis.tools import SUPPORTED_TOOL, _convert_tool_row
-from mavis.tools.vcf import VcfInfoType, VcfRecordType, convert_record, pandas_vcf
+from mavis.convert import SUPPORTED_TOOL, _convert_tool_row
+from mavis.convert.vcf import VcfInfoType, VcfRecordType, convert_record, pandas_vcf
 
-from ..util import get_data
+from ...util import get_data
 
 
 def test_read_vcf():
diff --git a/tests/test_mavis/illustrate/__init__.py b/tests/test_mavis/illustrate/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/test_illustrate.py b/tests/test_mavis/illustrate/test_illustrate.py
similarity index 100%
rename from tests/unit/test_illustrate.py
rename to tests/test_mavis/illustrate/test_illustrate.py
diff --git a/tests/integration/test_illustrate.py b/tests/test_mavis/illustrate/test_illustrate2.py
similarity index 99%
rename from tests/integration/test_illustrate.py
rename to tests/test_mavis/illustrate/test_illustrate2.py
index 0ce2bdae..d6bd1bbf 100644
--- a/tests/integration/test_illustrate.py
+++ b/tests/test_mavis/illustrate/test_illustrate2.py
@@ -20,8 +20,8 @@
 from mavis.interval import Interval
 from svgwrite import Drawing
 
-from ..util import get_data
-from . import OUTPUT_SVG, MockObject, MockString, build_transcript
+from ...util import get_data
+from ..mock import OUTPUT_SVG, MockObject, MockString, build_transcript
 
 TEMPLATE_METADATA = None
 DEFAULTS.domain_name_regex_filter = r'.*'
diff --git a/tests/integration/__init__.py b/tests/test_mavis/mock.py
similarity index 87%
rename from tests/integration/__init__.py
rename to tests/test_mavis/mock.py
index b71134a1..6e91f96d 100644
--- a/tests/integration/__init__.py
+++ b/tests/test_mavis/mock.py
@@ -1,13 +1,13 @@
 import os
+import types
 
 from mavis.align import query_coverage_interval
-from mavis.annotate.genomic import Transcript, PreTranscript
 from mavis.annotate.file_io import load_annotations, load_reference_genome
+from mavis.annotate.genomic import PreTranscript, Transcript
 from mavis.annotate.protein import Translation
 from mavis.constants import CIGAR, NA_MAPPING_QUALITY
 
-from ..util import DATA_DIR
-
+from ..util import get_data
 
 ARGUMENT_ERROR = 2
 
@@ -16,6 +16,50 @@
 _EXAMPLE_GENES = None
 
 
+class Mock:
+    def __init__(self, **kwargs):
+        for attr, val in kwargs.items():
+            setattr(self, attr, val)
+
+    def bind_method(self, **kwargs):
+        for attr, val in kwargs.items():
+            val = types.MethodType(val, self)  # bind the method to self
+            setattr(self, attr, val)
+
+    def add_attr(self, attr, val):
+        setattr(self, attr, val)
+
+    def __contains__(self, item):
+        if hasattr(self, item):
+            return True
+        return False
+
+
+class MockFunction:
+    def __init__(self, return_value):
+        self.return_value = return_value
+
+    def __call__(self, *pos, **kwargs):
+        return self.return_value
+
+
+class MockLongString:
+    def __init__(self, string, offset):
+        self.string = string
+        self.offset = offset
+
+    def __len__(self):
+        return len(self.string) + self.offset
+
+    def __getitem__(self, index):
+        if not isinstance(index, slice):
+            index = slice(index, index + 1)
+        index = slice(index.start - self.offset, index.stop - self.offset, index.step)
+        if index.start < 0:
+            raise NotImplementedError('string portion not given')
+        return self.string[index]
+
+
 def get_example_genes():
     global _EXAMPLE_GENES
     if _EXAMPLE_GENES is None:
@@ -25,8 +69,8 @@ def get_example_genes():
 
 def set_example_genes():
     result = {}
-    genes = load_annotations(os.path.join(DATA_DIR, 'example_genes.json'))
-    seqs = load_reference_genome(os.path.join(DATA_DIR, 'example_genes.fa'))
+    genes = load_annotations(get_data('example_genes.json'))
+    seqs = load_reference_genome(get_data('example_genes.fa'))
     for chr_genes in genes.values():
         for gene in chr_genes:
             if gene.name in seqs:
@@ -223,23 +267,6 @@ def __getitem__(self, index):
             return self.char
 
 
-class MockLongString(str):
-    def __new__(cls, *args, offset=0, **kw):
-        s = str.__new__(cls, *args, **kw)
-        setattr(s, 'offset', offset)
-        return s
-
-    def __getitem__(self, index):
-        if isinstance(index, slice):
-            index = slice(index.start - self.offset, index.stop - self.offset, index.step)
-        else:
-            index -= self.offset
-        return str.__getitem__(self, index)
-
-    def __len__(self):
-        return self.offset + str.__len__(self)
-
-
 def mock_read_pair(mock1, mock2):
     if mock1.reference_id != mock2.reference_id:
         mock1.template_length = 0
diff --git a/tests/test_mavis/pairing/__init__.py b/tests/test_mavis/pairing/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/test_pairing.py b/tests/test_mavis/pairing/test_pairing.py
similarity index 100%
rename from tests/integration/test_pairing.py
rename to tests/test_mavis/pairing/test_pairing.py
diff --git a/tests/test_mavis/summary/__init__.py b/tests/test_mavis/summary/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit/test_summary.py b/tests/test_mavis/summary/test_summary.py
similarity index 99%
rename from tests/unit/test_summary.py
rename to tests/test_mavis/summary/test_summary.py
index 3e2a9efc..b66a8134 100644
--- a/tests/unit/test_summary.py
+++ b/tests/test_mavis/summary/test_summary.py
@@ -3,7 +3,7 @@
 from mavis.constants import CALL_METHOD, COLUMNS, PROTOCOL, STRAND, SVTYPE
 from mavis.summary.summary import filter_by_annotations
 
-from ..util import todo
+from ...util import todo
 
 
 @pytest.fixture
diff --git a/tests/integration/test_align.py b/tests/test_mavis/test_align.py
similarity index 99%
rename from tests/integration/test_align.py
rename to tests/test_mavis/test_align.py
index 0b3f556e..790a3d09 100644
--- a/tests/integration/test_align.py
+++ b/tests/test_mavis/test_align.py
@@ -1,4 +1,3 @@
-import shutil
 from unittest import mock
 
 import mavis.bam.cigar as _cigar
@@ -14,7 +13,7 @@
 from mavis_config import DEFAULTS
 
 from ..util import blat_only, bwa_only, get_data
-from . import MockLongString, MockObject, MockRead
+from .mock import MockLongString, MockObject, MockRead
 
 REFERENCE_GENOME = None
 
diff --git a/tests/integration/test_args.py b/tests/test_mavis/test_args.py
similarity index 100%
rename from tests/integration/test_args.py
rename to tests/test_mavis/test_args.py
diff --git a/tests/unit/test_assemble.py b/tests/test_mavis/test_assemble.py
similarity index 97%
rename from tests/unit/test_assemble.py
rename to tests/test_mavis/test_assemble.py
index 73b3c6bf..52711a6c 100644
--- a/tests/unit/test_assemble.py
+++ b/tests/test_mavis/test_assemble.py
@@ -1,14 +1,11 @@
 import itertools
-import os
 import random
 
 import pytest
 from mavis.assemble import Contig, DeBruijnGraph, assemble, filter_contigs, kmers
 from mavis.constants import DNA_ALPHABET
 
-from ..util import long_running_test
-
-DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
+from ..util import get_data, long_running_test
 
 
 class TestModule:
@@ -184,7 +181,7 @@ def test_trim_noncutting_paths_by_freq_degree_stop(self):
 @pytest.fixture
 def assembly_sequences():
     # load the sequences
-    with open(os.path.join(DATA_DIR, 'test_assembly_sequences.txt')) as fh:
+    with open(get_data('test_assembly_sequences.txt')) as fh:
         seq = [i.strip() for i in fh.readlines()]
     return seq
 
diff --git a/tests/integration/test_assemble.py b/tests/test_mavis/test_assemble2.py
similarity index 99%
rename from tests/integration/test_assemble.py
rename to tests/test_mavis/test_assemble2.py
index 07e22b64..8baa718c 100644
--- a/tests/integration/test_assemble.py
+++ b/tests/test_mavis/test_assemble2.py
@@ -8,7 +8,7 @@
 from mavis_config import DEFAULTS
 
 from ..util import get_data, long_running_test
-from . import MockObject
+from .mock import MockObject
 
 
 class TestFilterContigs:
diff --git a/tests/unit/test_bam.py b/tests/test_mavis/test_bam.py
similarity index 100%
rename from tests/unit/test_bam.py
rename to tests/test_mavis/test_bam.py
diff --git a/tests/unit/test_blat.py b/tests/test_mavis/test_blat.py
similarity index 100%
rename from tests/unit/test_blat.py
rename to tests/test_mavis/test_blat.py
diff --git a/tests/integration/test_blat.py b/tests/test_mavis/test_blat2.py
similarity index 99%
rename from tests/integration/test_blat.py
rename to tests/test_mavis/test_blat2.py
index 10fe8320..254f6a5d 100644
--- a/tests/integration/test_blat.py
+++ b/tests/test_mavis/test_blat2.py
@@ -9,7 +9,7 @@
 from mavis.interval import Interval
 
 from ..util import get_data
-from . import MockBamFileHandle, MockLongString, MockObject
+from .mock import MockBamFileHandle, MockLongString, MockObject
 
 REFERENCE_GENOME = None
 
diff --git a/tests/unit/test_breakpoint.py b/tests/test_mavis/test_breakpoint.py
similarity index 100%
rename from tests/unit/test_breakpoint.py
rename to tests/test_mavis/test_breakpoint.py
diff --git a/tests/integration/test_breakpoint.py b/tests/test_mavis/test_breakpoint2.py
similarity index 99%
rename from tests/integration/test_breakpoint.py
rename to tests/test_mavis/test_breakpoint2.py
index f6e0b3bb..134a8455 100644
--- a/tests/integration/test_breakpoint.py
+++ b/tests/test_mavis/test_breakpoint2.py
@@ -8,7 +8,7 @@
 from mavis.validate.evidence import TranscriptomeEvidence
 
 from ..util import get_data
-from . import MockObject, get_example_genes
+from .mock import MockObject, get_example_genes
 
 REFERENCE_GENOME = None
 REF_CHR = 'fake'
diff --git a/tests/unit/test_constants.py b/tests/test_mavis/test_constants.py
similarity index 100%
rename from tests/unit/test_constants.py
rename to tests/test_mavis/test_constants.py
diff --git a/tests/end_to_end/test_help.py b/tests/test_mavis/test_help.py
similarity index 100%
rename from tests/end_to_end/test_help.py
rename to tests/test_mavis/test_help.py
diff --git a/tests/unit/test_interval.py b/tests/test_mavis/test_interval.py
similarity index 100%
rename from tests/unit/test_interval.py
rename to tests/test_mavis/test_interval.py
diff --git a/tests/end_to_end/test_overlay.py b/tests/test_mavis/test_overlay.py
similarity index 100%
rename from tests/end_to_end/test_overlay.py
rename to tests/test_mavis/test_overlay.py
diff --git a/tests/unit/test_util.py b/tests/test_mavis/test_util.py
similarity index 100%
rename from tests/unit/test_util.py
rename to tests/test_mavis/test_util.py
diff --git a/tests/test_mavis/validate/__init__.py b/tests/test_mavis/validate/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/integration/test_validate_call.py b/tests/test_mavis/validate/test_call.py
similarity index 99%
rename from tests/integration/test_validate_call.py
rename to tests/test_mavis/validate/test_call.py
index 23d53e37..b959b6d5 100644
--- a/tests/integration/test_validate_call.py
+++ b/tests/test_mavis/validate/test_call.py
@@ -15,8 +15,8 @@
 from mavis.validate.base import Evidence
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
 
-from ..util import get_data, todo
-from . import MockBamFileHandle, MockLongString, MockRead, get_example_genes, mock_read_pair
+from ...util import get_data, todo
+from ..mock import MockBamFileHandle, MockLongString, MockRead, get_example_genes, mock_read_pair
 
 REFERENCE_GENOME = None
 
diff --git a/tests/integration/test_validate_evidence.py b/tests/test_mavis/validate/test_evidence.py
similarity index 99%
rename from tests/integration/test_validate_evidence.py
rename to tests/test_mavis/validate/test_evidence.py
index 1cab995a..0c6c1420 100644
--- a/tests/integration/test_validate_evidence.py
+++ b/tests/test_mavis/validate/test_evidence.py
@@ -13,7 +13,7 @@
 from mavis.validate.evidence import GenomeEvidence, TranscriptomeEvidence
 from mavis_config import DEFAULTS
 
-from . import MockBamFileHandle, MockObject, MockRead, mock_read_pair
+from ..mock import MockBamFileHandle, MockObject, MockRead, mock_read_pair
 
 REFERENCE_GENOME = None
 
diff --git a/tests/unit/test_validate.py b/tests/test_mavis/validate/test_validate.py
similarity index 98%
rename from tests/unit/test_validate.py
rename to tests/test_mavis/validate/test_validate.py
index 560ca909..f26fde4a 100644
--- a/tests/unit/test_validate.py
+++ b/tests/test_mavis/validate/test_validate.py
@@ -3,7 +3,7 @@
 from mavis.validate.base import Evidence
 from mavis.validate.call import _call_interval_by_flanking_coverage
 
-from .mock import Mock
+from ..mock import Mock
 
 
 class CallIntervalByFlankingCoverage:
diff --git a/tests/integration/test_validate.py b/tests/test_mavis/validate/test_validate2.py
similarity index 99%
rename from tests/integration/test_validate.py
rename to tests/test_mavis/validate/test_validate2.py
index e29a063f..0ec4278d 100644
--- a/tests/integration/test_validate.py
+++ b/tests/test_mavis/validate/test_validate2.py
@@ -9,8 +9,8 @@
 from mavis.validate.evidence import GenomeEvidence
 from mavis_config import DEFAULTS
 
-from ..util import get_data, long_running_test
-from . import MockLongString, MockObject, MockRead, mock_read_pair
+from ...util import get_data, long_running_test
+from ..mock import MockLongString, MockObject, MockRead, mock_read_pair
 
 REFERENCE_GENOME = None
 
diff --git a/tests/test_tools/__init__.py b/tests/test_tools/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3 b/tests/test_tools/data/Homo_sapiens.GRCh38.kras.gff3
similarity index 100%
rename from tests/tools/data/Homo_sapiens.GRCh38.kras.gff3
rename to tests/test_tools/data/Homo_sapiens.GRCh38.kras.gff3
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json b/tests/test_tools/data/Homo_sapiens.GRCh38.kras.gff3.json
similarity index 100%
rename from tests/tools/data/Homo_sapiens.GRCh38.kras.gff3.json
rename to tests/test_tools/data/Homo_sapiens.GRCh38.kras.gff3.json
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf b/tests/test_tools/data/Homo_sapiens.GRCh38.kras.gtf
similarity index 100%
rename from tests/tools/data/Homo_sapiens.GRCh38.kras.gtf
rename to tests/test_tools/data/Homo_sapiens.GRCh38.kras.gtf
diff --git a/tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json b/tests/test_tools/data/Homo_sapiens.GRCh38.kras.gtf.json
similarity index 100%
rename from tests/tools/data/Homo_sapiens.GRCh38.kras.gtf.json
rename to tests/test_tools/data/Homo_sapiens.GRCh38.kras.gtf.json
diff --git a/tests/tools/data/K02718.1.gff3 b/tests/test_tools/data/K02718.1.gff3
similarity index 100%
rename from tests/tools/data/K02718.1.gff3
rename to tests/test_tools/data/K02718.1.gff3
diff --git a/tests/tools/data/K02718.1.gff3.json b/tests/test_tools/data/K02718.1.gff3.json
similarity index 100%
rename from tests/tools/data/K02718.1.gff3.json
rename to tests/test_tools/data/K02718.1.gff3.json
diff --git a/tests/tools/data/K02718.1.gtf b/tests/test_tools/data/K02718.1.gtf
similarity index 100%
rename from tests/tools/data/K02718.1.gtf
rename to tests/test_tools/data/K02718.1.gtf
diff --git a/tests/tools/data/K02718.1.gtf.json b/tests/test_tools/data/K02718.1.gtf.json
similarity index 100%
rename from tests/tools/data/K02718.1.gtf.json
rename to tests/test_tools/data/K02718.1.gtf.json
diff --git a/tests/tools/data/ensembl69_hg19_annotations.kras.tab b/tests/test_tools/data/ensembl69_hg19_annotations.kras.tab
similarity index 100%
rename from tests/tools/data/ensembl69_hg19_annotations.kras.tab
rename to tests/test_tools/data/ensembl69_hg19_annotations.kras.tab
diff --git a/tests/tools/data/ensembl69_hg19_annotations.kras.tab.json b/tests/test_tools/data/ensembl69_hg19_annotations.kras.tab.json
similarity index 100%
rename from tests/tools/data/ensembl69_hg19_annotations.kras.tab.json
rename to tests/test_tools/data/ensembl69_hg19_annotations.kras.tab.json
diff --git a/tests/tools/data/example_genes.v2.json b/tests/test_tools/data/example_genes.v2.json
similarity index 100%
rename from tests/tools/data/example_genes.v2.json
rename to tests/test_tools/data/example_genes.v2.json
diff --git a/tests/tools/data/example_genes.v3.json b/tests/test_tools/data/example_genes.v3.json
similarity index 100%
rename from tests/tools/data/example_genes.v3.json
rename to tests/test_tools/data/example_genes.v3.json
diff --git a/tests/tools/test_convert_annotations_format.py b/tests/test_tools/test_convert_annotations_format.py
similarity index 99%
rename from tests/tools/test_convert_annotations_format.py
rename to tests/test_tools/test_convert_annotations_format.py
index a42daadb..7e637ae9 100644
--- a/tests/tools/test_convert_annotations_format.py
+++ b/tests/test_tools/test_convert_annotations_format.py
@@ -2,7 +2,6 @@
 import os
 
 import pytest
-
 from tools.convert_annotations_format import (
     convert_gff2_to_mavis,
     convert_gff3_to_mavis,
diff --git a/tests/end_to_end/test_ref_alt_count.py b/tests/test_tools/test_ref_alt_count.py
similarity index 100%
rename from tests/end_to_end/test_ref_alt_count.py
rename to tests/test_tools/test_ref_alt_count.py
diff --git a/tests/unit/mock.py b/tests/unit/mock.py
deleted file mode 100644
index a1311acb..00000000
--- a/tests/unit/mock.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import types
-
-
-class Mock:
-    def __init__(self, **kwargs):
-        for attr, val in kwargs.items():
-            setattr(self, attr, val)
-
-    def bind_method(self, **kwargs):
-        for attr, val in kwargs.items():
-            val = types.MethodType(val, self)  # bind the method to self
-            setattr(self, attr, val)
-
-    def add_attr(self, attr, val):
-        setattr(self, attr, val)
-
-    def __contains__(self, item):
-        if hasattr(self, item):
-            return True
-        return False
-
-
-class MockFunction:
-    def __init__(self, return_value):
-        self.return_value = return_value
-
-    def __call__(self, *pos, **kwargs):
-        return self.return_value
-
-
-class MockLongString:
-    def __init__(self, string, offset):
-        self.string = string
-        self.offset = offset
-
-    def __len__(self):
-        return len(self.string) + self.offset
-
-    def __getitem__(self, index):
-        if not isinstance(index, slice):
-            index = slice(index, index + 1)
-        index = slice(index.start - self.offset, index.stop - self.offset, index.step)
-        if index.start < 0:
-            raise NotImplementedError('string portion not given')
-        return self.string[index]

From fc0a236e2204461ee023804ba99598c1b59f473b Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 9 Feb 2022 20:30:09 -0800
Subject: [PATCH 126/137] Specify page order for docs

---
 docs/background/.pages        |  3 +++
 docs/configuration/general.md |  2 +-
 docs/tutorials/.pages         |  4 ++++
 mkdocs.yml                    | 14 ++++++++++++++
 setup.cfg                     |  1 +
 5 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 docs/background/.pages
 create mode 100644 docs/tutorials/.pages

diff --git a/docs/background/.pages b/docs/background/.pages
new file mode 100644
index 00000000..278d1410
--- /dev/null
+++ b/docs/background/.pages
@@ -0,0 +1,3 @@
+nav:
+  - theory.md
+  - citations.md 
diff --git a/docs/configuration/general.md b/docs/configuration/general.md
index 176aef2e..dea8801f 100644
--- a/docs/configuration/general.md
+++ b/docs/configuration/general.md
@@ -1,6 +1,6 @@
 # Getting Started
 
-An exhaustive list of the various configurable settings can be found [here](../settings)
+An exhaustive list of the various configurable settings can be found [here](../settings). Alternatively you can view them through the [online schema explorer](https://json-schema.app/view?url=https://raw.githubusercontent.com/bcgsc/mavis_config/master/src/mavis_config/config.json)
 
 ## Pipeline Configuration File
 
diff --git a/docs/tutorials/.pages b/docs/tutorials/.pages
new file mode 100644
index 00000000..b9f03d9d
--- /dev/null
+++ b/docs/tutorials/.pages
@@ -0,0 +1,4 @@
+nav:
+  - mini.md
+  - full.md 
+  - ...
diff --git a/mkdocs.yml b/mkdocs.yml
index 736658d8..79900dff 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -12,7 +12,21 @@ markdown_extensions:
   - markdown_include.include:
       base_path: docs
 extra_css: [extra.css]
+nav:
+  - index.md
+  - install.md
+  - migrating.md
+  - ... | background/**.md
+  - ... | inputs/**.md
+  - ... | outputs/**.md
+  - ... | configuration/**.md
+  - ... | tutorials/**.md
+  - development.md
+  - ...
+  - glossary.md
+
 plugins:
+  - awesome-pages
   - mkdocs-simple-hooks:
       hooks:
         on_pre_build: "docs.hooks:build_package_docs"
diff --git a/setup.cfg b/setup.cfg
index 38a05509..c6adbdf0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,6 +61,7 @@ doc =
     mkdocs-material==5.4.0
     markdown-include
     mkdocs-simple-hooks==0.1.2
+    mkdocs-awesome-pages-plugin==22.0.3
 test =
     timeout-decorator>=0.3.3
     coverage>=4.2

From 192c267e9cca6dbf8caabdf4bdbd45ccf104c689 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 10 Feb 2022 11:45:06 -0800
Subject: [PATCH 127/137] Fix weird import linting

---
 tests/test_mavis/annotate/test_annotate2.py | 19 ++++++++++---------
 tests/test_mavis/annotate/test_splicing.py  |  3 +--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/test_mavis/annotate/test_annotate2.py b/tests/test_mavis/annotate/test_annotate2.py
index 0ab63ddc..df45e307 100644
--- a/tests/test_mavis/annotate/test_annotate2.py
+++ b/tests/test_mavis/annotate/test_annotate2.py
@@ -5,16 +5,17 @@
 from mavis.annotate.base import BioInterval, ReferenceName
 from mavis.annotate.file_io import load_annotations, load_reference_genome
 from mavis.annotate.fusion import FusionTranscript, determine_prime
-from mavis.annotate.genomic import (Exon, Gene, PreTranscript, Template,
-                                    Transcript)
-from mavis.annotate.protein import (Domain, DomainRegion, Translation,
-                                    calculate_orf, translate)
-from mavis.annotate.variant import (Annotation, _gather_annotations,
-                                    _gather_breakpoint_annotations,
-                                    annotate_events, overlapping_transcripts)
+from mavis.annotate.genomic import Exon, Gene, PreTranscript, Template, Transcript
+from mavis.annotate.protein import Domain, DomainRegion, Translation, calculate_orf, translate
+from mavis.annotate.variant import (
+    Annotation,
+    _gather_annotations,
+    _gather_breakpoint_annotations,
+    annotate_events,
+    overlapping_transcripts,
+)
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import (ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE,
-                             reverse_complement)
+from mavis.constants import ORIENT, PRIME, PROTOCOL, STRAND, SVTYPE, reverse_complement
 from mavis.error import NotSpecifiedError
 from mavis.interval import Interval
 
diff --git a/tests/test_mavis/annotate/test_splicing.py b/tests/test_mavis/annotate/test_splicing.py
index 734cbf70..79978d6c 100644
--- a/tests/test_mavis/annotate/test_splicing.py
+++ b/tests/test_mavis/annotate/test_splicing.py
@@ -6,8 +6,7 @@
 from mavis.annotate.splicing import predict_splice_sites
 from mavis.annotate.variant import annotate_events
 from mavis.breakpoint import Breakpoint, BreakpointPair
-from mavis.constants import (PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE,
-                             reverse_complement)
+from mavis.constants import PROTOCOL, SPLICE_TYPE, STRAND, SVTYPE, reverse_complement
 from mavis.interval import Interval
 
 from ..mock import MockLongString, MockObject, get_example_genes

From 08aed1068abe379fbda70e13a716148985f357e0 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 10 Feb 2022 13:21:36 -0800
Subject: [PATCH 128/137] Fix name of extras in rtd config

---
 .readthedocs.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index aa35ae55..12ffe7f3 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -20,4 +20,4 @@ python:
     -   method: pip
         path: .
         extra_requirements:
-            - docs
+            - doc

From 8c252b2ea2f06782216ee33e0647d9f818af2c0d Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 10 Feb 2022 13:24:29 -0800
Subject: [PATCH 129/137] Relax version requirement

---
 setup.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.cfg b/setup.cfg
index c6adbdf0..905b1bae 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -61,7 +61,7 @@ doc =
     mkdocs-material==5.4.0
     markdown-include
     mkdocs-simple-hooks==0.1.2
-    mkdocs-awesome-pages-plugin==22.0.3
+    mkdocs-awesome-pages-plugin
 test =
     timeout-decorator>=0.3.3
     coverage>=4.2

From addc8470ec8b000b5aa58a1964953a397f67c05d Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Thu, 10 Feb 2022 13:26:52 -0800
Subject: [PATCH 130/137] Relax more versions

---
 setup.cfg | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 905b1bae..ed47bfd5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -56,11 +56,11 @@ where = src
 
 [options.extras_require]
 doc =
-    mkdocs==1.1.2
+    mkdocs>=1.1.2
     markdown-refdocs
-    mkdocs-material==5.4.0
+    mkdocs-material>=5.4.0
     markdown-include
-    mkdocs-simple-hooks==0.1.2
+    mkdocs-simple-hooks>=0.1.2
     mkdocs-awesome-pages-plugin
 test =
     timeout-decorator>=0.3.3
@@ -79,11 +79,11 @@ dev =
     pytest
     pytest-cov
     pytest-xdist
-    mkdocs==1.1.2
+    mkdocs>=1.1.2,<2
     markdown-refdocs
-    mkdocs-material==5.4.0
+    mkdocs-material>=5.4.0
     markdown-include
-    mkdocs-simple-hooks==0.1.2
+    mkdocs-simple-hooks>=0.1.2
     types-setuptools>=57.4.7, <58
 deploy =
     twine

From e5c6a59ae7e42b57dd73b90f72783ec62cb609f9 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 16 Feb 2022 15:11:39 -0800
Subject: [PATCH 131/137] Make template metadata file optional

resolves: #310
---
 src/mavis/annotate/main.py                    | 11 +++++++++--
 tests/mini-tutorial.annotate_only.config.json |  3 ---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mavis/annotate/main.py b/src/mavis/annotate/main.py
index ed426a04..1d6fd503 100644
--- a/src/mavis/annotate/main.py
+++ b/src/mavis/annotate/main.py
@@ -10,11 +10,12 @@
 from ..error import DrawingFitError, NotSpecifiedError
 from ..illustrate.constants import DiagramSettings
 from ..illustrate.diagram import draw_sv_summary_diagram
+from ..types import ReferenceGenome
 from ..util import generate_complete_stamp, logger, mkdirp, read_inputs
 from .constants import PASS_FILENAME
 from .file_io import ReferenceFile
 from .fusion import determine_prime
-from .genomic import PreTranscript
+from .genomic import PreTranscript, Template
 from .variant import (
     annotate_events,
     call_protein_indel,
@@ -30,7 +31,13 @@
 }
 
 
-def draw(drawing_config, ann, reference_genome, template_metadata, drawings_directory):
+def draw(
+    drawing_config: DiagramSettings,
+    ann,
+    reference_genome: ReferenceGenome,
+    template_metadata: Dict[str, Template],
+    drawings_directory: str,
+):
     """
     produces the svg diagram and json legend for a given annotation
     """
diff --git a/tests/mini-tutorial.annotate_only.config.json b/tests/mini-tutorial.annotate_only.config.json
index b270c7dc..545b8b78 100644
--- a/tests/mini-tutorial.annotate_only.config.json
+++ b/tests/mini-tutorial.annotate_only.config.json
@@ -45,8 +45,5 @@
     ],
     "reference.reference_genome": [
         "tests/data/mock_reference_genome.fa"
-    ],
-    "reference.template_metadata": [
-        "tests/data/cytoBand.txt"
     ]
 }

From 05bc7723c3483698f2e26c926d26ef4289c15c76 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Wed, 16 Feb 2022 15:45:17 -0800
Subject: [PATCH 132/137] set default arrays for reference files in config

---
 src/mavis/annotate/file_io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mavis/annotate/file_io.py b/src/mavis/annotate/file_io.py
index 7220fc9c..b5dcc9e5 100644
--- a/src/mavis/annotate/file_io.py
+++ b/src/mavis/annotate/file_io.py
@@ -428,4 +428,4 @@ def load(self, ignore_cache=False, verbose=True):
 
     @classmethod
     def load_from_config(cls, config, file_type: str, **kwargs):
-        return ReferenceFile(file_type, *config[f'reference.{file_type}'], **kwargs)
+        return ReferenceFile(file_type, *config.get(f'reference.{file_type}', []), **kwargs)

From 0ccd6ef671d1109101170ee90da319597159c610 Mon Sep 17 00:00:00 2001
From: Jeremy Fan <jfan@bcgsc.ca>
Date: Wed, 16 Feb 2022 21:13:08 -0800
Subject: [PATCH 133/137] add wheel upgrade to document

---
 docs/install.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/install.md b/docs/install.md
index b3468f7a..b9343c48 100644
--- a/docs/install.md
+++ b/docs/install.md
@@ -16,7 +16,7 @@ The simplest way to use MAVIS is via Singularity. The MAVIS docker container use
 by singularity will take care of installing the aligner as well.
 
 ```bash
-pip install -U setuptools pip
+pip install -U setuptools pip wheel
 pip install mavis_config  # also installs snakemake
 ```
 

From 6a3d15dc34340fcc29699a8bd5c2787d95ac2be7 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 19 Feb 2022 12:16:27 -0800
Subject: [PATCH 134/137] Enforce unique transcript IDs

---
 src/tools/convert_annotations_format.py       |   37 +-
 tests/data/mock_reference_annotations2.json   |  155 +-
 tests/test_tools/data/viral.gtf               |  447 +++
 tests/test_tools/data/viral.gtf.json          | 2683 +++++++++++++++++
 .../test_convert_annotations_format.py        |    1 +
 5 files changed, 3317 insertions(+), 6 deletions(-)
 create mode 100644 tests/test_tools/data/viral.gtf
 create mode 100644 tests/test_tools/data/viral.gtf.json

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index 775b6d1d..eceba67a 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -464,6 +464,26 @@ def validate_gff_coordinates(nodes_df, links_df):
         raise ValueError(f'{errors.shape[0]} entries with impossible coordinates')
 
 
+def enforce_uniq_transcript_ids(input_df) -> pd.DataFrame:
+    df = input_df.copy()
+    duplicates = df[df.type == 'transcript'].drop_duplicates(['seqid', 'parent_id', 'feature_id'])
+
+    if duplicates.shape[0] == duplicates.feature_id.nunique():
+        return df
+
+    # there are some non-unique transcript IDs, make them all pre-pend the seqid
+    df.loc[df.type == 'transcript', 'feature_id'] = df.seqid + GFF_ID_DELIMITER + df.feature_id
+    df.loc[df.parent_type == 'transcript', 'parent_id'] = df.seqid + GFF_ID_DELIMITER + df.parent_id
+    duplicates = df[df.type == 'transcript'].drop_duplicates(['seqid', 'parent_id', 'feature_id'])
+
+    if duplicates.shape[0] == duplicates.feature_id.nunique():
+        return df.copy()
+
+    raise ValueError(
+        f'Unable to enforce unique transcript IDs: ({duplicates.shape[0]},{duplicates.feature_id.nunique()})'
+    )
+
+
 def convert_pandas_gff_to_mavis(df) -> Dict:
     df['error'] = ''
     df.loc[~df.type.isin(GFF_ALL_FEATURES), 'error'] = 'unrecognized type ' + df.type
@@ -530,8 +550,13 @@ def simplify_type(t):
     nodes_df, links_df = fix_orphan_elements(nodes_df, links_df)
     nodes_df, links_df = insert_missing_transcripts(nodes_df, links_df)
     validate_gff_coordinates(nodes_df, links_df)
+    df = nodes_df.merge(
+        links_df[GFF_KEY_COLS + ['parent_type', 'parent_id']].drop_duplicates(),
+        how='outer',
+        on=GFF_KEY_COLS,
+    ).fillna('')
 
-    df = nodes_df.merge(links_df, how='outer', on=GFF_KEY_COLS).fillna('')
+    df = enforce_uniq_transcript_ids(df)
 
     def feature_key(row, parent=False):
         if not parent:
@@ -783,6 +808,8 @@ def split_attributes(row):
     df.loc[df.type == 'gene', 'Name'] = df.gene_name
     df.loc[df.type == 'transcript', 'Name'] = df.transcript_name
     df['strand'] = df.strand.fillna('')
+    df['gene_id'] = df.gene_id.astype(str)
+    df.loc[df.gene_id.str.startswith('unassigned_gene_'), 'gene_id'] = ''
 
     df['Parent'] = ''
     df.loc[(df.type == 'transcript') & (df.gene_id != ''), 'Parent'] = 'gene:' + df.gene_id
@@ -868,8 +895,8 @@ def main():
     parser.add_argument('--input_type', default='v2', choices=['v2-tab', 'v2-json', 'gff3', 'gtf'])
     parser.add_argument('output', help='path to the JSON output file')
     parser.add_argument(
-        '--keep_alt',
-        help='do not filter out chromosome/seqid names starting with GL or KI',
+        '--filter_alt',
+        help='filter out chromosome/seqid names starting with GL or KI',
         action='store_true',
         default=False,
     )
@@ -889,9 +916,9 @@ def main():
     elif args.input_type == 'v2-json':
         annotations = convert_mavis_json_2to3(args.input)
     elif args.input_type == 'gtf':
-        annotations = convert_gff2_to_mavis(args.input, not args.keep_alt)
+        annotations = convert_gff2_to_mavis(args.input, args.filter_alt)
     else:
-        annotations = convert_gff3_to_mavis(args.input, not args.keep_alt)
+        annotations = convert_gff3_to_mavis(args.input, args.filter_alt)
 
     logging.info(f'writing: {args.output}')
     with open(args.output, 'w') as fh:
diff --git a/tests/data/mock_reference_annotations2.json b/tests/data/mock_reference_annotations2.json
index f1ef1c50..065b1221 100644
--- a/tests/data/mock_reference_annotations2.json
+++ b/tests/data/mock_reference_annotations2.json
@@ -1 +1,154 @@
-{"genes": [{"aliases": [], "chr": "fake", "end": 200, "name": "GENE-A", "start": 100, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 200, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-A", "start": 100}]}, {"aliases": [], "chr": "fake", "end": 350, "name": "GENE-B", "start": 250, "strand": "-", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 350, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-B", "start": 250}]}, {"aliases": [], "chr": "fake", "end": 400, "name": "GENE-C", "start": 300, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 400, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-C", "start": 300}]}, {"aliases": [], "chr": "fake", "end": 550, "name": "GENE-D", "start": 450, "strand": "-", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 550, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-D", "start": 450}]}, {"aliases": [], "chr": "fake", "end": 600, "name": "GENE-E", "start": 500, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 600, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-E", "start": 500}]}, {"aliases": [], "chr": "fake", "end": 650, "name": "GENE-F", "start": 550, "strand": "+", "transcripts": [{"aliases": [], "cdna_coding_end": null, "cdna_coding_start": null, "domains": [], "end": 650, "exons": [], "is_best_transcript": true, "name": "TRANSCRIPT-E", "start": 550}]}]}
\ No newline at end of file
+{
+    "genes": [
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 200,
+            "name": "GENE-A",
+            "start": 100,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 200,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-A",
+                    "start": 100
+                }
+            ]
+        },
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 350,
+            "name": "GENE-B",
+            "start": 250,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 350,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-B",
+                    "start": 250
+                }
+            ]
+        },
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 400,
+            "name": "GENE-C",
+            "start": 300,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 400,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-C",
+                    "start": 300
+                }
+            ]
+        },
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 550,
+            "name": "GENE-D",
+            "start": 450,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 550,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-D",
+                    "start": 450
+                }
+            ]
+        },
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 600,
+            "name": "GENE-E",
+            "start": 500,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 600,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-E",
+                    "start": 500
+                }
+            ]
+        },
+        {
+            "aliases": [
+            ],
+            "chr": "fake",
+            "end": 650,
+            "name": "GENE-F",
+            "start": 550,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "aliases": [
+                    ],
+                    "cdna_coding_end": null,
+                    "cdna_coding_start": null,
+                    "domains": [
+                    ],
+                    "end": 650,
+                    "exons": [
+                    ],
+                    "is_best_transcript": true,
+                    "name": "TRANSCRIPT-F",
+                    "start": 550
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/test_tools/data/viral.gtf b/tests/test_tools/data/viral.gtf
new file mode 100644
index 00000000..57c45450
--- /dev/null
+++ b/tests/test_tools/data/viral.gtf
@@ -0,0 +1,447 @@
+X74464.1	EMBL	gene	200	646	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	200	643	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; note "alternative"; product "early protein"; protein_id "CAA52482.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	200	202	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; note "alternative"; product "early protein"; protein_id "CAA52482.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	644	646	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; note "alternative"; product "early protein"; protein_id "CAA52482.1"; exon_number "1"; 
+X74464.1	EMBL	CDS	221	643	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52483.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	221	223	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52483.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	644	646	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36801"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36801"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52483.1"; exon_number "1"; 
+X74464.1	EMBL	gene	643	924	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	643	921	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36817"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36817"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52484.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	643	645	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36817"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36817"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52484.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	922	924	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36817"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36817"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52484.1"; exon_number "1"; 
+X74464.1	EMBL	gene	917	2734	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	917	2731	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q05111"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05111"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52485.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	917	919	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q05111"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05111"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52485.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	2732	2734	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q05111"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05111"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52485.1"; exon_number "1"; 
+X74464.1	EMBL	gene	2676	4061	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	2676	4058	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36780"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36780"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52486.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	2676	2678	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36780"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36780"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52486.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	4059	4061	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36780"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36780"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52486.1"; exon_number "1"; 
+X74464.1	EMBL	gene	4129	5730	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	4129	5727	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P36746"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36746"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52487.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	4129	4131	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P36746"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36746"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52487.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	5728	5730	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P36746"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36746"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52487.1"; exon_number "1"; 
+X74464.1	EMBL	gene	5745	7268	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X74464.1	EMBL	CDS	5745	7265	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q02480"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02480"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52488.1"; exon_number "1"; 
+X74464.1	EMBL	start_codon	5745	5747	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q02480"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02480"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52488.1"; exon_number "1"; 
+X74464.1	EMBL	stop_codon	7266	7268	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q02480"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02480"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52488.1"; exon_number "1"; 
+K02718.1	Genbank	gene	83	559	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	83	556	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	83	85	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	557	559	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 65 to 559; putative"; product "transforming protein"; protein_id "AAA46939.1"; exon_number "1"; 
+K02718.1	Genbank	gene	562	858	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	562	855	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	562	564	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	856	858	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from 544 to 858; putative"; product "transforming protein"; protein_id "AAA46940.1"; exon_number "1"; 
+K02718.1	Genbank	gene	865	1140	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; part "1"; 
+K02718.1	Genbank	gene	1140	2813	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; part "2"; 
+K02718.1	Genbank	CDS	865	1140	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "1"; 
+K02718.1	Genbank	CDS	1140	2810	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "2"; 
+K02718.1	Genbank	start_codon	865	867	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	2811	2813	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 interrupted ORF from 859 to 2813; putative"; product "replication protein"; protein_id "AAA46936.1"; exon_number "2"; 
+K02718.1	Genbank	gene	2755	3852	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	2755	3849	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	2755	2757	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	3850	3852	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from 2725 to 3852; putative"; product "regulatory protein"; protein_id "AAA46941.1"; exon_number "1"; 
+K02718.1	Genbank	gene	3332	3619	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	3332	3616	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46937.1"; protein_id "AAA46937.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	3617	3619	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46937.1"; protein_id "AAA46937.1"; exon_number "1"; 
+K02718.1	Genbank	gene	3863	4099	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	3863	4096	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; partial "true"; product "AAA46938.1"; protein_id "AAA46938.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	4097	4099	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; partial "true"; product "AAA46938.1"; protein_id "AAA46938.1"; exon_number "1"; 
+K02718.1	Genbank	gene	4235	5656	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	4235	5653	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	4235	4237	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	5654	5656	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from 4133 to 5656; putative"; product "minor capsid protein"; protein_id "AAA46942.1"; exon_number "1"; 
+K02718.1	Genbank	gene	5559	7154	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+K02718.1	Genbank	CDS	5559	7151	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
+K02718.1	Genbank	start_codon	5559	5561	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
+K02718.1	Genbank	stop_codon	7152	7154	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from 5526 to 7154; putative"; product "major capsid protein"; protein_id "AAA46943.1"; exon_number "1"; 
+X05015.1	EMBL	gene	105	581	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	105	578	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P06463"; db_xref "InterPro:IPR001334"; db_xref "PDB:2I04"; db_xref "PDB:2I0I"; db_xref "PDB:2I0L"; db_xref "UniProtKB/Swiss-Prot:P06463"; gbkey "CDS"; gene "E6"; product "E6 protein"; protein_id "CAA28664.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	105	107	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P06463"; db_xref "InterPro:IPR001334"; db_xref "PDB:2I04"; db_xref "PDB:2I0I"; db_xref "PDB:2I0L"; db_xref "UniProtKB/Swiss-Prot:P06463"; gbkey "CDS"; gene "E6"; product "E6 protein"; protein_id "CAA28664.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	579	581	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P06463"; db_xref "InterPro:IPR001334"; db_xref "PDB:2I04"; db_xref "PDB:2I0I"; db_xref "PDB:2I0L"; db_xref "UniProtKB/Swiss-Prot:P06463"; gbkey "CDS"; gene "E6"; product "E6 protein"; protein_id "CAA28664.1"; exon_number "1"; 
+X05015.1	EMBL	gene	590	907	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	590	904	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P06788"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P06788"; gbkey "CDS"; gene "E7"; product "E7 protein"; protein_id "CAA28665.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	590	592	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P06788"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P06788"; gbkey "CDS"; gene "E7"; product "E7 protein"; protein_id "CAA28665.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	905	907	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P06788"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P06788"; gbkey "CDS"; gene "E7"; product "E7 protein"; protein_id "CAA28665.1"; exon_number "1"; 
+X05015.1	EMBL	gene	914	2887	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	914	2884	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P06789"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "PDB:1R9W"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06789"; gbkey "CDS"; gene "E1"; product "E1 protein"; protein_id "CAA28666.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	914	916	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P06789"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "PDB:1R9W"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06789"; gbkey "CDS"; gene "E1"; product "E1 protein"; protein_id "CAA28666.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	2885	2887	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P06789"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "PDB:1R9W"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06789"; gbkey "CDS"; gene "E1"; product "E1 protein"; protein_id "CAA28666.1"; exon_number "1"; 
+X05015.1	EMBL	gene	2817	3914	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	2817	3911	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P06790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "PDB:1F9F"; db_xref "PDB:1JJ4"; db_xref "PDB:1QQH"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06790"; gbkey "CDS"; gene "E2"; product "E2 protein"; protein_id "CAA28667.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	2817	2819	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P06790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "PDB:1F9F"; db_xref "PDB:1JJ4"; db_xref "PDB:1QQH"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06790"; gbkey "CDS"; gene "E2"; product "E2 protein"; protein_id "CAA28667.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	3912	3914	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P06790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "PDB:1F9F"; db_xref "PDB:1JJ4"; db_xref "PDB:1QQH"; db_xref "PDB:1TUE"; db_xref "UniProtKB/Swiss-Prot:P06790"; gbkey "CDS"; gene "E2"; product "E2 protein"; protein_id "CAA28667.1"; exon_number "1"; 
+X05015.1	EMBL	gene	3418	3684	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	3418	3681	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/Swiss-Prot:P06791"; gbkey "CDS"; gene "E4"; product "E4 protein"; protein_id "CAA28668.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	3418	3420	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/Swiss-Prot:P06791"; gbkey "CDS"; gene "E4"; product "E4 protein"; protein_id "CAA28668.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	3682	3684	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/Swiss-Prot:P06791"; gbkey "CDS"; gene "E4"; product "E4 protein"; protein_id "CAA28668.1"; exon_number "1"; 
+X05015.1	EMBL	gene	3936	4157	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	3936	4154	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/Swiss-Prot:P06792"; gbkey "CDS"; gene "E5"; product "E5 protein"; protein_id "CAA28669.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	3936	3938	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/Swiss-Prot:P06792"; gbkey "CDS"; gene "E5"; product "E5 protein"; protein_id "CAA28669.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	4155	4157	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/Swiss-Prot:P06792"; gbkey "CDS"; gene "E5"; product "E5 protein"; protein_id "CAA28669.1"; exon_number "1"; 
+X05015.1	EMBL	gene	4244	5632	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	4244	5629	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:P06793"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P06793"; gbkey "CDS"; gene "L2"; product "L2 protein"; protein_id "CAA28670.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	4244	4246	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:P06793"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P06793"; gbkey "CDS"; gene "L2"; product "L2 protein"; protein_id "CAA28670.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	5630	5632	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:P06793"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P06793"; gbkey "CDS"; gene "L2"; product "L2 protein"; protein_id "CAA28670.1"; exon_number "1"; 
+X05015.1	EMBL	gene	5430	7136	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X05015.1	EMBL	CDS	5430	7133	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:P06794"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5I"; db_xref "UniProtKB/Swiss-Prot:P06794"; gbkey "CDS"; gene "L1"; product "L1 protein"; protein_id "CAA28671.1"; exon_number "1"; 
+X05015.1	EMBL	start_codon	5430	5432	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:P06794"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5I"; db_xref "UniProtKB/Swiss-Prot:P06794"; gbkey "CDS"; gene "L1"; product "L1 protein"; protein_id "CAA28671.1"; exon_number "1"; 
+X05015.1	EMBL	stop_codon	7134	7136	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:P06794"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5I"; db_xref "UniProtKB/Swiss-Prot:P06794"; gbkey "CDS"; gene "L1"; product "L1 protein"; protein_id "CAA28671.1"; exon_number "1"; 
+X74474.1	EMBL	gene	102	563	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	102	560	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36809"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36809"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52543.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	102	104	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36809"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36809"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52543.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	561	563	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36809"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36809"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52543.1"; exon_number "1"; 
+X74474.1	EMBL	gene	566	883	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	566	880	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36826"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36826"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52544.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	566	568	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36826"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36826"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52544.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	881	883	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36826"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36826"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52544.1"; exon_number "1"; 
+X74474.1	EMBL	gene	890	2785	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	890	2782	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q05112"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05112"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52545.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	890	892	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q05112"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05112"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52545.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	2783	2785	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q05112"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q05112"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52545.1"; exon_number "1"; 
+X74474.1	EMBL	gene	2727	3863	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	2727	3860	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36790"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52546.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	2727	2729	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36790"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52546.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	3861	3863	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36790"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36790"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52546.1"; exon_number "1"; 
+X74474.1	EMBL	gene	4280	5671	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	4280	5668	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36756"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36756"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52547.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	4280	4282	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36756"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36756"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52547.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	5669	5671	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36756"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36756"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52547.1"; exon_number "1"; 
+X74474.1	EMBL	gene	5631	7157	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X74474.1	EMBL	CDS	5631	7154	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q02515"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02515"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52548.1"; exon_number "1"; 
+X74474.1	EMBL	start_codon	5631	5633	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q02515"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02515"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52548.1"; exon_number "1"; 
+X74474.1	EMBL	stop_codon	7155	7157	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q02515"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q02515"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52548.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	102	557	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	102	554	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "BAA90727.1"; protein_id "BAA90727.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	102	104	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "BAA90727.1"; protein_id "BAA90727.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	555	557	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "BAA90727.1"; protein_id "BAA90727.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	564	878	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	564	875	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "BAA90728.1"; protein_id "BAA90728.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	564	566	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "BAA90728.1"; protein_id "BAA90728.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	876	878	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "BAA90728.1"; protein_id "BAA90728.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	886	2790	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	886	2787	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; product "BAA90729.1"; protein_id "BAA90729.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	886	888	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; product "BAA90729.1"; protein_id "BAA90729.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	2788	2790	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; product "BAA90729.1"; protein_id "BAA90729.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	2732	3838	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	2732	3835	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; product "BAA90730.1"; protein_id "BAA90730.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	2732	2734	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; product "BAA90730.1"; protein_id "BAA90730.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	3836	3838	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; product "BAA90730.1"; protein_id "BAA90730.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	3309	3614	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; partial "true"; 
+AB027020.1	DDBJ	CDS	3309	3611	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; note "start codon is not identified"; partial "true"; product "BAA90731.1"; protein_id "BAA90731.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	3612	3614	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; note "start codon is not identified"; partial "true"; product "BAA90731.1"; protein_id "BAA90731.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	3846	4142	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; partial "true"; 
+AB027020.1	DDBJ	CDS	3846	4139	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; note "start codon is not identified"; partial "true"; product "BAA90732.1"; protein_id "BAA90732.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	4140	4142	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; note "start codon is not identified"; partial "true"; product "BAA90732.1"; protein_id "BAA90732.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	4157	5560	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	4157	5557	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; product "BAA90733.1"; protein_id "BAA90733.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	4157	4159	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; product "BAA90733.1"; protein_id "BAA90733.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	5558	5560	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; product "BAA90733.1"; protein_id "BAA90733.1"; exon_number "1"; 
+AB027020.1	DDBJ	gene	5541	7064	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+AB027020.1	DDBJ	CDS	5541	7061	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; product "BAA90734.1"; protein_id "BAA90734.1"; exon_number "1"; 
+AB027020.1	DDBJ	start_codon	5541	5543	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; product "BAA90734.1"; protein_id "BAA90734.1"; exon_number "1"; 
+AB027020.1	DDBJ	stop_codon	7062	7064	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; product "BAA90734.1"; protein_id "BAA90734.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	110	556	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; note "open reading frame E6"; product "BAA31845.1"; protein_id "BAA31845.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	110	112	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; note "open reading frame E6"; product "BAA31845.1"; protein_id "BAA31845.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	557	559	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; note "open reading frame E6"; product "BAA31845.1"; protein_id "BAA31845.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	574	867	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; note "open reading frame E7"; product "BAA31846.1"; protein_id "BAA31846.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	574	576	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; note "open reading frame E7"; product "BAA31846.1"; protein_id "BAA31846.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	868	870	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; note "open reading frame E7"; product "BAA31846.1"; protein_id "BAA31846.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	883	2814	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; note "open reading frame E1"; product "BAA31847.1"; protein_id "BAA31847.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	883	885	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; note "open reading frame E1"; product "BAA31847.1"; protein_id "BAA31847.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	2815	2817	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; note "open reading frame E1"; product "BAA31847.1"; protein_id "BAA31847.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	2753	3826	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; note "open reading frame E2"; product "BAA31848.1"; protein_id "BAA31848.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	2753	2755	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; note "open reading frame E2"; product "BAA31848.1"; protein_id "BAA31848.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	3827	3829	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; note "open reading frame E2"; product "BAA31848.1"; protein_id "BAA31848.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	3330	3602	.	+	0	gene_id "unassigned_gene_5"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; note "no ATG start codon~open reading frame E4"; partial "true"; product "BAA14396.1"; protein_id "BAA14396.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	3603	3605	.	+	0	gene_id "unassigned_gene_5"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; note "no ATG start codon~open reading frame E4"; partial "true"; product "BAA14396.1"; protein_id "BAA14396.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	3892	4119	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; note "open reading frame E5"; product "BAA31849.1"; protein_id "BAA31849.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	3892	3894	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; note "open reading frame E5"; product "BAA31849.1"; protein_id "BAA31849.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	4120	4122	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; note "open reading frame E5"; product "BAA31849.1"; protein_id "BAA31849.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	4244	5659	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; note "open reading frame L2"; product "BAA31850.1"; protein_id "BAA31850.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	4244	4246	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; note "open reading frame L2"; product "BAA31850.1"; protein_id "BAA31850.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	5660	5662	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; note "open reading frame L2"; product "BAA31850.1"; protein_id "BAA31850.1"; exon_number "1"; 
+D90400.1	DDBJ	CDS	5565	7136	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; note "open reading frame L1"; product "BAA31851.1"; protein_id "BAA31851.1"; exon_number "1"; 
+D90400.1	DDBJ	start_codon	5565	5567	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; note "open reading frame L1"; product "BAA31851.1"; protein_id "BAA31851.1"; exon_number "1"; 
+D90400.1	DDBJ	stop_codon	7137	7139	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; note "open reading frame L1"; product "BAA31851.1"; protein_id "BAA31851.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	1	474	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; product "E6 protein"; protein_id "AAZ39491.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	1	3	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; product "E6 protein"; protein_id "AAZ39491.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	475	477	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; product "E6 protein"; protein_id "AAZ39491.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	484	813	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; product "E7 protein"; protein_id "AAZ39492.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	484	486	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; product "E7 protein"; protein_id "AAZ39492.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	814	816	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; product "E7 protein"; protein_id "AAZ39492.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	823	2742	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "E1 protein"; protein_id "AAZ39493.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	823	825	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "E1 protein"; protein_id "AAZ39493.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	2743	2745	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "E1 protein"; protein_id "AAZ39493.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	2672	3781	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "E2 protein"; protein_id "AAZ39494.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	2672	2674	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "E2 protein"; protein_id "AAZ39494.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	3782	3784	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "E2 protein"; protein_id "AAZ39494.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	3267	3548	.	+	0	gene_id "unassigned_gene_5"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; note "lacks traditional start codon"; partial "true"; product "E4 protein"; protein_id "AAZ39495.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	3549	3551	.	+	0	gene_id "unassigned_gene_5"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; note "lacks traditional start codon"; partial "true"; product "E4 protein"; protein_id "AAZ39495.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	3830	4048	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; product "E5 protein"; protein_id "AAZ39496.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	3830	3832	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; product "E5 protein"; protein_id "AAZ39496.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	4049	4051	.	+	0	gene_id "unassigned_gene_6"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; product "E5 protein"; protein_id "AAZ39496.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	4098	5504	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "L2 protein"; protein_id "AAZ39497.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	4098	4100	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "L2 protein"; protein_id "AAZ39497.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	5505	5507	.	+	0	gene_id "unassigned_gene_7"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "L2 protein"; protein_id "AAZ39497.1"; exon_number "1"; 
+DQ080079.1	Genbank	CDS	5488	7002	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "L1 protein"; protein_id "AAZ39498.1"; exon_number "1"; 
+DQ080079.1	Genbank	start_codon	5488	5490	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "L1 protein"; protein_id "AAZ39498.1"; exon_number "1"; 
+DQ080079.1	Genbank	stop_codon	7003	7005	.	+	0	gene_id "unassigned_gene_8"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "L1 protein"; protein_id "AAZ39498.1"; exon_number "1"; 
+J04353.1	Genbank	gene	108	557	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	108	554	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "ORF E6 from bp 39 to 557"; product "transforming protein"; protein_id "AAA46950.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	108	110	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "ORF E6 from bp 39 to 557"; product "transforming protein"; protein_id "AAA46950.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	555	557	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "ORF E6 from bp 39 to 557"; product "transforming protein"; protein_id "AAA46950.1"; exon_number "1"; 
+J04353.1	Genbank	gene	560	856	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	560	853	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "ORF E7 from bp 545 to 856"; product "transforming protein"; protein_id "AAA46951.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	560	562	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "ORF E7 from bp 545 to 856"; product "transforming protein"; protein_id "AAA46951.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	854	856	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "ORF E7 from bp 545 to 856"; product "transforming protein"; protein_id "AAA46951.1"; exon_number "1"; 
+J04353.1	Genbank	gene	862	2751	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	862	2748	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "ORF E1 from bp 850 to bp 2751"; product "replication protein"; protein_id "AAA46952.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	862	864	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "ORF E1 from bp 850 to bp 2751"; product "replication protein"; protein_id "AAA46952.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	2749	2751	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "ORF E1 from bp 850 to bp 2751"; product "replication protein"; protein_id "AAA46952.1"; exon_number "1"; 
+J04353.1	Genbank	gene	2693	3811	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	2693	3808	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "ORF E2 from bp 2663 to 3811"; product "regulatory protein"; protein_id "AAA46953.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	2693	2695	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "ORF E2 from bp 2663 to 3811"; product "regulatory protein"; protein_id "AAA46953.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	3809	3811	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "ORF E2 from bp 2663 to 3811"; product "regulatory protein"; protein_id "AAA46953.1"; exon_number "1"; 
+J04353.1	Genbank	gene	3270	3578	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	3270	3575	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46949.1"; protein_id "AAA46949.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	3576	3578	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46949.1"; protein_id "AAA46949.1"; exon_number "1"; 
+J04353.1	Genbank	gene	3816	4070	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	3816	4067	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46954.1"; protein_id "AAA46954.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	3816	3818	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46954.1"; protein_id "AAA46954.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	4068	4070	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46954.1"; protein_id "AAA46954.1"; exon_number "1"; 
+J04353.1	Genbank	gene	4171	5571	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	4171	5568	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "ORF L2 from bp 4060 to bp 5571"; product "minor capsid protein"; protein_id "AAA46955.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	4171	4173	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "ORF L2 from bp 4060 to bp 5571"; product "minor capsid protein"; protein_id "AAA46955.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	5569	5571	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "ORF L2 from bp 4060 to bp 5571"; product "minor capsid protein"; protein_id "AAA46955.1"; exon_number "1"; 
+J04353.1	Genbank	gene	5552	7066	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+J04353.1	Genbank	CDS	5552	7063	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "ORF L1 from bp 5516 to  7066"; product "major capsid protein"; protein_id "AAA46956.1"; exon_number "1"; 
+J04353.1	Genbank	start_codon	5552	5554	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "ORF L1 from bp 5516 to  7066"; product "major capsid protein"; protein_id "AAA46956.1"; exon_number "1"; 
+J04353.1	Genbank	stop_codon	7064	7066	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "ORF L1 from bp 5516 to  7066"; product "major capsid protein"; protein_id "AAA46956.1"; exon_number "1"; 
+M12732.1	Genbank	gene	109	558	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	109	555	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 76 to 558; putative"; product "transforming protein"; protein_id "AAA46958.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	109	111	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 76 to 558; putative"; product "transforming protein"; protein_id "AAA46958.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	556	558	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "E6 ORF from 76 to 558; putative"; product "transforming protein"; protein_id "AAA46958.1"; exon_number "1"; 
+M12732.1	Genbank	gene	573	866	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	573	863	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from bp 543 to 866; putative"; product "transforming protein"; protein_id "AAA46959.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	573	575	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from bp 543 to 866; putative"; product "transforming protein"; protein_id "AAA46959.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	864	866	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "E7 ORF from bp 543 to 866; putative"; product "transforming protein"; protein_id "AAA46959.1"; exon_number "1"; 
+M12732.1	Genbank	gene	879	2813	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	879	2810	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 ORF from bp 867 to 2813; putative"; product "replication protein"; protein_id "AAA46960.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	879	881	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 ORF from bp 867 to 2813; putative"; product "replication protein"; protein_id "AAA46960.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	2811	2813	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "E1 ORF from bp 867 to 2813; putative"; product "replication protein"; protein_id "AAA46960.1"; exon_number "1"; 
+M12732.1	Genbank	gene	2749	3810	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	2749	3807	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from bp 2728 to 3810; putative"; product "regulatory protein"; protein_id "AAA46961.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	2749	2751	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from bp 2728 to 3810; putative"; product "regulatory protein"; protein_id "AAA46961.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	3808	3810	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "E2 ORF from bp 2728 to 3810; putative"; product "regulatory protein"; protein_id "AAA46961.1"; exon_number "1"; 
+M12732.1	Genbank	gene	3326	3577	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	3326	3574	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46957.1"; protein_id "AAA46957.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	3575	3577	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAA46957.1"; protein_id "AAA46957.1"; exon_number "1"; 
+M12732.1	Genbank	gene	3854	4081	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	3854	4078	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46962.1"; protein_id "AAA46962.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	3854	3856	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46962.1"; protein_id "AAA46962.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	4079	4081	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAA46962.1"; protein_id "AAA46962.1"; exon_number "1"; 
+M12732.1	Genbank	gene	4210	5613	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	4210	5610	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from bp 4198 to 5613; putative"; product "minor capsid protein"; protein_id "AAA46963.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	4210	4212	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from bp 4198 to 5613; putative"; product "minor capsid protein"; protein_id "AAA46963.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	5611	5613	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "L2 ORF from bp 4198 to 5613; putative"; product "minor capsid protein"; protein_id "AAA46963.1"; exon_number "1"; 
+M12732.1	Genbank	gene	5594	7093	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+M12732.1	Genbank	CDS	5594	7090	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5516 to 7093; putative"; product "major capsid protein"; protein_id "AAA46964.1"; exon_number "1"; 
+M12732.1	Genbank	start_codon	5594	5596	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5516 to 7093; putative"; product "major capsid protein"; protein_id "AAA46964.1"; exon_number "1"; 
+M12732.1	Genbank	stop_codon	7091	7093	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5516 to 7093; putative"; product "major capsid protein"; protein_id "AAA46964.1"; exon_number "1"; 
+M62849.1	Genbank	gene	107	583	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	107	580	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "putative"; product "transforming protein"; protein_id "AAA47050.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	107	109	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "putative"; product "transforming protein"; protein_id "AAA47050.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	581	583	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; note "putative"; product "transforming protein"; protein_id "AAA47050.1"; exon_number "1"; 
+M62849.1	Genbank	gene	592	921	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	592	918	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "putative"; product "transforming protein"; protein_id "AAA47051.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	592	594	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "putative"; product "transforming protein"; protein_id "AAA47051.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	919	921	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; note "putative"; product "transforming protein"; protein_id "AAA47051.1"; exon_number "1"; 
+M62849.1	Genbank	gene	928	2871	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	928	2868	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "putative"; product "replication protein"; protein_id "AAA47052.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	928	930	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "putative"; product "replication protein"; protein_id "AAA47052.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	2869	2871	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; gene "E1"; note "putative"; product "replication protein"; protein_id "AAA47052.1"; exon_number "1"; 
+M62849.1	Genbank	gene	2798	3910	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	2798	3907	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "putative"; product "regulatory protein"; protein_id "AAA47053.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	2798	2800	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "putative"; product "regulatory protein"; protein_id "AAA47053.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	3908	3910	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; gene "E2"; note "putative"; product "regulatory protein"; protein_id "AAA47053.1"; exon_number "1"; 
+M62849.1	Genbank	gene	3393	3677	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	3393	3674	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "E4 ORF"; protein_id "AAA47049.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	3675	3677	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "E4 ORF"; protein_id "AAA47049.1"; exon_number "1"; 
+M62849.1	Genbank	gene	3958	4176	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	3958	4173	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "E5 ORF"; protein_id "AAA47054.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	3958	3960	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "E5 ORF"; protein_id "AAA47054.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	4174	4176	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "E5 ORF"; protein_id "AAA47054.1"; exon_number "1"; 
+M62849.1	Genbank	gene	4250	5662	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	4250	5659	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "putative"; product "minor capsid protein"; protein_id "AAA47055.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	4250	4252	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "putative"; product "minor capsid protein"; protein_id "AAA47055.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	5660	5662	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; gene "L2"; note "putative"; product "minor capsid protein"; protein_id "AAA47055.1"; exon_number "1"; 
+M62849.1	Genbank	gene	5643	7160	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+M62849.1	Genbank	CDS	5643	7157	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5610 to 7160; putative"; product "major capsid protein"; protein_id "AAA47056.1"; exon_number "1"; 
+M62849.1	Genbank	start_codon	5643	5645	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5610 to 7160; putative"; product "major capsid protein"; protein_id "AAA47056.1"; exon_number "1"; 
+M62849.1	Genbank	stop_codon	7158	7160	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; gene "L1"; note "L1 ORF from bp 5610 to 7160; putative"; product "major capsid protein"; protein_id "AAA47056.1"; exon_number "1"; 
+U21941.1	Genbank	gene	107	583	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+U21941.1	Genbank	CDS	107	580	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "AAC54850.1"; protein_id "AAC54850.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	107	109	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "AAC54850.1"; protein_id "AAC54850.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	581	583	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "E6"; product "AAC54850.1"; protein_id "AAC54850.1"; exon_number "1"; 
+U21941.1	Genbank	gene	592	921	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+U21941.1	Genbank	CDS	592	918	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "AAC54851.1"; protein_id "AAC54851.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	592	594	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "AAC54851.1"; protein_id "AAC54851.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	919	921	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; gbkey "CDS"; gene "E7"; product "AAC54851.1"; protein_id "AAC54851.1"; exon_number "1"; 
+U21941.1	Genbank	CDS	928	2883	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "AAC54852.1"; protein_id "AAC54852.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	928	930	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "AAC54852.1"; protein_id "AAC54852.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	2884	2886	.	+	0	gene_id "unassigned_gene_1"; transcript_id "unassigned_transcript_3"; gbkey "CDS"; product "AAC54852.1"; protein_id "AAC54852.1"; exon_number "1"; 
+U21941.1	Genbank	gene	928	2746	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "other"; part "1"; 
+U21941.1	Genbank	gene	2748	2886	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "other"; part "2"; 
+U21941.1	Genbank	CDS	2813	3892	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "AAC54853.1"; protein_id "AAC54853.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	2813	2815	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "AAC54853.1"; protein_id "AAC54853.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	3893	3895	.	+	0	gene_id "unassigned_gene_2"; transcript_id "unassigned_transcript_4"; gbkey "CDS"; product "AAC54853.1"; protein_id "AAC54853.1"; exon_number "1"; 
+U21941.1	Genbank	gene	2813	3079	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "other"; part "1"; 
+U21941.1	Genbank	gene	3081	3260	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "other"; part "2"; 
+U21941.1	Genbank	gene	3262	3895	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "other"; part "3"; 
+U21941.1	Genbank	gene	3408	3662	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "protein_coding"; 
+U21941.1	Genbank	CDS	3408	3659	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAC54854.1"; protein_id "AAC54854.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	3660	3662	.	+	0	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "CDS"; gene "E4"; partial "true"; product "AAC54854.1"; protein_id "AAC54854.1"; exon_number "1"; 
+U21941.1	Genbank	gene	3909	4145	.	+	.	gene_id "E5"; transcript_id ""; gbkey "Gene"; gene "E5"; gene_biotype "protein_coding"; 
+U21941.1	Genbank	CDS	3909	4142	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAC54855.1"; protein_id "AAC54855.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	3909	3911	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAC54855.1"; protein_id "AAC54855.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	4143	4145	.	+	0	gene_id "E5"; transcript_id "unassigned_transcript_6"; gbkey "CDS"; gene "E5"; product "AAC54855.1"; protein_id "AAC54855.1"; exon_number "1"; 
+U21941.1	Genbank	CDS	4209	5606	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "AAC54856.1"; protein_id "AAC54856.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	4209	4211	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "AAC54856.1"; protein_id "AAC54856.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	5607	5609	.	+	0	gene_id "unassigned_gene_3"; transcript_id "unassigned_transcript_7"; gbkey "CDS"; product "AAC54856.1"; protein_id "AAC54856.1"; exon_number "1"; 
+U21941.1	Genbank	gene	4209	4752	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "other"; part "1"; 
+U21941.1	Genbank	gene	4754	5174	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "other"; part "2"; 
+U21941.1	Genbank	gene	5176	5560	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "other"; part "3"; 
+U21941.1	Genbank	gene	5562	5609	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "other"; part "4"; 
+U21941.1	Genbank	CDS	5590	7101	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "AAC54857.1"; protein_id "AAC54857.1"; exon_number "1"; 
+U21941.1	Genbank	start_codon	5590	5592	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "AAC54857.1"; protein_id "AAC54857.1"; exon_number "1"; 
+U21941.1	Genbank	stop_codon	7102	7104	.	+	0	gene_id "unassigned_gene_4"; transcript_id "unassigned_transcript_8"; gbkey "CDS"; product "AAC54857.1"; protein_id "AAC54857.1"; exon_number "1"; 
+U21941.1	Genbank	gene	5590	6784	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "other"; part "1"; 
+U21941.1	Genbank	gene	6786	7104	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "other"; part "2"; 
+X74477.1	EMBL	gene	110	559	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X74477.1	EMBL	CDS	110	556	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P27228"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P27228"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52561.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	110	112	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P27228"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P27228"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52561.1"; exon_number "1"; 
+X74477.1	EMBL	stop_codon	557	559	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P27228"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P27228"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52561.1"; exon_number "1"; 
+X74477.1	EMBL	gene	562	861	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X74477.1	EMBL	CDS	562	858	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P27230"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P27230"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52562.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	562	564	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P27230"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P27230"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52562.1"; exon_number "1"; 
+X74477.1	EMBL	stop_codon	859	861	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P27230"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P27230"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52562.1"; exon_number "1"; 
+X74477.1	EMBL	gene	868	2781	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X74477.1	EMBL	CDS	868	2778	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P27220"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P27220"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52563.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	868	870	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P27220"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P27220"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52563.1"; exon_number "1"; 
+X74477.1	EMBL	stop_codon	2779	2781	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P27220"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P27220"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52563.1"; exon_number "1"; 
+X74477.1	EMBL	gene	2714	2717	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; partial "true"; 
+X74477.1	EMBL	CDS	2714	2714	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P27222"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P27222"; gbkey "CDS"; gene "E2"; partial "true"; product "early protein"; protein_id "CAA52564.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	2714	2716	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P27222"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P27222"; gbkey "CDS"; gene "E2"; partial "true"; product "early protein"; protein_id "CAA52564.1"; exon_number "1"; 
+X74477.1	EMBL	gene	4211	5620	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X74477.1	EMBL	CDS	4211	5617	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P27234"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P27234"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52565.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	4211	4213	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P27234"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P27234"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52565.1"; exon_number "1"; 
+X74477.1	EMBL	stop_codon	5618	5620	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P27234"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P27234"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52565.1"; exon_number "1"; 
+X74477.1	EMBL	gene	5601	7109	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X74477.1	EMBL	CDS	5601	7106	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P27232"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5J"; db_xref "UniProtKB/Swiss-Prot:P27232"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52566.1"; exon_number "1"; 
+X74477.1	EMBL	start_codon	5601	5603	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P27232"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5J"; db_xref "UniProtKB/Swiss-Prot:P27232"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52566.1"; exon_number "1"; 
+X74477.1	EMBL	stop_codon	7107	7109	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:P27232"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "PDB:2R5J"; db_xref "UniProtKB/Swiss-Prot:P27232"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52566.1"; exon_number "1"; 
+X74481.1	EMBL	gene	102	548	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	102	545	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36814"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36814"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52585.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	102	104	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36814"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36814"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52585.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	546	548	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P36814"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P36814"; gbkey "CDS"; gene "E6"; product "early protein"; protein_id "CAA52585.1"; exon_number "1"; 
+X74481.1	EMBL	gene	553	852	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	553	849	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36831"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36831"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52586.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	553	555	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36831"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36831"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52586.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	850	852	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36831"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36831"; gbkey "CDS"; gene "E7"; product "early protein"; protein_id "CAA52586.1"; exon_number "1"; 
+X74481.1	EMBL	gene	864	2807	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	864	2804	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36730"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P36730"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52587.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	864	866	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36730"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P36730"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52587.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	2805	2807	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36730"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:P36730"; gbkey "CDS"; gene "E1"; product "early protein"; protein_id "CAA52587.1"; exon_number "1"; 
+X74481.1	EMBL	gene	2743	3849	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	2743	3846	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36796"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36796"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52588.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	2743	2745	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36796"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36796"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52588.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	3847	3849	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36796"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36796"; gbkey "CDS"; gene "E2"; product "early protein"; protein_id "CAA52588.1"; exon_number "1"; 
+X74481.1	EMBL	gene	4262	5662	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	4262	5659	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36763"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36763"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52589.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	4262	4264	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36763"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36763"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52589.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	5660	5662	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36763"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36763"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52589.1"; exon_number "1"; 
+X74481.1	EMBL	gene	5565	7154	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X74481.1	EMBL	CDS	5565	7151	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q05138"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q05138"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52590.1"; exon_number "1"; 
+X74481.1	EMBL	start_codon	5565	5567	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q05138"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q05138"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52590.1"; exon_number "1"; 
+X74481.1	EMBL	stop_codon	7152	7154	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q05138"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:Q05138"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52590.1"; exon_number "1"; 
+X74483.1	EMBL	gene	102	566	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; partial "true"; 
+X74483.1	EMBL	CDS	102	563	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P24836"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P24836"; gbkey "CDS"; gene "E6"; partial "true"; product "envelope protein"; protein_id "CAA52596.1"; exon_number "1"; 
+X74483.1	EMBL	start_codon	102	104	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:P24836"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/Swiss-Prot:P24836"; gbkey "CDS"; gene "E6"; partial "true"; product "envelope protein"; protein_id "CAA52596.1"; exon_number "1"; 
+X74483.1	EMBL	gene	572	889	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X74483.1	EMBL	CDS	572	886	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36833"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36833"; gbkey "CDS"; gene "E7"; product "envelope protein"; protein_id "CAA52597.1"; exon_number "1"; 
+X74483.1	EMBL	start_codon	572	574	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36833"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36833"; gbkey "CDS"; gene "E7"; product "envelope protein"; protein_id "CAA52597.1"; exon_number "1"; 
+X74483.1	EMBL	stop_codon	887	889	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:P36833"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/Swiss-Prot:P36833"; gbkey "CDS"; gene "E7"; product "envelope protein"; protein_id "CAA52597.1"; exon_number "1"; 
+X74483.1	EMBL	gene	2918	3850	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X74483.1	EMBL	CDS	2918	3847	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36798"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36798"; gbkey "CDS"; gene "E2"; product "envelope protein"; protein_id "CAA52598.1"; exon_number "1"; 
+X74483.1	EMBL	start_codon	2918	2920	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36798"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36798"; gbkey "CDS"; gene "E2"; product "envelope protein"; protein_id "CAA52598.1"; exon_number "1"; 
+X74483.1	EMBL	stop_codon	3848	3850	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_3"; db_xref "GOA:P36798"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/Swiss-Prot:P36798"; gbkey "CDS"; gene "E2"; product "envelope protein"; protein_id "CAA52598.1"; exon_number "1"; 
+X74483.1	EMBL	gene	4222	5616	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X74483.1	EMBL	CDS	4222	5613	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36765"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36765"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52599.1"; exon_number "1"; 
+X74483.1	EMBL	start_codon	4222	4224	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36765"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36765"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52599.1"; exon_number "1"; 
+X74483.1	EMBL	stop_codon	5614	5616	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:P36765"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/Swiss-Prot:P36765"; gbkey "CDS"; gene "L2"; product "late protein"; protein_id "CAA52599.1"; exon_number "1"; 
+X74483.1	EMBL	gene	5492	7096	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X74483.1	EMBL	CDS	5492	7093	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36743"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:P36743"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52600.1"; exon_number "1"; 
+X74483.1	EMBL	start_codon	5492	5494	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36743"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:P36743"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52600.1"; exon_number "1"; 
+X74483.1	EMBL	stop_codon	7094	7096	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_5"; db_xref "GOA:P36743"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/Swiss-Prot:P36743"; gbkey "CDS"; gene "L1"; product "late protein"; protein_id "CAA52600.1"; exon_number "1"; 
+X77858.1	EMBL	gene	55	537	.	+	.	gene_id "ORF putative E6"; transcript_id ""; gbkey "Gene"; gene "ORF putative E6"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	55	534	.	+	0	gene_id "ORF putative E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q81964"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q81964"; gbkey "CDS"; gene "ORF putative E6"; product "CAA54849.1"; protein_id "CAA54849.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	55	57	.	+	0	gene_id "ORF putative E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q81964"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q81964"; gbkey "CDS"; gene "ORF putative E6"; product "CAA54849.1"; protein_id "CAA54849.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	535	537	.	+	0	gene_id "ORF putative E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q81964"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q81964"; gbkey "CDS"; gene "ORF putative E6"; product "CAA54849.1"; protein_id "CAA54849.1"; exon_number "1"; 
+X77858.1	EMBL	gene	542	865	.	+	.	gene_id "ORF putative E7"; transcript_id ""; gbkey "Gene"; gene "ORF putative E7"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	542	862	.	+	0	gene_id "ORF putative E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q81965"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q81965"; gbkey "CDS"; gene "ORF putative E7"; product "CAA54850.1"; protein_id "CAA54850.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	542	544	.	+	0	gene_id "ORF putative E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q81965"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q81965"; gbkey "CDS"; gene "ORF putative E7"; product "CAA54850.1"; protein_id "CAA54850.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	863	865	.	+	0	gene_id "ORF putative E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q81965"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q81965"; gbkey "CDS"; gene "ORF putative E7"; product "CAA54850.1"; protein_id "CAA54850.1"; exon_number "1"; 
+X77858.1	EMBL	gene	872	2806	.	+	.	gene_id "ORF putative E1"; transcript_id ""; gbkey "Gene"; gene "ORF putative E1"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	872	2803	.	+	0	gene_id "ORF putative E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q81966"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q81966"; gbkey "CDS"; gene "ORF putative E1"; product "CAA54851.1"; protein_id "CAA54851.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	872	874	.	+	0	gene_id "ORF putative E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q81966"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q81966"; gbkey "CDS"; gene "ORF putative E1"; product "CAA54851.1"; protein_id "CAA54851.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	2804	2806	.	+	0	gene_id "ORF putative E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q81966"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q81966"; gbkey "CDS"; gene "ORF putative E1"; product "CAA54851.1"; protein_id "CAA54851.1"; exon_number "1"; 
+X77858.1	EMBL	gene	2736	3848	.	+	.	gene_id "ORF putative E2"; transcript_id ""; gbkey "Gene"; gene "ORF putative E2"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	2736	3845	.	+	0	gene_id "ORF putative E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q81967"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q81967"; gbkey "CDS"; gene "ORF putative E2"; product "CAA54852.1"; protein_id "CAA54852.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	2736	2738	.	+	0	gene_id "ORF putative E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q81967"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q81967"; gbkey "CDS"; gene "ORF putative E2"; product "CAA54852.1"; protein_id "CAA54852.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	3846	3848	.	+	0	gene_id "ORF putative E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q81967"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q81967"; gbkey "CDS"; gene "ORF putative E2"; product "CAA54852.1"; protein_id "CAA54852.1"; exon_number "1"; 
+X77858.1	EMBL	gene	3268	3615	.	+	.	gene_id "ORF putative E4"; transcript_id ""; gbkey "Gene"; gene "ORF putative E4"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	3268	3612	.	+	0	gene_id "ORF putative E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/TrEMBL:Q76QV7"; gbkey "CDS"; gene "ORF putative E4"; product "CAA54853.1"; protein_id "CAA54853.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	3268	3270	.	+	0	gene_id "ORF putative E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/TrEMBL:Q76QV7"; gbkey "CDS"; gene "ORF putative E4"; product "CAA54853.1"; protein_id "CAA54853.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	3613	3615	.	+	0	gene_id "ORF putative E4"; transcript_id "unassigned_transcript_5"; db_xref "InterPro:IPR003861"; db_xref "UniProtKB/TrEMBL:Q76QV7"; gbkey "CDS"; gene "ORF putative E4"; product "CAA54853.1"; protein_id "CAA54853.1"; exon_number "1"; 
+X77858.1	EMBL	gene	3908	4129	.	+	.	gene_id "ORF putative E5"; transcript_id ""; gbkey "Gene"; gene "ORF putative E5"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	3908	4126	.	+	0	gene_id "ORF putative E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/TrEMBL:Q81969"; gbkey "CDS"; gene "ORF putative E5"; product "CAA54854.1"; protein_id "CAA54854.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	3908	3910	.	+	0	gene_id "ORF putative E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/TrEMBL:Q81969"; gbkey "CDS"; gene "ORF putative E5"; product "CAA54854.1"; protein_id "CAA54854.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	4127	4129	.	+	0	gene_id "ORF putative E5"; transcript_id "unassigned_transcript_6"; db_xref "InterPro:IPR004270"; db_xref "UniProtKB/TrEMBL:Q81969"; gbkey "CDS"; gene "ORF putative E5"; product "CAA54854.1"; protein_id "CAA54854.1"; exon_number "1"; 
+X77858.1	EMBL	gene	4231	5625	.	+	.	gene_id "ORF putative L2"; transcript_id ""; gbkey "Gene"; gene "ORF putative L2"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	4231	5622	.	+	0	gene_id "ORF putative L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q81970"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q81970"; gbkey "CDS"; gene "ORF putative L2"; product "CAA54855.1"; protein_id "CAA54855.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	4231	4233	.	+	0	gene_id "ORF putative L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q81970"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q81970"; gbkey "CDS"; gene "ORF putative L2"; product "CAA54855.1"; protein_id "CAA54855.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	5623	5625	.	+	0	gene_id "ORF putative L2"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q81970"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q81970"; gbkey "CDS"; gene "ORF putative L2"; product "CAA54855.1"; protein_id "CAA54855.1"; exon_number "1"; 
+X77858.1	EMBL	gene	5606	7132	.	+	.	gene_id "ORF putative L1"; transcript_id ""; gbkey "Gene"; gene "ORF putative L1"; gene_biotype "protein_coding"; 
+X77858.1	EMBL	CDS	5606	7129	.	+	0	gene_id "ORF putative L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:Q81971"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q81971"; gbkey "CDS"; gene "ORF putative L1"; product "CAA54856.1"; protein_id "CAA54856.1"; exon_number "1"; 
+X77858.1	EMBL	start_codon	5606	5608	.	+	0	gene_id "ORF putative L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:Q81971"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q81971"; gbkey "CDS"; gene "ORF putative L1"; product "CAA54856.1"; protein_id "CAA54856.1"; exon_number "1"; 
+X77858.1	EMBL	stop_codon	7130	7132	.	+	0	gene_id "ORF putative L1"; transcript_id "unassigned_transcript_8"; db_xref "GOA:Q81971"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q81971"; gbkey "CDS"; gene "ORF putative L1"; product "CAA54856.1"; protein_id "CAA54856.1"; exon_number "1"; 
+X94165.1	EMBL	gene	102	548	.	+	.	gene_id "E6"; transcript_id ""; gbkey "Gene"; gene "E6"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	102	545	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q82005"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q82005"; gbkey "CDS"; gene "E6"; note "early gene, putative"; product "CAA63882.1"; protein_id "CAA63882.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	102	104	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q82005"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q82005"; gbkey "CDS"; gene "E6"; note "early gene, putative"; product "CAA63882.1"; protein_id "CAA63882.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	546	548	.	+	0	gene_id "E6"; transcript_id "unassigned_transcript_1"; db_xref "GOA:Q82005"; db_xref "InterPro:IPR001334"; db_xref "UniProtKB/TrEMBL:Q82005"; gbkey "CDS"; gene "E6"; note "early gene, putative"; product "CAA63882.1"; protein_id "CAA63882.1"; exon_number "1"; 
+X94165.1	EMBL	gene	550	843	.	+	.	gene_id "E7"; transcript_id ""; gbkey "Gene"; gene "E7"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	550	840	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q82006"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q82006"; gbkey "CDS"; gene "E7"; note "putative"; product "CAA63883.1"; protein_id "CAA63883.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	550	552	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q82006"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q82006"; gbkey "CDS"; gene "E7"; note "putative"; product "CAA63883.1"; protein_id "CAA63883.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	841	843	.	+	0	gene_id "E7"; transcript_id "unassigned_transcript_2"; db_xref "GOA:Q82006"; db_xref "InterPro:IPR000148"; db_xref "UniProtKB/TrEMBL:Q82006"; gbkey "CDS"; gene "E7"; note "putative"; product "CAA63883.1"; protein_id "CAA63883.1"; exon_number "1"; 
+X94165.1	EMBL	gene	850	2802	.	+	.	gene_id "E1"; transcript_id ""; gbkey "Gene"; gene "E1"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	850	2799	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q82007"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q82007"; gbkey "CDS"; gene "E1"; note "putative"; product "CAA63884.1"; protein_id "CAA63884.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	850	852	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q82007"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q82007"; gbkey "CDS"; gene "E1"; note "putative"; product "CAA63884.1"; protein_id "CAA63884.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	2800	2802	.	+	0	gene_id "E1"; transcript_id "unassigned_transcript_3"; db_xref "GOA:Q82007"; db_xref "InterPro:IPR001177"; db_xref "InterPro:IPR014000"; db_xref "InterPro:IPR014015"; db_xref "InterPro:IPR016393"; db_xref "UniProtKB/Swiss-Prot:Q82007"; gbkey "CDS"; gene "E1"; note "putative"; product "CAA63884.1"; protein_id "CAA63884.1"; exon_number "1"; 
+X94165.1	EMBL	gene	2741	3793	.	+	.	gene_id "E2"; transcript_id ""; gbkey "Gene"; gene "E2"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	2741	3790	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q82008"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q82008"; gbkey "CDS"; gene "E2"; note "putative"; product "CAA63885.1"; protein_id "CAA63885.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	2741	2743	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q82008"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q82008"; gbkey "CDS"; gene "E2"; note "putative"; product "CAA63885.1"; protein_id "CAA63885.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	3791	3793	.	+	0	gene_id "E2"; transcript_id "unassigned_transcript_4"; db_xref "GOA:Q82008"; db_xref "HSSP:1JJ4"; db_xref "InterPro:IPR000427"; db_xref "InterPro:IPR001866"; db_xref "InterPro:IPR009021"; db_xref "InterPro:IPR012677"; db_xref "UniProtKB/TrEMBL:Q82008"; gbkey "CDS"; gene "E2"; note "putative"; product "CAA63885.1"; protein_id "CAA63885.1"; exon_number "1"; 
+X94165.1	EMBL	gene	3324	3560	.	+	.	gene_id "E4"; transcript_id ""; gbkey "Gene"; gene "E4"; gene_biotype "other"; 
+X94165.1	EMBL	transcript	3324	3560	.	+	.	gene_id "E4"; transcript_id "unassigned_transcript_5"; gbkey "mRNA"; gene "E4"; note "putative"; transcript_biotype "mRNA"; 
+X94165.1	EMBL	exon	3324	3560	.	+	.	gene_id "E4"; transcript_id "unassigned_transcript_5"; gene "E4"; note "putative"; transcript_biotype "mRNA"; exon_number "1"; 
+X94165.1	EMBL	gene	4083	5510	.	+	.	gene_id "L2"; transcript_id ""; gbkey "Gene"; gene "L2"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	4083	5507	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q82009"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q82009"; gbkey "CDS"; gene "L2"; note "late gene, putative"; product "CAA63886.1"; protein_id "CAA63886.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	4083	4085	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q82009"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q82009"; gbkey "CDS"; gene "L2"; note "late gene, putative"; product "CAA63886.1"; protein_id "CAA63886.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	5508	5510	.	+	0	gene_id "L2"; transcript_id "unassigned_transcript_6"; db_xref "GOA:Q82009"; db_xref "InterPro:IPR000784"; db_xref "UniProtKB/TrEMBL:Q82009"; gbkey "CDS"; gene "L2"; note "late gene, putative"; product "CAA63886.1"; protein_id "CAA63886.1"; exon_number "1"; 
+X94165.1	EMBL	gene	5494	7005	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene"; gene "L1"; gene_biotype "protein_coding"; 
+X94165.1	EMBL	CDS	5494	7002	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
+X94165.1	EMBL	start_codon	5494	5496	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
+X94165.1	EMBL	stop_codon	7003	7005	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
diff --git a/tests/test_tools/data/viral.gtf.json b/tests/test_tools/data/viral.gtf.json
new file mode 100644
index 00000000..710f2e0d
--- /dev/null
+++ b/tests/test_tools/data/viral.gtf.json
@@ -0,0 +1,2683 @@
+{
+    "genes": [
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 2790,
+            "name": "E1",
+            "start": 886,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2787,
+                    "name": "AB027020.1_E1_T",
+                    "start": 886,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2787,
+                            "name": "BAA90729.1",
+                            "start": 886
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 2751,
+            "name": "E1",
+            "start": 862,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2748,
+                    "name": "J04353.1_E1_T",
+                    "start": 862,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2748,
+                            "name": "AAA46952.1",
+                            "start": 862
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 2813,
+            "name": "E1",
+            "start": 865,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2810,
+                    "name": "K02718.1_E1_T",
+                    "start": 865,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2810,
+                            "name": "AAA46936.1",
+                            "start": 865
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 2813,
+            "name": "E1",
+            "start": 879,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2810,
+                    "name": "M12732.1_E1_T",
+                    "start": 879,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2810,
+                            "name": "AAA46960.1",
+                            "start": 879
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 2871,
+            "name": "E1",
+            "start": 928,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2868,
+                    "name": "M62849.1_E1_T",
+                    "start": 928,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2868,
+                            "name": "AAA47052.1",
+                            "start": 928
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 2886,
+            "name": "E1",
+            "start": 928,
+            "strand": "+"
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 2887,
+            "name": "E1",
+            "start": 914,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2884,
+                    "name": "X05015.1_E1_T",
+                    "start": 914,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2884,
+                            "name": "CAA28666.1",
+                            "start": 914
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 2734,
+            "name": "E1",
+            "start": 917,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2731,
+                    "name": "X74464.1_E1_T",
+                    "start": 917,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2731,
+                            "name": "CAA52485.1",
+                            "start": 917
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 2785,
+            "name": "E1",
+            "start": 890,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2782,
+                    "name": "X74474.1_E1_T",
+                    "start": 890,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2782,
+                            "name": "CAA52545.1",
+                            "start": 890
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 2781,
+            "name": "E1",
+            "start": 868,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2778,
+                    "name": "X74477.1_E1_T",
+                    "start": 868,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2778,
+                            "name": "CAA52563.1",
+                            "start": 868
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 2807,
+            "name": "E1",
+            "start": 864,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2804,
+                    "name": "X74481.1_E1_T",
+                    "start": 864,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2804,
+                            "name": "CAA52587.1",
+                            "start": 864
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 2802,
+            "name": "E1",
+            "start": 850,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2799,
+                    "name": "X94165.1_E1_T",
+                    "start": 850,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2799,
+                            "name": "CAA63884.1",
+                            "start": 850
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 3838,
+            "name": "E2",
+            "start": 2732,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3835,
+                    "name": "AB027020.1_E2_T",
+                    "start": 2732,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3835,
+                            "name": "BAA90730.1",
+                            "start": 2732
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 3811,
+            "name": "E2",
+            "start": 2693,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3808,
+                    "name": "J04353.1_E2_T",
+                    "start": 2693,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3808,
+                            "name": "AAA46953.1",
+                            "start": 2693
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3852,
+            "name": "E2",
+            "start": 2755,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3849,
+                    "name": "K02718.1_E2_T",
+                    "start": 2755,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3849,
+                            "name": "AAA46941.1",
+                            "start": 2755
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 3810,
+            "name": "E2",
+            "start": 2749,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3807,
+                    "name": "M12732.1_E2_T",
+                    "start": 2749,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3807,
+                            "name": "AAA46961.1",
+                            "start": 2749
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 3910,
+            "name": "E2",
+            "start": 2798,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3907,
+                    "name": "M62849.1_E2_T",
+                    "start": 2798,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3907,
+                            "name": "AAA47053.1",
+                            "start": 2798
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 3895,
+            "name": "E2",
+            "start": 2813,
+            "strand": "+"
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 3914,
+            "name": "E2",
+            "start": 2817,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3911,
+                    "name": "X05015.1_E2_T",
+                    "start": 2817,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3911,
+                            "name": "CAA28667.1",
+                            "start": 2817
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 4061,
+            "name": "E2",
+            "start": 2676,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4058,
+                    "name": "X74464.1_E2_T",
+                    "start": 2676,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4058,
+                            "name": "CAA52486.1",
+                            "start": 2676
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 3863,
+            "name": "E2",
+            "start": 2727,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3860,
+                    "name": "X74474.1_E2_T",
+                    "start": 2727,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3860,
+                            "name": "CAA52546.1",
+                            "start": 2727
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 2717,
+            "name": "E2",
+            "start": 2714,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2714,
+                    "name": "X74477.1_E2_T",
+                    "start": 2714,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2714,
+                            "name": "CAA52564.1",
+                            "start": 2714
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 3849,
+            "name": "E2",
+            "start": 2743,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3846,
+                    "name": "X74481.1_E2_T",
+                    "start": 2743,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3846,
+                            "name": "CAA52588.1",
+                            "start": 2743
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74483.1",
+            "end": 3850,
+            "name": "E2",
+            "start": 2918,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3847,
+                    "name": "X74483.1_E2_T",
+                    "start": 2918,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3847,
+                            "name": "CAA52598.1",
+                            "start": 2918
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 3793,
+            "name": "E2",
+            "start": 2741,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3790,
+                    "name": "X94165.1_E2_T",
+                    "start": 2741,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3790,
+                            "name": "CAA63885.1",
+                            "start": 2741
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 3614,
+            "name": "E4",
+            "start": 3309,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3611,
+                    "name": "AB027020.1_E4_T",
+                    "start": 3309,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3611,
+                            "name": "BAA90731.1",
+                            "start": 3309
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 3578,
+            "name": "E4",
+            "start": 3270,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3575,
+                    "name": "J04353.1_E4_T",
+                    "start": 3270,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3575,
+                            "name": "AAA46949.1",
+                            "start": 3270
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 3619,
+            "name": "E4",
+            "start": 3332,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3616,
+                    "name": "K02718.1_E4_T",
+                    "start": 3332,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3616,
+                            "name": "AAA46937.1",
+                            "start": 3332
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 3577,
+            "name": "E4",
+            "start": 3326,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3574,
+                    "name": "M12732.1_E4_T",
+                    "start": 3326,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3574,
+                            "name": "AAA46957.1",
+                            "start": 3326
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 3677,
+            "name": "E4",
+            "start": 3393,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3674,
+                    "name": "M62849.1_E4_T",
+                    "start": 3393,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3674,
+                            "name": "AAA47049.1",
+                            "start": 3393
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 3662,
+            "name": "E4",
+            "start": 3408,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3659,
+                    "name": "U21941.1_E4_T",
+                    "start": 3408,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3659,
+                            "name": "AAC54854.1",
+                            "start": 3408
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 3684,
+            "name": "E4",
+            "start": 3418,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3681,
+                    "name": "X05015.1_E4_T",
+                    "start": 3418,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3681,
+                            "name": "CAA28668.1",
+                            "start": 3418
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 3560,
+            "name": "E4",
+            "start": 3324,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "biotype": "transcript",
+                    "end": 3560,
+                    "name": "X94165.1_unassigned_transcript_5",
+                    "start": 3324
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 4142,
+            "name": "E5",
+            "start": 3846,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4139,
+                    "name": "AB027020.1_E5_T",
+                    "start": 3846,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4139,
+                            "name": "BAA90732.1",
+                            "start": 3846
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 4070,
+            "name": "E5",
+            "start": 3816,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4067,
+                    "name": "J04353.1_E5_T",
+                    "start": 3816,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4067,
+                            "name": "AAA46954.1",
+                            "start": 3816
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 4099,
+            "name": "E5",
+            "start": 3863,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4096,
+                    "name": "K02718.1_E5_T",
+                    "start": 3863,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4096,
+                            "name": "AAA46938.1",
+                            "start": 3863
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 4081,
+            "name": "E5",
+            "start": 3854,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4078,
+                    "name": "M12732.1_E5_T",
+                    "start": 3854,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4078,
+                            "name": "AAA46962.1",
+                            "start": 3854
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 4176,
+            "name": "E5",
+            "start": 3958,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4173,
+                    "name": "M62849.1_E5_T",
+                    "start": 3958,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4173,
+                            "name": "AAA47054.1",
+                            "start": 3958
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 4145,
+            "name": "E5",
+            "start": 3909,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4142,
+                    "name": "U21941.1_E5_T",
+                    "start": 3909,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4142,
+                            "name": "AAC54855.1",
+                            "start": 3909
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 4157,
+            "name": "E5",
+            "start": 3936,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4154,
+                    "name": "X05015.1_E5_T",
+                    "start": 3936,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4154,
+                            "name": "CAA28669.1",
+                            "start": 3936
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 557,
+            "name": "E6",
+            "start": 102,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 554,
+                    "name": "AB027020.1_E6_T",
+                    "start": 102,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 554,
+                            "name": "BAA90727.1",
+                            "start": 102
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 557,
+            "name": "E6",
+            "start": 108,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 554,
+                    "name": "J04353.1_E6_T",
+                    "start": 108,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 554,
+                            "name": "AAA46950.1",
+                            "start": 108
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 559,
+            "name": "E6",
+            "start": 83,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 556,
+                    "name": "K02718.1_E6_T",
+                    "start": 83,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 556,
+                            "name": "AAA46939.1",
+                            "start": 83
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 558,
+            "name": "E6",
+            "start": 109,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 555,
+                    "name": "M12732.1_E6_T",
+                    "start": 109,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 555,
+                            "name": "AAA46958.1",
+                            "start": 109
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 583,
+            "name": "E6",
+            "start": 107,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 580,
+                    "name": "M62849.1_E6_T",
+                    "start": 107,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 580,
+                            "name": "AAA47050.1",
+                            "start": 107
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 583,
+            "name": "E6",
+            "start": 107,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 580,
+                    "name": "U21941.1_E6_T",
+                    "start": 107,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 580,
+                            "name": "AAC54850.1",
+                            "start": 107
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 581,
+            "name": "E6",
+            "start": 105,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 578,
+                    "name": "X05015.1_E6_T",
+                    "start": 105,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 578,
+                            "name": "CAA28664.1",
+                            "start": 105
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 646,
+            "name": "E6",
+            "start": 200,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 643,
+                    "name": "X74464.1_E6_T",
+                    "start": 200,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 643,
+                            "name": "CAA52482.1",
+                            "start": 200
+                        },
+                        {
+                            "biotype": "CDS",
+                            "end": 643,
+                            "name": "CAA52483.1",
+                            "start": 221
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 563,
+            "name": "E6",
+            "start": 102,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 560,
+                    "name": "X74474.1_E6_T",
+                    "start": 102,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 560,
+                            "name": "CAA52543.1",
+                            "start": 102
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 559,
+            "name": "E6",
+            "start": 110,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 556,
+                    "name": "X74477.1_E6_T",
+                    "start": 110,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 556,
+                            "name": "CAA52561.1",
+                            "start": 110
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 548,
+            "name": "E6",
+            "start": 102,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 545,
+                    "name": "X74481.1_E6_T",
+                    "start": 102,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 545,
+                            "name": "CAA52585.1",
+                            "start": 102
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74483.1",
+            "end": 566,
+            "name": "E6",
+            "start": 102,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 563,
+                    "name": "X74483.1_E6_T",
+                    "start": 102,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 563,
+                            "name": "CAA52596.1",
+                            "start": 102
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 548,
+            "name": "E6",
+            "start": 102,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 545,
+                    "name": "X94165.1_E6_T",
+                    "start": 102,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 545,
+                            "name": "CAA63882.1",
+                            "start": 102
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 878,
+            "name": "E7",
+            "start": 564,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 875,
+                    "name": "AB027020.1_E7_T",
+                    "start": 564,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 875,
+                            "name": "BAA90728.1",
+                            "start": 564
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 856,
+            "name": "E7",
+            "start": 560,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 853,
+                    "name": "J04353.1_E7_T",
+                    "start": 560,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 853,
+                            "name": "AAA46951.1",
+                            "start": 560
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 858,
+            "name": "E7",
+            "start": 562,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 855,
+                    "name": "K02718.1_E7_T",
+                    "start": 562,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 855,
+                            "name": "AAA46940.1",
+                            "start": 562
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 866,
+            "name": "E7",
+            "start": 573,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 863,
+                    "name": "M12732.1_E7_T",
+                    "start": 573,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 863,
+                            "name": "AAA46959.1",
+                            "start": 573
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 921,
+            "name": "E7",
+            "start": 592,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 918,
+                    "name": "M62849.1_E7_T",
+                    "start": 592,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 918,
+                            "name": "AAA47051.1",
+                            "start": 592
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 921,
+            "name": "E7",
+            "start": 592,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 918,
+                    "name": "U21941.1_E7_T",
+                    "start": 592,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 918,
+                            "name": "AAC54851.1",
+                            "start": 592
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 907,
+            "name": "E7",
+            "start": 590,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 904,
+                    "name": "X05015.1_E7_T",
+                    "start": 590,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 904,
+                            "name": "CAA28665.1",
+                            "start": 590
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 924,
+            "name": "E7",
+            "start": 643,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 921,
+                    "name": "X74464.1_E7_T",
+                    "start": 643,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 921,
+                            "name": "CAA52484.1",
+                            "start": 643
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 883,
+            "name": "E7",
+            "start": 566,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 880,
+                    "name": "X74474.1_E7_T",
+                    "start": 566,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 880,
+                            "name": "CAA52544.1",
+                            "start": 566
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 861,
+            "name": "E7",
+            "start": 562,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 858,
+                    "name": "X74477.1_E7_T",
+                    "start": 562,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 858,
+                            "name": "CAA52562.1",
+                            "start": 562
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 852,
+            "name": "E7",
+            "start": 553,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 849,
+                    "name": "X74481.1_E7_T",
+                    "start": 553,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 849,
+                            "name": "CAA52586.1",
+                            "start": 553
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74483.1",
+            "end": 889,
+            "name": "E7",
+            "start": 572,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 886,
+                    "name": "X74483.1_E7_T",
+                    "start": 572,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 886,
+                            "name": "CAA52597.1",
+                            "start": 572
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 843,
+            "name": "E7",
+            "start": 550,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 840,
+                    "name": "X94165.1_E7_T",
+                    "start": 550,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 840,
+                            "name": "CAA63883.1",
+                            "start": 550
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 7064,
+            "name": "L1",
+            "start": 5541,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7061,
+                    "name": "AB027020.1_L1_T",
+                    "start": 5541,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7061,
+                            "name": "BAA90734.1",
+                            "start": 5541
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 7066,
+            "name": "L1",
+            "start": 5552,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7063,
+                    "name": "J04353.1_L1_T",
+                    "start": 5552,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7063,
+                            "name": "AAA46956.1",
+                            "start": 5552
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 7154,
+            "name": "L1",
+            "start": 5559,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7151,
+                    "name": "K02718.1_L1_T",
+                    "start": 5559,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7151,
+                            "name": "AAA46943.1",
+                            "start": 5559
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 7093,
+            "name": "L1",
+            "start": 5594,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7090,
+                    "name": "M12732.1_L1_T",
+                    "start": 5594,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7090,
+                            "name": "AAA46964.1",
+                            "start": 5594
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 7160,
+            "name": "L1",
+            "start": 5643,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7157,
+                    "name": "M62849.1_L1_T",
+                    "start": 5643,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7157,
+                            "name": "AAA47056.1",
+                            "start": 5643
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 7104,
+            "name": "L1",
+            "start": 5590,
+            "strand": "+"
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 7136,
+            "name": "L1",
+            "start": 5430,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7133,
+                    "name": "X05015.1_L1_T",
+                    "start": 5430,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7133,
+                            "name": "CAA28671.1",
+                            "start": 5430
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 7268,
+            "name": "L1",
+            "start": 5745,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7265,
+                    "name": "X74464.1_L1_T",
+                    "start": 5745,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7265,
+                            "name": "CAA52488.1",
+                            "start": 5745
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 7157,
+            "name": "L1",
+            "start": 5631,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7154,
+                    "name": "X74474.1_L1_T",
+                    "start": 5631,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7154,
+                            "name": "CAA52548.1",
+                            "start": 5631
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 7109,
+            "name": "L1",
+            "start": 5601,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7106,
+                    "name": "X74477.1_L1_T",
+                    "start": 5601,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7106,
+                            "name": "CAA52566.1",
+                            "start": 5601
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 7154,
+            "name": "L1",
+            "start": 5565,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7151,
+                    "name": "X74481.1_L1_T",
+                    "start": 5565,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7151,
+                            "name": "CAA52590.1",
+                            "start": 5565
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74483.1",
+            "end": 7096,
+            "name": "L1",
+            "start": 5492,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7093,
+                    "name": "X74483.1_L1_T",
+                    "start": 5492,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7093,
+                            "name": "CAA52600.1",
+                            "start": 5492
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 7005,
+            "name": "L1",
+            "start": 5494,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7002,
+                    "name": "X94165.1_L1_T",
+                    "start": 5494,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7002,
+                            "name": "CAA63887.1",
+                            "start": 5494
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "AB027020.1",
+            "end": 5560,
+            "name": "L2",
+            "start": 4157,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5557,
+                    "name": "AB027020.1_L2_T",
+                    "start": 4157,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5557,
+                            "name": "BAA90733.1",
+                            "start": 4157
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "J04353.1",
+            "end": 5571,
+            "name": "L2",
+            "start": 4171,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5568,
+                    "name": "J04353.1_L2_T",
+                    "start": 4171,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5568,
+                            "name": "AAA46955.1",
+                            "start": 4171
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "K02718.1",
+            "end": 5656,
+            "name": "L2",
+            "start": 4235,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5653,
+                    "name": "K02718.1_L2_T",
+                    "start": 4235,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5653,
+                            "name": "AAA46942.1",
+                            "start": 4235
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M12732.1",
+            "end": 5613,
+            "name": "L2",
+            "start": 4210,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5610,
+                    "name": "M12732.1_L2_T",
+                    "start": 4210,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5610,
+                            "name": "AAA46963.1",
+                            "start": 4210
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "M62849.1",
+            "end": 5662,
+            "name": "L2",
+            "start": 4250,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5659,
+                    "name": "M62849.1_L2_T",
+                    "start": 4250,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5659,
+                            "name": "AAA47055.1",
+                            "start": 4250
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "U21941.1",
+            "end": 5609,
+            "name": "L2",
+            "start": 4209,
+            "strand": "+"
+        },
+        {
+            "biotype": "gene",
+            "chr": "X05015.1",
+            "end": 5632,
+            "name": "L2",
+            "start": 4244,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5629,
+                    "name": "X05015.1_L2_T",
+                    "start": 4244,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5629,
+                            "name": "CAA28670.1",
+                            "start": 4244
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74464.1",
+            "end": 5730,
+            "name": "L2",
+            "start": 4129,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5727,
+                    "name": "X74464.1_L2_T",
+                    "start": 4129,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5727,
+                            "name": "CAA52487.1",
+                            "start": 4129
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74474.1",
+            "end": 5671,
+            "name": "L2",
+            "start": 4280,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5668,
+                    "name": "X74474.1_L2_T",
+                    "start": 4280,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5668,
+                            "name": "CAA52547.1",
+                            "start": 4280
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74477.1",
+            "end": 5620,
+            "name": "L2",
+            "start": 4211,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5617,
+                    "name": "X74477.1_L2_T",
+                    "start": 4211,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5617,
+                            "name": "CAA52565.1",
+                            "start": 4211
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74481.1",
+            "end": 5662,
+            "name": "L2",
+            "start": 4262,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5659,
+                    "name": "X74481.1_L2_T",
+                    "start": 4262,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5659,
+                            "name": "CAA52589.1",
+                            "start": 4262
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X74483.1",
+            "end": 5616,
+            "name": "L2",
+            "start": 4222,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5613,
+                    "name": "X74483.1_L2_T",
+                    "start": 4222,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5613,
+                            "name": "CAA52599.1",
+                            "start": 4222
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X94165.1",
+            "end": 5510,
+            "name": "L2",
+            "start": 4083,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5507,
+                    "name": "X94165.1_L2_T",
+                    "start": 4083,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5507,
+                            "name": "CAA63886.1",
+                            "start": 4083
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 2806,
+            "name": "ORF putative E1",
+            "start": 872,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2803,
+                    "name": "X77858.1_ORF putative E1_T",
+                    "start": 872,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2803,
+                            "name": "CAA54851.1",
+                            "start": 872
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 3848,
+            "name": "ORF putative E2",
+            "start": 2736,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3845,
+                    "name": "X77858.1_ORF putative E2_T",
+                    "start": 2736,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3845,
+                            "name": "CAA54852.1",
+                            "start": 2736
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 3615,
+            "name": "ORF putative E4",
+            "start": 3268,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3612,
+                    "name": "X77858.1_ORF putative E4_T",
+                    "start": 3268,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3612,
+                            "name": "CAA54853.1",
+                            "start": 3268
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 4129,
+            "name": "ORF putative E5",
+            "start": 3908,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4126,
+                    "name": "X77858.1_ORF putative E5_T",
+                    "start": 3908,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4126,
+                            "name": "CAA54854.1",
+                            "start": 3908
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 537,
+            "name": "ORF putative E6",
+            "start": 55,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 534,
+                    "name": "X77858.1_ORF putative E6_T",
+                    "start": 55,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 534,
+                            "name": "CAA54849.1",
+                            "start": 55
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 865,
+            "name": "ORF putative E7",
+            "start": 542,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 862,
+                    "name": "X77858.1_ORF putative E7_T",
+                    "start": 542,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 862,
+                            "name": "CAA54850.1",
+                            "start": 542
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 7132,
+            "name": "ORF putative L1",
+            "start": 5606,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7129,
+                    "name": "X77858.1_ORF putative L1_T",
+                    "start": 5606,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7129,
+                            "name": "CAA54856.1",
+                            "start": 5606
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "biotype": "gene",
+            "chr": "X77858.1",
+            "end": 5625,
+            "name": "ORF putative L2",
+            "start": 4231,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5622,
+                    "name": "X77858.1_ORF putative L2_T",
+                    "start": 4231,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5622,
+                            "name": "CAA54855.1",
+                            "start": 4231
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "U21941.1",
+            "end": 2883,
+            "name": "G_AAC54852.1",
+            "start": 928,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2883,
+                    "name": "U21941.1_G_AAC54852.1_T",
+                    "start": 928,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2883,
+                            "name": "AAC54852.1",
+                            "start": 928
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "U21941.1",
+            "end": 3892,
+            "name": "G_AAC54853.1",
+            "start": 2813,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3892,
+                    "name": "U21941.1_G_AAC54853.1_T",
+                    "start": 2813,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3892,
+                            "name": "AAC54853.1",
+                            "start": 2813
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "U21941.1",
+            "end": 5606,
+            "name": "G_AAC54856.1",
+            "start": 4209,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5606,
+                    "name": "U21941.1_G_AAC54856.1_T",
+                    "start": 4209,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5606,
+                            "name": "AAC54856.1",
+                            "start": 4209
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "U21941.1",
+            "end": 7101,
+            "name": "G_AAC54857.1",
+            "start": 5590,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7101,
+                    "name": "U21941.1_G_AAC54857.1_T",
+                    "start": 5590,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7101,
+                            "name": "AAC54857.1",
+                            "start": 5590
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 474,
+            "name": "G_AAZ39491.1",
+            "start": 1,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 474,
+                    "name": "DQ080079.1_G_AAZ39491.1_T",
+                    "start": 1,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 474,
+                            "name": "AAZ39491.1",
+                            "start": 1
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 813,
+            "name": "G_AAZ39492.1",
+            "start": 484,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 813,
+                    "name": "DQ080079.1_G_AAZ39492.1_T",
+                    "start": 484,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 813,
+                            "name": "AAZ39492.1",
+                            "start": 484
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 2742,
+            "name": "G_AAZ39493.1",
+            "start": 823,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2742,
+                    "name": "DQ080079.1_G_AAZ39493.1_T",
+                    "start": 823,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2742,
+                            "name": "AAZ39493.1",
+                            "start": 823
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 3781,
+            "name": "G_AAZ39494.1",
+            "start": 2672,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3781,
+                    "name": "DQ080079.1_G_AAZ39494.1_T",
+                    "start": 2672,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3781,
+                            "name": "AAZ39494.1",
+                            "start": 2672
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 3548,
+            "name": "G_AAZ39495.1",
+            "start": 3267,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3548,
+                    "name": "DQ080079.1_G_AAZ39495.1_T",
+                    "start": 3267,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3548,
+                            "name": "AAZ39495.1",
+                            "start": 3267
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 4048,
+            "name": "G_AAZ39496.1",
+            "start": 3830,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4048,
+                    "name": "DQ080079.1_G_AAZ39496.1_T",
+                    "start": 3830,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4048,
+                            "name": "AAZ39496.1",
+                            "start": 3830
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 5504,
+            "name": "G_AAZ39497.1",
+            "start": 4098,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5504,
+                    "name": "DQ080079.1_G_AAZ39497.1_T",
+                    "start": 4098,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5504,
+                            "name": "AAZ39497.1",
+                            "start": 4098
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "DQ080079.1",
+            "end": 7002,
+            "name": "G_AAZ39498.1",
+            "start": 5488,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7002,
+                    "name": "DQ080079.1_G_AAZ39498.1_T",
+                    "start": 5488,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7002,
+                            "name": "AAZ39498.1",
+                            "start": 5488
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 3602,
+            "name": "G_BAA14396.1",
+            "start": 3330,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3602,
+                    "name": "D90400.1_G_BAA14396.1_T",
+                    "start": 3330,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3602,
+                            "name": "BAA14396.1",
+                            "start": 3330
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 556,
+            "name": "G_BAA31845.1",
+            "start": 110,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 556,
+                    "name": "D90400.1_G_BAA31845.1_T",
+                    "start": 110,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 556,
+                            "name": "BAA31845.1",
+                            "start": 110
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 867,
+            "name": "G_BAA31846.1",
+            "start": 574,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 867,
+                    "name": "D90400.1_G_BAA31846.1_T",
+                    "start": 574,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 867,
+                            "name": "BAA31846.1",
+                            "start": 574
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 2814,
+            "name": "G_BAA31847.1",
+            "start": 883,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 2814,
+                    "name": "D90400.1_G_BAA31847.1_T",
+                    "start": 883,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 2814,
+                            "name": "BAA31847.1",
+                            "start": 883
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 3826,
+            "name": "G_BAA31848.1",
+            "start": 2753,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 3826,
+                    "name": "D90400.1_G_BAA31848.1_T",
+                    "start": 2753,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 3826,
+                            "name": "BAA31848.1",
+                            "start": 2753
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 4119,
+            "name": "G_BAA31849.1",
+            "start": 3892,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 4119,
+                    "name": "D90400.1_G_BAA31849.1_T",
+                    "start": 3892,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 4119,
+                            "name": "BAA31849.1",
+                            "start": 3892
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 5659,
+            "name": "G_BAA31850.1",
+            "start": 4244,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 5659,
+                    "name": "D90400.1_G_BAA31850.1_T",
+                    "start": 4244,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 5659,
+                            "name": "BAA31850.1",
+                            "start": 4244
+                        }
+                    ]
+                }
+            ]
+        },
+        {
+            "chr": "D90400.1",
+            "end": 7136,
+            "name": "G_BAA31851.1",
+            "start": 5565,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "end": 7136,
+                    "name": "D90400.1_G_BAA31851.1_T",
+                    "start": 5565,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 7136,
+                            "name": "BAA31851.1",
+                            "start": 5565
+                        }
+                    ]
+                }
+            ]
+        }
+    ]
+}
diff --git a/tests/test_tools/test_convert_annotations_format.py b/tests/test_tools/test_convert_annotations_format.py
index 7e637ae9..2516af84 100644
--- a/tests/test_tools/test_convert_annotations_format.py
+++ b/tests/test_tools/test_convert_annotations_format.py
@@ -51,6 +51,7 @@ def sort_elements(data):
             'ensembl69_hg19_annotations.kras.tab.json',
             'v2-tab',
         ],
+        ['viral.gtf', 'viral.gtf.json', 'gtf'],
     ],
 )
 def test_gff_examples(filename, expected_file, input_type):

From 5aefd884d621d71f915165848441cddee8fe8ca0 Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Sat, 19 Feb 2022 12:40:28 -0800
Subject: [PATCH 135/137] Include mix of viral/human transcripts

---
 src/tools/convert_annotations_format.py |   9 +-
 tests/test_tools/data/viral.gtf         |  58 ++++++
 tests/test_tools/data/viral.gtf.json    | 250 ++++++++++++++++++++++++
 3 files changed, 315 insertions(+), 2 deletions(-)

diff --git a/src/tools/convert_annotations_format.py b/src/tools/convert_annotations_format.py
index eceba67a..ca0f0cc3 100644
--- a/src/tools/convert_annotations_format.py
+++ b/src/tools/convert_annotations_format.py
@@ -472,8 +472,13 @@ def enforce_uniq_transcript_ids(input_df) -> pd.DataFrame:
         return df
 
     # there are some non-unique transcript IDs, make them all pre-pend the seqid
-    df.loc[df.type == 'transcript', 'feature_id'] = df.seqid + GFF_ID_DELIMITER + df.feature_id
-    df.loc[df.parent_type == 'transcript', 'parent_id'] = df.seqid + GFF_ID_DELIMITER + df.parent_id
+    # do not change ensembl transcript IDs since they should already be unique
+    df.loc[(df.type == 'transcript') & (~df.feature_id.str.startswith('ENST')), 'feature_id'] = (
+        df.seqid + GFF_ID_DELIMITER + df.feature_id
+    )
+    df.loc[
+        (df.parent_type == 'transcript') & (~df.parent_id.str.startswith('ENST')), 'parent_id'
+    ] = (df.seqid + GFF_ID_DELIMITER + df.parent_id)
     duplicates = df[df.type == 'transcript'].drop_duplicates(['seqid', 'parent_id', 'feature_id'])
 
     if duplicates.shape[0] == duplicates.feature_id.nunique():
diff --git a/tests/test_tools/data/viral.gtf b/tests/test_tools/data/viral.gtf
index 57c45450..cafb8d74 100644
--- a/tests/test_tools/data/viral.gtf
+++ b/tests/test_tools/data/viral.gtf
@@ -445,3 +445,61 @@ X94165.1	EMBL	gene	5494	7005	.	+	.	gene_id "L1"; transcript_id ""; gbkey "Gene";
 X94165.1	EMBL	CDS	5494	7002	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
 X94165.1	EMBL	start_codon	5494	5496	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
 X94165.1	EMBL	stop_codon	7003	7005	.	+	0	gene_id "L1"; transcript_id "unassigned_transcript_7"; db_xref "GOA:Q82010"; db_xref "HSSP:1DZL"; db_xref "InterPro:IPR002210"; db_xref "InterPro:IPR011222"; db_xref "UniProtKB/TrEMBL:Q82010"; gbkey "CDS"; gene "L1"; note "putative"; product "CAA63887.1"; protein_id "CAA63887.1"; exon_number "1"; 
+chr6	havana	gene	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene"; gene_type "processed_pseudogene";
+chr6	havana	transcript	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; transcript_id "ENST00000407852"; transcript_version "2"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "KRASP1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; tag "basic"; transcript_support_level "NA"; gene_type "processed_pseudogene";
+chr6	havana	exon	54770583	54771134	.	+	.	gene_id "ENSG00000220635"; gene_version "2"; transcript_id "ENST00000407852"; transcript_version "2"; exon_number "1"; gene_name "KRASP1"; gene_source "havana"; gene_biotype "processed_pseudogene"; transcript_name "KRASP1-201"; transcript_source "havana"; transcript_biotype "processed_pseudogene"; exon_id "ENSE00001550689"; exon_version "2"; tag "basic"; transcript_support_level "NA"; gene_type "processed_pseudogene";
+chr12	ensembl_havana	gene	25205246	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; gene_type "protein_coding";
+chr12	ensembl_havana	transcript	25205246	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001189804"; exon_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; exon_id "ENSE00002456976"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; protein_id "ENSP00000308495"; protein_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	three_prime_utr	25205246	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000311936"; transcript_version "8"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-202"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8702"; tag "basic"; transcript_support_level "1 (assigned to previous version 7)"; gene_type "protein_coding";
+chr12	ensembl_havana	transcript	25205246	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00000000028"; exon_version "2"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25227234	25227412	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001719809"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25227234	25227412	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25225614	25225773	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001644818"; exon_version "1"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25225614	25225773	.	-	1	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "4"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25215437	25215560	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00001189807"; exon_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	CDS	25215444	25215560	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; protein_id "ENSP00000256078"; protein_version "5"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	stop_codon	25215441	25215443	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	exon	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; exon_number "6"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; exon_id "ENSE00002477035"; exon_version "3"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	five_prime_utr	25250751	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	three_prime_utr	25215437	25215440	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	ensembl_havana	three_prime_utr	25205246	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000256078"; transcript_version "10"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-201"; transcript_source "ensembl_havana"; transcript_biotype "protein_coding"; tag "CCDS"; ccds_id "CCDS8703"; tag "basic"; transcript_support_level "1 (assigned to previous version 8)"; gene_type "protein_coding";
+chr12	havana	transcript	25209168	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	exon	25250751	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002446502"; exon_version "1"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000452512"; protein_version "1"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	exon	25209168	25209911	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002464674"; exon_version "1"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	CDS	25209798	25209911	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000452512"; protein_version "1"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	stop_codon	25209795	25209797	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	five_prime_utr	25250751	25250936	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	three_prime_utr	25209168	25209794	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000557334"; transcript_version "5"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-204"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "5"; gene_type "protein_coding";
+chr12	havana	transcript	25233819	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	exon	25250764	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002530521"; exon_version "1"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	exon	25245274	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00000936617"; exon_version "1"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	CDS	25245274	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000451856"; protein_version "1"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	start_codon	25245382	25245384	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "2"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	exon	25233819	25235226	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; exon_id "ENSE00002478081"; exon_version "1"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	CDS	25235209	25235226	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; protein_id "ENSP00000451856"; protein_version "1"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	stop_codon	25235206	25235208	.	-	0	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; exon_number "3"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	five_prime_utr	25250764	25250929	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	five_prime_utr	25245385	25245395	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
+chr12	havana	three_prime_utr	25233819	25235205	.	-	.	gene_id "ENSG00000133703"; gene_version "13"; transcript_id "ENST00000556131"; transcript_version "1"; gene_name "KRAS"; gene_source "ensembl_havana"; gene_biotype "protein_coding"; transcript_name "KRAS-203"; transcript_source "havana"; transcript_biotype "protein_coding"; tag "basic"; transcript_support_level "1"; gene_type "protein_coding";
diff --git a/tests/test_tools/data/viral.gtf.json b/tests/test_tools/data/viral.gtf.json
index 710f2e0d..db29832e 100644
--- a/tests/test_tools/data/viral.gtf.json
+++ b/tests/test_tools/data/viral.gtf.json
@@ -2678,6 +2678,256 @@
                     ]
                 }
             ]
+        },
+        {
+            "aliases": [
+                "KRAS"
+            ],
+            "biotype": "gene",
+            "chr": "chr12",
+            "end": 25250936,
+            "name": "ENSG00000133703",
+            "start": 25205246,
+            "strand": "-",
+            "transcripts": [
+                {
+                    "aliases": [
+                        "KRAS-201"
+                    ],
+                    "biotype": "transcript",
+                    "end": 25250929,
+                    "exons": [
+                        {
+                            "end": 25250929,
+                            "name": "ENSE00000000028",
+                            "number": "1",
+                            "start": 25250751,
+                            "version": "2"
+                        },
+                        {
+                            "end": 25245395,
+                            "name": "ENSE00000936617",
+                            "number": "2",
+                            "start": 25245274,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25215560,
+                            "name": "ENSE00001189807",
+                            "number": "5",
+                            "start": 25215437,
+                            "version": "5"
+                        },
+                        {
+                            "end": 25225773,
+                            "name": "ENSE00001644818",
+                            "number": "4",
+                            "start": 25225614,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25227412,
+                            "name": "ENSE00001719809",
+                            "number": "3",
+                            "start": 25227234,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25209911,
+                            "name": "ENSE00002477035",
+                            "number": "6",
+                            "start": 25205246,
+                            "version": "3"
+                        }
+                    ],
+                    "name": "ENST00000256078",
+                    "start": 25205246,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 25245384,
+                            "name": "ENSP00000256078",
+                            "start": 25215444,
+                            "version": "5"
+                        }
+                    ],
+                    "version": "10"
+                },
+                {
+                    "aliases": [
+                        "KRAS-202"
+                    ],
+                    "biotype": "transcript",
+                    "end": 25250929,
+                    "exons": [
+                        {
+                            "end": 25245395,
+                            "name": "ENSE00000936617",
+                            "number": "2",
+                            "start": 25245274,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25250929,
+                            "name": "ENSE00001189804",
+                            "number": "1",
+                            "start": 25250751,
+                            "version": "5"
+                        },
+                        {
+                            "end": 25225773,
+                            "name": "ENSE00001644818",
+                            "number": "4",
+                            "start": 25225614,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25227412,
+                            "name": "ENSE00001719809",
+                            "number": "3",
+                            "start": 25227234,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25209911,
+                            "name": "ENSE00002456976",
+                            "number": "5",
+                            "start": 25205246,
+                            "version": "2"
+                        }
+                    ],
+                    "name": "ENST00000311936",
+                    "start": 25205246,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 25245384,
+                            "name": "ENSP00000308495",
+                            "start": 25209798,
+                            "version": "3"
+                        }
+                    ],
+                    "version": "8"
+                },
+                {
+                    "aliases": [
+                        "KRAS-203"
+                    ],
+                    "biotype": "transcript",
+                    "end": 25250929,
+                    "exons": [
+                        {
+                            "end": 25245395,
+                            "name": "ENSE00000936617",
+                            "number": "2",
+                            "start": 25245274,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25235226,
+                            "name": "ENSE00002478081",
+                            "number": "3",
+                            "start": 25233819,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25250929,
+                            "name": "ENSE00002530521",
+                            "number": "1",
+                            "start": 25250764,
+                            "version": "1"
+                        }
+                    ],
+                    "name": "ENST00000556131",
+                    "start": 25233819,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 25245384,
+                            "name": "ENSP00000451856",
+                            "start": 25235209,
+                            "version": "1"
+                        }
+                    ],
+                    "version": "1"
+                },
+                {
+                    "aliases": [
+                        "KRAS-204"
+                    ],
+                    "biotype": "transcript",
+                    "end": 25250936,
+                    "exons": [
+                        {
+                            "end": 25245395,
+                            "name": "ENSE00000936617",
+                            "number": "2",
+                            "start": 25245274,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25250936,
+                            "name": "ENSE00002446502",
+                            "number": "1",
+                            "start": 25250751,
+                            "version": "1"
+                        },
+                        {
+                            "end": 25209911,
+                            "name": "ENSE00002464674",
+                            "number": "3",
+                            "start": 25209168,
+                            "version": "1"
+                        }
+                    ],
+                    "name": "ENST00000557334",
+                    "start": 25209168,
+                    "translations": [
+                        {
+                            "biotype": "CDS",
+                            "end": 25245384,
+                            "name": "ENSP00000452512",
+                            "start": 25209798,
+                            "version": "1"
+                        }
+                    ],
+                    "version": "5"
+                }
+            ],
+            "version": "13"
+        },
+        {
+            "aliases": [
+                "KRASP1"
+            ],
+            "biotype": "gene",
+            "chr": "chr6",
+            "end": 54771134,
+            "name": "ENSG00000220635",
+            "start": 54770583,
+            "strand": "+",
+            "transcripts": [
+                {
+                    "aliases": [
+                        "KRASP1-201"
+                    ],
+                    "biotype": "transcript",
+                    "end": 54771134,
+                    "exons": [
+                        {
+                            "end": 54771134,
+                            "name": "ENSE00001550689",
+                            "number": "1",
+                            "start": 54770583,
+                            "version": "2"
+                        }
+                    ],
+                    "name": "ENST00000407852",
+                    "start": 54770583,
+                    "version": "2"
+                }
+            ],
+            "version": "2"
         }
     ]
 }

From 84abab90401164a6590e03677bbbc6410e5d4f9c Mon Sep 17 00:00:00 2001
From: Caralyn Reisle <creisle@bcgsc.ca>
Date: Tue, 22 Feb 2022 11:26:12 -0800
Subject: [PATCH 136/137] Bump version number to 3.0.0

---
 Snakefile | 2 +-
 setup.cfg | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 4c8233f9..c361c753 100644
--- a/Snakefile
+++ b/Snakefile
@@ -11,7 +11,7 @@ from mavis_config import (
 from mavis_config.constants import SUBCOMMAND
 
 # env variable mainly for CI/CD
-CONTAINER = os.environ.get('SNAKEMAKE_CONTAINER', 'docker://bcgsc/mavis:latest')
+CONTAINER = os.environ.get('SNAKEMAKE_CONTAINER', 'docker://bcgsc/mavis:v3.0.0')
 MAX_TIME = 57600
 DEFAULT_MEMORY_MB = 16000
 
diff --git a/setup.cfg b/setup.cfg
index ed47bfd5..4149c69c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = mavis
-version = 2.2.10
+version = 3.0.0
 url = https://github.com/bcgsc/mavis.git
 download_url = https://github.com/bcgsc/mavis/archive/v2.2.10.tar.gz
 description = A Structural Variant Post-Processing Package

From c2221fee6f8ab3767a040c12c9d024ea67e56fff Mon Sep 17 00:00:00 2001
From: zhemingfan <43304373+zhemingfan@users.noreply.github.com>
Date: Tue, 22 Feb 2022 15:03:31 -0800
Subject: [PATCH 137/137] Update mkdocs.yml

As per instructions on: https://github.com/lukasgeiter/mkdocs-awesome-pages-plugin
We now have to add the search flag explicitly.
---
 mkdocs.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mkdocs.yml b/mkdocs.yml
index 79900dff..2bf2bbfc 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -26,6 +26,7 @@ nav:
   - glossary.md
 
 plugins:
+  - search
   - awesome-pages
   - mkdocs-simple-hooks:
       hooks: