Skip to content

Commit a2f84e4

Browse files
author
Peter Causey-Freeman
authored
Merge pull request #114 from openvar/develop_v3
Develop v3
2 parents ec89acd + 723f70b commit a2f84e4

File tree

6 files changed

+82
-25
lines changed

6 files changed

+82
-25
lines changed

VariantValidator/modules/variant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ def output_dict(self, test=False):
194194
if test is True:
195195
try:
196196
del self.stable_gene_ids['ensembl_gene_id']
197+
del self.stable_gene_ids['ccds_ids']
197198
except KeyError:
198199
pass
199200
dict_out = {

VariantValidator/modules/vvMixinConverters.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from Bio import Entrez, SeqIO
1010
from . import utils as fn
1111

12-
from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError
12+
from vvhgvs.exceptions import HGVSError, HGVSDataNotAvailableError, HGVSUnsupportedOperationError, \
13+
HGVSInvalidVariantError
1314

1415
logger = logging.getLogger(__name__)
1516

@@ -494,6 +495,24 @@ def search_through_options(hgvs_genomic, seqtype, chr_num_val, final=False):
494495
# This will only happen if the variant is flanking the gap but is
495496
# not inside the gap
496497
logger.info('Variant is on the flank of a genomic gap but not within the gap')
498+
499+
# Test on the flank and if so, return
500+
501+
# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
502+
# Currently believe that sub.n is the only variant type which fits. ins can normalize
503+
# and may also be a dup!
504+
try:
505+
norm_stored_c = hn.normalize(stored_hgvs_c)
506+
if norm_stored_c.posedit.edit.type == 'sub':
507+
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
508+
self.vr.validate(flank_hgvs_genomic)
509+
return flank_hgvs_genomic
510+
511+
# Will occur if the variant still overlaps the gap / is in the gap
512+
except HGVSInvalidVariantError:
513+
pass
514+
515+
# If test fails, continue old processing
497516
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
498517
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
499518
genomic_gap_variant.posedit.pos.start.base = gap_start
@@ -1159,6 +1178,23 @@ def myvm_t_to_g(self, hgvs_c, alt_chr, no_norm_evm, hn):
11591178
# This will only happen if the variant is flanking the gap but is
11601179
# not inside the gap
11611180
logger.info('Variant is on the flank of a genomic gap but not within the gap')
1181+
1182+
# Test definately on the flank and if so, return
1183+
# Logic, normalize the c. variant and if a substitution (cannot normalize) then direct map
1184+
# Currently believe that sub.n is the only variant type which fits. ins can normalize
1185+
# and may also be a dup!
1186+
try:
1187+
norm_stored_c = hn.normalize(stored_hgvs_c)
1188+
if norm_stored_c.posedit.edit.type == 'sub':
1189+
flank_hgvs_genomic = self.vm.t_to_g(norm_stored_c, genomic_gap_variant.ac)
1190+
self.vr.validate(flank_hgvs_genomic)
1191+
return flank_hgvs_genomic
1192+
1193+
# Will occur if the variant still overlaps the gap / is in the gap
1194+
except HGVSInvalidVariantError:
1195+
pass
1196+
1197+
# If test fails, continue old processing
11621198
gap_start = genomic_gap_variant.posedit.pos.start.base - 1
11631199
gap_end = genomic_gap_variant.posedit.pos.end.base + 1
11641200
genomic_gap_variant.posedit.pos.start.base = gap_start
@@ -2155,7 +2191,7 @@ def chr_to_rsg(self, hgvs_genomic, hn):
21552191
"""
21562192
# Covert chromosomal HGVS description to RefSeqGene
21572193
"""
2158-
# print 'chr_to_rsg triggered'
2194+
# 'chr_to_rsg triggered'
21592195
hgvs_genomic = hn.normalize(hgvs_genomic)
21602196
# split the description
21612197
# Accessions
@@ -2358,7 +2394,6 @@ def rsg_to_chr(self, hgvs_refseqgene, primary_assembly, hn):
23582394
new_ref = match[1]
23592395
hgvs_genomic.posedit.edit.ref = new_ref
23602396
error = 'true'
2361-
# # print str(e) + '\n3.'
23622397
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': str(error)}
23632398
else:
23642399
data = {'hgvs_genomic': str(hgvs_genomic), 'gene': gene, 'valid': 'true'}

VariantValidator/modules/vvMixinCore.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -175,15 +175,26 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
175175
# INITIAL USER INPUT FORMATTING
176176
invalid = my_variant.format_quibble()
177177
if invalid:
178-
if re.search(r'\w+:[gcnmrp]', my_variant.quibble) and not \
178+
if re.search(r'\w+:[gcnmrp],', my_variant.quibble):
179+
error = 'Variant description ' + my_variant.quibble + ' contained the , character between '\
180+
'<type> and <position> in the expected pattern <accession>:<type>.<position> and ' \
181+
'has been auto-corrected'
182+
my_variant.quibble = my_variant.quibble.replace(',', '.')
183+
my_variant.warnings.append(error)
184+
logger.warning(error)
185+
pass
186+
elif re.search(r'\w+:[gcnmrp]', my_variant.quibble) and not \
179187
re.search(r'\w+:[gcnmrp]\.', my_variant.quibble):
180188
error = 'Variant description ' + my_variant.quibble + ' lacks the . character between ' \
181189
'<type> and <position> in the expected pattern <accession>:<type>.<position>'
190+
my_variant.warnings.append(error)
191+
logger.warning(error)
192+
continue
182193
else:
183194
error = 'Variant description ' + my_variant.quibble + ' is not in an accepted format'
184-
my_variant.warnings.append(error)
185-
logger.warning(error)
186-
continue
195+
my_variant.warnings.append(error)
196+
logger.warning(error)
197+
continue
187198

188199
formatted_variant = my_variant.quibble
189200
stash_input = my_variant.quibble
@@ -214,8 +225,9 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
214225
except vvhgvs.exceptions.HGVSError as e:
215226
# Look for T not U!
216227
posedit = formatted_variant.split(':')[-1]
217-
if 'T' in posedit:
218-
e = 'The IUPAC RNA alphabet dictates that RNA variants must use the character u in place of t'
228+
if 'T' in posedit and "r." in posedit:
229+
e = 'The IUPAC RNA alphabet dictates that RNA variants must use the character u in ' \
230+
'place of t'
219231
my_variant.warnings.append(str(e))
220232
logger.warning(str(e))
221233
continue
@@ -257,7 +269,7 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
257269
my_variant.warnings.append(str(trap_ens_in) + ' automapped to equivalent RefSeq transcript '
258270
+ my_variant.quibble)
259271
logger.info(str(trap_ens_in) + ' automapped to equivalent RefSeq '
260-
'transcript ' + my_variant.quibble)
272+
'transcript ' + my_variant.quibble)
261273
logger.debug("HVGS acceptance test passed")
262274

263275
# Check whether supported genome build is requested for non g. descriptions
@@ -701,7 +713,14 @@ def validate(self, batch_variant, selected_assembly, select_transcripts, transcr
701713
stable_gene_ids['ucsc_id'] = gene_stable_info[5]
702714
stable_gene_ids['omim_id'] = json.loads(gene_stable_info[6])
703715
# stable_gene_ids['vega_id'] = gene_stable_info[7]
704-
# stable_gene_ids['ccds_ids'] = gene_stable_info[8]
716+
717+
# reformat ccds return into a Python list
718+
my_ccds = gene_stable_info[8].replace('[', '')
719+
my_ccds = my_ccds.replace(']', '')
720+
my_ccds = my_ccds.replace('"','')
721+
ccds_list = my_ccds.split()
722+
stable_gene_ids['ccds_ids'] = ccds_list
723+
705724
except IndexError as e:
706725
logger.debug("Except pass, %s", e)
707726

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,6 @@ dependencies:
1313
- configparser>=3.5.0
1414
- requests
1515
- pip:
16-
- git+https://github.com/openvar/vv_hgvs@master#egg=vvhgvs
16+
- git+https://github.com/openvar/vv_hgvs@1.2.5.vv1#egg=vvhgvs
1717
- biotools>=0.3.0
1818
- biopython

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,8 @@
6969
# removed
7070
"biopython==1.74",
7171
"requests",
72-
# "vvhgvs",
73-
"vvhgvs @ git+https://github.com/openvar/vv_hgvs.git@master#egg=vvhgvs",
72+
"mysql-connector-python",
73+
"vvhgvs @ git+https://github.com/openvar/vv_hgvs.git@1.2.5.vv1#egg=vvhgvs",
7474
],
7575
# dependency_links=[
7676
# "git+https://github.com/openvar/vv_hgvs.git@master#egg=vvhgvs-1.0.0",

tests/test_inputs.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ def test_variant1(self):
2525
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.35T>C'
2626
assert results['NM_015120.4:c.35T>C']['hgvs_lrg_variant'] == 'LRG_741:g.5146T>C'
2727
self.assertCountEqual(results['NM_015120.4:c.35T>C']['alt_genomic_loci'], [])
28-
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
29-
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
30-
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031delinsCGGA', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'CGGA'}}
31-
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903delinsCGGA', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'CGGA'}}
28+
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': 'chr2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
29+
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': 'chr2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
30+
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613031T>C', 'vcf': {'chr': '2', 'pos': '73613031', 'ref': 'T', 'alt': 'C'}}
31+
assert results['NM_015120.4:c.35T>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385903T>C', 'vcf': {'chr': '2', 'pos': '73385903', 'ref': 'T', 'alt': 'C'}}
3232
assert results['NM_015120.4:c.35T>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}
3333

3434
def test_variant2(self):
@@ -49,10 +49,10 @@ def test_variant2(self):
4949
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_transcript_variant'] == 'LRG_741t1:c.39G>C'
5050
assert results['NM_015120.4:c.39G>C']['hgvs_lrg_variant'] == 'LRG_741:g.5150G>C'
5151
self.assertCountEqual(results['NM_015120.4:c.39G>C']['alt_genomic_loci'], [])
52-
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
53-
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
54-
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613034_73613035insCGA', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'GGAC'}}
55-
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385906_73385907insCGA', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'GGAC'}}
52+
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': 'chr2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
53+
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': 'chr2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
54+
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000002.11:g.73613032G>C', 'vcf': {'chr': '2', 'pos': '73613032', 'ref': 'G', 'alt': 'C'}}
55+
assert results['NM_015120.4:c.39G>C']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000002.12:g.73385904G>C', 'vcf': {'chr': '2', 'pos': '73385904', 'ref': 'G', 'alt': 'C'}}
5656
assert results['NM_015120.4:c.39G>C']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_015120.4', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_055935.4', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_011690.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_741.xml'}
5757

5858
def test_variant3(self):
@@ -2286,10 +2286,12 @@ def test_variant76(self):
22862286
assert results['NM_032790.3:c.126C>A']['hgvs_predicted_protein_consequence'] == {'tlr': 'NP_116179.2(LRG_93p1):p.(Ala42=)', 'slr': 'NP_116179.2:p.(A42=)'}
22872287
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_transcript_variant'] == 'LRG_93t1:c.126C>A'
22882288
assert results['NM_032790.3:c.126C>A']['hgvs_lrg_variant'] == 'LRG_93:g.5299C>A'
2289-
self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
2290-
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': 'chr12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}
2289+
2290+
# Bug fix for issue https://github.com/openvar/variantValidator/issues/94 creates extra outputs. Not an issue so ignore
2291+
#self.assertCountEqual(results['NM_032790.3:c.126C>A']['alt_genomic_loci'], [{'grch37': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'HG1595_PATCH', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}, {'hg19': {'hgvs_genomic_description': 'NW_004504303.2:g.302871_302876del', 'vcf': {'chr': 'NW_004504303.2', 'pos': '302869', 'ref': 'GCCCCGC', 'alt': 'G'}}}])
2292+
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg19'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': 'chr12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
22912293
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['hg38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': 'chr12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
2292-
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773_122064778del', 'vcf': {'chr': '12', 'pos': '122064771', 'ref': 'GCCCCGC', 'alt': 'G'}}
2294+
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch37'] == {'hgvs_genomic_description': 'NC_000012.11:g.122064773C>A', 'vcf': {'chr': '12', 'pos': '122064773', 'ref': 'C', 'alt': 'A'}}
22932295
assert results['NM_032790.3:c.126C>A']['primary_assembly_loci']['grch38'] == {'hgvs_genomic_description': 'NC_000012.12:g.121626873C>A', 'vcf': {'chr': '12', 'pos': '121626873', 'ref': 'C', 'alt': 'A'}}
22942296
assert results['NM_032790.3:c.126C>A']['reference_sequence_records'] == {'transcript': 'https://www.ncbi.nlm.nih.gov/nuccore/NM_032790.3', 'protein': 'https://www.ncbi.nlm.nih.gov/nuccore/NP_116179.2', 'refseqgene': 'https://www.ncbi.nlm.nih.gov/nuccore/NG_007500.1', 'lrg': 'http://ftp.ebi.ac.uk/pub/databases/lrgex/LRG_93.xml'}
22952297

0 commit comments

Comments
 (0)