Skip to content

Commit d0b98ee

Browse files
Merge pull request #114 from griffithlab/intron_variants
Handle variants with multiple stops in the amino acid change
2 parents e2877c2 + 964bb04 commit d0b98ee

File tree

5 files changed

+72
-10
lines changed

5 files changed

+72
-10
lines changed

.travis.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@ before_install:
1010
- sudo apt-get install -y ghostscript
1111
install:
1212
#Remove the following line when pvacseq-client is first pushed to the live pypi site
13-
- pip install -i https://testpypi.python.org/pypi pvacseq-client
13+
#This has been updated to pull from the live pypi site, instead of test pypi.
14+
#Not sure if it can or needs to be deleted altogether.
15+
- pip install pvacseq-client
1416
- pip install -e .[API]
1517
before_script:
1618
- "export DISPLAY=:99.0"

lib/fasta_generator.py

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,15 @@ def execute(self):
104104
position = int(line['protein_position'].split('-', 1)[0]) - 1
105105
elif variant_type == 'missense' or variant_type == 'inframe_ins':
106106
wildtype_amino_acid, mutant_amino_acid = line['amino_acid_change'].split('/')
107-
if wildtype_amino_acid.endswith('*'):
108-
wildtype_amino_acid = wildtype_amino_acid.replace('*', '')
109-
if mutant_amino_acid.endswith('*'):
110-
mutant_amino_acid = mutant_amino_acid.replace('*', '')
107+
if '*' in wildtype_amino_acid:
108+
wildtype_amino_acid = wildtype_amino_acid.split('*')[0]
109+
elif 'X' in wildtype_amino_acid:
110+
wildtype_amino_acid = wildtype_amino_acid.split('X')[0]
111+
if '*' in mutant_amino_acid:
112+
mutant_amino_acid = mutant_amino_acid.split('*')[0]
113+
stop_codon_added = True
114+
elif 'X' in mutant_amino_acid:
115+
mutant_amino_acid = mutant_amino_acid.split('X')[0]
111116
stop_codon_added = True
112117
else:
113118
stop_codon_added = False
@@ -124,10 +129,15 @@ def execute(self):
124129
elif variant_type == 'inframe_del':
125130
variant_type = 'inframe_del'
126131
wildtype_amino_acid, mutant_amino_acid = line['amino_acid_change'].split('/')
127-
if wildtype_amino_acid.endswith('*'):
128-
wildtype_amino_acid = wildtype_amino_acid.replace('*', '')
129-
if mutant_amino_acid.endswith('*'):
130-
mutant_amino_acid = mutant_amino_acid.replace('*', '')
132+
if '*' in wildtype_amino_acid:
133+
wildtype_amino_acid = wildtype_amino_acid.split('*')[0]
134+
elif 'X' in wildtype_amino_acid:
135+
wildtype_amino_acid = wildtype_amino_acid.split('X')[0]
136+
if '*' in mutant_amino_acid:
137+
mutant_amino_acid = mutant_amino_acid.split('*')[0]
138+
stop_codon_added = True
139+
elif 'X' in mutant_amino_acid:
140+
mutant_amino_acid = mutant_amino_acid.split('X')[0]
131141
stop_codon_added = True
132142
else:
133143
stop_codon_added = False
@@ -151,7 +161,11 @@ def execute(self):
151161
mutation_start_position, wildtype_subsequence = self.get_wildtype_subsequence(position, full_wildtype_sequence, wildtype_amino_acid_length, peptide_sequence_length, line)
152162
mutation_end_position = mutation_start_position + wildtype_amino_acid_length
153163
if wildtype_amino_acid != '-' and wildtype_amino_acid != wildtype_subsequence[mutation_start_position:mutation_end_position]:
154-
sys.exit("ERROR: There was a mismatch between the actual wildtype amino acid and the expected amino acid. Did you use the same reference build version for VEP that you used for creating the VCF?\n%s" % line)
164+
if line['amino_acid_change'].split('/')[0].count('*') > 1:
165+
print("Warning: Amino acid change is not sane - contains multiple stops. Skipping entry {}".format(line['index']))
166+
continue
167+
else:
168+
sys.exit("ERROR: There was a mismatch between the actual wildtype amino acid sequence ({}) and the expected amino acid sequence ({}). Did you use the same reference build version for VEP that you used for creating the VCF?\n{}".format(wildtype_subsequence[mutation_start_position:mutation_end_position], wildtype_amino_acid, line))
155169
if stop_codon_added:
156170
mutant_subsequence = wildtype_subsequence[:mutation_start_position] + mutant_amino_acid
157171
else:
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence fusion_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index protein_length_change
2+
12 96617457 96617460 CAGA C ELK3 ENST00000547249 AX/X ENSG00000111145 MESAITLWQFLLQLLLDQKHEHLICWTSNDGEFKLLKA inframe_del 38-39 NA NA NA NA NA NA NA NA 39805.ELK3.ENST00000547249.inframe_del.38-39AX/X
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
chromosome_name start stop reference variant gene_name transcript_name amino_acid_change ensembl_gene_id wildtype_amino_acid_sequence downstream_amino_acid_sequence fusion_amino_acid_sequence variant_type protein_position transcript_expression gene_expression normal_depth normal_vaf tdna_depth tdna_vaf trna_depth trna_vaf index protein_length_change
2+
3 41265565 41266581 TACTCAAGGTTTGTGTCATTAAATCTTTAGTTACTGAATTGGGGCTCTGCTTCGTTGCCATTAAGCCAGTCTGGCTGAGATCCCCCTGCTTTCCTCTCTCCCTGCTTACTTGTCAGGCTACCTTTTGCTCCATTTTCTGCTCACTCCTCCTAATGGCTTGGTGAAATAGCAAACAAGCCACCAGCAGGAATCTAGTCTGGATGACTGCTTCTGGAGCCTGGATGCAGTACCATTCTTCCACTGATTCAGTGAGTAACTGTTAGGTGGTTCCCTAAGGGATTAGGTATTTCATCACTGAGCTAACCCTGGCTATCATTCTGCTTTTCTTGGCTGTCTTTCAGATTTGACTTTATTTCTAAAAATATTTCAATGGGTCATATCACAGATTCTTTTTTTTTAAATTAAAGTAACATTTCCAATCTACTAATGCTAATACTGTTTCGTATTTATAGCTGATTTGATGGAGTTGGACATGGCCATGGAACCAGACAGAAAAGCGGCTGTTAGTCACTGGCAGCAACAGTCTTACCTGGACTCTGGAATCCATTCTGGTGCCACTACCACAGCTCCTTCTCTGAGTGGTAAAGGCAATCCTGAGGAAGAGGATGTGGATACCTCCCAAGTCCTGTATGAGTGGGAACAGGGATTTTCTCAGTCCTTCACTCAAGAACAAGTAGCTGGTAAGAGTATTATTTTTCATTGCCTTACTGAAAGTCAGAATGCAGTTTTGAGAACTAAAAAGTTAGTGTATAATAGTTTAAATAAAATGTTGTGGTGAAGAAAAGAGAGTAATAGCAATGTCACTTTTACCATTTAGGATAGCAAATACTTAGGTAAATGCTGAACTGTGGATAGTGAGTGTTGAATTAACCTTTTCCAGATATTGATGGACAGTATGCAATGACTCGAGCTCAGAGGGTACGAGCTGCTATGTTCCCTGAGACATTAGATGAGGGCATGCAGATCCCATCTACACAGTTTGATGCTGCTCATCCCACTAATGTCCAGCGTTTGGCT T CTNNB1 ENST00000349496 TQGLCH*IFSY*IGALLRCH*ASLAEIPLLSSLPAYLSGYLLLHFLLTPPNGLVK*QTSHQQESSLDDCFWSLDAVPFFH*FSE*LLGGSLRD*VFHH*ANPGYHSAFLGCLSDLTLFLKIFQWVISQILFF*IKVTFPIY*C*YCFVFIADLMELDMAMEPDRKAAVSHWQQQSYLDSGIHSGATTTAPSLSGKGNPEEEDVDTSQVLYEWEQGFSQSFTQEQVAGKSIIFHCLTESQNAVLRTKKLVYNSLNKMLW*RKESNSNVTFTI*DSKYLGKC*TVDSEC*INLFQILMDSMQ*LELRGYELLCSLRH*MRACRSHLHSLMLLIPLMSSVWX/- ENSG00000168036 MATQADLMELDMAMEPDRKAAVSHWQQQSYLDSGIHSGATTTAPSLSGKGNPEEEDVDTSQVLYEWEQGFSQSFTQEQVADIDGQYAMTRAQRVRAAMFPETLDEGMQIPSTQFDAAHPTNVQRLAEPSQMLKHAVVNLINYQDDAELATRAIPELTKLLNDEDQVVVNKAAVMVHQLSKKEASRHAIMRSPQMVSAIVRTMQNTNDVETARCTAGTLHNLSHHREGLLAIFKSGGIPALVKMLGSPVDSVLFYAITTLHNLLLHQEGAKMAVRLAGGLQKMVALLNKTNVKFLAITTDCLQILAYGNQESKLIILASGGPQALVNIMRTYTYEKLLWTTSRVLKVLSVCSSNKPAIVEAGGMQALGLHLTDPSQRLVQNCLWTLRNLSDAATKQEGMEGLLGTLVQLLGSDDINVVTCAAGILSNLTCNNYKNKMMVCQVGGIEALVRTVLRAGDREDITEPAICALRHLTSRHQEAEMAQNAVRLHYGLPVVVKLLHPPSHWPLIKATVGLIRNLALCPANHAPLREQGAIPRLVQLLVRAHQDTQRRTSMGGTQQQFVEGVRMEEIVEGCTGALHILARDVHNRIVIRGLNTIPLFVQLLYSPIENIQRVAAGVLCELAQDKEAAEAIEAEGATAPLTELLHSRNEGVATYAAAVLFRMSEDKPQDYKKRLSVELTSSLFRTEPMAWNETADLGLDIGAQGEPLGYRQDDPSYRSFHSGGYGQDALGMDPMMEHEMGGHHPGADYPVDGLPDLGHAQDLMDGLPPGDSNQLAWFDTDL inframe_del 3-126 NA NA NA NA NA NA NA NA 1.CTNNB1.ENST00000349496.inframe_del.3-126TQGLCH*IFSY*IGALLRCH*ASLAEIPLLSSLPAYLSGYLLLHFLLTPPNGLVK*QTSHQQESSLDDCFWSLDAVPFFH*FSE*LLGGSLRD*VFHH*ANPGYHSAFLGCLSDLTLFLKIFQWVISQILFF*IKVTFPIY*C*YCFVFIADLMELDMAMEPDRKAAVSHWQQQSYLDSGIHSGATTTAPSLSGKGNPEEEDVDTSQVLYEWEQGFSQSFTQEQVAGKSIIFHCLTESQNAVLRTKKLVYNSLNKMLW*RKESNSNVTFTI*DSKYLGKC*TVDSEC*INLFQILMDSMQ*LELRGYELLCSLRH*MRACRSHLHSLMLLIPLMSSVWX/-

tests/test_fasta_generator.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,6 +564,48 @@ def test_protein_change_with_asterisk_in_wildtype_and_mutant(self):
564564
self.assertEqual(os.path.getsize(generate_fasta_output_file.name), 0)
565565
self.assertEqual(os.path.getsize(generate_fasta_key_output_file.name), 0)
566566

567+
def test_protein_change_with_X_in_wildtype_and_mutatnt(self):
568+
peptide_sequence_length = '21'
569+
test_data_dir = os.path.join(self.test_data_dir, 'protein_change_with_X_in_wildtype_and_mutant')
570+
generate_fasta_input_file = os.path.join(test_data_dir, 'input.tsv')
571+
generate_fasta_output_file = tempfile.NamedTemporaryFile()
572+
generate_fasta_key_output_file = tempfile.NamedTemporaryFile()
573+
574+
generate_fasta_params = {
575+
'input_file' : generate_fasta_input_file,
576+
'peptide_sequence_length' : self.peptide_sequence_length,
577+
'epitope_length' : self.epitope_length,
578+
'output_file' : generate_fasta_output_file.name,
579+
'output_key_file' : generate_fasta_key_output_file.name,
580+
'downstream_sequence_length': None,
581+
}
582+
generator = FastaGenerator(**generate_fasta_params)
583+
584+
self.assertFalse(generator.execute())
585+
self.assertEqual(os.path.getsize(generate_fasta_output_file.name), 0)
586+
self.assertEqual(os.path.getsize(generate_fasta_key_output_file.name), 0)
587+
588+
def test_protein_change_with_multiple_asterisks(self):
589+
peptide_sequence_length = '21'
590+
test_data_dir = os.path.join(self.test_data_dir, 'protein_change_with_multiple_asterisks')
591+
generate_fasta_input_file = os.path.join(test_data_dir, 'input.tsv')
592+
generate_fasta_output_file = tempfile.NamedTemporaryFile()
593+
generate_fasta_key_output_file = tempfile.NamedTemporaryFile()
594+
595+
generate_fasta_params = {
596+
'input_file' : generate_fasta_input_file,
597+
'peptide_sequence_length' : self.peptide_sequence_length,
598+
'epitope_length' : self.epitope_length,
599+
'output_file' : generate_fasta_output_file.name,
600+
'output_key_file' : generate_fasta_key_output_file.name,
601+
'downstream_sequence_length': None,
602+
}
603+
generator = FastaGenerator(**generate_fasta_params)
604+
605+
self.assertFalse(generator.execute())
606+
self.assertEqual(os.path.getsize(generate_fasta_output_file.name), 0)
607+
self.assertEqual(os.path.getsize(generate_fasta_key_output_file.name), 0)
608+
567609
def test_distance_from_start_works_as_expected(self):
568610
generate_fasta_input_file = tempfile.NamedTemporaryFile()
569611
generate_fasta_output_file = tempfile.NamedTemporaryFile()

0 commit comments

Comments
 (0)