Skip to content

Commit

Permalink
Merge pull request #1017 from griffithlab/ref_seq_mouse
Browse files Browse the repository at this point in the history
Update reference proteome similarity step to work with nonhuman data
  • Loading branch information
susannasiebert authored Aug 9, 2023
2 parents af74bab + b95f8c0 commit 2dea4a7
Show file tree
Hide file tree
Showing 8 changed files with 281 additions and 27 deletions.
4 changes: 2 additions & 2 deletions pvactools/lib/calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def _input_tsv_type(self, line):
def _get_full_peptide(self, line, mt_records_dict, wt_records_dict):
for record_id in mt_records_dict.keys():
(rest_record_id, variant_type, aa_change) = record_id.rsplit(".", 2)
transcript_regex = '^.*(ENST[0-9|.]+)$'
transcript_regex = '^.*(ENS[0-9|A-Z|.]+)$'
transcript_p = re.compile(transcript_regex)
m = transcript_p.match(rest_record_id)
if m:
Expand Down Expand Up @@ -522,7 +522,7 @@ def _write_outputs(self, processed_peptides, mt_records_dict, wt_records_dict):
for query_window, hit_reference_matches in groupby(metric_lines,key=lambda x:x['Match Window']):
hit_reference_matches = list(hit_reference_matches)
gene_regex = '^.*gene_symbol:([0-9|A-Z]+).*$'
transcript_regex = '^.*transcript:(ENST[0-9|.]+).*$'
transcript_regex = '^.*transcript:(ENS[0-9|A-Z|.]+).*$'
gene_p = re.compile(gene_regex)
transcript_p = re.compile(transcript_regex)
genes = []
Expand Down
23 changes: 23 additions & 0 deletions tests/test_calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,29 @@ def test_calculate_self_similarity_with_aggregated_tsv_and_peptide_fasta(self):
))
os.remove(metric_file)

def test_calculate_self_similarity_with_aggregated_tsv_and_peptide_fasta_mouse(self):
input_file = os.path.join(self.test_data_dir, 'Test.all_epitopes.aggregated.mouse.tsv')
input_aggregated_metrics_file = os.path.join(self.test_data_dir, 'Test.all_epitopes.aggregated.mouse.tsv.metrics.json')
tmp_aggregated_metrics_file = tempfile.NamedTemporaryFile()
import shutil
shutil.copy(input_aggregated_metrics_file, tmp_aggregated_metrics_file.name)
input_fasta = os.path.join(self.test_data_dir, 'Test.mouse.fasta')
output_file = tempfile.NamedTemporaryFile(suffix='.tsv')
metric_file = "{}.reference_matches".format(output_file.name)
output_aggregated_metrics_file = output_file.name.replace(".tsv", ".metrics.json")
self.assertFalse(CalculateReferenceProteomeSimilarity(
input_file,
input_fasta,
output_file.name,
peptide_fasta=self.peptide_fasta,
aggregate_metrics_file=tmp_aggregated_metrics_file.name,
).execute())
self.assertTrue(cmp(
output_file.name,
os.path.join(self.test_data_dir, "output.aggregated.peptide_fasta.mouse.tsv"),
))
os.remove(metric_file)

def test_wt_peptide_fully_in_mt_peptide(self):
input_file = os.path.join(self.test_data_dir, 'input_wt_in_mt.tsv')
input_fasta = os.path.join(self.test_data_dir, 'input_wt_in_mt.fasta')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ID E*01:01 G*01:09 Gene AA Change Num Passing Transcripts Best Peptide Best Transcript TSL Allele Pos Prob Pos Num Passing Peptides IC50 MT IC50 WT %ile MT %ile WT RNA Expr RNA VAF Allele Expr RNA Depth DNA VAF Tier Evaluation
22-41920894-41920895-G-C 2 1 ACO2 N1453S 1 KFNPQTDYL ENSMUST00000027032 Not Supported HLA-G*01:09 5 None 3 1262.760 1318.61 0.500 0.6 NA NA NA NA 0.250 Poor Pending
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
{
"tumor_purity": null,
"vaf_clonal": 0.571,
"vaf_subclonal": 0.286,
"binding_threshold": 500,
"aggregate_inclusion_binding_threshold": 5000,
"trna_vaf": 0.25,
"trna_cov": 10,
"allele_expr_threshold": 2.5,
"maximum_transcript_support_level": 1,
"percentile_threshold": null,
"allele_specific_binding_thresholds": false,
"mt_top_score_metric": "Best",
"wt_top_score_metric": "Corresponding",
"binding_cutoffs": {
"HLA-E*01:01": 500,
"HLA-G*01:09": 500
},
"is_allele_specific_binding_cutoff": {
"HLA-E*01:01": false,
"HLA-G*01:09": false
},
"allele_specific_anchors": false,
"anchor_contribution_threshold": 0.8,
"22-41920894-41920895-G-C": {
"good_binders": {
"Transcript Set 1": {
"peptides": {
"LLPLLPLLL": {
"ic50s_MT": [
2551.25,
4778.52001953125
],
"percentiles_MT": [
0.1700439453125,
6.80078125
],
"ic50s_WT": [
3099.81,
4830.5
],
"percentiles_WT": [
0.33,
6.9
],
"hla_types": [
"HLA-E*01:01",
"HLA-G*01:09"
],
"mutation_position": "6-7",
"problematic_positions": "None",
"individual_ic50_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
6891.60986328125,
2551.25
],
"HLA-G*01:09": [
"NA",
4778.52001953125
]
},
"WT": {
"HLA-E*01:01": [
9234.71,
3099.81
],
"HLA-G*01:09": [
"NA",
4830.5
]
}
},
"individual_percentile_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
0.1700439453125,
1.400390625
],
"HLA-G*01:09": [
"NA",
6.80078125
]
},
"WT": {
"HLA-E*01:01": [
0.33,
2.2
],
"HLA-G*01:09": [
"NA",
6.9
]
}
},
"individual_el_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
},
"WT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
}
},
"individual_el_percentile_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
},
"WT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
}
},
"wt_peptide": "LLPLLLLLL"
},
"LLPLLLLLG": {
"ic50s_MT": [
4676.22998046875,
"X"
],
"percentiles_MT": [
5.8984375,
"X"
],
"ic50s_WT": [
38565.12,
"X"
],
"percentiles_WT": [
81.0,
"X"
],
"hla_types": [
"HLA-E*01:01",
"HLA-G*01:09"
],
"mutation_position": "3-4",
"problematic_positions": "None",
"individual_ic50_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
31981.0703125,
4676.22998046875
]
},
"WT": {
"HLA-E*01:01": [
42768.73,
38565.12
]
}
},
"individual_percentile_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
14.0,
5.8984375
]
},
"WT": {
"HLA-E*01:01": [
60.0,
81.0
]
}
},
"individual_el_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": []
},
"WT": {
"HLA-E*01:01": []
}
},
"individual_el_percentile_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": []
},
"WT": {
"HLA-E*01:01": []
}
},
"wt_peptide": "LPLLLLLLG"
}
},
"transcripts": [
"ENST00000233809.4-IGFBP2-L/LLP-20"
],
"transcript_expr": [
"NA"
],
"tsl": [
"Not Supported"
],
"biotype": [
"protein_coding"
],
"transcript_length": [
325
],
"transcript_count": 1,
"peptide_count": 2,
"total_expr": 0
}
},
"sets": [
"Transcript Set 1"
],
"transcript_counts": [
1
],
"peptide_counts": [
2
],
"set_expr": [
0
],
"DNA VAF": 0.891,
"RNA VAF": "NA",
"gene_expr": "NA",
"best_peptide_mt": "LLPLLPLLL",
"best_peptide_wt": "LLPLLLLLL",
"best_hla_allele": "HLA-E*01:01"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3275,4 +3275,4 @@
"best_peptide_wt": "ATLSRTLLA",
"best_hla_allele": "HLA-E*01:01"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>WT.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPETDYLTGTDG
>MT.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPQTDYLTGTDG

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ID E*01:01 G*01:09 Gene AA Change Num Passing Transcripts Best Peptide Best Transcript TSL Allele Pos Prob Pos Num Passing Peptides IC50 MT IC50 WT %ile MT %ile WT RNA Expr RNA VAF Allele Expr RNA Depth DNA VAF Tier Ref Match Evaluation
22-41920894-41920895-G-C 2 1 ACO2 N1453S 1 KFNPQTDYL ENSMUST00000027032 Not Supported HLA-G*01:09 5 None 3 1262.760 1318.61 0.500 0.6 NA NA NA NA 0.250 Poor False Pending

0 comments on commit 2dea4a7

Please sign in to comment.