Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/hotfix'
Browse files Browse the repository at this point in the history
  • Loading branch information
susannasiebert committed Aug 9, 2023
2 parents 1307ab0 + 12fa00e commit 3317d2c
Show file tree
Hide file tree
Showing 12 changed files with 293 additions and 46 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
# The short X.Y version.
version = '4.0'
# The full version, including alpha/beta/rc tags.
release = '4.0.2'
release = '4.0.3'


# The language for content autogenerated by Sphinx. Refer to documentation
Expand Down
19 changes: 2 additions & 17 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,8 @@ New in Release |release|

This is a bugfix release. It fixes the following problem(s):

- Arriba annotated fusion sequences may contain characters that aren't
supported. This update skips such sequences.
- The ``--aggregate-report-evaluation`` parameter in the standalone ``pvacseq
generate_protein_fasta`` command was previously set up with
nargs in order to allow specifying multiple values. However, this
conflicts with required positional parameters. The parameter definiton was
updated so that multiple values are now specified as a comma-separated list.
- pVACfuse would previously fail in an odd way when none of the fusions in the
input were processable. This update now exits pVACfuse more gracefully in
this case.
- The reference proteome similarity step would previously fail when an epitope's
full peptide sequence wasn't found in the input fasta. It now skips such
epitopes and marks the Reference Match column as ``Not Run``.
- There was a mismatch in how proximal variants were incorporated into the
n-mer fasta files vs the "master" fasta file which had the potential of
epitopes not being present in the "master" fasta file. This update brings
both file creation steps in sync.
- The fixes in issue in the reference proteome similarity step in pVACseq
where running with non-human data would cause an error.

New in Version |version|
------------------------
Expand Down
8 changes: 8 additions & 0 deletions docs/releases/4_0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,11 @@ This is a bugfix release. It fixes the following problem(s):
n-mer fasta files vs the "master" fasta file which had the potential of
epitopes not being present in the "master" fasta file. This update brings
both file creation steps in sync.

New in Version 4.0.3
--------------------

This is a bugfix release. It fixes the following problem(s):

- The fixes in issue in the reference proteome similarity step in pVACseq
where running with non-human data would cause an error.
4 changes: 2 additions & 2 deletions pvactools/lib/calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def _input_tsv_type(self, line):
def _get_full_peptide(self, line, mt_records_dict, wt_records_dict):
for record_id in mt_records_dict.keys():
(rest_record_id, variant_type, aa_change) = record_id.rsplit(".", 2)
transcript_regex = '^.*(ENST[0-9|.]+)$'
transcript_regex = '^.*(ENS[0-9|A-Z|.]+)$'
transcript_p = re.compile(transcript_regex)
m = transcript_p.match(rest_record_id)
if m:
Expand Down Expand Up @@ -522,7 +522,7 @@ def _write_outputs(self, processed_peptides, mt_records_dict, wt_records_dict):
for query_window, hit_reference_matches in groupby(metric_lines,key=lambda x:x['Match Window']):
hit_reference_matches = list(hit_reference_matches)
gene_regex = '^.*gene_symbol:([0-9|A-Z]+).*$'
transcript_regex = '^.*transcript:(ENST[0-9|.]+).*$'
transcript_regex = '^.*transcript:(ENS[0-9|A-Z|.]+).*$'
gene_p = re.compile(gene_regex)
transcript_p = re.compile(transcript_regex)
genes = []
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

setup(
name="pvactools",
version="4.0.2",
version="4.0.3",
packages=[
"pvactools.tools",
"pvactools.tools.pvacbind",
Expand Down
23 changes: 23 additions & 0 deletions tests/test_calculate_reference_proteome_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,29 @@ def test_calculate_self_similarity_with_aggregated_tsv_and_peptide_fasta(self):
))
os.remove(metric_file)

def test_calculate_self_similarity_with_aggregated_tsv_and_peptide_fasta_mouse(self):
input_file = os.path.join(self.test_data_dir, 'Test.all_epitopes.aggregated.mouse.tsv')
input_aggregated_metrics_file = os.path.join(self.test_data_dir, 'Test.all_epitopes.aggregated.mouse.tsv.metrics.json')
tmp_aggregated_metrics_file = tempfile.NamedTemporaryFile()
import shutil
shutil.copy(input_aggregated_metrics_file, tmp_aggregated_metrics_file.name)
input_fasta = os.path.join(self.test_data_dir, 'Test.mouse.fasta')
output_file = tempfile.NamedTemporaryFile(suffix='.tsv')
metric_file = "{}.reference_matches".format(output_file.name)
output_aggregated_metrics_file = output_file.name.replace(".tsv", ".metrics.json")
self.assertFalse(CalculateReferenceProteomeSimilarity(
input_file,
input_fasta,
output_file.name,
peptide_fasta=self.peptide_fasta,
aggregate_metrics_file=tmp_aggregated_metrics_file.name,
).execute())
self.assertTrue(cmp(
output_file.name,
os.path.join(self.test_data_dir, "output.aggregated.peptide_fasta.mouse.tsv"),
))
os.remove(metric_file)

def test_wt_peptide_fully_in_mt_peptide(self):
input_file = os.path.join(self.test_data_dir, 'input_wt_in_mt.tsv')
input_fasta = os.path.join(self.test_data_dir, 'input_wt_in_mt.fasta')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ID E*01:01 G*01:09 Gene AA Change Num Passing Transcripts Best Peptide Best Transcript TSL Allele Pos Prob Pos Num Passing Peptides IC50 MT IC50 WT %ile MT %ile WT RNA Expr RNA VAF Allele Expr RNA Depth DNA VAF Tier Evaluation
22-41920894-41920895-G-C 2 1 ACO2 N1453S 1 KFNPQTDYL ENSMUST00000027032 Not Supported HLA-G*01:09 5 None 3 1262.760 1318.61 0.500 0.6 NA NA NA NA 0.250 Poor Pending
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
{
"tumor_purity": null,
"vaf_clonal": 0.571,
"vaf_subclonal": 0.286,
"binding_threshold": 500,
"aggregate_inclusion_binding_threshold": 5000,
"trna_vaf": 0.25,
"trna_cov": 10,
"allele_expr_threshold": 2.5,
"maximum_transcript_support_level": 1,
"percentile_threshold": null,
"allele_specific_binding_thresholds": false,
"mt_top_score_metric": "Best",
"wt_top_score_metric": "Corresponding",
"binding_cutoffs": {
"HLA-E*01:01": 500,
"HLA-G*01:09": 500
},
"is_allele_specific_binding_cutoff": {
"HLA-E*01:01": false,
"HLA-G*01:09": false
},
"allele_specific_anchors": false,
"anchor_contribution_threshold": 0.8,
"22-41920894-41920895-G-C": {
"good_binders": {
"Transcript Set 1": {
"peptides": {
"LLPLLPLLL": {
"ic50s_MT": [
2551.25,
4778.52001953125
],
"percentiles_MT": [
0.1700439453125,
6.80078125
],
"ic50s_WT": [
3099.81,
4830.5
],
"percentiles_WT": [
0.33,
6.9
],
"hla_types": [
"HLA-E*01:01",
"HLA-G*01:09"
],
"mutation_position": "6-7",
"problematic_positions": "None",
"individual_ic50_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
6891.60986328125,
2551.25
],
"HLA-G*01:09": [
"NA",
4778.52001953125
]
},
"WT": {
"HLA-E*01:01": [
9234.71,
3099.81
],
"HLA-G*01:09": [
"NA",
4830.5
]
}
},
"individual_percentile_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
0.1700439453125,
1.400390625
],
"HLA-G*01:09": [
"NA",
6.80078125
]
},
"WT": {
"HLA-E*01:01": [
0.33,
2.2
],
"HLA-G*01:09": [
"NA",
6.9
]
}
},
"individual_el_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
},
"WT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
}
},
"individual_el_percentile_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
},
"WT": {
"HLA-E*01:01": [],
"HLA-G*01:09": []
}
},
"wt_peptide": "LLPLLLLLL"
},
"LLPLLLLLG": {
"ic50s_MT": [
4676.22998046875,
"X"
],
"percentiles_MT": [
5.8984375,
"X"
],
"ic50s_WT": [
38565.12,
"X"
],
"percentiles_WT": [
81.0,
"X"
],
"hla_types": [
"HLA-E*01:01",
"HLA-G*01:09"
],
"mutation_position": "3-4",
"problematic_positions": "None",
"individual_ic50_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
31981.0703125,
4676.22998046875
]
},
"WT": {
"HLA-E*01:01": [
42768.73,
38565.12
]
}
},
"individual_percentile_calls": {
"algorithms": [
"NetMHC",
"PickPocket"
],
"MT": {
"HLA-E*01:01": [
14.0,
5.8984375
]
},
"WT": {
"HLA-E*01:01": [
60.0,
81.0
]
}
},
"individual_el_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": []
},
"WT": {
"HLA-E*01:01": []
}
},
"individual_el_percentile_calls": {
"algorithms": [],
"MT": {
"HLA-E*01:01": []
},
"WT": {
"HLA-E*01:01": []
}
},
"wt_peptide": "LPLLLLLLG"
}
},
"transcripts": [
"ENST00000233809.4-IGFBP2-L/LLP-20"
],
"transcript_expr": [
"NA"
],
"tsl": [
"Not Supported"
],
"biotype": [
"protein_coding"
],
"transcript_length": [
325
],
"transcript_count": 1,
"peptide_count": 2,
"total_expr": 0
}
},
"sets": [
"Transcript Set 1"
],
"transcript_counts": [
1
],
"peptide_counts": [
2
],
"set_expr": [
0
],
"DNA VAF": 0.891,
"RNA VAF": "NA",
"gene_expr": "NA",
"best_peptide_mt": "LLPLLPLLL",
"best_peptide_wt": "LLPLLLLLL",
"best_hla_allele": "HLA-E*01:01"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3275,4 +3275,4 @@
"best_peptide_wt": "ATLSRTLLA",
"best_hla_allele": "HLA-E*01:01"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
>WT.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPETDYLTGTDG
>MT.Rp1.ENSMUST00000027032.missense.1453N/S
IAGTLKFNPQTDYLTGTDG

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ID E*01:01 G*01:09 Gene AA Change Num Passing Transcripts Best Peptide Best Transcript TSL Allele Pos Prob Pos Num Passing Peptides IC50 MT IC50 WT %ile MT %ile WT RNA Expr RNA VAF Allele Expr RNA Depth DNA VAF Tier Ref Match Evaluation
22-41920894-41920895-G-C 2 1 ACO2 N1453S 1 KFNPQTDYL ENSMUST00000027032 Not Supported HLA-G*01:09 5 None 3 1262.760 1318.61 0.500 0.6 NA NA NA NA 0.250 Poor False Pending

0 comments on commit 3317d2c

Please sign in to comment.