Skip to content

Commit

Permalink
test: fix schema
Browse files Browse the repository at this point in the history
  • Loading branch information
d0choa committed Dec 7, 2023
1 parent 0d1b387 commit de6f083
Showing 1 changed file with 0 additions and 61 deletions.
61 changes: 0 additions & 61 deletions src/otg/dataset/variant_annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,67 +136,6 @@ def get_most_severe_vep_v2g(
_schema=V2G.get_schema(),
)

def get_polyphen_v2g(
self: VariantAnnotation, gene_index: GeneIndex | None = None
) -> V2G:
"""Creates a dataset with variant to gene assignments with a PolyPhen's predicted score on the transcript.
Polyphen informs about the probability that a substitution is damaging.The score can be interpreted as follows:
- 0.0 to 0.15 -- Predicted to be benign.
- 0.15 to 1.0 -- Possibly damaging.
- 0.85 to 1.0 -- Predicted to be damaging.
Args:
gene_index (GeneIndex | None): A gene index to filter by. Defaults to None.
Returns:
V2G: variant to gene assignments with their polyphen scores
"""
return V2G(
_df=(
self.get_transcript_consequence_df(gene_index)
.filter(f.col("transcriptConsequence.polyphenScore").isNotNull())
.select(
"variantId",
"chromosome",
"geneId",
f.col("transcriptConsequence.polyphenScore").alias("score"),
f.lit("vep").alias("datatypeId"),
f.lit("polyphen").alias("datasourceId"),
)
),
_schema=V2G.get_schema(),
)

def get_sift_v2g(self: VariantAnnotation, gene_index: GeneIndex) -> V2G:
"""Creates a dataset with variant to gene assignments with a SIFT's predicted score on the transcript.
SIFT informs about the probability that a substitution is tolerated. The score can be interpreted as follows:
- 0.0 to 0.05 -- Likely to be deleterious.
- 0.05 to 1.0 -- Likely to be tolerated.
Args:
gene_index (GeneIndex): A gene index to filter by.
Returns:
V2G: variant to gene assignments with their SIFT scores
"""
return V2G(
_df=(
self.get_transcript_consequence_df(gene_index)
.filter(f.col("transcriptConsequence.siftScore").isNotNull())
.select(
"variantId",
"chromosome",
"geneId",
f.expr("1 - transcriptConsequence.siftScore").alias("score"),
f.lit("vep").alias("datatypeId"),
f.lit("sift").alias("datasourceId"),
)
),
_schema=V2G.get_schema(),
)

def get_plof_v2g(self: VariantAnnotation, gene_index: GeneIndex) -> V2G:
"""Creates a dataset with variant to gene assignments with a flag indicating if the variant is predicted to be a loss-of-function variant by the LOFTEE algorithm.
Expand Down

0 comments on commit de6f083

Please sign in to comment.