Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: 99% credible set validation during study_locus_validation #765

Merged
merged 9 commits into from
Sep 24, 2024
6 changes: 2 additions & 4 deletions src/gentropy/colocalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pyspark.sql.functions import col

from gentropy.common.session import Session
from gentropy.dataset.study_locus import CredibleInterval, StudyLocus
from gentropy.dataset.study_locus import StudyLocus
from gentropy.method.colocalisation import Coloc


Expand Down Expand Up @@ -46,9 +46,7 @@ def __init__(
)

# Transform
overlaps = credible_set.filter_credible_set(
CredibleInterval.IS95
).find_overlaps()
overlaps = credible_set.find_overlaps()
colocalisation_results = colocalisation_class.colocalise(overlaps) # type: ignore

# Load
Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def filter_credible_set(
self: StudyLocus,
credible_interval: CredibleInterval,
) -> StudyLocus:
"""Filter study-locus tag variants based on given credible interval.
"""Annotate and filter study-locus tag variants based on given credible interval.

Args:
credible_interval (CredibleInterval): Credible interval to filter for.
Expand All @@ -562,7 +562,7 @@ def filter_credible_set(
StudyLocus: Filtered study-locus dataset.
"""
return StudyLocus(
_df=self._df.withColumn(
_df=self.annotate_credible_sets().df.withColumn(
"locus",
f.filter(
f.col("locus"),
Expand Down
6 changes: 2 additions & 4 deletions src/gentropy/pics.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@ def __init__(
session, study_locus_ld_annotated_in
)
# PICS
picsed_sl = (
PICS.finemap(study_locus_ld_annotated)
.annotate_credible_sets()
.filter_credible_set(credible_interval=CredibleInterval.IS99)
picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set(
credible_interval=CredibleInterval.IS99
)
# Write
picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out)
4 changes: 3 additions & 1 deletion src/gentropy/study_locus_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from gentropy.common.session import Session
from gentropy.dataset.study_index import StudyIndex
from gentropy.dataset.study_locus import StudyLocus
from gentropy.dataset.study_locus import CredibleInterval, StudyLocus


class StudyLocusValidationStep:
Expand Down Expand Up @@ -49,6 +49,8 @@ def __init__(
.annotate_study_type(study_index) # Add study type to study locus
.qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics
.validate_unique_study_locus_id() # Flagging duplicated study locus ids
# Annotates credible intervals and filter to only keep 99% credible sets
.filter_credible_set(credible_interval=CredibleInterval.IS99)
).persist() # we will need this for 2 types of outputs

study_locus_with_qc.valid_rows(
Expand Down
Loading