Skip to content

Commit

Permalink
feat: changing studylocus validation to 95 percent credible sets (#921)
Browse files Browse the repository at this point in the history
* feat: changing studylocus validation to 95 percent credible sets

* fix: updating comment in code to reflect 95% credset

* fix: removing credset number of partitions

* fix: flag name

---------

Co-authored-by: Yakov Tsepilov <yt4@sanger.ac.uk>
  • Loading branch information
Daniel-Considine and addramir authored Nov 21, 2024
1 parent b6303d5 commit 05e47a3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion src/gentropy/dataset/study_locus.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class StudyLocusQualityCheck(Enum):
EXPLAINED_BY_SUSIE = "Study locus in region explained by a SuSiE credible set"
OUT_OF_SAMPLE_LD = "Study locus finemapped without in-sample LD reference"
ABNORMAL_PIPS = (
"Study locus with a sum of PIPs that not in the expected range [0.99,1]"
"Study locus with a sum of PIPs that not in the expected range [0.95,1]"
)
INVALID_CHROMOSOME = "Chromosome not in 1:22, X, Y, XY or MT"
TOP_HIT_AND_SUMMARY_STATS = (
Expand Down
22 changes: 11 additions & 11 deletions src/gentropy/study_locus_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,24 +46,24 @@ def __init__(
.annotate_study_type(study_index) # Add study type to study locus
.qc_redundant_top_hits_from_PICS() # Flagging top hits from studies with PICS summary statistics
.qc_explained_by_SuSiE() # Flagging credible sets in regions explained by SuSiE
# Flagging credible sets with PIP > 1 or PIP < 0.99
# Annotates credible intervals and filter to only keep 95% credible sets
.filter_credible_set(credible_interval=CredibleInterval.IS95)
# Flagging credible sets with PIP > 1 or PIP < 0.95
.qc_abnormal_pips(
sum_pips_lower_threshold=0.99, sum_pips_upper_threshold=1.0001
sum_pips_lower_threshold=0.95, sum_pips_upper_threshold=1.0001
)
# Annotates credible intervals and filter to only keep 99% credible sets
.filter_credible_set(credible_interval=CredibleInterval.IS99)
# Annotate credible set confidence:
.assign_confidence()
).persist() # we will need this for 2 types of outputs

# Valid study locus partitioned to simplify the finding of overlaps
study_locus_with_qc.valid_rows(
invalid_qc_reasons, invalid=True
).df.repartitionByRange("chromosome", "position").sortWithinPartitions(
study_locus_with_qc.valid_rows(invalid_qc_reasons).df.repartitionByRange(
"chromosome", "position"
).write.mode(session.write_mode).parquet(invalid_study_locus_path)

# Infalid study locus
study_locus_with_qc.valid_rows(invalid_qc_reasons).df.write.mode(
).sortWithinPartitions("chromosome", "position").write.mode(
session.write_mode
).parquet(valid_study_locus_path)

# Invalid study locus
study_locus_with_qc.valid_rows(invalid_qc_reasons, invalid=True).df.write.mode(
session.write_mode
).parquet(invalid_study_locus_path)

0 comments on commit 05e47a3

Please sign in to comment.