-
Notifications
You must be signed in to change notification settings - Fork 9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix: adding data specific p-value filters #788
Changes from 5 commits
73b97a3
0077370
0f5b669
f629e63
9d66127
88e4dd6
996fe3a
5c57ab4
3b95cfd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from __future__ import annotations | ||
|
||
from gentropy.common.session import Session | ||
from gentropy.config import EqtlCatalogueConfig | ||
from gentropy.datasource.eqtl_catalogue.finemapping import EqtlCatalogueFinemapping | ||
from gentropy.datasource.eqtl_catalogue.study_index import EqtlCatalogueStudyIndex | ||
|
||
|
@@ -58,13 +59,21 @@ def __init__( | |
processed_susie_df = EqtlCatalogueFinemapping.parse_susie_results( | ||
credible_sets_df, lbf_df, studies_metadata | ||
) | ||
credible_sets = EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) | ||
study_index = EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) | ||
|
||
# Load | ||
study_index.df.write.mode(session.write_mode).parquet( | ||
eqtl_catalogue_study_index_out | ||
( | ||
EqtlCatalogueStudyIndex.from_susie_results(processed_susie_df) | ||
# Writing the output: | ||
.df.write.mode(session.write_mode) | ||
.parquet(eqtl_catalogue_study_index_out) | ||
) | ||
credible_sets.df.write.mode(session.write_mode).parquet( | ||
eqtl_catalogue_credible_sets_out | ||
|
||
( | ||
EqtlCatalogueFinemapping.from_susie_results(processed_susie_df) | ||
# Flagging sub-significnat loci: | ||
.validate_lead_pvalue( | ||
pvalue_cutoff=EqtlCatalogueConfig().eqtl_lead_pvalue_threshold | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment: |
||
) | ||
# Writing the output: | ||
.df.write.mode(session.write_mode) | ||
.parquet(eqtl_catalogue_credible_sets_out) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,13 +31,17 @@ def __init__( | |
""" | ||
# Read finemapping outputs from the input paths. | ||
|
||
finngen_finemapping_df = FinnGenFinemapping.from_finngen_susie_finemapping( | ||
spark=session.spark, | ||
finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, | ||
finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, | ||
) | ||
|
||
# Write the output. | ||
finngen_finemapping_df.df.write.mode(session.write_mode).parquet( | ||
finngen_finemapping_out | ||
( | ||
FinnGenFinemapping.from_finngen_susie_finemapping( | ||
spark=session.spark, | ||
finngen_susie_finemapping_snp_files=finngen_susie_finemapping_snp_files, | ||
finngen_susie_finemapping_cs_summary_files=finngen_susie_finemapping_cs_summary_files, | ||
) | ||
# Flagging sub-significnat loci: | ||
.validate_lead_pvalue( | ||
pvalue_cutoff=FinngenFinemappingConfig().finngen_finemapping_lead_pvalue_threshold | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment: |
||
) | ||
# Writing the output: | ||
.df.write.mode(session.write_mode) | ||
.parquet(finngen_finemapping_out) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,6 +3,7 @@ | |
from __future__ import annotations | ||
|
||
from gentropy.common.session import Session | ||
from gentropy.config import WindowBasedClumpingStepConfig | ||
from gentropy.dataset.study_locus import CredibleInterval, StudyLocus | ||
from gentropy.method.pics import PICS | ||
|
||
|
@@ -28,8 +29,14 @@ def __init__( | |
session, study_locus_ld_annotated_in | ||
) | ||
# PICS | ||
picsed_sl = PICS.finemap(study_locus_ld_annotated).filter_credible_set( | ||
credible_interval=CredibleInterval.IS99 | ||
( | ||
PICS.finemap(study_locus_ld_annotated) | ||
.filter_credible_set(credible_interval=CredibleInterval.IS99) | ||
# Flagging sub-significnat loci: | ||
.validate_lead_pvalue( | ||
pvalue_cutoff=WindowBasedClumpingStepConfig().gwas_significance | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment. |
||
) | ||
# Writing the output: | ||
.df.write.mode(session.write_mode) | ||
.parquet(picsed_study_locus_out) | ||
) | ||
# Write | ||
picsed_sl.df.write.mode(session.write_mode).parquet(picsed_study_locus_out) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's one thing I'm not sure about. You have added
finngen_finemapping_lead_pvalue_threshold
to the relevant config, and refer to aspvalue_cutoff=FinngenFinemappingConfig().finngen_finemapping_lead_pvalue_threshold
in the step. However,finngen_finemapping_lead_pvalue_threshold
it not an argument forFinnGenFinemappingIngestionStep
. Is it OK? Would all parameters in the config passed to the step? Would that cause any problem? @project-defiant , what do you think?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, double-checked with @project-defiant and @d0choa and all parameters in the setepConfig classes needs to be parameters in the init function of the step.