Skip to content

Commit

Permalink
Merge branch 'ds_3548_gnomad_variants' of https://github.com/opentarg…
Browse files Browse the repository at this point in the history
…ets/gentropy into ds_3548_gnomad_variants
  • Loading branch information
DSuveges committed Oct 2, 2024
2 parents 0461967 + 4b25702 commit c476d97
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 94 deletions.
4 changes: 2 additions & 2 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,8 +417,7 @@ class FinemapperConfig(StepConfig):
study_locus_manifest_path: str = MISSING
study_locus_index: int = MISSING
max_causal_snps: int = MISSING
primary_signal_pval_threshold: float = MISSING
secondary_signal_pval_threshold: float = MISSING
lead_pval_threshold: float = MISSING
purity_mean_r2_threshold: float = MISSING
purity_min_r2_threshold: float = MISSING
cs_lbf_thr: float = MISSING
Expand All @@ -430,6 +429,7 @@ class FinemapperConfig(StepConfig):
carma_time_limit: int = MISSING
imputed_r2_threshold: float = MISSING
ld_score_threshold: float = MISSING
ld_min_r2: float = MISSING
_target_: str = "gentropy.susie_finemapper.SusieFineMapperStep"


Expand Down
65 changes: 36 additions & 29 deletions src/gentropy/finemapping_simulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,40 +139,47 @@ def SimulationLoop(
imputed_r2_threshold=0.9,
ld_score_threshold=5,
sum_pips=0.99,
primary_signal_pval_threshold=1e-2,
secondary_signal_pval_threshold=1e-2,
lead_pval_threshold=1,
purity_mean_r2_threshold=0,
purity_min_r2_threshold=0,
cs_lbf_thr=2,
ld_min_r2=0.9,
locusStart=1,
locusEnd=2,
)
cred_set = CS_sim["study_locus"].df

X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist()

cred_set = cred_set.withColumn("exploded_locus", col("locus.variantId"))
# Create a condition for each element in X
conditions = [array_contains(col("exploded_locus"), x) for x in X]
# Combine the conditions using the | operator
combined_condition = conditions[0]
for condition in conditions[1:]:
combined_condition = combined_condition | condition
# Create a new column that is True if any condition is True and False otherwise
cred_set = cred_set.withColumn("is_in_X", combined_condition)

cred_set = cred_set.withColumn(
"is_in_lead", when(col("variantId").isin(X), 1).otherwise(0)
)

cred_set = cred_set.toPandas()
cred_set = cred_set[column_list]

if counter == 1:
cred_sets = cred_set
else:
# cred_sets = cred_sets.unionByName(cred_set)
cred_sets = pd.concat([cred_sets, cred_set], axis=0)
# cred_sets=cred_sets.merge(cred_set)
counter = counter + 1
if CS_sim is not None:
cs_sl = CS_sim["study_locus"]
cred_set = cs_sl.df

X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist()

cred_set = cred_set.withColumn(
"exploded_locus", col("locus.variantId")
)
# Create a condition for each element in X
conditions = [array_contains(col("exploded_locus"), x) for x in X]
# Combine the conditions using the | operator
combined_condition = conditions[0]
for condition in conditions[1:]:
combined_condition = combined_condition | condition
# Create a new column that is True if any condition is True and False otherwise
cred_set = cred_set.withColumn("is_in_X", combined_condition)

cred_set = cred_set.withColumn(
"is_in_lead", when(col("variantId").isin(X), 1).otherwise(0)
)

cred_set = cred_set.toPandas()
cred_set = cred_set[column_list]

if counter == 1:
cred_sets = cred_set
else:
# cred_sets = cred_sets.unionByName(cred_set)
cred_sets = pd.concat([cred_sets, cred_set], axis=0)
# cred_sets=cred_sets.merge(cred_set)
counter = counter + 1

return cred_sets

Expand Down
5 changes: 3 additions & 2 deletions src/gentropy/locus_breaker_clumping.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Step to apply linkageg based clumping on study-locus dataset."""
"""Step to apply linkage based clumping on study-locus dataset."""

from __future__ import annotations

Expand Down Expand Up @@ -47,7 +47,8 @@ def __init__(
remove_mhc (bool, optional): If true will use exclude_region() to remove the MHC region.
"""
sum_stats = SummaryStatistics.from_parquet(
session, summary_statistics_input_path, recursiveFileLookup=True
session,
summary_statistics_input_path,
)
lbc = sum_stats.locus_breaker_clumping(
lbc_baseline_pvalue,
Expand Down
Loading

0 comments on commit c476d97

Please sign in to comment.