Skip to content

Commit 174f8f9

Browse files
feat: adding filtering to susie finemapper (#796)
* feat: adding filltering to susie fine_mapper * fix: correct options * fix: fix simulations * fix: correct clumping * fix: fix options * fix: fix for catching None * fix: adding LD and gwas_df as output * chore: changing defaults * chore: v1 * fix: spelling in susie_finemapper.py --------- Co-authored-by: Daniel-Considine <113430683+Daniel-Considine@users.noreply.github.com>
1 parent 9177dd4 commit 174f8f9

File tree

4 files changed

+142
-92
lines changed

4 files changed

+142
-92
lines changed

src/gentropy/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,7 @@ class FinemapperConfig(StepConfig):
417417
study_locus_manifest_path: str = MISSING
418418
study_locus_index: int = MISSING
419419
max_causal_snps: int = MISSING
420-
primary_signal_pval_threshold: float = MISSING
421-
secondary_signal_pval_threshold: float = MISSING
420+
lead_pval_threshold: float = MISSING
422421
purity_mean_r2_threshold: float = MISSING
423422
purity_min_r2_threshold: float = MISSING
424423
cs_lbf_thr: float = MISSING
@@ -430,6 +429,7 @@ class FinemapperConfig(StepConfig):
430429
carma_time_limit: int = MISSING
431430
imputed_r2_threshold: float = MISSING
432431
ld_score_threshold: float = MISSING
432+
ld_min_r2: float = MISSING
433433
_target_: str = "gentropy.susie_finemapper.SusieFineMapperStep"
434434

435435

src/gentropy/finemapping_simulations.py

Lines changed: 36 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -139,40 +139,47 @@ def SimulationLoop(
139139
imputed_r2_threshold=0.9,
140140
ld_score_threshold=5,
141141
sum_pips=0.99,
142-
primary_signal_pval_threshold=1e-2,
143-
secondary_signal_pval_threshold=1e-2,
142+
lead_pval_threshold=1,
144143
purity_mean_r2_threshold=0,
145144
purity_min_r2_threshold=0,
146145
cs_lbf_thr=2,
146+
ld_min_r2=0.9,
147+
locusStart=1,
148+
locusEnd=2,
147149
)
148-
cred_set = CS_sim["study_locus"].df
149-
150-
X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist()
151-
152-
cred_set = cred_set.withColumn("exploded_locus", col("locus.variantId"))
153-
# Create a condition for each element in X
154-
conditions = [array_contains(col("exploded_locus"), x) for x in X]
155-
# Combine the conditions using the | operator
156-
combined_condition = conditions[0]
157-
for condition in conditions[1:]:
158-
combined_condition = combined_condition | condition
159-
# Create a new column that is True if any condition is True and False otherwise
160-
cred_set = cred_set.withColumn("is_in_X", combined_condition)
161-
162-
cred_set = cred_set.withColumn(
163-
"is_in_lead", when(col("variantId").isin(X), 1).otherwise(0)
164-
)
165-
166-
cred_set = cred_set.toPandas()
167-
cred_set = cred_set[column_list]
168150

169-
if counter == 1:
170-
cred_sets = cred_set
171-
else:
172-
# cred_sets = cred_sets.unionByName(cred_set)
173-
cred_sets = pd.concat([cred_sets, cred_set], axis=0)
174-
# cred_sets=cred_sets.merge(cred_set)
175-
counter = counter + 1
151+
if CS_sim is not None:
152+
cs_sl = CS_sim["study_locus"]
153+
cred_set = cs_sl.df
154+
155+
X = ld_index_pd["variantId"][x_cycle["indexes"]].tolist()
156+
157+
cred_set = cred_set.withColumn(
158+
"exploded_locus", col("locus.variantId")
159+
)
160+
# Create a condition for each element in X
161+
conditions = [array_contains(col("exploded_locus"), x) for x in X]
162+
# Combine the conditions using the | operator
163+
combined_condition = conditions[0]
164+
for condition in conditions[1:]:
165+
combined_condition = combined_condition | condition
166+
# Create a new column that is True if any condition is True and False otherwise
167+
cred_set = cred_set.withColumn("is_in_X", combined_condition)
168+
169+
cred_set = cred_set.withColumn(
170+
"is_in_lead", when(col("variantId").isin(X), 1).otherwise(0)
171+
)
172+
173+
cred_set = cred_set.toPandas()
174+
cred_set = cred_set[column_list]
175+
176+
if counter == 1:
177+
cred_sets = cred_set
178+
else:
179+
# cred_sets = cred_sets.unionByName(cred_set)
180+
cred_sets = pd.concat([cred_sets, cred_set], axis=0)
181+
# cred_sets=cred_sets.merge(cred_set)
182+
counter = counter + 1
176183

177184
return cred_sets
178185

0 commit comments

Comments
 (0)