Skip to content

Commit 9295d58

Browse files
feat: add effect size direction to coloc output (#854)
* feat(coloc effect sizes): add calculate beta ratio method * feat(coloc effect sizes): update schema and coloc code * fix(coloc effect sizes): tweak beta ratio return * chore: tweak colocalisation schema * feat(coloc_effect_sizes): add simple test * fix(coloc_effect_sizes): change variable to camel case * fix: test broken * feat: remove zero betas too * chore: stylistic changes --------- Co-authored-by: Daniel Suveges <daniel.suveges@protonmail.com>
1 parent c599856 commit 9295d58

File tree

4 files changed

+133
-3
lines changed

4 files changed

+133
-3
lines changed

src/gentropy/assets/schemas/colocalisation.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@
7272
"type": "double",
7373
"nullable": true,
7474
"metadata": {}
75+
},
76+
{
77+
"name": "betaRatioSignAverage",
78+
"type": "double",
79+
"nullable": true,
80+
"metadata": {}
7581
}
7682
]
7783
}

src/gentropy/dataset/study_locus_overlap.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,13 @@
55
from dataclasses import dataclass
66
from typing import TYPE_CHECKING
77

8+
import pyspark.sql.functions as f
9+
810
from gentropy.common.schemas import parse_spark_schema
911
from gentropy.dataset.dataset import Dataset
1012

1113
if TYPE_CHECKING:
14+
from pyspark.sql import DataFrame
1215
from pyspark.sql.types import StructType
1316

1417
from gentropy.dataset.study_locus import StudyLocus
@@ -48,6 +51,37 @@ def from_associations(
4851
"""
4952
return study_locus.find_overlaps()
5053

54+
55+
def calculate_beta_ratio(self: StudyLocusOverlap) -> DataFrame:
56+
"""Calculate the beta ratio for the overlapping signals.
57+
58+
Returns:
59+
DataFrame: A dataframe containing left and right loci IDs, chromosome
60+
and the average sign of the beta ratio
61+
"""
62+
return (
63+
# Unpack statistics column:
64+
self.df.select("*", "statistics.*")
65+
.drop("statistics")
66+
# Drop any rows where the beta is null or zero
67+
.filter(
68+
f.col("left_beta").isNotNull() &
69+
f.col("right_beta").isNotNull() &
70+
(f.col("left_beta") != 0) &
71+
(f.col("right_beta") != 0)
72+
)
73+
# Calculate the beta ratio and get the sign, then calculate the average sign across all variants in the locus
74+
.withColumn(
75+
"betaRatioSign",
76+
f.signum(f.col("left_beta") / f.col("right_beta"))
77+
)
78+
# Aggregate beta signs:
79+
.groupBy("leftStudyLocusId","rightStudyLocusId","chromosome")
80+
.agg(
81+
f.avg("betaRatioSign").alias("betaRatioSignAverage")
82+
)
83+
)
84+
5185
def _convert_to_square_matrix(self: StudyLocusOverlap) -> StudyLocusOverlap:
5286
"""Convert the dataset to a square matrix.
5387

src/gentropy/method/colocalisation.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,11 @@ def colocalise(
179179
f.sum(f.col("clpp")).alias("clpp"),
180180
)
181181
.withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME))
182+
.join(
183+
overlapping_signals.calculate_beta_ratio(),
184+
on=["leftStudyLocusId", "rightStudyLocusId","chromosome"],
185+
how="left"
186+
)
182187
),
183188
_schema=Colocalisation.get_schema(),
184189
)
@@ -379,6 +384,11 @@ def colocalise(
379384
"lH4bf",
380385
)
381386
.withColumn("colocalisationMethod", f.lit(cls.METHOD_NAME))
387+
.join(
388+
overlapping_signals.calculate_beta_ratio(),
389+
on=["leftStudyLocusId", "rightStudyLocusId","chromosome"],
390+
how="left"
391+
)
382392
),
383393
_schema=Colocalisation.get_schema(),
384394
)

tests/gentropy/method/test_colocalisation_method.py

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None:
3838
"rightStudyType": "eqtl",
3939
"chromosome": "1",
4040
"tagVariantId": "snp",
41-
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
41+
"statistics": {
42+
"left_logBF": 10.3,
43+
"right_logBF": 10.5,
44+
"left_beta": 0.1,
45+
"right_beta": 0.2,
46+
},
4247
},
4348
],
4449
# expected coloc
@@ -62,15 +67,25 @@ def test_coloc(mock_study_locus_overlap: StudyLocusOverlap) -> None:
6267
"rightStudyType": "eqtl",
6368
"chromosome": "1",
6469
"tagVariantId": "snp1",
65-
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
70+
"statistics": {
71+
"left_logBF": 10.3,
72+
"right_logBF": 10.5,
73+
"left_beta": 0.1,
74+
"right_beta": 0.2,
75+
},
6676
},
6777
{
6878
"leftStudyLocusId": "1",
6979
"rightStudyLocusId": "2",
7080
"rightStudyType": "eqtl",
7181
"chromosome": "1",
7282
"tagVariantId": "snp2",
73-
"statistics": {"left_logBF": 10.3, "right_logBF": 10.5},
83+
"statistics": {
84+
"left_logBF": 10.3,
85+
"right_logBF": 10.5,
86+
"left_beta": 0.3,
87+
"right_beta": 0.5,
88+
},
7489
},
7590
],
7691
# expected coloc
@@ -134,6 +149,8 @@ def test_coloc_no_logbf(
134149
"statistics": {
135150
"left_logBF": None,
136151
"right_logBF": None,
152+
"left_beta": 0.1,
153+
"right_beta": 0.2,
137154
"left_posteriorProbability": None,
138155
"right_posteriorProbability": None,
139156
}, # irrelevant for COLOC
@@ -152,6 +169,8 @@ def test_coloc_no_logbf(
152169
[
153170
StructField("left_logBF", DoubleType(), True),
154171
StructField("right_logBF", DoubleType(), True),
172+
StructField("left_beta", DoubleType(), False),
173+
StructField("right_beta", DoubleType(), False),
155174
StructField(
156175
"left_posteriorProbability", DoubleType(), True
157176
),
@@ -176,6 +195,67 @@ def test_coloc_no_logbf(
176195
), "COLOC should return a low h4 (traits are associated) when the input data has irrelevant logBF."
177196

178197

198+
def test_coloc_no_betas(spark: SparkSession) -> None:
199+
"""Test COLOC output when the input data has no betas."""
200+
observed_overlap = StudyLocusOverlap(
201+
(
202+
spark.createDataFrame(
203+
[
204+
{
205+
"leftStudyLocusId": "1",
206+
"rightStudyLocusId": "2",
207+
"rightStudyType": "eqtl",
208+
"chromosome": "1",
209+
"tagVariantId": "snp",
210+
"statistics": {
211+
"left_logBF": 10.5,
212+
"right_logBF": 10.3,
213+
"left_beta": None,
214+
"right_beta": None,
215+
"left_posteriorProbability": None,
216+
"right_posteriorProbability": None,
217+
}, # irrelevant for COLOC
218+
}
219+
],
220+
schema=StructType(
221+
[
222+
StructField("leftStudyLocusId", StringType(), False),
223+
StructField("rightStudyLocusId", StringType(), False),
224+
StructField("rightStudyType", StringType(), False),
225+
StructField("chromosome", StringType(), False),
226+
StructField("tagVariantId", StringType(), False),
227+
StructField(
228+
"statistics",
229+
StructType(
230+
[
231+
StructField("left_logBF", DoubleType(), False),
232+
StructField("right_logBF", DoubleType(), False),
233+
StructField("left_beta", DoubleType(), True),
234+
StructField("right_beta", DoubleType(), True),
235+
StructField(
236+
"left_posteriorProbability", DoubleType(), True
237+
),
238+
StructField(
239+
"right_posteriorProbability", DoubleType(), True
240+
),
241+
]
242+
),
243+
),
244+
]
245+
),
246+
)
247+
),
248+
StudyLocusOverlap.get_schema(),
249+
)
250+
observed_coloc_df = Coloc.colocalise(observed_overlap).df
251+
assert (
252+
observed_coloc_df.select("betaRatioSignAverage").collect()[0][
253+
"betaRatioSignAverage"
254+
]
255+
is None
256+
), "No betas results in None type."
257+
258+
179259
def test_ecaviar(mock_study_locus_overlap: StudyLocusOverlap) -> None:
180260
"""Test eCAVIAR."""
181261
assert isinstance(ECaviar.colocalise(mock_study_locus_overlap), Colocalisation)

0 commit comments

Comments
 (0)