Skip to content

Commit da9fa7c

Browse files
authored
Merge branch 'main' into do_docs_imgs
2 parents fde9d3a + dc5d2b6 commit da9fa7c

File tree

14 files changed

+64
-66
lines changed

14 files changed

+64
-66
lines changed

config/step/my_v2g.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@ liftover_chain_file_path: ${datasets.chain_37_38}
1010
anderson_path: ${datasets.anderson}
1111
javierre_path: ${datasets.javierre}
1212
jung_path: ${datasets.jung}
13-
thurnman_path: ${datasets.thurnman}
13+
thurman_path: ${datasets.thurman}
1414
v2g_path: ${datasets.v2g}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
::: otg.datasource.intervals.thurman.IntervalsThurman

src/otg/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ class V2GStepConfig:
149149
anderson_path (str): Anderson intervals path.
150150
javierre_path (str): Javierre intervals path.
151151
jung_path (str): Jung intervals path.
152-
thurnman_path (str): Thurnman intervals path.
152+
thurman_path (str): Thurman intervals path.
153153
liftover_max_length_difference (int): Maximum length difference for liftover.
154154
max_distance (int): Maximum distance to consider.
155155
output_path (str): Output V2G path.
@@ -164,7 +164,7 @@ class V2GStepConfig:
164164
anderson_path: str = MISSING
165165
javierre_path: str = MISSING
166166
jung_path: str = MISSING
167-
thurnman_path: str = MISSING
167+
thurman_path: str = MISSING
168168
liftover_max_length_difference: int = 100
169169
max_distance: int = 500_000
170170
v2g_path: str = MISSING

src/otg/dataset/summary_statistics.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def window_based_clumping(
5555
self: SummaryStatistics,
5656
distance: int,
5757
gwas_significance: float = 5e-8,
58-
with_locus: bool = False,
5958
baseline_significance: float = 0.05,
6059
locus_collect_distance: int | None = None,
6160
) -> StudyLocus:
@@ -70,10 +69,8 @@ def window_based_clumping(
7069
Returns:
7170
StudyLocus: Clumped study-locus containing variants based on window.
7271
"""
73-
if locus_collect_distance is None:
74-
locus_collect_distance = distance
75-
# Based on if we want to get the locus different clumping function is called:
76-
if with_locus:
72+
# If locus collect distance is present, collect locus with the provided distance:
73+
if locus_collect_distance:
7774
clumped_df = WindowBasedClumping.clump_with_locus(
7875
self,
7976
window_length=distance,

src/otg/datasource/finngen/study_index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ def from_source(
3737
Args:
3838
finngen_studies (DataFrame): FinnGen raw study table
3939
finngen_release_prefix (str): Release prefix pattern.
40-
finngen_sumstat_url_prefix (str): URL prefix for summary statistics location.
41-
finngen_sumstat_url_suffix (str): URL prefix suffix for summary statistics location.
40+
finngen_summary_stats_url_prefix (str): URL prefix for summary statistics location.
41+
finngen_summary_stats_url_suffix (str): URL prefix suffix for summary statistics location.
4242
4343
Returns:
4444
FinnGenStudyIndex: Parsed and annotated FinnGen study table.

src/otg/datasource/intervals/andersson.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,7 @@ def parse(
4747
"""Parse Andersson et al. 2014 dataset.
4848
4949
Args:
50-
session (Session): session
51-
path (str): Path to dataset
50+
raw_anderson_df (DataFrame): Raw Andersson et al. dataset
5251
gene_index (GeneIndex): Gene index
5352
lift (LiftOverSpark): LiftOverSpark instance
5453

src/otg/datasource/intervals/javierre.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,15 @@ class IntervalsJavierre(Intervals):
1919
"""Interval dataset from Javierre et al. 2016."""
2020

2121
@staticmethod
22-
def read_javierre(spark: SparkSession, path: str):
22+
def read_javierre(spark: SparkSession, path: str) -> DataFrame:
2323
"""Read Javierre dataset.
2424
2525
Args:
2626
spark (SparkSession): Spark session
2727
path (str): Path to dataset
2828
2929
Returns:
30-
DataFrame: DataFrame with raw Javierre data
30+
DataFrame: Raw Javierre dataset
3131
"""
3232
return spark.read.parquet(path)
3333

@@ -41,8 +41,7 @@ def parse(
4141
"""Parse Javierre et al. 2016 dataset.
4242
4343
Args:
44-
session (Session): session
45-
path (str): Path to dataset
44+
javierre_raw (DataFrame): Raw Javierre data
4645
gene_index (GeneIndex): Gene index
4746
lift (LiftOverSpark): LiftOverSpark instance
4847

src/otg/datasource/intervals/jung.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ class IntervalsJung(Intervals):
1919
"""Interval dataset from Jung et al. 2019."""
2020

2121
@staticmethod
22-
def read_jung(spark: SparkSession, path: str):
22+
def read_jung(spark: SparkSession, path: str) -> DataFrame:
2323
"""Read jung dataset.
2424
2525
Args:

src/otg/datasource/intervals/thurnman.py renamed to src/otg/datasource/intervals/thurman.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,19 @@
1515
from otg.dataset.gene_index import GeneIndex
1616

1717

18-
class IntervalsThurnman(Intervals):
18+
class IntervalsThurman(Intervals):
1919
"""Interval dataset from Thurman et al. 2012."""
2020

2121
@staticmethod
22-
def read_thurnman(spark: SparkSession, path: str):
23-
"""Read thurnman dataset.
22+
def read_thurman(spark: SparkSession, path: str) -> DataFrame:
23+
"""Read thurman dataset.
2424
2525
Args:
2626
spark (SparkSession): Spark session
2727
path (str): Path to dataset
2828
2929
Returns:
30-
DataFrame: DataFrame with raw thurnman data
30+
DataFrame: DataFrame with raw thurman data
3131
"""
3232
thurman_schema = t.StructType(
3333
[
@@ -45,28 +45,28 @@ def read_thurnman(spark: SparkSession, path: str):
4545

4646
@classmethod
4747
def parse(
48-
cls: type[IntervalsThurnman],
49-
thurnman_raw: DataFrame,
48+
cls: type[IntervalsThurman],
49+
thurman_raw: DataFrame,
5050
gene_index: GeneIndex,
5151
lift: LiftOverSpark,
5252
) -> Intervals:
5353
"""Parse the Thurman et al. 2012 dataset.
5454
5555
Args:
56-
thurnman_raw (DataFrame): raw Thurman et al. 2019 dataset
56+
thurman_raw (DataFrame): raw Thurman et al. 2019 dataset
5757
gene_index (GeneIndex): gene index
5858
lift (LiftOverSpark): LiftOverSpark instance
5959
6060
Returns:
61-
Intervals: Interval dataset containing Thurnman et al. 2012 data
61+
Intervals: Interval dataset containing Thurman et al. 2012 data
6262
"""
6363
dataset_name = "thurman2012"
6464
experiment_type = "dhscor"
6565
pmid = "22955617"
6666

6767
return cls(
6868
_df=(
69-
thurnman_raw.select(
69+
thurman_raw.select(
7070
f.regexp_replace(f.col("chrom"), "chr", "").alias("chrom"),
7171
"start",
7272
"end",

src/otg/v2g.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from otg.datasource.intervals.andersson import IntervalsAndersson
1515
from otg.datasource.intervals.javierre import IntervalsJavierre
1616
from otg.datasource.intervals.jung import IntervalsJung
17-
from otg.datasource.intervals.thurnman import IntervalsThurnman
17+
from otg.datasource.intervals.thurman import IntervalsThurman
1818

1919

2020
@dataclass
@@ -76,8 +76,8 @@ def run(self: V2GStep) -> None:
7676
gene_index_filtered,
7777
lift,
7878
).v2g(vi),
79-
IntervalsThurnman.parse(
80-
IntervalsThurnman.read_thurnman(self.session, self.thurnman_path),
79+
IntervalsThurman.parse(
80+
IntervalsThurman.read_thurman(self.session, self.thurman_path),
8181
gene_index_filtered,
8282
lift,
8383
).v2g(vi),
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""Test Thurman."""
2+
from __future__ import annotations
3+
4+
import pytest
5+
from pyspark.sql import DataFrame, SparkSession
6+
7+
from otg.common.Liftover import LiftOverSpark
8+
from otg.dataset.gene_index import GeneIndex
9+
from otg.datasource.intervals.thurman import IntervalsThurman
10+
11+
12+
@pytest.fixture(scope="module")
13+
def sample_intervals_thurman(spark: SparkSession) -> DataFrame:
14+
"""Sample Andersson intervals."""
15+
return IntervalsThurman.read_thurman(
16+
spark, "tests/data_samples/thurman_sample.bed8"
17+
)
18+
19+
20+
def test_read_thurman(sample_intervals_thurman: DataFrame) -> None:
21+
"""Test read_jung."""
22+
assert isinstance(sample_intervals_thurman, DataFrame)
23+
24+
25+
def test_thurman_intervals_from_source(
26+
sample_intervals_thurman: DataFrame,
27+
mock_gene_index: GeneIndex,
28+
liftover_chain_37_to_38: LiftOverSpark,
29+
) -> None:
30+
"""Test IntervalsThurman creation with mock data."""
31+
assert isinstance(
32+
IntervalsThurman.parse(
33+
sample_intervals_thurman, mock_gene_index, liftover_chain_37_to_38
34+
),
35+
IntervalsThurman,
36+
)

tests/datasource/intervals/test_thurnman.py

Lines changed: 0 additions & 36 deletions
This file was deleted.

tests/method/test_window_based_clumping.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@ def test_window_based_clump_with_locus__correctness(
4444
sample_summary_satistics: SummaryStatistics,
4545
) -> None:
4646
"""Test window-based clumping."""
47-
clumped = sample_summary_satistics.window_based_clumping(250_000, with_locus=True)
47+
clumped = sample_summary_satistics.window_based_clumping(
48+
distance=250_000, locus_collect_distance=250_000
49+
)
4850

4951
# Asserting the presence of locus key:
5052
assert "locus" in clumped.df.columns

0 commit comments

Comments
 (0)