From f4f955d21c7a26fab3c54577c0cb29ee0308064f Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Fri, 13 Oct 2023 14:51:28 +0100 Subject: [PATCH 1/3] docs: several fixes in docstrings --- src/otg/datasource/finngen/study_index.py | 4 ++-- src/otg/datasource/intervals/andersson.py | 3 +-- src/otg/datasource/intervals/javierre.py | 7 +++---- src/otg/datasource/intervals/jung.py | 2 +- src/otg/datasource/intervals/thurnman.py | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/otg/datasource/finngen/study_index.py b/src/otg/datasource/finngen/study_index.py index 48829a267..16603e2af 100644 --- a/src/otg/datasource/finngen/study_index.py +++ b/src/otg/datasource/finngen/study_index.py @@ -37,8 +37,8 @@ def from_source( Args: finngen_studies (DataFrame): FinnGen raw study table finngen_release_prefix (str): Release prefix pattern. - finngen_sumstat_url_prefix (str): URL prefix for summary statistics location. - finngen_sumstat_url_suffix (str): URL prefix suffix for summary statistics location. + finngen_summary_stats_url_prefix (str): URL prefix for summary statistics location. + finngen_summary_stats_url_suffix (str): URL prefix suffix for summary statistics location. Returns: FinnGenStudyIndex: Parsed and annotated FinnGen study table. diff --git a/src/otg/datasource/intervals/andersson.py b/src/otg/datasource/intervals/andersson.py index 67a3f01d7..88d38cf98 100644 --- a/src/otg/datasource/intervals/andersson.py +++ b/src/otg/datasource/intervals/andersson.py @@ -47,8 +47,7 @@ def parse( """Parse Andersson et al. 2014 dataset. Args: - session (Session): session - path (str): Path to dataset + raw_anderson_df (DataFrame): Raw Andersson et al. dataset gene_index (GeneIndex): Gene index lift (LiftOverSpark): LiftOverSpark instance diff --git a/src/otg/datasource/intervals/javierre.py b/src/otg/datasource/intervals/javierre.py index 7fc7e858e..4c50e5e36 100644 --- a/src/otg/datasource/intervals/javierre.py +++ b/src/otg/datasource/intervals/javierre.py @@ -19,7 +19,7 @@ class IntervalsJavierre(Intervals): """Interval dataset from Javierre et al. 2016.""" @staticmethod - def read_javierre(spark: SparkSession, path: str): + def read_javierre(spark: SparkSession, path: str) -> DataFrame: """Read Javierre dataset. Args: @@ -27,7 +27,7 @@ def read_javierre(spark: SparkSession, path: str): path (str): Path to dataset Returns: - DataFrame: DataFrame with raw Javierre data + DataFrame: Raw Javierre dataset """ return spark.read.parquet(path) @@ -41,8 +41,7 @@ def parse( """Parse Javierre et al. 2016 dataset. Args: - session (Session): session - path (str): Path to dataset + javierre_raw (DataFrame): Raw Javierre data gene_index (GeneIndex): Gene index lift (LiftOverSpark): LiftOverSpark instance diff --git a/src/otg/datasource/intervals/jung.py b/src/otg/datasource/intervals/jung.py index 3ca26f026..9ab5cf99f 100644 --- a/src/otg/datasource/intervals/jung.py +++ b/src/otg/datasource/intervals/jung.py @@ -19,7 +19,7 @@ class IntervalsJung(Intervals): """Interval dataset from Jung et al. 2019.""" @staticmethod - def read_jung(spark: SparkSession, path: str): + def read_jung(spark: SparkSession, path: str) -> DataFrame: """Read jung dataset. Args: diff --git a/src/otg/datasource/intervals/thurnman.py b/src/otg/datasource/intervals/thurnman.py index 18587066d..fefd14a20 100644 --- a/src/otg/datasource/intervals/thurnman.py +++ b/src/otg/datasource/intervals/thurnman.py @@ -19,7 +19,7 @@ class IntervalsThurnman(Intervals): """Interval dataset from Thurman et al. 2012.""" @staticmethod - def read_thurnman(spark: SparkSession, path: str): + def read_thurnman(spark: SparkSession, path: str) -> DataFrame: """Read thurnman dataset. Args: From 172f10acbea809c1eea84bbfdc6e2232354576b0 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Mon, 16 Oct 2023 09:41:53 +0100 Subject: [PATCH 2/3] docs: rename all instances of Thurnman to Thurman --- config/step/my_v2g.yaml | 2 +- .../datasource/intervals/thurman.md | 1 + .../datasource/intervals/thurnman.md | 1 - src/otg/config.py | 4 +-- .../intervals/{thurnman.py => thurman.py} | 18 +++++----- src/otg/v2g.py | 6 ++-- tests/datasource/intervals/test_thurman.py | 36 +++++++++++++++++++ tests/datasource/intervals/test_thurnman.py | 36 ------------------- 8 files changed, 52 insertions(+), 52 deletions(-) create mode 100644 docs/components/datasource/intervals/thurman.md delete mode 100644 docs/components/datasource/intervals/thurnman.md rename src/otg/datasource/intervals/{thurnman.py => thurman.py} (87%) create mode 100644 tests/datasource/intervals/test_thurman.py delete mode 100644 tests/datasource/intervals/test_thurnman.py diff --git a/config/step/my_v2g.yaml b/config/step/my_v2g.yaml index 6e506c5a9..709f41784 100644 --- a/config/step/my_v2g.yaml +++ b/config/step/my_v2g.yaml @@ -10,5 +10,5 @@ liftover_chain_file_path: ${datasets.chain_37_38} anderson_path: ${datasets.anderson} javierre_path: ${datasets.javierre} jung_path: ${datasets.jung} -thurnman_path: ${datasets.thurnman} +thurman_path: ${datasets.thurman} v2g_path: ${datasets.v2g} diff --git a/docs/components/datasource/intervals/thurman.md b/docs/components/datasource/intervals/thurman.md new file mode 100644 index 000000000..98cacabc4 --- /dev/null +++ b/docs/components/datasource/intervals/thurman.md @@ -0,0 +1 @@ +::: otg.datasource.intervals.thurman.IntervalsThurman diff --git a/docs/components/datasource/intervals/thurnman.md b/docs/components/datasource/intervals/thurnman.md deleted file mode 100644 index bc8cab313..000000000 --- a/docs/components/datasource/intervals/thurnman.md +++ /dev/null @@ -1 +0,0 @@ -::: otg.datasource.intervals.thurnman.IntervalsThurnman diff --git a/src/otg/config.py b/src/otg/config.py index 163a6bd34..c87505f8f 100644 --- a/src/otg/config.py +++ b/src/otg/config.py @@ -149,7 +149,7 @@ class V2GStepConfig: anderson_path (str): Anderson intervals path. javierre_path (str): Javierre intervals path. jung_path (str): Jung intervals path. - thurnman_path (str): Thurnman intervals path. + thurman_path (str): Thurman intervals path. liftover_max_length_difference (int): Maximum length difference for liftover. max_distance (int): Maximum distance to consider. output_path (str): Output V2G path. @@ -164,7 +164,7 @@ class V2GStepConfig: anderson_path: str = MISSING javierre_path: str = MISSING jung_path: str = MISSING - thurnman_path: str = MISSING + thurman_path: str = MISSING liftover_max_length_difference: int = 100 max_distance: int = 500_000 v2g_path: str = MISSING diff --git a/src/otg/datasource/intervals/thurnman.py b/src/otg/datasource/intervals/thurman.py similarity index 87% rename from src/otg/datasource/intervals/thurnman.py rename to src/otg/datasource/intervals/thurman.py index fefd14a20..3cac112d3 100644 --- a/src/otg/datasource/intervals/thurnman.py +++ b/src/otg/datasource/intervals/thurman.py @@ -15,19 +15,19 @@ from otg.dataset.gene_index import GeneIndex -class IntervalsThurnman(Intervals): +class IntervalsThurman(Intervals): """Interval dataset from Thurman et al. 2012.""" @staticmethod - def read_thurnman(spark: SparkSession, path: str) -> DataFrame: - """Read thurnman dataset. + def read_thurman(spark: SparkSession, path: str) -> DataFrame: + """Read thurman dataset. Args: spark (SparkSession): Spark session path (str): Path to dataset Returns: - DataFrame: DataFrame with raw thurnman data + DataFrame: DataFrame with raw thurman data """ thurman_schema = t.StructType( [ @@ -45,20 +45,20 @@ def read_thurnman(spark: SparkSession, path: str) -> DataFrame: @classmethod def parse( - cls: type[IntervalsThurnman], - thurnman_raw: DataFrame, + cls: type[IntervalsThurman], + thurman_raw: DataFrame, gene_index: GeneIndex, lift: LiftOverSpark, ) -> Intervals: """Parse the Thurman et al. 2012 dataset. Args: - thurnman_raw (DataFrame): raw Thurman et al. 2019 dataset + thurman_raw (DataFrame): raw Thurman et al. 2019 dataset gene_index (GeneIndex): gene index lift (LiftOverSpark): LiftOverSpark instance Returns: - Intervals: Interval dataset containing Thurnman et al. 2012 data + Intervals: Interval dataset containing Thurman et al. 2012 data """ dataset_name = "thurman2012" experiment_type = "dhscor" @@ -66,7 +66,7 @@ def parse( return cls( _df=( - thurnman_raw.select( + thurman_raw.select( f.regexp_replace(f.col("chrom"), "chr", "").alias("chrom"), "start", "end", diff --git a/src/otg/v2g.py b/src/otg/v2g.py index ce0c1541b..d62a0539d 100644 --- a/src/otg/v2g.py +++ b/src/otg/v2g.py @@ -14,7 +14,7 @@ from otg.datasource.intervals.andersson import IntervalsAndersson from otg.datasource.intervals.javierre import IntervalsJavierre from otg.datasource.intervals.jung import IntervalsJung -from otg.datasource.intervals.thurnman import IntervalsThurnman +from otg.datasource.intervals.thurman import IntervalsThurman @dataclass @@ -76,8 +76,8 @@ def run(self: V2GStep) -> None: gene_index_filtered, lift, ).v2g(vi), - IntervalsThurnman.parse( - IntervalsThurnman.read_thurnman(self.session, self.thurnman_path), + IntervalsThurman.parse( + IntervalsThurman.read_thurman(self.session, self.thurman_path), gene_index_filtered, lift, ).v2g(vi), diff --git a/tests/datasource/intervals/test_thurman.py b/tests/datasource/intervals/test_thurman.py new file mode 100644 index 000000000..3ef287a5c --- /dev/null +++ b/tests/datasource/intervals/test_thurman.py @@ -0,0 +1,36 @@ +"""Test Thurman.""" +from __future__ import annotations + +import pytest +from pyspark.sql import DataFrame, SparkSession + +from otg.common.Liftover import LiftOverSpark +from otg.dataset.gene_index import GeneIndex +from otg.datasource.intervals.thurman import IntervalsThurman + + +@pytest.fixture(scope="module") +def sample_intervals_thurman(spark: SparkSession) -> DataFrame: + """Sample Andersson intervals.""" + return IntervalsThurman.read_thurman( + spark, "tests/data_samples/thurman_sample.bed8" + ) + + +def test_read_thurman(sample_intervals_thurman: DataFrame) -> None: + """Test read_jung.""" + assert isinstance(sample_intervals_thurman, DataFrame) + + +def test_thurman_intervals_from_source( + sample_intervals_thurman: DataFrame, + mock_gene_index: GeneIndex, + liftover_chain_37_to_38: LiftOverSpark, +) -> None: + """Test IntervalsThurman creation with mock data.""" + assert isinstance( + IntervalsThurman.parse( + sample_intervals_thurman, mock_gene_index, liftover_chain_37_to_38 + ), + IntervalsThurman, + ) diff --git a/tests/datasource/intervals/test_thurnman.py b/tests/datasource/intervals/test_thurnman.py deleted file mode 100644 index 7b4f03995..000000000 --- a/tests/datasource/intervals/test_thurnman.py +++ /dev/null @@ -1,36 +0,0 @@ -"""Test Thurman.""" -from __future__ import annotations - -import pytest -from pyspark.sql import DataFrame, SparkSession - -from otg.common.Liftover import LiftOverSpark -from otg.dataset.gene_index import GeneIndex -from otg.datasource.intervals.thurnman import IntervalsThurnman - - -@pytest.fixture(scope="module") -def sample_intervals_thurnman(spark: SparkSession) -> DataFrame: - """Sample Andersson intervals.""" - return IntervalsThurnman.read_thurnman( - spark, "tests/data_samples/thurnman_sample.bed8" - ) - - -def test_read_thurnman(sample_intervals_thurnman: DataFrame) -> None: - """Test read_jung.""" - assert isinstance(sample_intervals_thurnman, DataFrame) - - -def test_thurnman_intervals_from_source( - sample_intervals_thurnman: DataFrame, - mock_gene_index: GeneIndex, - liftover_chain_37_to_38: LiftOverSpark, -) -> None: - """Test IntervalsThurnman creation with mock data.""" - assert isinstance( - IntervalsThurnman.parse( - sample_intervals_thurnman, mock_gene_index, liftover_chain_37_to_38 - ), - IntervalsThurnman, - ) From 84e8de5565fb3a24556511f8d68ab0079a87b764 Mon Sep 17 00:00:00 2001 From: David Ochoa Date: Mon, 16 Oct 2023 09:47:54 +0100 Subject: [PATCH 3/3] fix: incorrect filename --- tests/data_samples/{thurnman_sample.bed8 => thurman_sample.bed8} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/data_samples/{thurnman_sample.bed8 => thurman_sample.bed8} (100%) diff --git a/tests/data_samples/thurnman_sample.bed8 b/tests/data_samples/thurman_sample.bed8 similarity index 100% rename from tests/data_samples/thurnman_sample.bed8 rename to tests/data_samples/thurman_sample.bed8