Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: update poetry to version 2 #969

Draft
wants to merge 8 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,7 @@ repos:
- id: pydoclint

- repo: https://github.com/python-poetry/poetry
rev: "1.8.0"
rev: "2.0.0"
hooks:
- id: poetry-check
- id: poetry-lock
args: ["--no-update"]
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ENV POETRY_NO_INTERACTION=1 \
POETRY_CACHE_DIR=/tmp/poetry_cache \
JAVA_HOME=/usr

RUN pip install poetry==1.7.1
RUN pip install poetry>=2.0.0
WORKDIR /app

COPY pyproject.toml poetry.lock ./
Expand Down
244 changes: 241 additions & 3 deletions poetry.lock

Large diffs are not rendered by default.

56 changes: 29 additions & 27 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,43 +1,45 @@
[tool.poetry]
[project]
name = "gentropy"
# !! version is managed by semantic_release
version = "0.0.0"
description = "Open Targets python framework for post-GWAS analysis"
authors = ["Open Targets core team"]
authors = [
{ name = "Open Targets core team", email = "data@opentargets.org" }
]
license = "Apache-2.0"
readme = "README.md"
documentation = "https://opentargets.github.io/gentropy/"
repository = "https://github.com/opentargets/gentropy"
packages = [{ include = "gentropy", from = "src" }]
requires-poetry = ">=2.0"
requires-python = ">=3.10, <3.11"
dependencies = [
"pyspark (==3.3.4)",
"scipy (>=1.11.4, <1.12.0)",
"hydra-core (>=1.3.2, <1.4.0)",
"pyliftover (>=0.4, <0.5)",
"numpy (>=1.26.2, <1.27.0)",
"hail (==0.2.127)",
"wandb (>=0.19.0, <0.20.0)",
"google (>=3.0.0, <3.1.0)",
"omegaconf (>=2.3.0, <2.4.0)",
"typing-extensions (>=4.9.0, <4.13.0)",
"scikit-learn (>=1.6.0, <1.7.0)",
"pandas[gcp,parquet] (>=2.2.2, <2.3.0)",
"skops (>=0.11, <0.12)",
"google-cloud-secret-manager (>=2.20.0, <2.22.0)",
"shap (>=0.46.0, <0.47.0)",
"matplotlib (>=3.7.3, <3.8.0)"
]

[tool.poetry.urls]
[project.urls]
Source = "https://github.com/opentargets/gentropy"
"Bug Tracker" = "http://github.com/opentargets/issues"
"Funding" = "https://www.opentargets.org"
"Documentation" = "https://opentargets.github.io/gentropy/"
Funding = "https://www.opentargets.org"
Documentation = "https://opentargets.github.io/gentropy/"

[tool.poetry.scripts]
[project.scripts]
gentropy = "gentropy.cli:main"

[tool.poetry.dependencies]
python = ">=3.10, <3.11"
pyspark = "3.3.4"
scipy = ">=1.11.4, <1.12.0"
hydra-core = ">=1.3.2, <1.4.0"
pyliftover = ">=0.4, <0.5"
numpy = ">=1.26.2, <1.27.0"
hail = "0.2.127"
wandb = ">=0.19.0, <0.20.0"
google = ">=3.0.0, <3.1.0"
omegaconf = ">=2.3.0, <2.4.0"
typing-extensions = ">=4.9.0, <4.13.0"
scikit-learn = ">=1.6.0, <1.7.0"
pandas = { extras = ["gcp", "parquet"], version = ">=2.2.2, <2.3.0" }
skops = ">=0.11, <0.12"
google-cloud-secret-manager = ">=2.20.0, <2.22.0"
shap = ">=0.46.0, <0.47.0"
matplotlib = ">=3.7.3, <3.8.0"


[tool.poetry.group.docs.dependencies]
mkdocs = ">=1.5.3, <1.6.0"
mkdocstrings-python = ">=1.12.2, <1.13.0"
Expand Down
9 changes: 4 additions & 5 deletions src/gentropy/colocalisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from functools import partial
from typing import Any, Type
from typing import Any

from pyspark.sql.functions import col

Expand Down Expand Up @@ -77,14 +77,14 @@ def __init__(
@classmethod
def _get_colocalisation_class(
cls, method: str
) -> Type[ColocalisationMethodInterface]:
) -> type[ColocalisationMethodInterface]:
"""Get colocalisation class.

Args:
method (str): Colocalisation method.

Returns:
Type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface.
type[ColocalisationMethodInterface]: Class that implements the ColocalisationMethodInterface.

Raises:
ValueError: if method not available.
Expand All @@ -96,5 +96,4 @@ def _get_colocalisation_class(
method = method.lower()
if method not in cls.__coloc_methods__:
raise ValueError(f"Colocalisation method {method} not available.")
coloc_method = cls.__coloc_methods__[method]
return coloc_method
return cls.__coloc_methods__[method]
3 changes: 2 additions & 1 deletion src/gentropy/common/spark_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@

import re
import sys
from collections.abc import Callable, Iterable
from functools import reduce, wraps
from itertools import chain
from typing import TYPE_CHECKING, Any, Callable, Iterable, Optional, TypeVar
from typing import TYPE_CHECKING, Any, Optional, TypeVar

import pyspark.sql.functions as f
import pyspark.sql.types as t
Expand Down
28 changes: 10 additions & 18 deletions src/gentropy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
from dataclasses import dataclass, field
from typing import Any, ClassVar, List, TypedDict
from typing import Any, ClassVar, TypedDict

from hail import __file__ as hail_location
from hydra.core.config_store import ConfigStore
Expand All @@ -17,8 +17,7 @@ class SessionConfig:
write_mode: str = "errorifexists"
spark_uri: str = "local[*]"
hail_home: str = os.path.dirname(hail_location)
extended_spark_conf: dict[str, str] | None = field(
default_factory=dict[str, str])
extended_spark_conf: dict[str, str] | None = field(default_factory=dict[str, str])
output_partitions: int = 200
_target_: str = "gentropy.common.session.Session"

Expand All @@ -28,7 +27,7 @@ class StepConfig:
"""Base step configuration."""

session: SessionConfig
defaults: List[Any] = field(
defaults: list[Any] = field(
default_factory=lambda: [{"session": "base_session"}, "_self_"]
)

Expand All @@ -40,8 +39,7 @@ class ColocalisationConfig(StepConfig):
credible_set_path: str = MISSING
coloc_path: str = MISSING
colocalisation_method: str = MISSING
colocalisation_method_params: dict[str, Any] = field(
default_factory=dict[str, Any])
colocalisation_method_params: dict[str, Any] = field(default_factory=dict[str, Any])
_target_: str = "gentropy.colocalisation.ColocalisationStep"


Expand Down Expand Up @@ -126,8 +124,7 @@ class EqtlCatalogueConfig(StepConfig):
eqtl_catalogue_paths_imported: str = MISSING
eqtl_catalogue_study_index_out: str = MISSING
eqtl_catalogue_credible_sets_out: str = MISSING
mqtl_quantification_methods_blacklist: list[str] = field(
default_factory=lambda: [])
mqtl_quantification_methods_blacklist: list[str] = field(default_factory=lambda: [])
eqtl_lead_pvalue_threshold: float = 1e-3
_target_: str = "gentropy.eqtl_catalogue.EqtlCatalogueStep"

Expand Down Expand Up @@ -681,8 +678,7 @@ class Config:
"""Application configuration."""

# this is unfortunately verbose due to @dataclass limitations
defaults: List[Any] = field(default_factory=lambda: [
"_self_", {"step": MISSING}])
defaults: list[Any] = field(default_factory=lambda: ["_self_", {"step": MISSING}])
step: StepConfig = MISSING
datasets: dict[str, str] = field(default_factory=dict)

Expand Down Expand Up @@ -716,8 +712,7 @@ def register_config() -> None:
name="gwas_catalog_top_hit_ingestion",
node=GWASCatalogTopHitIngestionConfig,
)
cs.store(group="step", name="ld_based_clumping",
node=LDBasedClumpingConfig)
cs.store(group="step", name="ld_based_clumping", node=LDBasedClumpingConfig)
cs.store(group="step", name="ld_index", node=LDIndexConfig)
cs.store(group="step", name="locus_to_gene", node=LocusToGeneConfig)
cs.store(
Expand All @@ -735,8 +730,7 @@ def register_config() -> None:

cs.store(group="step", name="pics", node=PICSConfig)
cs.store(group="step", name="gnomad_variants", node=GnomadVariantConfig)
cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess",
node=UkbPppEurConfig)
cs.store(group="step", name="ukb_ppp_eur_sumstat_preprocess", node=UkbPppEurConfig)
cs.store(group="step", name="variant_index", node=VariantIndexConfig)
cs.store(group="step", name="variant_to_vcf", node=ConvertToVcfStepConfig)
cs.store(
Expand Down Expand Up @@ -769,7 +763,5 @@ def register_config() -> None:
name="locus_to_gene_associations",
node=LocusToGeneAssociationsStepConfig,
)
cs.store(group="step", name="finngen_ukb_meta_ingestion",
node=FinngenUkbMetaConfig)
cs.store(group="step", name="credible_set_qc",
node=CredibleSetQCStepConfig)
cs.store(group="step", name="finngen_ukb_meta_ingestion", node=FinngenUkbMetaConfig)
cs.store(group="step", name="credible_set_qc", node=CredibleSetQCStepConfig)
4 changes: 2 additions & 2 deletions src/gentropy/dataset/l2g_feature_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from functools import reduce
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING

import pyspark.sql.functions as f
from pyspark.sql import Window
Expand Down Expand Up @@ -55,7 +55,7 @@ def __init__(

@classmethod
def from_features_list(
cls: Type[L2GFeatureMatrix],
cls: type[L2GFeatureMatrix],
study_loci_to_annotate: StudyLocus | L2GGoldStandard,
features_list: list[str],
features_input_loader: L2GFeatureInputLoader,
Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/dataset/l2g_gold_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING

import pyspark.sql.functions as f
from pyspark.sql import Window
Expand Down Expand Up @@ -74,7 +74,7 @@ def get_schema(cls: type[L2GGoldStandard]) -> StructType:

@classmethod
def process_gene_interactions(
cls: Type[L2GGoldStandard], interactions: DataFrame
cls: type[L2GGoldStandard], interactions: DataFrame
) -> DataFrame:
"""Extract top scoring gene-gene interaction from the interactions dataset of the Platform.

Expand Down
4 changes: 2 additions & 2 deletions src/gentropy/dataset/l2g_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from dataclasses import dataclass
from typing import TYPE_CHECKING, Type
from typing import TYPE_CHECKING

import pyspark.sql.functions as f
from pyspark.sql import DataFrame
Expand Down Expand Up @@ -40,7 +40,7 @@ def get_schema(cls: type[L2GPrediction]) -> StructType:

@classmethod
def from_credible_set(
cls: Type[L2GPrediction],
cls: type[L2GPrediction],
session: Session,
credible_set: StudyLocus,
feature_matrix: L2GFeatureMatrix,
Expand Down
6 changes: 3 additions & 3 deletions src/gentropy/datasource/gwas_catalog/study_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Tuple
from typing import TYPE_CHECKING

import pyspark.sql.functions as f
from pyspark.sql.window import Window
Expand Down Expand Up @@ -84,7 +84,7 @@ def split(
cls: type[GWASCatalogStudySplitter],
studies: StudyIndexGWASCatalog,
associations: StudyLocusGWASCatalog,
) -> Tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]:
) -> tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]:
"""Splitting multi-trait GWAS Catalog studies.

If assigned disease of the study and the association don't agree, we assume the study needs to be split.
Expand All @@ -95,7 +95,7 @@ def split(
associations (StudyLocusGWASCatalog): GWAS Catalog associations.

Returns:
Tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]: Split studies and associations.
tuple[StudyIndexGWASCatalog, StudyLocusGWASCatalog]: Split studies and associations.
"""
# Composite of studies and associations to resolve scattered information
st_ass = (
Expand Down
6 changes: 2 additions & 4 deletions src/gentropy/datasource/open_targets/l2g_gold_standard.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from __future__ import annotations

from typing import Type

import pyspark.sql.functions as f
from pyspark.sql import DataFrame

Expand All @@ -24,7 +22,7 @@ class OpenTargetsL2GGoldStandard:

@classmethod
def parse_positive_curation(
cls: Type[OpenTargetsL2GGoldStandard], gold_standard_curation: DataFrame
cls: type[OpenTargetsL2GGoldStandard], gold_standard_curation: DataFrame
) -> DataFrame:
"""Parse positive set from gold standard curation.

Expand Down Expand Up @@ -60,7 +58,7 @@ def parse_positive_curation(

@classmethod
def expand_gold_standard_with_negatives(
cls: Type[OpenTargetsL2GGoldStandard],
cls: type[OpenTargetsL2GGoldStandard],
positive_set: DataFrame,
variant_index: VariantIndex,
) -> DataFrame:
Expand Down
3 changes: 2 additions & 1 deletion src/gentropy/method/l2g/feature_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

from __future__ import annotations

from typing import Any, Iterator, Mapping
from typing import Any
from collections.abc import Iterator, Mapping

from gentropy.dataset.l2g_features.colocalisation import (
EQtlColocClppMaximumFeature,
Expand Down
6 changes: 3 additions & 3 deletions src/gentropy/method/l2g/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, Type
from typing import TYPE_CHECKING, Any

import pandas as pd
import skops.io as sio
Expand Down Expand Up @@ -51,7 +51,7 @@ def __post_init__(self: LocusToGeneModel) -> None:
self.model.set_params(**self.hyperparameters_dict)

@classmethod
def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel:
def load_from_disk(cls: type[LocusToGeneModel], path: str) -> LocusToGeneModel:
"""Load a fitted model from disk.

Args:
Expand Down Expand Up @@ -83,7 +83,7 @@ def load_from_disk(cls: Type[LocusToGeneModel], path: str) -> LocusToGeneModel:

@classmethod
def load_from_hub(
cls: Type[LocusToGeneModel],
cls: type[LocusToGeneModel],
model_id: str,
hf_token: str | None = None,
model_name: str = "classifier.skops",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from gentropy.finngen_studies import FinnGenStudiesStep

if TYPE_CHECKING:
from collections.abc import Callable
from pathlib import Path
from typing import Callable

from pyspark.sql import SparkSession

Expand Down
3 changes: 1 addition & 2 deletions tests/gentropy/step/test_colocalisation_step.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Test colocalisation step."""

from pathlib import Path
from typing import Type

import pytest

Expand Down Expand Up @@ -208,7 +207,7 @@ def _setup(self, session: Session, tmp_path: Path) -> None:
],
)
def test_get_colocalisation_class(
self, label: str, expected_method: Type[ColocalisationMethodInterface]
self, label: str, expected_method: type[ColocalisationMethodInterface]
) -> None:
"""Test _get_colocalisation_class method on ColocalisationStep."""
method = ColocalisationStep._get_colocalisation_class(label)
Expand Down
Loading
Loading